From 4bb08e4bbf5a849a02e5af550f3e071c7c7ebb79 Mon Sep 17 00:00:00 2001 From: Pablo Brubeck Date: Wed, 14 Feb 2024 16:22:31 +0000 Subject: [PATCH] review comments --- firedrake/preconditioners/fdm.py | 764 ++++++++++++++++--------------- firedrake/preconditioners/pmg.py | 18 + 2 files changed, 423 insertions(+), 359 deletions(-) diff --git a/firedrake/preconditioners/fdm.py b/firedrake/preconditioners/fdm.py index f14928e2a1..b27542a446 100644 --- a/firedrake/preconditioners/fdm.py +++ b/firedrake/preconditioners/fdm.py @@ -1,3 +1,4 @@ +from textwrap import dedent from functools import partial from itertools import chain, product from firedrake.petsc import PETSc @@ -369,6 +370,33 @@ def destroy(self, pc): self.pc.destroy() def condense(self, A, J, bcs, fcp, pc_type="icc"): + """ + Construct block matrices used for matrix-free static condensation. + The inversion of the interior-interior block is replaced with a local + KSP object that is reused on each cell within an MPI rank. + + Parameters + ---------- + A : + The :class:`PETSc.Mat` to statically condense. + J : + The bilinear :class:`ufl.Form` to statically condense. + bcs : + An iteratable of boundary conditions. + fcp : + A dict with the form compiler parameters. + pc_type : + The preconditioner type for the interior solver. + + Returns + ------- + Smat : + A :class:`PETSc.Mat` with the original blocks of A, except that + the matrix-free Schur complement replaces the interface-interface block. + Pmat : + A `dict` mapping pairs of function spaces to the preconditioner blocks + [[inv(A00), A01], [A10, inv(S)]]. + """ Smats = {} V = J.arguments()[0].function_space() V0 = next((Vi for Vi in V if is_restricted(Vi.finat_element)[0]), None) @@ -642,14 +670,14 @@ def _element_mass_matrix(self): @PETSc.Log.EventDecorator("FDMSetValues") def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): """ - Assemble the stiffness matrix in the FDM basis using sparse reference + Assemble the auxiliary operator in the FDM basis using sparse reference tensors and diagonal mass matrices. :arg A: the :class:`PETSc.Mat` to assemble :arg Vrow: the :class:`.FunctionSpace` test space :arg Vcol: the :class:`.FunctionSpace` trial space :arg addv: a `PETSc.Mat.InsertMode` - :arg mat_type: a `PETSc.Mat.Type` + :arg mat_type: the `PETSc.Mat.Type` of the auxiliary operator """ key = (Vrow.ufl_element(), Vcol.ufl_element()) on_diag = Vrow == Vcol @@ -683,6 +711,7 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): *indices_acc) self.assemblers.setdefault(key, assembler) if A.getType() == "preallocator": + # Determine the global sparsity pattern by inserting a constant sparse element matrix args = assembler.arguments[:2] kernel = ElementKernel(PETSc.Mat(), name="preallocate").kernel(mat_type=mat_type, on_diag=on_diag) assembler = op2.ParLoop(kernel, Vrow.mesh().cell_set, @@ -693,12 +722,14 @@ def set_values(self, A, Vrow, Vcol, addv, mat_type="aij"): class ElementKernel(object): - code = """ -PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { - PetscFunctionBeginUser; - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" + """Base class for sparse element kernel builders. + By default, it inserts the same matrix on each cell.""" + code = dedent(""" + PetscErrorCode %(name)s(const Mat A, const Mat B, %(indices)s) { + PetscFunctionBeginUser; + PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscFunctionReturn(PETSC_SUCCESS); + }""") def __init__(self, A, name=None): self.result = A @@ -715,49 +746,49 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): indices = ("rindices",) if on_diag else ("rindices", "cindices") code = "" if "MatSetValuesArray" in self.code: - code = """ -static inline PetscErrorCode MatSetValuesArray(Mat A, const PetscScalar *restrict values) { - PetscBool done; - PetscInt m; - const PetscInt *ai; - PetscScalar *vals; - PetscFunctionBeginUser; - PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscCall(MatSeqAIJGetArrayWrite(A, &vals)); - PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals))); - PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals)); - PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" + code = dedent(""" + static inline PetscErrorCode MatSetValuesArray(Mat A, const PetscScalar *restrict values) { + PetscBool done; + PetscInt m; + const PetscInt *ai; + PetscScalar *vals; + PetscFunctionBeginUser; + PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + PetscCall(MatSeqAIJGetArrayWrite(A, &vals)); + PetscCall(PetscMemcpy(vals, values, ai[m] * sizeof(*vals))); + PetscCall(MatSeqAIJRestoreArrayWrite(A, &vals)); + PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + PetscFunctionReturn(PETSC_SUCCESS); + }""") if mat_type != "matfree": select_cols = """ for (PetscInt j = ai[i]; j < ai[i + 1]; j++) indices[j] -= (indices[j] < rindices[i]) * (indices[j] + 1);""" - code += """ -static inline PetscErrorCode MatSetValuesSparse(const Mat A, const Mat B, - const PetscInt *restrict rindices, - const PetscInt *restrict cindices, - InsertMode addv) { - PetscBool done; - PetscInt m, ncols, istart, *indices; - const PetscInt *ai, *aj; - const PetscScalar *vals; - PetscFunctionBeginUser; - PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); - PetscCall(PetscMalloc1(ai[m], &indices)); - for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]]; - PetscCall(MatSeqAIJGetArrayRead(B, &vals)); - for (PetscInt i = 0; i < m; i++) { - istart = ai[i]; - ncols = ai[i + 1] - istart; - %(select_cols)s - PetscCall(MatSetValues(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); - } - PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); - PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); - PetscCall(PetscFree(indices)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""} + code += dedent(""" + static inline PetscErrorCode MatSetValuesSparse(const Mat A, const Mat B, + const PetscInt *restrict rindices, + const PetscInt *restrict cindices, + InsertMode addv) { + PetscBool done; + PetscInt m, ncols, istart, *indices; + const PetscInt *ai, *aj; + const PetscScalar *vals; + PetscFunctionBeginUser; + PetscCall(MatGetRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); + PetscCall(PetscMalloc1(ai[m], &indices)); + for (PetscInt j = 0; j < ai[m]; j++) indices[j] = cindices[aj[j]]; + PetscCall(MatSeqAIJGetArrayRead(B, &vals)); + for (PetscInt i = 0; i < m; i++) { + istart = ai[i]; + ncols = ai[i + 1] - istart; + %(select_cols)s + PetscCall(MatSetValues(A, 1, &rindices[i], ncols, &indices[istart], &vals[istart], addv)); + } + PetscCall(MatSeqAIJRestoreArrayRead(B, &vals)); + PetscCall(MatRestoreRowIJ(B, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, &aj, &done)); + PetscCall(PetscFree(indices)); + PetscFunctionReturn(PETSC_SUCCESS); + }""" % {"select_cols": select_cols if mat_type.endswith("sbaij") else ""}) code += self.code % dict(self.rules, name=self.name, indices=", ".join("const PetscInt *restrict %s" % s for s in indices), rows=indices[0], cols=indices[-1], addv=addv) @@ -765,18 +796,20 @@ def kernel(self, mat_type="aij", on_diag=False, addv=None): class TripleProductKernel(ElementKernel): - code = """ -PetscErrorCode %(name)s(const Mat A, const Mat B, - const PetscScalar *restrict coefficients, - %(indices)s) { - Mat C; - PetscFunctionBeginUser; - PetscCall(MatProductGetMats(B, NULL, &C, NULL)); - PetscCall(MatSetValuesArray(C, coefficients)); - PetscCall(MatProductNumeric(B)); - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" + """Kernel builder that inserts a triple product of the form L * C * R, + where L, C, R are sparse matrices and the entries of C are updated on each cell.""" + code = dedent(""" + PetscErrorCode %(name)s(const Mat A, const Mat B, + const PetscScalar *restrict coefficients, + %(indices)s) { + Mat C; + PetscFunctionBeginUser; + PetscCall(MatProductGetMats(B, NULL, &C, NULL)); + PetscCall(MatSetValuesArray(C, coefficients)); + PetscCall(MatProductNumeric(B)); + PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscFunctionReturn(PETSC_SUCCESS); + }""") def __init__(self, A, B, C, name=None): self.product = partial(A.matMatMult, B, C) @@ -784,20 +817,21 @@ def __init__(self, A, B, C, name=None): class SchurComplementKernel(ElementKernel): + """Base class for Schur complement kernel builders.""" condense_code = "" - code = """ -#include -PetscErrorCode %(name)s(const Mat A, const Mat B, - const Mat A11, const Mat A10, const Mat A01, const Mat A00, - const PetscScalar *restrict coefficients, %(indices)s) { - Mat C; - PetscFunctionBeginUser; - PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); - PetscCall(MatSetValuesArray(C, coefficients)); - %(condense)s - PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" + code = dedent(""" + #include + PetscErrorCode %(name)s(const Mat A, const Mat B, + const Mat A11, const Mat A10, const Mat A01, const Mat A00, + const PetscScalar *restrict coefficients, %(indices)s) { + Mat C; + PetscFunctionBeginUser; + PetscCall(MatProductGetMats(A11, NULL, &C, NULL)); + PetscCall(MatSetValuesArray(C, coefficients)); + %(condense)s + PetscCall(MatSetValuesSparse(A, B, %(rows)s, %(cols)s, %(addv)d)); + PetscFunctionReturn(PETSC_SUCCESS); + }""") def __init__(self, *kernels, name=None): self.children = kernels @@ -824,10 +858,11 @@ def condense(self, result=None): class SchurComplementPattern(SchurComplementKernel): - condense_code = """ - PetscCall(MatProductNumeric(A11)); - PetscCall(MatAYPX(B, 0.0, A11, SUBSET_NONZERO_PATTERN)); - """ + """Kernel builder to pad with zeros the Schur complement sparsity pattern.""" + condense_code = dedent(""" + PetscCall(MatProductNumeric(A11)); + PetscCall(MatAYPX(B, 0.0, A11, SUBSET_NONZERO_PATTERN)); + """) def condense(self, result=None): """Pad with zeros the statically condensed pattern""" @@ -840,27 +875,28 @@ def condense(self, result=None): class SchurComplementDiagonal(SchurComplementKernel): - condense_code = """ - Vec vec; - PetscInt n; - PetscScalar *vals; - PetscCall(MatProductNumeric(A11)); - PetscCall(MatProductNumeric(A10)); - PetscCall(MatProductNumeric(A01)); - PetscCall(MatProductNumeric(A00)); - - PetscCall(MatGetSize(A00, &n, NULL)); - PetscCall(MatSeqAIJGetArray(A00, &vals)); - PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n, vals, &vec)); - PetscCall(VecReciprocal(vec)); - PetscCall(VecScale(vec, -1.0)); - PetscCall(MatDiagonalScale(A01, vec, NULL)); - PetscCall(VecDestroy(&vec)); - PetscCall(MatSeqAIJRestoreArray(A00, &vals)); - - PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); - """ + """Schur complement kernel builder that assumes a diagonal interior block.""" + condense_code = dedent(""" + Vec vec; + PetscInt n; + PetscScalar *vals; + PetscCall(MatProductNumeric(A11)); + PetscCall(MatProductNumeric(A10)); + PetscCall(MatProductNumeric(A01)); + PetscCall(MatProductNumeric(A00)); + + PetscCall(MatGetSize(A00, &n, NULL)); + PetscCall(MatSeqAIJGetArray(A00, &vals)); + PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n, vals, &vec)); + PetscCall(VecReciprocal(vec)); + PetscCall(VecScale(vec, -1.0)); + PetscCall(MatDiagonalScale(A01, vec, NULL)); + PetscCall(VecDestroy(&vec)); + PetscCall(MatSeqAIJRestoreArray(A00, &vals)); + + PetscCall(MatProductNumeric(B)); + PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); + """) def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None @@ -875,47 +911,49 @@ def condense(self, result=None): class SchurComplementBlockCholesky(SchurComplementKernel): - condense_code = """ - PetscBLASInt bn, lierr; - PetscBool done; - PetscInt m, bsize, irow; - const PetscInt *ai; - PetscScalar *vals, *U; - Mat X; - PetscFunctionBeginUser; - PetscCall(MatProductNumeric(A11)); - PetscCall(MatProductNumeric(A01)); - PetscCall(MatProductNumeric(A00)); - PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscCall(MatSeqAIJGetArray(A00, &vals)); - irow = 0; - while (irow < m && ai[irow + 1] - ai[irow] == 1) { - vals[irow] = PetscSqrtReal(1.0 / vals[irow]); - irow++; - } - U = &vals[irow]; - while (irow < m) { - bsize = ai[irow + 1] - ai[irow]; - PetscCall(PetscBLASIntCast(bsize, &bn)); - PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &bn, U, &bn, &lierr)); - PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); - for (PetscInt j = 0; j < bsize - 1; j++) - for (PetscInt i = j + 1; i < bsize; i++) - U[i + bsize * j] = 0.0; - U += bsize * bsize; - irow += bsize; - } - PetscCall(MatSeqAIJRestoreArray(A00, &vals)); - PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscCall(MatProductGetMats(B, &X, NULL, NULL)); - PetscCall(MatProductNumeric(X)); - PetscCall(MatProductNumeric(B)); - PetscCall(MatAYPX(B, -1.0, A11, SUBSET_NONZERO_PATTERN)); - """ + """Schur complement kernel builder that assumes a block-diagonal interior block, + and uses its Cholesky factorization to compute S = A11 - (L^-1 A01)^T (L^-1 A01).""" + condense_code = dedent(""" + PetscBLASInt bn, lierr; + PetscBool done; + PetscInt m, bsize, irow; + const PetscInt *ai; + PetscScalar *vals, *U; + Mat X; + PetscFunctionBeginUser; + PetscCall(MatProductNumeric(A11)); + PetscCall(MatProductNumeric(A01)); + PetscCall(MatProductNumeric(A00)); + PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + PetscCall(MatSeqAIJGetArray(A00, &vals)); + irow = 0; + while (irow < m && ai[irow + 1] - ai[irow] == 1) { + vals[irow] = PetscSqrtReal(1.0 / vals[irow]); + irow++; + } + U = &vals[irow]; + while (irow < m) { + bsize = ai[irow + 1] - ai[irow]; + PetscCall(PetscBLASIntCast(bsize, &bn)); + PetscCallBLAS("LAPACKpotrf", LAPACKpotrf_("U", &bn, U, &bn, &lierr)); + PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); + for (PetscInt j = 0; j < bsize - 1; j++) + for (PetscInt i = j + 1; i < bsize; i++) + U[i + bsize * j] = 0.0; + U += bsize * bsize; + irow += bsize; + } + PetscCall(MatSeqAIJRestoreArray(A00, &vals)); + PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + PetscCall(MatProductGetMats(B, &X, NULL, NULL)); + PetscCall(MatProductNumeric(X)); + PetscCall(MatProductNumeric(B)); + PetscCall(MatAYPX(B, -1.0, A11, SUBSET_NONZERO_PATTERN)); + """) def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None - # asssume that K10 = K01^T + # asssume that A10 = A01^T A11, _, A01, A00 = self.submats indptr, indices, R = A00.getValuesCSR() @@ -940,76 +978,78 @@ def condense(self, result=None): class SchurComplementBlockLU(SchurComplementKernel): - condense_code = """ - PetscBLASInt bn, lierr, lwork; - PetscBool done; - PetscInt m, bsize, irow, icol, nnz, iswap, *ipiv, *perm; - const PetscInt *ai; - PetscScalar *vals, *work, *L, *U; - Mat X; - PetscFunctionBeginUser; - PetscCall(MatProductNumeric(A11)); - PetscCall(MatProductNumeric(A10)); - PetscCall(MatProductNumeric(A01)); - PetscCall(MatProductNumeric(A00)); - PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - PetscCall(MatSeqAIJGetArray(A00, &vals)); - - // A00 = (U^T) * (L^T) - nnz = ai[m]; - bsize = ai[m] - ai[m - 1]; - PetscCall(PetscMalloc2(bsize, &ipiv, bsize, &perm)); - PetscCall(PetscCalloc1(nnz, &work)); - irow = 0; - while (irow < m && ai[irow + 1] - ai[irow] == 1) { - work[irow] = 1.0; - vals[irow] = 1.0 / vals[irow]; - irow++; - } - L = &work[irow]; - U = &vals[irow]; - while (irow < m) { - bsize = ai[irow + 1] - ai[irow]; - PetscCall(PetscBLASIntCast(bsize, &bn)); - PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, U, &bn, ipiv, &lierr)); - PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); - PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("L", "U", &bn, U, &bn, &lierr)); - for (PetscInt j = 0; j < bsize; j++) perm[j] = j; - for (PetscInt j = 0; j < bsize; j++) { - icol = ipiv[j] - 1; - iswap = perm[icol]; - perm[icol] = perm[j]; - perm[j] = iswap; + """Schur complement kernel builder that assumes a block-diagonal interior block, + and uses its LU factorization to compute S = A11 - (A10 U^-1) (L^-1 A01).""" + condense_code = dedent(""" + PetscBLASInt bn, lierr, lwork; + PetscBool done; + PetscInt m, bsize, irow, icol, nnz, iswap, *ipiv, *perm; + const PetscInt *ai; + PetscScalar *vals, *work, *L, *U; + Mat X; + PetscFunctionBeginUser; + PetscCall(MatProductNumeric(A11)); + PetscCall(MatProductNumeric(A10)); + PetscCall(MatProductNumeric(A01)); + PetscCall(MatProductNumeric(A00)); + PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + PetscCall(MatSeqAIJGetArray(A00, &vals)); + + // A00 = (U^T) * (L^T) + nnz = ai[m]; + bsize = ai[m] - ai[m - 1]; + PetscCall(PetscMalloc2(bsize, &ipiv, bsize, &perm)); + PetscCall(PetscCalloc1(nnz, &work)); + irow = 0; + while (irow < m && ai[irow + 1] - ai[irow] == 1) { + work[irow] = 1.0; + vals[irow] = 1.0 / vals[irow]; + irow++; } - for (PetscInt j = 0; j < bsize; j++) { - L[j + bsize * perm[j]] = 1.0; - for (PetscInt i = j + 1; i < bsize; i++) { - L[i + bsize * perm[j]] = U[i + bsize * j]; - U[i + bsize * j] = 0.0; + L = &work[irow]; + U = &vals[irow]; + while (irow < m) { + bsize = ai[irow + 1] - ai[irow]; + PetscCall(PetscBLASIntCast(bsize, &bn)); + PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, U, &bn, ipiv, &lierr)); + PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("U", "N", &bn, U, &bn, &lierr)); + PetscCallBLAS("LAPACKtrtri", LAPACKtrtri_("L", "U", &bn, U, &bn, &lierr)); + for (PetscInt j = 0; j < bsize; j++) perm[j] = j; + for (PetscInt j = 0; j < bsize; j++) { + icol = ipiv[j] - 1; + iswap = perm[icol]; + perm[icol] = perm[j]; + perm[j] = iswap; } + for (PetscInt j = 0; j < bsize; j++) { + L[j + bsize * perm[j]] = 1.0; + for (PetscInt i = j + 1; i < bsize; i++) { + L[i + bsize * perm[j]] = U[i + bsize * j]; + U[i + bsize * j] = 0.0; + } + } + L += bsize * bsize; + U += bsize * bsize; + irow += bsize; } - L += bsize * bsize; - U += bsize * bsize; - irow += bsize; - } - PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - - // A00 = inv(U^T) - PetscCall(MatSeqAIJRestoreArray(A00, &vals)); - // X = inv(U^T) * A01 - PetscCall(MatProductGetMats(B, NULL, NULL, &X)); - PetscCall(MatProductNumeric(X)); - - // A00 = -inv(L^T) - PetscCall(MatSeqAIJGetArray(A00, &vals)); - for (PetscInt i = 0; i < nnz; i++) vals[i] = -work[i]; - PetscCall(MatSeqAIJRestoreArray(A00, &vals)); - PetscCall(PetscFree3(ipiv, perm, work)); - - // B = A11 - A10 * inv(L^T) * X - PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); - """ + PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + + // A00 = inv(U^T) + PetscCall(MatSeqAIJRestoreArray(A00, &vals)); + // X = inv(U^T) * A01 + PetscCall(MatProductGetMats(B, NULL, NULL, &X)); + PetscCall(MatProductNumeric(X)); + + // A00 = -inv(L^T) + PetscCall(MatSeqAIJGetArray(A00, &vals)); + for (PetscInt i = 0; i < nnz; i++) vals[i] = -work[i]; + PetscCall(MatSeqAIJRestoreArray(A00, &vals)); + PetscCall(PetscFree3(ipiv, perm, work)); + + // B = A11 - A10 * inv(L^T) * X + PetscCall(MatProductNumeric(B)); + PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); + """) def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None @@ -1042,50 +1082,52 @@ def condense(self, result=None): class SchurComplementBlockInverse(SchurComplementKernel): - condense_code = """ - PetscBLASInt bn, lierr, lwork; - PetscBool done; - PetscInt m, irow, bsize, *ipiv; - const PetscInt *ai; - PetscScalar *vals, *work, *ainv, swork; - PetscFunctionBeginUser; - PetscCall(MatProductNumeric(A11)); - PetscCall(MatProductNumeric(A10)); - PetscCall(MatProductNumeric(A01)); - PetscCall(MatProductNumeric(A00)); - PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - - lwork = -1; - bsize = ai[m] - ai[m - 1]; - PetscCall(PetscMalloc1(bsize, &ipiv)); - PetscCall(PetscBLASIntCast(bsize, &bn)); - PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, &swork, &lwork, &lierr)); - bsize = (PetscInt)swork; - PetscCall(PetscBLASIntCast(bsize, &lwork)); - PetscCall(PetscMalloc1(bsize, &work)); - PetscCall(MatSeqAIJGetArray(A00, &vals)); - irow = 0; - while (irow < m && ai[irow + 1] - ai[irow] == 1) { - vals[irow] = 1.0 / vals[irow]; - irow++; - } - ainv = &vals[irow]; - while (irow < m) { - bsize = ai[irow + 1] - ai[irow]; + """Schur complement kernel builder that assumes a block-diagonal interior block, + and uses its inverse to compute S = A11 - A10 A00^-1 A01.""" + condense_code = dedent(""" + PetscBLASInt bn, lierr, lwork; + PetscBool done; + PetscInt m, irow, bsize, *ipiv; + const PetscInt *ai; + PetscScalar *vals, *work, *ainv, swork; + PetscFunctionBeginUser; + PetscCall(MatProductNumeric(A11)); + PetscCall(MatProductNumeric(A10)); + PetscCall(MatProductNumeric(A01)); + PetscCall(MatProductNumeric(A00)); + PetscCall(MatGetRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + + lwork = -1; + bsize = ai[m] - ai[m - 1]; + PetscCall(PetscMalloc1(bsize, &ipiv)); PetscCall(PetscBLASIntCast(bsize, &bn)); - PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, ainv, &bn, ipiv, &lierr)); - PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, work, &lwork, &lierr)); - ainv += bsize * bsize; - irow += bsize; - } - PetscCall(PetscFree2(ipiv, work)); - PetscCall(MatSeqAIJRestoreArray(A00, &vals)); - PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); - - PetscCall(MatScale(A00, -1.0)); - PetscCall(MatProductNumeric(B)); - PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); - """ + PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, &swork, &lwork, &lierr)); + bsize = (PetscInt)swork; + PetscCall(PetscBLASIntCast(bsize, &lwork)); + PetscCall(PetscMalloc1(bsize, &work)); + PetscCall(MatSeqAIJGetArray(A00, &vals)); + irow = 0; + while (irow < m && ai[irow + 1] - ai[irow] == 1) { + vals[irow] = 1.0 / vals[irow]; + irow++; + } + ainv = &vals[irow]; + while (irow < m) { + bsize = ai[irow + 1] - ai[irow]; + PetscCall(PetscBLASIntCast(bsize, &bn)); + PetscCallBLAS("LAPACKgetrf", LAPACKgetrf_(&bn, &bn, ainv, &bn, ipiv, &lierr)); + PetscCallBLAS("LAPACKgetri", LAPACKgetri_(&bn, ainv, &bn, ipiv, work, &lwork, &lierr)); + ainv += bsize * bsize; + irow += bsize; + } + PetscCall(PetscFree2(ipiv, work)); + PetscCall(MatSeqAIJRestoreArray(A00, &vals)); + PetscCall(MatRestoreRowIJ(A00, 0, PETSC_FALSE, PETSC_FALSE, &m, &ai, NULL, &done)); + + PetscCall(MatScale(A00, -1.0)); + PetscCall(MatProductNumeric(B)); + PetscCall(MatAXPY(B, 1.0, A11, SUBSET_NONZERO_PATTERN)); + """) def condense(self, result=None): structure = PETSc.Mat.Structure.SUBSET if result else None @@ -1134,50 +1176,52 @@ def wrap_form(a, prefix="form", fcp=None, matshell=False): cache[key] = (matmult_struct, matmult_call, ctx_struct, ctx_pack) if matshell: - matmult_struct += """ -static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) { - PetscScalar **appctx, *y; - const PetscScalar *x; - PetscFunctionBeginUser; - PetscCall(MatShellGetContext(A, &appctx)); - PetscCall(VecZeroEntries(Y)); - PetscCall(VecGetArray(Y, &y)); - PetscCall(VecGetArrayRead(X, &x)); - %(matmult_call)s - PetscCall(VecRestoreArrayRead(X, &x)); - PetscCall(VecRestoreArray(Y, &y)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")} + matmult_struct += dedent(""" + static PetscErrorCode %(prefix)s(Mat A, Vec X, Vec Y) { + PetscScalar **appctx, *y; + const PetscScalar *x; + PetscFunctionBeginUser; + PetscCall(MatShellGetContext(A, &appctx)); + PetscCall(VecZeroEntries(Y)); + PetscCall(VecGetArray(Y, &y)); + PetscCall(VecGetArrayRead(X, &x)); + %(matmult_call)s + PetscCall(VecRestoreArrayRead(X, &x)); + PetscCall(VecRestoreArray(Y, &y)); + PetscFunctionReturn(PETSC_SUCCESS); + }""" % {"prefix": prefix, "matmult_call": matmult_call("x", "y")}) return matmult_struct, matmult_call, ctx_struct, ctx_pack class InteriorSolveKernel(ElementKernel): - code = """ -%(A_struct)s -PetscErrorCode %(name)s(const KSP ksp, - const PetscScalar *restrict coefficients, - %(ctx_struct)s - const PetscScalar *restrict y, - PetscScalar *restrict x){ - %(ctx_pack)s - PetscInt m; - Mat A, B, C; - Vec X, Y; - PetscFunctionBeginUser; - PetscCall(KSPGetOperators(ksp, &A, &B)); - PetscCall(MatShellSetContext(A, &appctx)); - PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); - PetscCall(MatProductGetMats(B, NULL, &C, NULL)); - PetscCall(MatSetValuesArray(C, coefficients)); - PetscCall(MatProductNumeric(B)); - PetscCall(MatGetSize(B, &m, NULL)); - PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, y, &Y)); - PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, x, &X)); - PetscCall(KSPSolve(ksp, Y, X)); - PetscCall(VecDestroy(&X)); - PetscCall(VecDestroy(&Y)); - PetscFunctionReturn(PETSC_SUCCESS); -}""" + """Kernel builder that solves the interior block using a local KSP + accross cells owned by an MPI rank.""" + code = dedent(""" + %(A_struct)s + PetscErrorCode %(name)s(const KSP ksp, + const PetscScalar *restrict coefficients, + %(ctx_struct)s + const PetscScalar *restrict y, + PetscScalar *restrict x){ + %(ctx_pack)s + PetscInt m; + Mat A, B, C; + Vec X, Y; + PetscFunctionBeginUser; + PetscCall(KSPGetOperators(ksp, &A, &B)); + PetscCall(MatShellSetContext(A, &appctx)); + PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); + PetscCall(MatProductGetMats(B, NULL, &C, NULL)); + PetscCall(MatSetValuesArray(C, coefficients)); + PetscCall(MatProductNumeric(B)); + PetscCall(MatGetSize(B, &m, NULL)); + PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, y, &Y)); + PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, x, &X)); + PetscCall(KSPSolve(ksp, Y, X)); + PetscCall(VecDestroy(&X)); + PetscCall(VecDestroy(&Y)); + PetscFunctionReturn(PETSC_SUCCESS); + }""") def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_type="icc"): self.child = kernel @@ -1217,50 +1261,52 @@ def __init__(self, kernel, form, name=None, prefix="interior_", fcp=None, pc_typ class ImplicitSchurComplementKernel(ElementKernel): - code = """ -%(A_struct)s -%(A00_struct)s -PetscErrorCode %(name)s(const KSP ksp, - const PetscScalar *restrict coefficients, - %(ctx_struct)s - const PetscScalar *restrict xf, - PetscScalar *restrict yf) { - %(ctx_pack)s - static const PetscInt idofs[%(isize)d] = {%(idofs)s}; - static const PetscInt fdofs[%(fsize)d] = {%(fdofs)s}; - static PetscScalar xi[%(isize)d], yi[%(isize)d], x[%(size)d], y[%(size)d]; - PetscInt i; - Mat A, B, C; - Vec X, Y; - PetscFunctionBeginUser; - PetscCall(KSPGetOperators(ksp, &A, &B)); - PetscCall(MatShellSetContext(A, &appctx)); - PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); - PetscCall(MatProductGetMats(B, NULL, &C, NULL)); - PetscCall(MatSetValuesArray(C, coefficients)); - PetscCall(MatProductNumeric(B)); - - // x[fdofs] = x1; y = A * x; - for (i = 0; i < %(size)d; i++) y[i] = 0.0; - for (i = 0; i < %(size)d; i++) x[i] = 0.0; - for (i = 0; i < %(fsize)d; i++) x[fdofs[i]] = xf[i]; - %(A_call)s - - // x[idofs] = -inv(Aii) * y[idofs]; - for (i = 0; i < %(isize)d; i++) yi[i] = y[idofs[i]]; - PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, yi, &Y)); - PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, xi, &X)); - PetscCall(KSPSolve(ksp, Y, X)); - PetscCall(VecDestroy(&X)); - PetscCall(VecDestroy(&Y)); - for (i = 0; i < %(isize)d; i++) x[idofs[i]] = -xi[i]; - - // y = A * x; y1 += y[fdofs]; - for (i = 0; i < %(size)d; i++) y[i] = 0.0; - %(A_call)s - for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]]; - PetscFunctionReturn(PETSC_SUCCESS); -}""" + """Kernel builder that applies the matrix-free Schur complement matvec + reusing a local KSP to invert the interior blocks.""" + code = dedent(""" + %(A_struct)s + %(A00_struct)s + PetscErrorCode %(name)s(const KSP ksp, + const PetscScalar *restrict coefficients, + %(ctx_struct)s + const PetscScalar *restrict xf, + PetscScalar *restrict yf) { + %(ctx_pack)s + static const PetscInt idofs[%(isize)d] = {%(idofs)s}; + static const PetscInt fdofs[%(fsize)d] = {%(fdofs)s}; + static PetscScalar xi[%(isize)d], yi[%(isize)d], x[%(size)d], y[%(size)d]; + PetscInt i; + Mat A, B, C; + Vec X, Y; + PetscFunctionBeginUser; + PetscCall(KSPGetOperators(ksp, &A, &B)); + PetscCall(MatShellSetContext(A, &appctx)); + PetscCall(MatShellSetOperation(A, MATOP_MULT, (void(*)(void))A_interior)); + PetscCall(MatProductGetMats(B, NULL, &C, NULL)); + PetscCall(MatSetValuesArray(C, coefficients)); + PetscCall(MatProductNumeric(B)); + + // x[fdofs] = x1; y = A * x; + for (i = 0; i < %(size)d; i++) y[i] = 0.0; + for (i = 0; i < %(size)d; i++) x[i] = 0.0; + for (i = 0; i < %(fsize)d; i++) x[fdofs[i]] = xf[i]; + %(A_call)s + + // x[idofs] = -inv(Aii) * y[idofs]; + for (i = 0; i < %(isize)d; i++) yi[i] = y[idofs[i]]; + PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, yi, &Y)); + PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, %(isize)d, xi, &X)); + PetscCall(KSPSolve(ksp, Y, X)); + PetscCall(VecDestroy(&X)); + PetscCall(VecDestroy(&Y)); + for (i = 0; i < %(isize)d; i++) x[idofs[i]] = -xi[i]; + + // y = A * x; y1 += y[fdofs]; + for (i = 0; i < %(size)d; i++) y[i] = 0.0; + %(A_call)s + for (i = 0; i < %(fsize)d; i++) yf[i] += y[fdofs[i]]; + PetscFunctionReturn(PETSC_SUCCESS); + }""") def __init__(self, kernel, name=None): self.child = kernel @@ -1634,37 +1680,37 @@ def load_setSubMatCSR(comm, triu=False): else: name = "setSubMatCSR_AIJ" select_cols = "" - code = f""" - #include - - PetscErrorCode {name}(Mat A, - Mat B, - PetscInt *rindices, - PetscInt *cindices, - InsertMode addv) - {{ - PetscInt m, ncols, irow, icol; - PetscInt *cols, *indices; - PetscScalar *vals; - PetscFunctionBeginUser; - PetscCall(MatGetSize(B, &m, NULL)); - PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols)); - PetscCall(PetscMalloc1(ncols, &indices)); - for (PetscInt i = 0; i < m; i++) {{ - PetscCall(MatGetRow(B, i, &ncols, &cols, &vals)); - irow = rindices[i]; - for (PetscInt j = 0; j < ncols; j++) {{ - icol = cindices[cols[j]]; - {select_cols} - indices[j] = icol; + code = dedent(f""" + #include + + PetscErrorCode {name}(Mat A, + Mat B, + PetscInt *rindices, + PetscInt *cindices, + InsertMode addv) + {{ + PetscInt m, ncols, irow, icol; + PetscInt *cols, *indices; + PetscScalar *vals; + PetscFunctionBeginUser; + PetscCall(MatGetSize(B, &m, NULL)); + PetscCall(MatSeqAIJGetMaxRowNonzeros(B, &ncols)); + PetscCall(PetscMalloc1(ncols, &indices)); + for (PetscInt i = 0; i < m; i++) {{ + PetscCall(MatGetRow(B, i, &ncols, &cols, &vals)); + irow = rindices[i]; + for (PetscInt j = 0; j < ncols; j++) {{ + icol = cindices[cols[j]]; + {select_cols} + indices[j] = icol; + }} + PetscCall(MatSetValues(A, 1, &irow, ncols, indices, vals, addv)); + PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals)); + }} + PetscCall(PetscFree(indices)); + PetscFunctionReturn(PETSC_SUCCESS); }} - PetscCall(MatSetValues(A, 1, &irow, ncols, indices, vals, addv)); - PetscCall(MatRestoreRow(B, i, &ncols, &cols, &vals)); - }} - PetscCall(PetscFree(indices)); - PetscFunctionReturn(PETSC_SUCCESS); - }} - """ + """) argtypes = [ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_voidp, ctypes.c_int] funptr = SparseAssembler.load_c_code(code, name, comm=comm, argtypes=argtypes, diff --git a/firedrake/preconditioners/pmg.py b/firedrake/preconditioners/pmg.py index 73c3e37a3d..542fa1a977 100644 --- a/firedrake/preconditioners/pmg.py +++ b/firedrake/preconditioners/pmg.py @@ -592,6 +592,20 @@ def evaluate_dual(source, target, derivative=None): """Evaluate the action of a set of dual functionals of the target element on the (derivative of the) basis functions of the source element. + Parameters + ---------- + source : + A :class:`FIAT.CiarletElement` to interpolate. + target : + A :class:`FIAT.CiarletElement` defining the interpolation space. + derivative : ``str`` or ``None`` + An optional differential operator to apply on the source expression, + (currently only "grad" is supported). + + Returns + ------- + A read-only :class:`numpy.ndarray` with the evaluation of the target + dual basis on the (derivative of the) source primal basis. """ primal = source.get_nodal_basis() dual = target.get_dual_set() @@ -604,6 +618,10 @@ def evaluate_dual(source, target, derivative=None): for i in range(len(alpha)): for j in range(alpha[i]): B = numpy.dot(dmats[i], B) + elif derivative in ("curl", "div"): + raise NotImplementedError(f"Dual evaluation of {derivative} is not yet implemented.") + elif derivative is not None: + raise ValueError(f"Invalid derivaitve type {derivative}.") return get_readonly_view(numpy.dot(A, B))