From bf5647003cbe1ba9910eff74404dc157e48ab80e Mon Sep 17 00:00:00 2001
From: Christian Glusa <caglusa@sandia.gov>
Date: Tue, 12 Oct 2021 11:08:00 -0600
Subject: [PATCH] release

---
 .gitattributes                                |    2 +
 Dockerfile                                    |   89 +
 LICENSE                                       |   26 +
 MANIFEST.in                                   |    3 +
 Makefile                                      |  135 +
 PyNucleus/__init__.py                         |   27 +
 PyNucleus/_version.py                         |  652 ++++
 README.rst                                    |  160 +
 base/.gitattributes                           |    2 +
 base/MANIFEST.in                              |    3 +
 .../CSR_LinearOperator_decl_{SCALAR}.pxi      |   29 +
 .../CSR_LinearOperator_{SCALAR}.pxi           |  503 +++
 .../DenseLinearOperator_decl_{SCALAR}.pxi     |   29 +
 .../DenseLinearOperator_{SCALAR}.pxi          |  186 +
 .../DiagonalLinearOperator_decl_{SCALAR}.pxi  |   21 +
 .../DiagonalLinearOperator_{SCALAR}.pxi       |   95 +
 .../IJOperator_decl_{SCALAR}.pxi              |   16 +
 base/PyNucleus_base/IJOperator_{SCALAR}.pxi   |   90 +
 .../LinearOperatorWrapper_decl_{SCALAR}.pxi   |   11 +
 .../LinearOperatorWrapper_{SCALAR}.pxi        |   17 +
 .../LinearOperator_decl_{SCALAR}.pxi          |   95 +
 .../LinearOperator_{SCALAR}.pxi               |  502 +++
 .../SSS_LinearOperator_decl_{SCALAR}.pxi      |   25 +
 .../SSS_LinearOperator_{SCALAR}.pxi           |  309 ++
 base/PyNucleus_base/__init__.py               |   55 +
 base/PyNucleus_base/_version.py               |  652 ++++
 base/PyNucleus_base/blas.pxd                  |   45 +
 base/PyNucleus_base/blas.pyx                  |  654 ++++
 base/PyNucleus_base/convergence.pxd           |   92 +
 base/PyNucleus_base/convergence.pyx           |  156 +
 base/PyNucleus_base/factory.py                |   83 +
 base/PyNucleus_base/intTuple.pxd              |   29 +
 base/PyNucleus_base/intTuple.pyx              |  129 +
 base/PyNucleus_base/ip_norm.pxd               |   89 +
 base/PyNucleus_base/ip_norm.pyx               |  302 ++
 base/PyNucleus_base/linalg.pxd                |   41 +
 base/PyNucleus_base/linalg.pyx                | 1147 ++++++
 base/PyNucleus_base/linear_operators.pxd      |  170 +
 base/PyNucleus_base/linear_operators.pyx      | 1555 ++++++++
 base/PyNucleus_base/memProfile.py             |   18 +
 base/PyNucleus_base/myTypes32.h               |   10 +
 base/PyNucleus_base/myTypes32.pxd             |   16 +
 base/PyNucleus_base/myTypes32.pyx             |   16 +
 base/PyNucleus_base/myTypes64.h               |   10 +
 base/PyNucleus_base/myTypes64.pxd             |   16 +
 base/PyNucleus_base/myTypes64.pyx             |   16 +
 base/PyNucleus_base/performanceLogger.pxd     |   62 +
 base/PyNucleus_base/performanceLogger.pyx     |  193 +
 base/PyNucleus_base/plot_utils.py             |  222 ++
 base/PyNucleus_base/setupUtils.py             |   16 +
 base/PyNucleus_base/solver_factory.py         |  100 +
 base/PyNucleus_base/solvers.pxd               |  220 ++
 base/PyNucleus_base/solvers.pyx               | 1411 +++++++
 base/PyNucleus_base/sparseGraph.pxd           |   15 +
 base/PyNucleus_base/sparseGraph.pyx           |  263 ++
 base/PyNucleus_base/sparsityPattern.pxd       |   24 +
 base/PyNucleus_base/sparsityPattern.pyx       |  126 +
 base/PyNucleus_base/tupleDict.pxd             |   63 +
 base/PyNucleus_base/tupleDict.pyx             |  516 +++
 .../PyNucleus_base/tupleDict_decl_{VALUE}.pxi |   36 +
 base/PyNucleus_base/tupleDict_{VALUE}.pxi     |  296 ++
 base/PyNucleus_base/utilsCy.pyx               |   76 +
 base/PyNucleus_base/utilsFem.py               | 1437 +++++++
 base/setup.cfg                                |    7 +
 base/setup.py                                 |  153 +
 base/versioneer.py                            | 2116 +++++++++++
 data/matnip.png                               |  Bin 0 -> 27610 bytes
 docs/Makefile                                 |   15 +
 docs/PyNucleus_base.rst                       |  118 +
 docs/PyNucleus_fem.rst                        |  102 +
 docs/PyNucleus_metisCy.rst                    |   18 +
 docs/PyNucleus_multilevelSolver.rst           |   54 +
 docs/PyNucleus_nl.rst                         |  122 +
 docs/conf.py                                  |   80 +
 docs/example1.py                              |  151 +
 docs/example1.rst                             |  140 +
 docs/example2.py                              |  147 +
 docs/example2.rst                             |  127 +
 docs/features.rst                             |   38 +
 docs/index.rst                                |  107 +
 docs/installation.rst                         |   76 +
 drivers/example1.py                           |  124 +
 drivers/example2.py                           |  125 +
 drivers/interfaceProblem.py                   |  322 ++
 drivers/runHelmholtz.py                       |  193 +
 drivers/runNonlocal.py                        |  207 +
 drivers/runParallelGMG.py                     |  377 ++
 drivers/runSerialGMG.py                       |  204 +
 drivers/variableOrder.py                      |  218 ++
 fem/.gitattributes                            |    2 +
 fem/MANIFEST.in                               |    3 +
 fem/PyNucleus_fem/DoFMaps.pxd                 |   81 +
 fem/PyNucleus_fem/DoFMaps.pyx                 | 1919 ++++++++++
 fem/PyNucleus_fem/__init__.py                 |  177 +
 fem/PyNucleus_fem/_version.py                 |  652 ++++
 fem/PyNucleus_fem/algebraicOverlaps.pxd       |  160 +
 fem/PyNucleus_fem/algebraicOverlaps.pyx       | 1981 ++++++++++
 fem/PyNucleus_fem/boundaryLayerCy.pyx         |  353 ++
 fem/PyNucleus_fem/distributed_operators.pxd   |   14 +
 fem/PyNucleus_fem/distributed_operators.pyx   |   18 +
 .../distributed_operators_decl_{SCALAR}.pxi   |   30 +
 .../distributed_operators_{SCALAR}.pxi        |  195 +
 fem/PyNucleus_fem/femCy.pxd                   |   28 +
 fem/PyNucleus_fem/femCy.pyx                   | 2383 ++++++++++++
 fem/PyNucleus_fem/functions.pxd               |   34 +
 fem/PyNucleus_fem/functions.pyx               | 2075 ++++++++++
 fem/PyNucleus_fem/mass_1D_P0.pxi              |   22 +
 fem/PyNucleus_fem/mass_1D_P0_P1.pxi           |   23 +
 fem/PyNucleus_fem/mass_1D_P1.pxi              |   24 +
 fem/PyNucleus_fem/mass_1D_P2.pxi              |   27 +
 fem/PyNucleus_fem/mass_1D_P3.pxi              |   31 +
 fem/PyNucleus_fem/mass_2D_P0.pxi              |   22 +
 fem/PyNucleus_fem/mass_2D_P0_P1.pxi           |   24 +
 fem/PyNucleus_fem/mass_2D_P1.pxi              |   27 +
 fem/PyNucleus_fem/mass_2D_P2.pxi              |   42 +
 fem/PyNucleus_fem/mass_2D_P3.pxi              |   76 +
 fem/PyNucleus_fem/mass_3D_P0.pxi              |   22 +
 fem/PyNucleus_fem/mass_3D_P0_P1.pxi           |   25 +
 fem/PyNucleus_fem/mass_3D_P1.pxi              |   31 +
 fem/PyNucleus_fem/mass_3D_P2.pxi              |   76 +
 fem/PyNucleus_fem/mass_3D_P3.pxi              |  231 ++
 fem/PyNucleus_fem/mesh.py                     | 3346 +++++++++++++++++
 fem/PyNucleus_fem/meshConstruction.py         |  294 ++
 fem/PyNucleus_fem/meshCy.pxd                  |  122 +
 fem/PyNucleus_fem/meshCy.pyx                  | 2374 ++++++++++++
 fem/PyNucleus_fem/meshOverlaps.pxd            |   47 +
 fem/PyNucleus_fem/meshOverlaps.pyx            | 2188 +++++++++++
 fem/PyNucleus_fem/meshPartitioning.pyx        |  426 +++
 fem/PyNucleus_fem/pdeProblems.py              |  251 ++
 fem/PyNucleus_fem/quadrature.pxd              |  165 +
 fem/PyNucleus_fem/quadrature.pyx              |  617 +++
 fem/PyNucleus_fem/repartitioner.pyx           | 1649 ++++++++
 .../scalar_coefficient_stiffness_1D_P1.pxi    |   35 +
 .../scalar_coefficient_stiffness_1D_P2.pxi    |   48 +
 .../scalar_coefficient_stiffness_2D_P1.pxi    |   45 +
 .../scalar_coefficient_stiffness_2D_P2.pxi    |  100 +
 .../scalar_coefficient_stiffness_3D_P1.pxi    |   53 +
 .../scalar_coefficient_stiffness_3D_P2.pxi    |  206 +
 fem/PyNucleus_fem/simplexMapper.pxd           |  117 +
 fem/PyNucleus_fem/simplexMapper.pyx           |  721 ++++
 fem/PyNucleus_fem/splitting.py                |  153 +
 fem/PyNucleus_fem/stiffness_1D_P1.pxi         |   24 +
 fem/PyNucleus_fem/stiffness_1D_P2.pxi         |   27 +
 fem/PyNucleus_fem/stiffness_1D_P3.pxi         |   31 +
 fem/PyNucleus_fem/stiffness_2D_P1.pxi         |   34 +
 fem/PyNucleus_fem/stiffness_2D_P2.pxi         |   49 +
 fem/PyNucleus_fem/stiffness_2D_P3.pxi         |   83 +
 fem/PyNucleus_fem/stiffness_3D_P1.pxi         |   42 +
 fem/PyNucleus_fem/stiffness_3D_P2.pxi         |   87 +
 fem/PyNucleus_fem/stiffness_3D_P3.pxi         |  242 ++
 fem/PyNucleus_fem/vector_decl_{SCALAR}.pxi    |   17 +
 fem/PyNucleus_fem/vector_{SCALAR}.pxi         |  270 ++
 fem/setup.cfg                                 |    7 +
 fem/setup.py                                  |   72 +
 fem/versioneer.py                             | 2116 +++++++++++
 metisCy/.gitattributes                        |    2 +
 metisCy/MANIFEST.in                           |    3 +
 metisCy/PyNucleus_metisCy/__init__.py         |   89 +
 metisCy/PyNucleus_metisCy/_version.py         |  652 ++++
 metisCy/PyNucleus_metisCy/metisCy.pxd         |   21 +
 metisCy/PyNucleus_metisCy/metisCy.pyx         |  382 ++
 metisCy/PyNucleus_metisCy/parmetisCy.pyx      |  230 ++
 metisCy/setup.cfg                             |    7 +
 metisCy/setup.py                              |   55 +
 metisCy/versioneer.py                         | 2116 +++++++++++
 multilevelSolver/.gitattributes               |    2 +
 multilevelSolver/MANIFEST.in                  |    3 +
 .../PyNucleus_multilevelSolver/__init__.py    |   25 +
 .../PyNucleus_multilevelSolver/_version.py    |  652 ++++
 .../coarseSolvers.pxd                         |   16 +
 .../coarseSolvers.pyx                         |   33 +
 .../coarseSolvers_decl_{SCALAR}.pxi           |   35 +
 .../coarseSolvers_{SCALAR}.pxi                |  181 +
 .../PyNucleus_multilevelSolver/connectors.py  |  366 ++
 .../PyNucleus_multilevelSolver/geometricMG.py |  207 +
 .../PyNucleus_multilevelSolver/hierarchies.py |  480 +++
 .../PyNucleus_multilevelSolver/levels.py      |  577 +++
 .../PyNucleus_multilevelSolver/multigrid.pxd  |   17 +
 .../PyNucleus_multilevelSolver/multigrid.pyx  |   10 +
 .../multigrid_decl_{SCALAR}.pxi               |   31 +
 .../multigrid_{SCALAR}.pxi                    |  516 +++
 .../restrictionProlongation.pyx               |  159 +
 .../restriction_1D_P0.pxi                     |   41 +
 .../restriction_1D_P1.pxi                     |   49 +
 .../restriction_1D_P1_P2.pxi                  |   47 +
 .../restriction_1D_P1_P3.pxi                  |   51 +
 .../restriction_1D_P2.pxi                     |   63 +
 .../restriction_1D_P2_P3.pxi                  |   59 +
 .../restriction_1D_P3.pxi                     |   81 +
 .../restriction_2D_P0.pxi                     |   49 +
 .../restriction_2D_P1.pxi                     |   67 +
 .../restriction_2D_P1_P2.pxi                  |   61 +
 .../restriction_2D_P1_P3.pxi                  |   79 +
 .../restriction_2D_P2.pxi                     |  139 +
 .../restriction_2D_P2_P3.pxi                  |  109 +
 .../restriction_2D_P3.pxi                     |  307 ++
 .../restriction_3D_P0.pxi                     |  103 +
 .../restriction_3D_P1.pxi                     |   95 +
 .../restriction_3D_P1_P2.pxi                  |   79 +
 .../restriction_3D_P1_P3.pxi                  |  127 +
 .../restriction_3D_P2.pxi                     |  487 +++
 .../restriction_3D_P2_P3.pxi                  |  199 +
 .../restriction_3D_P3.pxi                     | 1719 +++++++++
 .../PyNucleus_multilevelSolver/smoothers.pxd  |   36 +
 .../PyNucleus_multilevelSolver/smoothers.pyx  |  442 +++
 .../smoothers_decl_{SCALAR}.pxi               |   65 +
 .../smoothers_{SCALAR}.pxi                    |  288 ++
 multilevelSolver/setup.cfg                    |    7 +
 multilevelSolver/setup.py                     |   53 +
 multilevelSolver/versioneer.py                | 2116 +++++++++++
 nl/.gitattributes                             |    2 +
 nl/MANIFEST.in                                |    3 +
 nl/PyNucleus_nl/__init__.py                   |  605 +++
 nl/PyNucleus_nl/_version.py                   |  652 ++++
 nl/PyNucleus_nl/adaptiveQuad.pxd              |   64 +
 nl/PyNucleus_nl/adaptiveQuad.pyx              | 1072 ++++++
 nl/PyNucleus_nl/clusterMethodCy.pxd           |   73 +
 nl/PyNucleus_nl/clusterMethodCy.pyx           | 2037 ++++++++++
 nl/PyNucleus_nl/fractionalLaplacian1D.pxd     |   51 +
 nl/PyNucleus_nl/fractionalLaplacian1D.pyx     |  757 ++++
 nl/PyNucleus_nl/fractionalLaplacian2D.pxd     |   43 +
 nl/PyNucleus_nl/fractionalLaplacian2D.pyx     | 1182 ++++++
 nl/PyNucleus_nl/fractionalOrders.pxd          |   60 +
 nl/PyNucleus_nl/fractionalOrders.pyx          |  685 ++++
 nl/PyNucleus_nl/interactionDomains.pxd        |   48 +
 nl/PyNucleus_nl/interactionDomains.pyx        |  890 +++++
 nl/PyNucleus_nl/kernel_params.pxi             |   67 +
 nl/PyNucleus_nl/kernel_params_decl.pxi        |    9 +
 nl/PyNucleus_nl/kernels.cpp                   |   41 +
 nl/PyNucleus_nl/kernels.hpp                   |   44 +
 nl/PyNucleus_nl/kernels.py                    |  164 +
 nl/PyNucleus_nl/kernels2.pxd                  |   51 +
 nl/PyNucleus_nl/kernels2.pyx                  |  566 +++
 nl/PyNucleus_nl/kernelsCy.pxd                 |   22 +
 nl/PyNucleus_nl/kernelsCy.pyx                 |   48 +
 nl/PyNucleus_nl/nonlocalLaplacian.pxd         |   72 +
 nl/PyNucleus_nl/nonlocalLaplacian.pyx         | 2567 +++++++++++++
 nl/PyNucleus_nl/nonlocalLaplacianBase.pxd     |  127 +
 nl/PyNucleus_nl/nonlocalLaplacianBase.pyx     |  763 ++++
 nl/PyNucleus_nl/nonlocalLaplacianND.pxd       |   54 +
 nl/PyNucleus_nl/nonlocalLaplacianND.pyx       | 1196 ++++++
 nl/PyNucleus_nl/nonlocalProblems.py           |  833 ++++
 nl/PyNucleus_nl/panelTypes.pxi                |   15 +
 nl/PyNucleus_nl/twoPointFunctions.pxd         |   63 +
 nl/PyNucleus_nl/twoPointFunctions.pyx         |  407 ++
 nl/setup.cfg                                  |    7 +
 nl/setup.py                                   |   49 +
 nl/versioneer.py                              | 2116 +++++++++++
 packageTools/.gitattributes                   |    2 +
 packageTools/MANIFEST.in                      |    3 +
 .../PyNucleus_packageTools/__init__.py        |  335 ++
 .../PyNucleus_packageTools/_version.py        |  652 ++++
 .../PyNucleus_packageTools/sphinxTools.py     |   85 +
 packageTools/setup.cfg                        |    7 +
 packageTools/setup.py                         |   20 +
 packageTools/versioneer.py                    | 2116 +++++++++++
 setup.cfg                                     |    7 +
 setup.py                                      |   50 +
 tests/bitArray.py                             |  116 +
 ...Problem.py--domaindoubleInterval--noRef101 |    2 +
 ...aceProblem.py--domaindoubleSquare--noRef51 |    2 +
 tests/cache_runHelmholtz.py--domaincube1      |    6 +
 tests/cache_runHelmholtz.py--domaincube4      |    6 +
 tests/cache_runHelmholtz.py--domaininterval1  |    6 +
 tests/cache_runHelmholtz.py--domaininterval4  |    6 +
 tests/cache_runHelmholtz.py--domainsquare1    |    6 +
 tests/cache_runHelmholtz.py--domainsquare4    |    6 +
 ...elfractional--problempoly-Dirichlet--dense |   13 +
 ...rnelfractional--problempoly-Neumann--dense |   13 +
 ...nelindicator--problempoly-Dirichlet--dense |   13 +
 ...ernelindicator--problempoly-Neumann--dense |   13 +
 ...lperidynamic--problempoly-Dirichlet--dense |   13 +
 ...nelperidynamic--problempoly-Neumann--dense |   13 +
 ...elfractional--problempoly-Dirichlet--dense |   13 +
 ...nelindicator--problempoly-Dirichlet--dense |   13 +
 ...lperidynamic--problempoly-Dirichlet--dense |   13 +
 ...lGMG.py--domaincube--elementP1--symmetric1 |   62 +
 ...lGMG.py--domaincube--elementP1--symmetric4 |   62 +
 ..._runParallelGMG.py--domaincube--elementP11 |   62 +
 ..._runParallelGMG.py--domaincube--elementP14 |   62 +
 ...lGMG.py--domaincube--elementP2--symmetric1 |   91 +
 ...lGMG.py--domaincube--elementP2--symmetric4 |   91 +
 ..._runParallelGMG.py--domaincube--elementP21 |   91 +
 ..._runParallelGMG.py--domaincube--elementP24 |   91 +
 ...lGMG.py--domaincube--elementP3--symmetric1 |  107 +
 ...lGMG.py--domaincube--elementP3--symmetric4 |  107 +
 ..._runParallelGMG.py--domaincube--elementP31 |  107 +
 ..._runParallelGMG.py--domaincube--elementP34 |  107 +
 ....py--domaininterval--elementP1--symmetric1 |   69 +
 ....py--domaininterval--elementP1--symmetric4 |   69 +
 ...ParallelGMG.py--domaininterval--elementP11 |   69 +
 ...ParallelGMG.py--domaininterval--elementP14 |   69 +
 ....py--domaininterval--elementP2--symmetric1 |   66 +
 ....py--domaininterval--elementP2--symmetric4 |   66 +
 ...ParallelGMG.py--domaininterval--elementP21 |   66 +
 ...ParallelGMG.py--domaininterval--elementP24 |   66 +
 ....py--domaininterval--elementP3--symmetric1 |   63 +
 ....py--domaininterval--elementP3--symmetric4 |   65 +
 ...ParallelGMG.py--domaininterval--elementP31 |   63 +
 ...ParallelGMG.py--domaininterval--elementP34 |   65 +
 ...MG.py--domainsquare--elementP1--symmetric1 |   67 +
 ...MG.py--domainsquare--elementP1--symmetric4 |   67 +
 ...unParallelGMG.py--domainsquare--elementP11 |   67 +
 ...unParallelGMG.py--domainsquare--elementP14 |   67 +
 ...MG.py--domainsquare--elementP2--symmetric1 |   89 +
 ...MG.py--domainsquare--elementP2--symmetric4 |   91 +
 ...unParallelGMG.py--domainsquare--elementP21 |   89 +
 ...unParallelGMG.py--domainsquare--elementP24 |   91 +
 ...MG.py--domainsquare--elementP3--symmetric1 |   97 +
 ...MG.py--domainsquare--elementP3--symmetric4 |  104 +
 ...unParallelGMG.py--domainsquare--elementP31 |   97 +
 ...unParallelGMG.py--domainsquare--elementP34 |  104 +
 tests/cache_runSerialGMG.py                   |  168 +
 tests/cache_variableOrder.py                  |    1 +
 tests/drivers_base.py                         |   93 +
 tests/test.py                                 |   12 +
 tests/test_drivers_intFracLapl.py             |   55 +
 tests/test_fracLapl.py                        |  246 ++
 tests/test_h2finiteHorizon.py                 |  284 ++
 tests/test_nearField.py                       |  467 +++
 tests/tupleDict.py                            |   79 +
 versioneer.py                                 | 2116 +++++++++++
 322 files changed, 86938 insertions(+)
 create mode 100644 .gitattributes
 create mode 100644 Dockerfile
 create mode 100644 LICENSE
 create mode 100644 MANIFEST.in
 create mode 100644 Makefile
 create mode 100644 PyNucleus/__init__.py
 create mode 100644 PyNucleus/_version.py
 create mode 100644 README.rst
 create mode 100644 base/.gitattributes
 create mode 100644 base/MANIFEST.in
 create mode 100644 base/PyNucleus_base/CSR_LinearOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/DenseLinearOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/DenseLinearOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/DiagonalLinearOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/DiagonalLinearOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/IJOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/IJOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/LinearOperatorWrapper_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/LinearOperatorWrapper_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/SSS_LinearOperator_decl_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi
 create mode 100644 base/PyNucleus_base/__init__.py
 create mode 100644 base/PyNucleus_base/_version.py
 create mode 100644 base/PyNucleus_base/blas.pxd
 create mode 100644 base/PyNucleus_base/blas.pyx
 create mode 100644 base/PyNucleus_base/convergence.pxd
 create mode 100644 base/PyNucleus_base/convergence.pyx
 create mode 100644 base/PyNucleus_base/factory.py
 create mode 100644 base/PyNucleus_base/intTuple.pxd
 create mode 100644 base/PyNucleus_base/intTuple.pyx
 create mode 100644 base/PyNucleus_base/ip_norm.pxd
 create mode 100644 base/PyNucleus_base/ip_norm.pyx
 create mode 100644 base/PyNucleus_base/linalg.pxd
 create mode 100644 base/PyNucleus_base/linalg.pyx
 create mode 100644 base/PyNucleus_base/linear_operators.pxd
 create mode 100644 base/PyNucleus_base/linear_operators.pyx
 create mode 100644 base/PyNucleus_base/memProfile.py
 create mode 100644 base/PyNucleus_base/myTypes32.h
 create mode 100644 base/PyNucleus_base/myTypes32.pxd
 create mode 100644 base/PyNucleus_base/myTypes32.pyx
 create mode 100644 base/PyNucleus_base/myTypes64.h
 create mode 100644 base/PyNucleus_base/myTypes64.pxd
 create mode 100644 base/PyNucleus_base/myTypes64.pyx
 create mode 100644 base/PyNucleus_base/performanceLogger.pxd
 create mode 100644 base/PyNucleus_base/performanceLogger.pyx
 create mode 100644 base/PyNucleus_base/plot_utils.py
 create mode 100644 base/PyNucleus_base/setupUtils.py
 create mode 100644 base/PyNucleus_base/solver_factory.py
 create mode 100644 base/PyNucleus_base/solvers.pxd
 create mode 100644 base/PyNucleus_base/solvers.pyx
 create mode 100644 base/PyNucleus_base/sparseGraph.pxd
 create mode 100644 base/PyNucleus_base/sparseGraph.pyx
 create mode 100644 base/PyNucleus_base/sparsityPattern.pxd
 create mode 100644 base/PyNucleus_base/sparsityPattern.pyx
 create mode 100644 base/PyNucleus_base/tupleDict.pxd
 create mode 100644 base/PyNucleus_base/tupleDict.pyx
 create mode 100644 base/PyNucleus_base/tupleDict_decl_{VALUE}.pxi
 create mode 100644 base/PyNucleus_base/tupleDict_{VALUE}.pxi
 create mode 100644 base/PyNucleus_base/utilsCy.pyx
 create mode 100644 base/PyNucleus_base/utilsFem.py
 create mode 100644 base/setup.cfg
 create mode 100644 base/setup.py
 create mode 100644 base/versioneer.py
 create mode 100644 data/matnip.png
 create mode 100644 docs/Makefile
 create mode 100644 docs/PyNucleus_base.rst
 create mode 100644 docs/PyNucleus_fem.rst
 create mode 100644 docs/PyNucleus_metisCy.rst
 create mode 100644 docs/PyNucleus_multilevelSolver.rst
 create mode 100644 docs/PyNucleus_nl.rst
 create mode 100644 docs/conf.py
 create mode 100644 docs/example1.py
 create mode 100644 docs/example1.rst
 create mode 100644 docs/example2.py
 create mode 100644 docs/example2.rst
 create mode 100644 docs/features.rst
 create mode 100644 docs/index.rst
 create mode 100644 docs/installation.rst
 create mode 100644 drivers/example1.py
 create mode 100644 drivers/example2.py
 create mode 100644 drivers/interfaceProblem.py
 create mode 100644 drivers/runHelmholtz.py
 create mode 100644 drivers/runNonlocal.py
 create mode 100644 drivers/runParallelGMG.py
 create mode 100644 drivers/runSerialGMG.py
 create mode 100644 drivers/variableOrder.py
 create mode 100644 fem/.gitattributes
 create mode 100644 fem/MANIFEST.in
 create mode 100644 fem/PyNucleus_fem/DoFMaps.pxd
 create mode 100644 fem/PyNucleus_fem/DoFMaps.pyx
 create mode 100644 fem/PyNucleus_fem/__init__.py
 create mode 100644 fem/PyNucleus_fem/_version.py
 create mode 100644 fem/PyNucleus_fem/algebraicOverlaps.pxd
 create mode 100644 fem/PyNucleus_fem/algebraicOverlaps.pyx
 create mode 100644 fem/PyNucleus_fem/boundaryLayerCy.pyx
 create mode 100644 fem/PyNucleus_fem/distributed_operators.pxd
 create mode 100644 fem/PyNucleus_fem/distributed_operators.pyx
 create mode 100644 fem/PyNucleus_fem/distributed_operators_decl_{SCALAR}.pxi
 create mode 100644 fem/PyNucleus_fem/distributed_operators_{SCALAR}.pxi
 create mode 100644 fem/PyNucleus_fem/femCy.pxd
 create mode 100644 fem/PyNucleus_fem/femCy.pyx
 create mode 100644 fem/PyNucleus_fem/functions.pxd
 create mode 100644 fem/PyNucleus_fem/functions.pyx
 create mode 100644 fem/PyNucleus_fem/mass_1D_P0.pxi
 create mode 100644 fem/PyNucleus_fem/mass_1D_P0_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_1D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_1D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/mass_1D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/mass_2D_P0.pxi
 create mode 100644 fem/PyNucleus_fem/mass_2D_P0_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_2D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_2D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/mass_2D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/mass_3D_P0.pxi
 create mode 100644 fem/PyNucleus_fem/mass_3D_P0_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_3D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/mass_3D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/mass_3D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/mesh.py
 create mode 100644 fem/PyNucleus_fem/meshConstruction.py
 create mode 100644 fem/PyNucleus_fem/meshCy.pxd
 create mode 100644 fem/PyNucleus_fem/meshCy.pyx
 create mode 100644 fem/PyNucleus_fem/meshOverlaps.pxd
 create mode 100644 fem/PyNucleus_fem/meshOverlaps.pyx
 create mode 100644 fem/PyNucleus_fem/meshPartitioning.pyx
 create mode 100644 fem/PyNucleus_fem/pdeProblems.py
 create mode 100644 fem/PyNucleus_fem/quadrature.pxd
 create mode 100644 fem/PyNucleus_fem/quadrature.pyx
 create mode 100644 fem/PyNucleus_fem/repartitioner.pyx
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/simplexMapper.pxd
 create mode 100644 fem/PyNucleus_fem/simplexMapper.pyx
 create mode 100644 fem/PyNucleus_fem/splitting.py
 create mode 100644 fem/PyNucleus_fem/stiffness_1D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_1D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_1D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_2D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_2D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_2D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_3D_P1.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_3D_P2.pxi
 create mode 100644 fem/PyNucleus_fem/stiffness_3D_P3.pxi
 create mode 100644 fem/PyNucleus_fem/vector_decl_{SCALAR}.pxi
 create mode 100644 fem/PyNucleus_fem/vector_{SCALAR}.pxi
 create mode 100644 fem/setup.cfg
 create mode 100644 fem/setup.py
 create mode 100644 fem/versioneer.py
 create mode 100644 metisCy/.gitattributes
 create mode 100644 metisCy/MANIFEST.in
 create mode 100644 metisCy/PyNucleus_metisCy/__init__.py
 create mode 100644 metisCy/PyNucleus_metisCy/_version.py
 create mode 100644 metisCy/PyNucleus_metisCy/metisCy.pxd
 create mode 100644 metisCy/PyNucleus_metisCy/metisCy.pyx
 create mode 100644 metisCy/PyNucleus_metisCy/parmetisCy.pyx
 create mode 100644 metisCy/setup.cfg
 create mode 100644 metisCy/setup.py
 create mode 100644 metisCy/versioneer.py
 create mode 100644 multilevelSolver/.gitattributes
 create mode 100644 multilevelSolver/MANIFEST.in
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/__init__.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/_version.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pxd
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pyx
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_decl_{SCALAR}.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_{SCALAR}.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/connectors.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/geometricMG.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/hierarchies.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/levels.py
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/multigrid.pxd
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/multigrid.pyx
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/multigrid_decl_{SCALAR}.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/multigrid_{SCALAR}.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P0.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P0.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P0.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P3.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/smoothers.pxd
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/smoothers.pyx
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/smoothers_decl_{SCALAR}.pxi
 create mode 100644 multilevelSolver/PyNucleus_multilevelSolver/smoothers_{SCALAR}.pxi
 create mode 100644 multilevelSolver/setup.cfg
 create mode 100644 multilevelSolver/setup.py
 create mode 100644 multilevelSolver/versioneer.py
 create mode 100644 nl/.gitattributes
 create mode 100644 nl/MANIFEST.in
 create mode 100644 nl/PyNucleus_nl/__init__.py
 create mode 100644 nl/PyNucleus_nl/_version.py
 create mode 100644 nl/PyNucleus_nl/adaptiveQuad.pxd
 create mode 100644 nl/PyNucleus_nl/adaptiveQuad.pyx
 create mode 100644 nl/PyNucleus_nl/clusterMethodCy.pxd
 create mode 100644 nl/PyNucleus_nl/clusterMethodCy.pyx
 create mode 100644 nl/PyNucleus_nl/fractionalLaplacian1D.pxd
 create mode 100644 nl/PyNucleus_nl/fractionalLaplacian1D.pyx
 create mode 100644 nl/PyNucleus_nl/fractionalLaplacian2D.pxd
 create mode 100644 nl/PyNucleus_nl/fractionalLaplacian2D.pyx
 create mode 100644 nl/PyNucleus_nl/fractionalOrders.pxd
 create mode 100644 nl/PyNucleus_nl/fractionalOrders.pyx
 create mode 100644 nl/PyNucleus_nl/interactionDomains.pxd
 create mode 100644 nl/PyNucleus_nl/interactionDomains.pyx
 create mode 100644 nl/PyNucleus_nl/kernel_params.pxi
 create mode 100644 nl/PyNucleus_nl/kernel_params_decl.pxi
 create mode 100644 nl/PyNucleus_nl/kernels.cpp
 create mode 100644 nl/PyNucleus_nl/kernels.hpp
 create mode 100644 nl/PyNucleus_nl/kernels.py
 create mode 100644 nl/PyNucleus_nl/kernels2.pxd
 create mode 100644 nl/PyNucleus_nl/kernels2.pyx
 create mode 100644 nl/PyNucleus_nl/kernelsCy.pxd
 create mode 100644 nl/PyNucleus_nl/kernelsCy.pyx
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacian.pxd
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacian.pyx
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacianBase.pxd
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacianBase.pyx
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacianND.pxd
 create mode 100644 nl/PyNucleus_nl/nonlocalLaplacianND.pyx
 create mode 100644 nl/PyNucleus_nl/nonlocalProblems.py
 create mode 100644 nl/PyNucleus_nl/panelTypes.pxi
 create mode 100644 nl/PyNucleus_nl/twoPointFunctions.pxd
 create mode 100644 nl/PyNucleus_nl/twoPointFunctions.pyx
 create mode 100644 nl/setup.cfg
 create mode 100644 nl/setup.py
 create mode 100644 nl/versioneer.py
 create mode 100644 packageTools/.gitattributes
 create mode 100644 packageTools/MANIFEST.in
 create mode 100644 packageTools/PyNucleus_packageTools/__init__.py
 create mode 100644 packageTools/PyNucleus_packageTools/_version.py
 create mode 100644 packageTools/PyNucleus_packageTools/sphinxTools.py
 create mode 100644 packageTools/setup.cfg
 create mode 100644 packageTools/setup.py
 create mode 100644 packageTools/versioneer.py
 create mode 100644 setup.cfg
 create mode 100644 setup.py
 create mode 100644 tests/bitArray.py
 create mode 100644 tests/cache_interfaceProblem.py--domaindoubleInterval--noRef101
 create mode 100644 tests/cache_interfaceProblem.py--domaindoubleSquare--noRef51
 create mode 100644 tests/cache_runHelmholtz.py--domaincube1
 create mode 100644 tests/cache_runHelmholtz.py--domaincube4
 create mode 100644 tests/cache_runHelmholtz.py--domaininterval1
 create mode 100644 tests/cache_runHelmholtz.py--domaininterval4
 create mode 100644 tests/cache_runHelmholtz.py--domainsquare1
 create mode 100644 tests/cache_runHelmholtz.py--domainsquare4
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Neumann--dense
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Neumann--dense
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Neumann--dense
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelfractional--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelindicator--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runNonlocal.py--domainsquare--kernelperidynamic--problempoly-Dirichlet--dense
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP11
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP14
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP21
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP24
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP31
 create mode 100644 tests/cache_runParallelGMG.py--domaincube--elementP34
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP11
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP14
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP21
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP24
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP31
 create mode 100644 tests/cache_runParallelGMG.py--domaininterval--elementP34
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP11
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP14
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP21
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP24
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric1
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric4
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP31
 create mode 100644 tests/cache_runParallelGMG.py--domainsquare--elementP34
 create mode 100644 tests/cache_runSerialGMG.py
 create mode 100644 tests/cache_variableOrder.py
 create mode 100644 tests/drivers_base.py
 create mode 100644 tests/test.py
 create mode 100644 tests/test_drivers_intFracLapl.py
 create mode 100644 tests/test_fracLapl.py
 create mode 100644 tests/test_h2finiteHorizon.py
 create mode 100644 tests/test_nearField.py
 create mode 100644 tests/tupleDict.py
 create mode 100644 versioneer.py

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..0d7c5a2
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus/_version.py export-subst
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..fecc384
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,89 @@
+
+# VERSION:        0.1
+# DESCRIPTION:    Dockerized PyNucleus build
+# AUTHOR:         Christian Glusa
+
+# Base docker image
+FROM debian:testing
+LABEL maintainer Christian Glusa
+
+ENV LANG C.UTF-8
+
+# based on recommendations from
+# https://docs.nersc.gov/development/shifter/how-to-use/
+
+# add contrib and non-free debian repos
+RUN sed -i "s#deb http://deb.debian.org/debian testing main#deb http://deb.debian.org/debian testing main contrib non-free#g" /etc/apt/sources.list
+
+# install packages needed for build
+RUN apt-get update && \
+  DEBIAN_FRONTEND=noninteractive apt-get install -y \
+        autoconf automake gcc g++ make gfortran wget zlib1g-dev libffi-dev \
+        tk-dev \
+        libssl-dev ca-certificates cmake \
+        git less \
+        libboost-dev  \
+        hdf5-tools \
+        libsuitesparse-dev \
+        libarpack2-dev \
+        libmkl-avx2 libmkl-dev \
+  --no-install-recommends \
+  && rm -rf /var/lib/apt/lists/*
+
+ENV LD_LIBRARY_PATH /usr/local/lib
+
+RUN echo "alias ls='ls --color=auto -FN'" >> /root/.bashrc
+
+RUN mkdir /build/
+
+# install python
+# Consider adding configure flags:
+# --enable-optimizations
+# --with-lto
+# --build="$gnuArch"
+# --enable-shared
+# --with-system-expat
+# --with-system-ffi
+ARG pythonVersion=3.8.2
+RUN cd /build && wget --no-check-certificate https://www.python.org/ftp/python/${pythonVersion}/Python-${pythonVersion}.tgz \
+  && tar xvzf Python-${pythonVersion}.tgz && cd /build/Python-${pythonVersion} \
+  && ./configure --enable-optimizations --with-pymalloc --enable-shared && make -j4 && make install && make clean && rm /build/Python-${pythonVersion}.tgz && rm -rf /build/Python-${pythonVersion}
+
+# install mpich
+ARG mpichVersion=3.2
+RUN cd /build && wget --no-check-certificate https://www.mpich.org/static/downloads/${mpichVersion}/mpich-${mpichVersion}.tar.gz \
+  && tar xvzf mpich-${mpichVersion}.tar.gz && cd /build/mpich-${mpichVersion} \
+  && ./configure && make -j4 && make install && make clean && rm /build/mpich-${mpichVersion}.tar.gz && rm -rf /build/mpich-${mpichVersion}
+
+# install mpi4py
+ARG mpi4pyVersion=3.0.3
+RUN cd /build && wget --no-check-certificate https://bitbucket.org/mpi4py/mpi4py/downloads/mpi4py-${mpi4pyVersion}.tar.gz \
+  && tar xvzf mpi4py-${mpi4pyVersion}.tar.gz && cd /build/mpi4py-${mpi4pyVersion} \
+  && python3 setup.py build && python3 setup.py install && rm -rf /build/mpi4py-${mpi4pyVersion}
+
+# install parmetis
+ARG parmetisVersion=4.0.3
+RUN cd /build && wget --no-check-certificate http://glaros.dtc.umn.edu/gkhome/fetch/sw/parmetis/parmetis-${parmetisVersion}.tar.gz \
+  && tar xvzf parmetis-${parmetisVersion}.tar.gz && cd /build/parmetis-${parmetisVersion} \
+  && make config shared=1 && make -j4 && make install && make clean && rm /build/parmetis-${parmetisVersion}.tar.gz && rm -rf /build/parmetis-${parmetisVersion}
+
+# install metis
+ARG metisVersion=5.1.0
+RUN cd /build && wget --no-check-certificate http://glaros.dtc.umn.edu/gkhome/fetch/sw/metis/metis-${metisVersion}.tar.gz \
+  && tar xvzf metis-${metisVersion}.tar.gz && cd /build/metis-${metisVersion} \
+  && make config shared=1 && make -j4 && make install && make clean && rm /build/metis-${metisVersion}.tar.gz && rm -rf /build/metis-${metisVersion}
+
+# delete build directory
+RUN rm -rf /build/
+
+RUN /sbin/ldconfig
+
+
+
+# copy code to container and build
+# we copy only the packages over, not any run scripts
+
+COPY PyNucleus_* /home/pynucleus-build/
+COPY setup.py /home/pynucleus-build/
+COPY Makefile /home/pynucleus-build/
+RUN cd /home/pynucleus-build && python -m pip install .
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..c1076da
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,26 @@
+Copyright 2021 National Technology & Engineering Solutions of Sandia,
+LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the
+U.S. Government retains certain rights in this software.
+
+MIT License
+
+Copyright (c) 2017-2021 Christian Glusa
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file
diff --git a/MANIFEST.in b/MANIFEST.in
new file mode 100644
index 0000000..5c8cc95
--- /dev/null
+++ b/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus/_version.py
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..659c938
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,135 @@
+PYTHON ?= python3
+VIRTUAL_ENV ?=
+ifeq ($(VIRTUAL_ENV),)
+  FLAGS ?= --no-use-pep517 -e
+  PIP_FLAGS ?= --user
+else
+  PYTHON = python
+  FLAGS ?= -e
+  PIP_FLAGS ?=
+endif
+TEST_RESULTS ?= index.html
+
+
+install :
+	$(PYTHON) -m pip install packageTools/. && \
+	$(PYTHON) -m pip install base/. && \
+	$(PYTHON) -m pip install metisCy/.  && \
+	$(PYTHON) -m pip install fem/.  && \
+	$(PYTHON) -m pip install multilevelSolver/.  && \
+	$(PYTHON) -m pip install nl/.  && \
+	$(PYTHON) -m pip install .
+
+
+clean :
+	$(PYTHON) -m pip uninstall PyNucleus_packageTools PyNucleus_base PyNucleus_metisCy PyNucleus_fem PyNucleus_multilevelSolver PyNucleus_nl
+
+dev : dev_packageTools dev_base dev_metisCy dev_fem dev_multilevelSolver dev_nl dev_package
+
+dev_packageTools :
+	@ echo "Entering directory \`packageTools/'"
+	cd packageTools; $(PYTHON) -m pip install $(FLAGS) .
+dev_base_build :
+	@ echo "Entering directory \`base'"
+	cd base; $(PYTHON) -m pip install $(FLAGS) .
+dev_base :
+	make dev_base_build
+	$(PYTHON) -c "import PyNucleus_base" || (make clean_base && make dev_base_build)
+dev_metisCy_build :
+	@ echo "Entering directory \`metisCy'"
+	cd metisCy; $(PYTHON) -m pip install $(FLAGS) .
+dev_metisCy :
+	make dev_metisCy_build
+	$(PYTHON) -c "import PyNucleus_metisCy" || (make clean_metisCy && make dev_metisCy_build)
+dev_fem_build :
+	@ echo "Entering directory \`fem'"
+	cd fem; $(PYTHON) -m pip install $(FLAGS) .
+dev_fem :
+	make dev_fem_build
+	$(PYTHON) -c "import PyNucleus_fem" || (make clean_fem && make dev_fem_build)
+dev_multilevelSolver_build :
+	@ echo "Entering directory \`multilevelSolver'"
+	cd multilevelSolver; $(PYTHON) -m pip install $(FLAGS) .
+dev_multilevelSolver :
+	make dev_multilevelSolver_build
+	$(PYTHON) -c "import PyNucleus_multilevelSolver" || (make clean_multilevelSolver && make dev_multilevelSolver_build)
+dev_nl_build :
+	@ echo "Entering directory \`nl'"
+	cd nl; $(PYTHON) -m pip install $(FLAGS) .
+dev_nl :
+	make dev_nl_build
+	$(PYTHON) -c "import PyNucleus_nl" || (make clean_nl && make dev_nl_build)
+dev_package :
+	$(PYTHON) -m pip install $(FLAGS) .
+
+
+
+clean_dev: clean_package clean_packageTools clean_base clean_metisCy clean_fem clean_multilevelSolver clean_nl
+clean_packageTools :
+	$(PYTHON) -m pip uninstall PyNucleus_packageTools -y
+clean_base :
+	$(PYTHON) -m pip uninstall PyNucleus_base -y
+	cd base/PyNucleus_base && \
+           rm -f *.so *.c *.pyc && \
+	   rm -rf __pycache__
+	cd base && rm -rf build __pycache__ *.egg-info
+clean_metisCy :
+	$(PYTHON) -m pip uninstall PyNucleus_metisCy -y
+	cd metisCy/PyNucleus_metisCy && \
+	   rm -f *.so *.c *.pyc && \
+	   rm -rf __pycache__
+	cd metisCy && rm -rf build __pycache__ *.egg-info
+clean_fem :
+	$(PYTHON) -m pip uninstall PyNucleus_fem -y
+	cd fem/PyNucleus_fem && \
+           rm -f *.so *.c *.pyc && \
+	   rm -rf __pycache__
+	cd fem && rm -rf build __pycache__ *.egg-info
+clean_multilevelSolver :
+	$(PYTHON) -m pip uninstall PyNucleus_multilevelSolver -y
+	cd multilevelSolver/PyNucleus_multilevelSolver && \
+           rm -f *.so *.c *.pyc && \
+	   rm -rf __pycache__
+	cd multilevelSolver && rm -rf build __pycache__ *.egg-info
+clean_nl :
+	$(PYTHON) -m pip uninstall PyNucleus_nl -y
+	cd nl/PyNucleus_nl && \
+	   rm -rf *.so *.c *.pyc *.html __pycache__ kernelsCy.cpp adaptiveQuad.cpp
+	cd nl && rm -rf build __pycache__ *.egg-info
+clean_package :
+	$(PYTHON) -m pip uninstall PyNucleus -y
+
+.PHONY: docs
+docs :
+	cd docs && make
+	sphinx-build -b html docs docs/build
+
+clean_docs :
+	cd docs; rm -rf build
+
+createVirtualEnv:
+	$(PYTHON) -m virtualenv --python=$(PYTHON) venv --system-site-packages
+
+
+list-tests:
+	$(PYTHON) -m pytest --collect-only tests/ tests/test.py
+
+.PHONY: tests
+tests:
+	$(PYTHON) -m pytest -rA --html=$(TEST_RESULTS) --self-contained-html tests/ tests/test.py
+
+docker:
+	./build-docker.sh
+
+docker-linux:
+	./run-docker-linux.sh
+
+docker-mac:
+	./run-docker-mac.sh
+
+
+prereq:
+	$(PYTHON) -m pip install $(PIP_FLAGS) Cython cython numpy scipy matplotlib pyyaml h5py pybind11 MeshPy tabulate modepy mpi4py scikit-sparse pyamg
+
+prereq-extra:
+	$(PYTHON) -m pip install $(PIP_FLAGS) pytest pytest-html pytest-xdist Sphinx sphinxcontrib-programoutput
diff --git a/PyNucleus/__init__.py b/PyNucleus/__init__.py
new file mode 100644
index 0000000..a5ddb31
--- /dev/null
+++ b/PyNucleus/__init__.py
@@ -0,0 +1,27 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import importlib
+import pkgutil
+import sys
+
+subpackages = {}
+for finder, name, ispkg in pkgutil.iter_modules():
+    if ispkg and name.find('PyNucleus_') == 0:
+        importName = name[len('PyNucleus_'):]
+        module = importlib.import_module(name, 'PyNucleus')
+        sys.modules['PyNucleus.'+importName] = module
+        subpackages[importName] = module
+        if "__all__" in module.__dict__:
+            names = module.__dict__["__all__"]
+        else:
+            names = [name for name in module.__dict__ if not name.startswith('_')]
+        locals().update({name: getattr(module, name) for name in names})
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/PyNucleus/_version.py b/PyNucleus/_version.py
new file mode 100644
index 0000000..fdff107
--- /dev/null
+++ b/PyNucleus/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..aa57e1c
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,160 @@
+
+
+Welcome to PyNucleus!
+=====================================
+
+PyNucleus is a finite element code that specifically targets nonlocal operators of the form
+
+.. math::
+
+   \int_{\mathbb{R}^d} [u(x)-u(y)] \gamma(x, y) dy
+
+for nonlocal kernels :math:`\gamma` with finite or infinite horizon and of integrable or fractional type.
+Specific examples of such operators include the integral and regional fractional Laplacians, their truncated and tempered variants, and operators arising from peridynamics.
+
+The package aims to provide efficient discretization and assembly routines with :math:`O(N \log N)` quasi-optimal complexity.
+The resulting sets of equations can be solved using optimal linear solvers.
+The code is fully NumPy/SciPy compatible, allowing easy integration into application codes.
+
+
+Features
+========
+
+* Simplical meshes in 1D, 2D, 3D
+
+* Finite Elements:
+
+  * continuous P1, P2, P3 spaces,
+  * discontinuous P0 space
+
+* Assembly of local operators
+
+* Nonlocal kernels:
+
+  * Finite and infinite horizon
+  * Singularities: fractional, peridynamic, constant kernel
+  * spatially variable kernels: variable fractional order and variable coefficients
+
+* Nonlocal assembly (1D and 2D) into dense, sparse and hierarchical matrices
+
+* Solvers/preconditioners:
+
+  * LU,
+  * Cholesky,
+  * incomplete LU & Cholesky,
+  * Jacobi,
+  * CG,
+  * BiCGStab,
+  * GMRES,
+  * geometric multigrid
+
+* Distributed computing using MPI
+
+* Computationally expensive parts of the code are compiled via Cython.
+
+* Partitioning using METIS / ParMETIS
+
+
+Documentation
+=============
+
+The documentation is available at XXXXXXXXXXXXX
+
+To generate the Sphinx documentation locally, run
+
+.. code-block:: shell
+
+   make docs
+
+and open ``docs/index.html`` in your browser.
+
+
+
+
+Prerequisites
+=============
+
+In order to install PyNucleus, you will need
+
+* Python 3,
+* MPI,
+* METIS,
+* ParMETIS,
+* SuiteSparse,
+* make (optional, only for editable installs).
+
+On Debian, Ubuntu etc, the required dependecies can be installed with
+
+.. code-block:: shell
+
+   sudo apt-get install python3 mpi-default-bin mpi-default-dev libmetis-dev libparmetis-dev libsuitesparse-dev
+
+Installation
+============
+
+PyNucleus is installed via
+
+.. code-block:: shell
+
+   python3 -m pip install .
+
+or via
+
+.. code-block:: shell
+
+   make
+
+If you want to easily modify the source code without re-installing the package every time, and editable install is available as
+
+.. code-block:: shell
+
+   make dev
+
+PyNucleus depends on other Python packages that will be installed automatically:
+
+* NumPy
+* SciPy
+* Matplotlib
+* Cython
+* mpi4py
+* tabulate
+* PyYAML
+* H5py
+* modepy
+* meshpy
+* scikit-sparse
+
+
+Docker container
+================
+
+A Docker container that contains all the required dependecies can be built as well:
+
+.. code-block:: shell
+
+   ./build-docker.sh
+
+Once the build is done, it can be launched as
+
+.. code-block:: shell
+
+   ./run-docker-linux.sh
+
+or
+
+.. code-block:: shell
+
+   ./run-docker-mac.sh
+
+
+Funding
+=======
+
+PyNucleus' development is funded through the MATNIP project (PI: Marta D'Elia) of the LDRD program at Sandia National Laboratories.
+
+.. image:: data/matnip.png
+   :height: 100px
+
+*The MATNIP project develops for the first time a rigorous nonlocal interface theory based on physical principles that is consistent with the classical theory of partial differential equations when the nonlocality vanishes and is mathematically well-posed.
+This will improve the predictive capability of nonlocal models and increase their usability at Sandia and, more in general, in the computational-science and engineering community.
+Furthermore, this theory will provide the groundwork for the development of nonlocal solvers, reducing the burden of prohibitively expensive computations.*
diff --git a/base/.gitattributes b/base/.gitattributes
new file mode 100644
index 0000000..814e582
--- /dev/null
+++ b/base/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_base/_version.py export-subst
diff --git a/base/MANIFEST.in b/base/MANIFEST.in
new file mode 100644
index 0000000..680614b
--- /dev/null
+++ b/base/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_base/_version.py
diff --git a/base/PyNucleus_base/CSR_LinearOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/CSR_LinearOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..6e778dd
--- /dev/null
+++ b/base/PyNucleus_base/CSR_LinearOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,29 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . blas cimport spmv, spres
+from . blas import uninitialized
+
+cdef class {SCALAR_label}CSR_LinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        public INDEX_t[::1] indptr, indices
+        public {SCALAR}_t[::1] data
+        public int NoThreads
+        public BOOL_t indices_sorted
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_multi(self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+    cpdef {SCALAR_label}CSR_LinearOperator getBlockDiagonal(self, sparseGraph blocks)
diff --git a/base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..7421a00
--- /dev/null
+++ b/base/PyNucleus_base/CSR_LinearOperator_{SCALAR}.pxi
@@ -0,0 +1,503 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}CSR_LinearOperator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 INDEX_t[::1] indices,
+                 INDEX_t[::1] indptr,
+                 {SCALAR}_t[::1] data,
+                 int NoThreads=1):
+        {SCALAR_label}LinearOperator.__init__(self,
+                                  indptr.shape[0]-1,
+                                  indptr.shape[0]-1)
+        self.indices = indices
+        self.indptr = indptr
+        self.data = data
+        self.NoThreads = NoThreads
+        self.indices_sorted = False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec({SCALAR_label}CSR_LinearOperator self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        spmv(self.indptr, self.indices, self.data, x, y)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void _residual({SCALAR_label}CSR_LinearOperator self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=False):
+        if simpleResidual:
+            assign(result, rhs)
+        else:
+            spres(self.indptr, self.indices, self.data, x, rhs, result)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite({SCALAR_label}CSR_LinearOperator self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        spmv(self.indptr, self.indices, self.data, x, y, overwrite=False)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_multi({SCALAR_label}CSR_LinearOperator self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1:
+        cdef:
+            INDEX_t i, jj, j, k, numVecs = x.shape[1]
+            {SCALAR}_t[::1] temp = uninitialized((numVecs), dtype={SCALAR})
+        for i in range(self.num_rows):
+            for k in range(numVecs):
+                temp[k] = 0.0
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                for k in range(numVecs):
+                    temp[k] += self.data[jj]*x[j, k]
+            for k in range(numVecs):
+                y[i, k] = temp[k]
+        return 0
+
+    def isSparse(self):
+        return True
+
+    def to_csr(self):
+        from scipy.sparse import csr_matrix
+        return csr_matrix((np.array(self.data, copy=False),
+                           np.array(self.indices, copy=False),
+                           np.array(self.indptr, copy=False)),
+                          shape=self.shape)
+
+    @staticmethod
+    def from_csr(matrix):
+        A = {SCALAR_label}CSR_LinearOperator(matrix.indices, matrix.indptr, matrix.data)
+        A.num_rows = matrix.shape[0]
+        A.num_columns = matrix.shape[1]
+        return A
+
+    @staticmethod
+    def from_dense(matrix):
+        cdef:
+            INDEX_t i, j, nnz
+            {SCALAR}_t[:, ::1] data
+            INDEX_t[::1] indptr, indices
+            {SCALAR}_t[::1] values
+        if isinstance(matrix, {SCALAR_label}Dense_LinearOperator):
+            data = matrix.data
+        else:
+            data = matrix
+        indptr = np.zeros((data.shape[0]+1), dtype=INDEX)
+        for i in range(data.shape[0]):
+            for j in range(data.shape[1]):
+                if data[i, j] != 0.:
+                    indptr[i+1] += 1
+        for i in range(data.shape[0]):
+            indptr[i+1] += indptr[i]
+        nnz = indptr[data.shape[0]]
+        indices = np.empty((nnz), dtype=INDEX)
+        values = np.empty((nnz), dtype={SCALAR})
+        for i in range(data.shape[0]):
+            for j in range(data.shape[1]):
+                if data[i, j] != 0.:
+                    indices[indptr[i]] = j
+                    values[indptr[i]] = data[i, j]
+                    indptr[i] += 1
+        for i in range(data.shape[0], 0, -1):
+            indptr[i] = indptr[i-1]
+        indptr[0] = 0
+        A = {SCALAR_label}CSR_LinearOperator(indices, indptr, values)
+        A.num_rows = data.shape[0]
+        A.num_columns = data.shape[1]
+        return A
+
+    def to_csr_linear_operator(self):
+        return self
+
+    def toarray(self):
+        return self.to_csr().toarray()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry({SCALAR_label}CSR_LinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, low, mid, high
+        low = self.indptr[I]
+        high = self.indptr[I+1]
+        if high-low < 20:
+            for i in range(low, high):
+                if self.indices[i] == J:
+                    self.data[i] = val
+                    break
+        else:
+            while self.indices[low] != J:
+                if high-low <= 1:
+                    raise IndexError()
+                mid = (low+high) >> 1
+                if self.indices[mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            self.data[low] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry({SCALAR_label}CSR_LinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, low, mid, high
+        low = self.indptr[I]
+        high = self.indptr[I+1]
+        if high-low < 20:
+            for i in range(low, high):
+                if self.indices[i] == J:
+                    self.data[i] += val
+                    break
+        else:
+            while self.indices[low] != J:
+                if high-low <= 1:
+                    # raise IndexError()
+                    return
+                mid = (low+high) >> 1
+                if self.indices[mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            self.data[low] += val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry({SCALAR_label}CSR_LinearOperator self, INDEX_t I, INDEX_t J):
+        cdef:
+            INDEX_t i, low, mid, high
+        low = self.indptr[I]
+        high = self.indptr[I+1]
+        if high-low < 20:
+            for i in range(low, high):
+                if self.indices[i] == J:
+                    return self.data[i]
+        else:
+            while self.indices[low] != J:
+                if high-low <= 1:
+                    return 0.
+                mid = (low+high) >> 1
+                if self.indices[mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            return self.data[low]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def to_sss(self):
+        cdef:
+            np.ndarray[INDEX_t, ndim=1] indptr_mem = np.zeros((self.num_rows+1),
+                                                              dtype=INDEX)
+            INDEX_t[::1] indptr = indptr_mem
+            np.ndarray[INDEX_t, ndim=1] indices_mem
+            INDEX_t[::1] indices
+            np.ndarray[{SCALAR}_t, ndim=1] data_mem, diagonal_mem
+            {SCALAR}_t[::1] data, diagonal
+            INDEX_t i, jj, j, nnz
+        for i in range(self.num_rows):
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                if j < i:
+                    indptr[i+1] += 1
+        for i in range(self.num_rows):
+            indptr[i+1] += indptr[i]
+        nnz = indptr[indptr.shape[0]-1]
+        indices_mem = uninitialized((nnz), dtype=INDEX)
+        indices = indices_mem
+        data_mem = uninitialized((nnz), dtype={SCALAR})
+        data = data_mem
+        diagonal_mem = uninitialized((self.num_rows), dtype={SCALAR})
+        diagonal = diagonal_mem
+        for i in range(self.num_rows):
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                if j < i:
+                    indices[indptr[i]] = j
+                    data[indptr[i]] = self.data[jj]
+                    indptr[i] += 1
+                elif j == i:
+                    diagonal[i] = self.data[jj]
+        for i in range(self.num_rows, 0, -1):
+            indptr[i] = indptr[i-1]
+        indptr[0] = 0
+        return SSS_LinearOperator(indices, indptr, data, diagonal)
+
+    def get_diagonal(self):
+        cdef:
+            INDEX_t i, jj
+            np.ndarray[{SCALAR}_t, ndim=1] diag_mem = np.zeros((self.num_rows),
+                                                             dtype={SCALAR})
+            {SCALAR}_t[::1] d = diag_mem
+
+        for i in range(self.num_rows):
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                if self.indices[jj] == i:
+                    d[i] = self.data[jj]
+                    break
+        return diag_mem
+
+    diagonal = property(fget=get_diagonal)
+
+    def getnnz(self):
+        return self.indptr[-1]
+
+    nnz = property(fget=getnnz)
+
+    def getMemorySize(self):
+        return ((self.indptr.shape[0]+self.indices.shape[0])*sizeof(INDEX_t) +
+                self.data.shape[0]*sizeof({SCALAR}_t))
+
+    def __repr__(self):
+        sizeInMB = self.getMemorySize() >> 20
+        if sizeInMB > 100:
+            return '<%dx%d %s with %d stored elements, %d MB>' % (self.num_rows,
+                                                                  self.num_columns,
+                                                                  self.__class__.__name__,
+                                                                  self.nnz,
+                                                                  sizeInMB)
+        else:
+            return '<%dx%d %s with %d stored elements>' % (self.num_rows,
+                                                           self.num_columns,
+                                                           self.__class__.__name__,
+                                                           self.nnz)
+
+    def HDF5write(self, node):
+        node.create_dataset('indices', data=np.array(self.indices,
+                                                     copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('indptr', data=np.array(self.indptr,
+                                                    copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('data', data=np.array(self.data,
+                                                  copy=False),
+                            compression=COMPRESSION)
+        node.attrs['type'] = 'csr'
+        node.attrs['num_rows'] = self.num_rows
+        node.attrs['num_columns'] = self.num_columns
+
+    @staticmethod
+    def HDF5read(node):
+        B = {SCALAR_label}CSR_LinearOperator(np.array(node['indices'], dtype=INDEX),
+                                  np.array(node['indptr'], dtype=INDEX),
+                                  np.array(node['data'], dtype={SCALAR}))
+        B.num_rows = node.attrs['num_rows']
+        B.num_columns = node.attrs['num_columns']
+        assert B.indptr.shape[0]-1 == B.num_rows
+        return B
+
+    def __getstate__(self):
+        return (np.array(self.indices, dtype=INDEX),
+                np.array(self.indptr, dtype=INDEX),
+                np.array(self.data, dtype={SCALAR}),
+                self.num_rows,
+                self.num_columns)
+
+    def __setstate__(self, state):
+        self.indices = state[0]
+        self.indptr = state[1]
+        self.data = state[2]
+        self.num_rows = state[3]
+        self.num_columns = state[4]
+
+    def copy(self):
+        data = np.array(self.data, copy=True)
+        other = {SCALAR_label}CSR_LinearOperator(self.indices, self.indptr, data)
+        return other
+
+    def sort_indices(self):
+        sort_indices{SCALAR_label}(self.indptr, self.indices, self.data)
+        self.indices_sorted = True
+
+    def isSorted(self):
+        """
+        Check if column indices are sorted.
+        """
+        cdef:
+            INDEX_t i, nnz, s, p, q
+        nnz = self.indptr[self.indptr.shape[0]-1]
+        for i in range(self.indptr.shape[0]-1):
+            s = self.indptr[i]
+            if s ==  nnz:
+                continue
+            p = self.indices[s]
+            for q in self.indices[self.indptr[i]+1:self.indptr[i+1]]:
+                if q <= p:
+                    return False
+                else:
+                    p = q
+        return True
+
+    def restrictMatrix(self, {SCALAR_label}LinearOperator A, {SCALAR_label}LinearOperator Ac):
+        if self.num_rows == Ac.num_rows:
+            multiply(self, A, Ac)
+        if self.num_columns == Ac.num_rows:
+            multiply2(self, A, Ac)
+
+    def scale(self, {SCALAR}_t scaling):
+        scaleScalar(self.data, scaling)
+
+    def scaleLeft(self, {SCALAR}_t[::1] scaling):
+        cdef:
+            INDEX_t i, jj
+            {SCALAR}_t d
+        assert self.num_rows == scaling.shape[0]
+        for i in range(self.num_rows):
+            d = scaling[i]
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                self.data[jj] *= d
+
+    def scaleRight(self, {SCALAR}_t[::1] scaling):
+        cdef:
+            INDEX_t i, jj, j
+        assert self.num_columns == scaling.shape[0]
+        for i in range(self.num_rows):
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                self.data[jj] *= scaling[j]
+
+    def setZero(self):
+        cdef:
+            INDEX_t i
+        for i in range(self.data.shape[0]):
+            self.data[i] = 0.
+
+    def eliminate_zeros(self):
+        cdef:
+            INDEX_t[::1] indptrNew = np.zeros((self.num_rows+1), dtype=INDEX)
+            INDEX_t[::1] indicesNew
+            {SCALAR}_t[::1] dataNew
+            INDEX_t i, jj, j, k
+            {SCALAR}_t v
+
+        for i in range(self.num_rows):
+            indptrNew[i+1] = indptrNew[i]
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                v = self.data[jj]
+                if v != 0.:
+                    indptrNew[i+1] += 1
+        indicesNew = uninitialized((indptrNew[self.num_rows]), dtype=INDEX)
+        dataNew = uninitialized((indptrNew[self.num_rows]), dtype={SCALAR})
+        k = 0
+        for i in range(self.num_rows):
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                v = self.data[jj]
+                if v != 0.:
+                    j = self.indices[jj]
+                    indicesNew[k] = j
+                    dataNew[k] = v
+                    k += 1
+        self.indptr = indptrNew
+        self.indices = indicesNew
+        self.data = dataNew
+
+    def sliceRows(self, INDEX_t[::1] rowIndices):
+        temp = self.to_csr()
+        temp = temp[rowIndices, :]
+        return {SCALAR_label}CSR_LinearOperator(temp.indices, temp.indptr, temp.data)
+
+    def sliceColumns(self, INDEX_t[::1] columnIndices):
+        temp = self.to_csr()
+        temp = temp[:, columnIndices]
+        return {SCALAR_label}CSR_LinearOperator(temp.indices, temp.indptr, temp.data)
+
+    def transpose(self):
+        return transpose(self, inplace=False)
+
+    cpdef {SCALAR_label}CSR_LinearOperator getBlockDiagonal(self, sparseGraph blocks):
+        cdef:
+            INDEX_t[::1] indptr = np.zeros((self.num_rows+1), dtype=INDEX)
+            INDEX_t[::1] indices
+            {SCALAR}_t[::1] data
+            INDEX_t blkIdx, ii, i, jj, j, kk, nnz, temp
+        for blkIdx in range(blocks.num_rows):
+            for ii in range(blocks.indptr[blkIdx], blocks.indptr[blkIdx+1]):
+                i = blocks.indices[ii]
+                for jj in range(self.indptr[i], self.indptr[i+1]):
+                    j = self.indices[jj]
+                    for kk in range(blocks.indptr[blkIdx], blocks.indptr[blkIdx+1]):
+                        if blocks.indices[kk] == j:
+                            indptr[i] += 1
+                            break
+        nnz = 0
+        for i in range(self.num_rows):
+            temp = indptr[i]
+            indptr[i] = nnz
+            nnz += temp
+        indptr[self.num_rows] = nnz
+        nnz = indptr[self.num_rows]
+        indices = uninitialized((nnz), dtype=INDEX)
+        data = uninitialized((nnz), dtype={SCALAR})
+        for blkIdx in range(blocks.num_rows):
+            for ii in range(blocks.indptr[blkIdx], blocks.indptr[blkIdx+1]):
+                i = blocks.indices[ii]
+                for jj in range(self.indptr[i], self.indptr[i+1]):
+                    j = self.indices[jj]
+                    for kk in range(blocks.indptr[blkIdx], blocks.indptr[blkIdx+1]):
+                        if blocks.indices[kk] == j:
+                            indices[indptr[i]] = j
+                            data[indptr[i]] = self.data[jj]
+                            indptr[i] += 1
+                            break
+        for i in range(self.num_rows, 0, -1):
+            indptr[i] = indptr[i-1]
+        indptr[0] = 0
+        blockA = {SCALAR_label}CSR_LinearOperator(indices, indptr, data)
+        return blockA
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef BOOL_t sort_indices{SCALAR_label}(INDEX_t[::1] indptr,
+                       INDEX_t[::1] indices,
+                       {SCALAR}_t[::1] data):
+    cdef:
+        INDEX_t n, i, jj, j, kk
+        {SCALAR}_t d
+        BOOL_t wasSorted = True
+    n = indptr.shape[0]-1
+    if indices.shape[0] == data.shape[0]:
+        for i in range(n):
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                d = data[jj]
+                kk = jj
+                while indptr[i] < kk and j < indices[kk-1]:
+                    wasSorted = False
+                    indices[kk] = indices[kk-1]
+                    data[kk] = data[kk-1]
+                    kk -= 1
+                indices[kk] = j
+                data[kk] = d
+    else:
+        for i in range(n):
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                kk = jj
+                while indptr[i] < kk and j < indices[kk-1]:
+                    wasSorted = False
+                    indices[kk] = indices[kk-1]
+                    kk -= 1
+                indices[kk] = j
+    return wasSorted
diff --git a/base/PyNucleus_base/DenseLinearOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/DenseLinearOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..d793888
--- /dev/null
+++ b/base/PyNucleus_base/DenseLinearOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,29 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . blas import uninitialized
+
+
+cdef class {SCALAR_label}Dense_LinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        public {SCALAR}_t[:, ::1] data
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_multi(self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+
+
+cdef class {SCALAR_label}Dense_SubBlock_LinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        dict lookupI, lookupJ
+        public {SCALAR}_t[:, :] data
diff --git a/base/PyNucleus_base/DenseLinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/DenseLinearOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..e7cbb90
--- /dev/null
+++ b/base/PyNucleus_base/DenseLinearOperator_{SCALAR}.pxi
@@ -0,0 +1,186 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}Dense_LinearOperator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 {SCALAR}_t[:, ::1] data):
+        {SCALAR_label}LinearOperator.__init__(self,
+                                  data.shape[0],
+                                  data.shape[1])
+        self.data = data
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        gemv(self.data, x, y)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        gemv(self.data, x, y, 1.)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_multi(self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            INDEX_t numVecs = y.shape[1]
+            {SCALAR}_t[::1] temp = uninitialized((numVecs), dtype={SCALAR})
+        y[:, :] = 0.
+        for i in range(self.num_rows):
+            temp[:] = 0.
+            for j in range(self.num_columns):
+                for k in range(numVecs):
+                    temp[k] += self.data[i, j]*x[j, k]
+            for k in range(numVecs):
+                y[i, k] = temp[k]
+        return 0
+
+    property diagonal:
+        def __get__(self):
+            cdef INDEX_t i
+            diag = uninitialized((min(self.num_rows, self.num_columns)),
+                            dtype={SCALAR})
+            for i in range(min(self.num_rows, self.num_columns)):
+                diag[i] = self.data[i, i]
+            return diag
+
+    def scale(self, {SCALAR}_t scaling):
+        cdef:
+            INDEX_t i, j
+        for i in range(self.num_rows):
+            for j in range(self.num_columns):
+                self.data[i, j] *= scaling
+
+    def isSparse(self):
+        return False
+
+    def toarray(self):
+        return np.array(self.data, copy=False, dtype={SCALAR})
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J):
+        return self.data[I, J]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        self.data[I, J] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        self.data[I, J] += val
+
+    def HDF5write(self, node):
+        node.create_dataset('data', data=np.array(self.data,
+                                                  copy=False),
+                            compression=COMPRESSION)
+        node.attrs['type'] = 'dense{SCALAR_label}'
+
+    @staticmethod
+    def HDF5read(node):
+        return Dense_LinearOperator(np.array(node['data'], dtype={SCALAR}))
+
+    @staticmethod
+    def zeros(INDEX_t num_rows, INDEX_t num_columns):
+        return Dense_LinearOperator(np.zeros((num_rows, num_columns), dtype={SCALAR}))
+
+    @staticmethod
+    def ones(INDEX_t num_rows, INDEX_t num_columns):
+        return Dense_LinearOperator(np.ones((num_rows, num_columns), dtype={SCALAR}))
+
+    @staticmethod
+    def empty(INDEX_t num_rows, INDEX_t num_columns):
+        return Dense_LinearOperator(uninitialized((num_rows, num_columns), dtype={SCALAR}))
+
+    def transpose(self):
+        return {SCALAR_label}Dense_LinearOperator(np.ascontiguousarray(self.toarray().T))
+
+    def getMemorySize(self):
+        return self.data.shape[0]*self.data.shape[1]*sizeof({SCALAR}_t)
+
+    def __repr__(self):
+        sizeInMB = self.getMemorySize() >> 20
+        if sizeInMB > 100:
+            return '<%dx%d %s, %d MB>' % (self.num_rows,
+                               self.num_columns,
+                               self.__class__.__name__,
+                               sizeInMB)
+        else:
+            return '<%dx%d %s>' % (self.num_rows,
+                               self.num_columns,
+                               self.__class__.__name__)
+
+
+cdef class {SCALAR_label}Dense_SubBlock_LinearOperator({SCALAR_label}LinearOperator):
+    def __init__(self, INDEX_t[::1] I, INDEX_t[::1] J, INDEX_t num_rows, INDEX_t num_columns, {SCALAR}_t[:, :] mem=None):
+        cdef:
+            INDEX_t i
+        if mem is None:
+            mem = np.zeros((I.shape[0], J.shape[0]), dtype={SCALAR})
+        self.data = mem
+        {SCALAR_label}LinearOperator.__init__(self,
+                                              num_rows,
+                                              num_columns)
+        self.lookupI = {}
+        self.lookupJ = {}
+        for i in range(I.shape[0]):
+            self.lookupI[I[i]] = i
+        for i in range(J.shape[0]):
+            self.lookupJ[J[i]] = i
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J):
+        cdef:
+            INDEX_t i, j
+        i = self.lookupI.get(I, -1)
+        j = self.lookupJ.get(J, -1)
+        if i >= 0 and j >= 0:
+            return self.data[i, j]
+        else:
+            return 0.
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, j
+        i = self.lookupI.get(I, -1)
+        j = self.lookupJ.get(J, -1)
+        if i >= 0 and j >= 0:
+            self.data[i, j] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, j
+        i = self.lookupI.get(I, -1)
+        j = self.lookupJ.get(J, -1)
+        if i >= 0 and j >= 0:
+            self.data[i, j] += val
diff --git a/base/PyNucleus_base/DiagonalLinearOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/DiagonalLinearOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..f3b88ee
--- /dev/null
+++ b/base/PyNucleus_base/DiagonalLinearOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,21 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}diagonalOperator({SCALAR_label}LinearOperator):
+    cdef:
+        public {SCALAR}_t[::1] data
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+
+
+cdef class {SCALAR_label}invDiagonal({SCALAR_label}diagonalOperator):
+    pass
diff --git a/base/PyNucleus_base/DiagonalLinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/DiagonalLinearOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..7ac0822
--- /dev/null
+++ b/base/PyNucleus_base/DiagonalLinearOperator_{SCALAR}.pxi
@@ -0,0 +1,95 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}diagonalOperator({SCALAR_label}LinearOperator):
+    def __init__(self, {SCALAR}_t[::1] diagonal):
+        {SCALAR_label}LinearOperator.__init__(self,
+                                  diagonal.shape[0],
+                                  diagonal.shape[0])
+        self.data = diagonal
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        y[:] = 0.
+        for i in range(self.num_rows):
+            y[i] = self.data[i]*x[i]
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J):
+        if I == J:
+            return self.data[I]
+        else:
+            return 0.
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        if I == J:
+            self.data[I] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        if I == J:
+            self.data[I] += val
+        else:
+            raise NotImplementedError()
+
+    def get_diagonal(self):
+        return np.array(self.data, copy=False)
+
+    diagonal = property(fget=get_diagonal)
+
+    def isSparse(self):
+        return True
+
+    def to_csr(self):
+        from scipy.sparse import csr_matrix
+        return csr_matrix((self.data,
+                           np.arange(self.num_columns, dtype=INDEX),
+                           np.arange(self.num_rows+1, dtype=INDEX)),
+                          shape=(self.num_rows, self.num_columns))
+
+    def to_csr_linear_operator(self):
+        B = self.to_csr()
+        return CSR_LinearOperator(B.indices, B.indptr, B.data)
+
+    def __getstate__(self):
+        return (np.array(self.data, dtype={SCALAR}),)
+
+    def __setstate__(self, state):
+        self.data = state[0]
+        self.num_rows = self.data.shape[0]
+        self.num_columns = self.data.shape[0]
+
+    def HDF5write(self, node):
+        node.create_dataset('data', data=np.array(self.data,
+                                                  copy=False),
+                            compression=COMPRESSION)
+        node.attrs['type'] = 'diagonal'
+
+    @staticmethod
+    def HDF5read(node):
+        return diagonalOperator(np.array(node['data'], dtype={SCALAR}))
+
+
+cdef class {SCALAR_label}invDiagonal({SCALAR_label}diagonalOperator):
+    def __init__(self, {SCALAR_label}LinearOperator A):
+        {SCALAR_label}diagonalOperator.__init__(self,
+                                  1./np.array(A.diagonal))
diff --git a/base/PyNucleus_base/IJOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/IJOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..0e90ab0
--- /dev/null
+++ b/base/PyNucleus_base/IJOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . intTuple cimport intTuple
+
+
+cdef class {SCALAR_label}IJOperator({SCALAR_label}LinearOperator):
+    cdef:
+        dict entries
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
diff --git a/base/PyNucleus_base/IJOperator_{SCALAR}.pxi b/base/PyNucleus_base/IJOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..c04dc01
--- /dev/null
+++ b/base/PyNucleus_base/IJOperator_{SCALAR}.pxi
@@ -0,0 +1,90 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}IJOperator({SCALAR_label}LinearOperator):
+    def __init__(self, INDEX_t numRows, INDEX_t numCols):
+        super({SCALAR_label}IJOperator, self).__init__(numRows, numCols)
+        self.entries = {}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry({SCALAR_label}IJOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            intTuple hv = intTuple.create2(I, J)
+        self.entries[hv] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry({SCALAR_label}IJOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            intTuple hv = intTuple.create2(I, J)
+            REAL_t oldVal
+        oldVal = self.entries.pop(hv, 0.)
+        self.entries[hv] = oldVal+val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry({SCALAR_label}IJOperator self, INDEX_t I, INDEX_t J):
+        cdef:
+            intTuple hv = intTuple.create2(I, J)
+        return self.entries.get(hv, 0.)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getData(self):
+        cdef:
+            INDEX_t[::1] I, J
+            {SCALAR}_t[::1] data
+            INDEX_t numEntries = len(self.entries)
+            intTuple hv
+            INDEX_t[::1] pair
+            INDEX_t k = 0
+        I = np.empty((numEntries), dtype=INDEX)
+        J = np.empty((numEntries), dtype=INDEX)
+        data = np.empty((numEntries), dtype={SCALAR})
+        pair = np.empty((2), dtype=INDEX)
+        for hv in self.entries:
+            hv.get(&pair[0])
+            I[k] = pair[0]
+            J[k] = pair[1]
+            data[k] = self.entries[hv]
+            k += 1
+        return (np.array(I, copy=False),
+                np.array(J, copy=False),
+                np.array(data, copy=False))
+
+    def to_csr_linear_operator(self):
+        cdef:
+            INDEX_t[::1] indptr = np.zeros((self.num_rows+1), dtype=INDEX)
+            INDEX_t[::1] indices = uninitialized((len(self.entries)), dtype=INDEX)
+            REAL_t[::1] data = uninitialized((len(self.entries)), dtype=REAL)
+            INDEX_t[::1] pair = np.empty((2), dtype=INDEX)
+            intTuple hv
+            INDEX_t I
+        for hv in self.entries:
+            hv.get(&pair[0])
+            I = pair[0]
+            indptr[I+1] += 1
+        for I in range(self.num_rows):
+            indptr[I+1] += indptr[I]
+        for hv in self.entries:
+            hv.get(&pair[0])
+            I = pair[0]
+            indices[indptr[I]] = pair[1]
+            data[indptr[I]] = self.entries[hv]
+            indptr[I] += 1
+        for I in range(self.num_rows-1, 0, -1):
+            indptr[I] = indptr[I-1]
+        indptr[0] = 0
+        A = {SCALAR_label}CSR_LinearOperator(indices, indptr, data)
+        A.num_columns = self.num_columns
+        return A
diff --git a/base/PyNucleus_base/LinearOperatorWrapper_decl_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperatorWrapper_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..a76f601
--- /dev/null
+++ b/base/PyNucleus_base/LinearOperatorWrapper_decl_{SCALAR}.pxi
@@ -0,0 +1,11 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}LinearOperator_wrapper({SCALAR_label}LinearOperator):
+    cdef:
+        object _matvec
diff --git a/base/PyNucleus_base/LinearOperatorWrapper_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperatorWrapper_{SCALAR}.pxi
new file mode 100644
index 0000000..ba5f014
--- /dev/null
+++ b/base/PyNucleus_base/LinearOperatorWrapper_{SCALAR}.pxi
@@ -0,0 +1,17 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}LinearOperator_wrapper({SCALAR_label}LinearOperator):
+    def __init__(self, INDEX_t num_rows, INDEX_t num_columns, matvec, {SCALAR}_t[::1] diagonal=None):
+        super({SCALAR_label}LinearOperator_wrapper, self).__init__(num_rows, num_columns)
+        self._matvec = matvec
+        self._diagonal = diagonal
+
+    cdef INDEX_t matvec(self, {SCALAR}_t[::1] x, {SCALAR}_t[::1] y) except -1:
+        self._matvec(x, y)
+        return 0
diff --git a/base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..f4dc16d
--- /dev/null
+++ b/base/PyNucleus_base/LinearOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,95 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . blas cimport scaleScalar, assign, assign3
+from . blas import uninitialized
+
+
+cdef class {SCALAR_label}LinearOperator:
+    cdef:
+        public INDEX_t num_rows, num_columns
+        {SCALAR}_t[::1] _diagonal
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_multi(self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1
+    cdef void residual(self,
+                       {SCALAR}_t[::1] x,
+                       {SCALAR}_t[::1] rhs,
+                       {SCALAR}_t[::1] result,
+                       BOOL_t simpleResidual=*)
+    cdef void preconditionedResidual(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] rhs,
+                                     {SCALAR}_t[::1] result,
+                                     BOOL_t simpleResidual=*)
+    cdef void _residual(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=*)
+    cdef void _preconditionedResidual(self,
+                                      {SCALAR}_t[::1] x,
+                                      {SCALAR}_t[::1] rhs,
+                                      {SCALAR}_t[::1] result,
+                                      BOOL_t simpleResidual=*)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+
+
+cdef class {SCALAR_label}TimeStepperLinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        public {SCALAR_label}LinearOperator M, S
+        public {SCALAR}_t facM, facS
+        {SCALAR}_t[::1] z
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+
+
+cdef class {SCALAR_label}Multiply_Linear_Operator({SCALAR_label}LinearOperator):
+    cdef:
+        public {SCALAR_label}LinearOperator A
+        public {SCALAR}_t factor
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef void _residual(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=*)
+
+
+cdef class {SCALAR_label}Product_Linear_Operator({SCALAR_label}LinearOperator):
+    cdef:
+        public {SCALAR_label}LinearOperator A, B
+        public {SCALAR}_t[::1] temporaryMemory
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1
+    cdef void _residual(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=*)
+
+    cdef void _preconditionedResidual(self,
+                                      {SCALAR}_t[::1] x,
+                                      {SCALAR}_t[::1] rhs,
+                                      {SCALAR}_t[::1] result,
+                                      BOOL_t simpleResidual=*)
diff --git a/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..c1a3d82
--- /dev/null
+++ b/base/PyNucleus_base/LinearOperator_{SCALAR}.pxi
@@ -0,0 +1,502 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}LinearOperator:
+    def __init__(self, int num_rows, int num_columns):
+        self.num_rows = num_rows
+        self.num_columns = num_columns
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        return -1
+
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        return -1
+
+    cdef INDEX_t matvec_multi(self,
+                              {SCALAR}_t[:, ::1] x,
+                              {SCALAR}_t[:, ::1] y) except -1:
+        return -1
+
+    def __call__(self,
+                 {SCALAR}_t[::1] x,
+                 {SCALAR}_t[::1] y,
+                 BOOL_t no_overwrite=False):
+        if no_overwrite:
+            self.matvec_no_overwrite(x, y)
+        else:
+            self.matvec(x, y)
+
+    def dot(self, {SCALAR}_t[::1] x):
+        cdef:
+            np.ndarray[{SCALAR}_t, ndim=1] y = np.zeros(self.num_rows,
+                                                      dtype={SCALAR})
+        self(x, y)
+        return y
+
+    def dotMV(self, {SCALAR}_t[:, ::1] x):
+        cdef:
+            np.ndarray[{SCALAR}_t, ndim=2] yMV
+        if self.num_columns == x.shape[0]:
+            yMV = np.zeros((self.num_rows, x.shape[1]), dtype={SCALAR})
+            self.matvec_multi(x, yMV)
+        elif self.num_columns == x.shape[1]:
+            yMV = np.zeros((self.num_rows, x.shape[0]), dtype={SCALAR})
+            self.matvec_multi(np.ascontiguousarray(np.array(x, copy=False).T), yMV)
+            return np.ascontiguousarray(yMV.T)
+        else:
+            raise
+        return yMV
+
+    def __add__(self, x):
+        if isinstance(x, {SCALAR_label}LinearOperator):
+            if isinstance(self, {SCALAR_label}Multiply_Linear_Operator):
+                if isinstance(x, {SCALAR_label}Multiply_Linear_Operator):
+                    return {SCALAR_label}TimeStepperLinearOperator(self.A, x.A, x.factor, self.factor)
+                else:
+                    return {SCALAR_label}TimeStepperLinearOperator(self.A, x, 1.0, self.factor)
+            else:
+                if isinstance(x, {SCALAR_label}Multiply_Linear_Operator):
+                    return {SCALAR_label}TimeStepperLinearOperator(self, x.A, x.factor)
+                else:
+                    return {SCALAR_label}TimeStepperLinearOperator(self, x, 1.0)
+        elif isinstance(x, ComplexLinearOperator):
+            return wrapRealToComplex(self)+x
+        elif isinstance(self, ComplexLinearOperator):
+            return self+wrapRealToComplex(x)
+        else:
+            raise NotImplementedError('Cannot add with {}'.format(x))
+
+    def __sub__(self, x):
+        return self + (-1.*x)
+
+    def __mul__(self, x):
+        cdef:
+            np.ndarray[{SCALAR}_t, ndim=1] y
+            {SCALAR}_t[::1] x_mv
+        try:
+            x_mv = x
+            y = np.zeros((self.num_rows), dtype={SCALAR})
+            self(x, y)
+            return y
+        except Exception as e:
+            if isinstance(self, {SCALAR_label}LinearOperator) and isinstance(x, {SCALAR_label}LinearOperator):
+                return {SCALAR_label}Product_Linear_Operator(self, x)
+            elif isinstance(self, {SCALAR_label}LinearOperator) and isinstance(x, np.ndarray) and x.ndim == 2:
+                return self.dotMV(x)
+            elif isinstance(self, {SCALAR_label}LinearOperator) and isinstance(x, (float, int, {SCALAR})):
+                return {SCALAR_label}Multiply_Linear_Operator(self, x)
+            elif isinstance(x, {SCALAR_label}LinearOperator) and isinstance(self, (float, int, {SCALAR})):
+                return {SCALAR_label}Multiply_Linear_Operator(x, self)
+            elif isinstance(x, complex):
+                if isinstance(self, ComplexLinearOperator):
+                    return {SCALAR_label}Multiply_Linear_Operator(self, COMPLEX(x))
+                else:
+                    return ComplexMultiply_Linear_Operator(wrapRealToComplex(self), COMPLEX(x))
+            elif isinstance(x, COMPLEX):
+                return ComplexMultiply_Linear_Operator(wrapRealToComplex(self), x)
+            else:
+                raise NotImplementedError('Cannot multiply {} with {}:\n{}'.format(self, x, e))
+
+    def __rmul__(self, x):
+        if isinstance(x, {SCALAR}):
+            return {SCALAR_label}Multiply_Linear_Operator(self, x)
+        else:
+            raise NotImplementedError('Cannot multiply with {}'.format(x))
+
+    def __neg__(self):
+        return {SCALAR_label}Multiply_Linear_Operator(self, -1.0)
+
+    property shape:
+        def __get__(self):
+            return (self.num_rows, self.num_columns)
+
+    cdef void residual(self,
+                       {SCALAR}_t[::1] x,
+                       {SCALAR}_t[::1] rhs,
+                       {SCALAR}_t[::1] result,
+                       BOOL_t simpleResidual=False):
+        self._residual(x, rhs, result, simpleResidual)
+
+    cdef void preconditionedResidual(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] rhs,
+                                     {SCALAR}_t[::1] result,
+                                     BOOL_t simpleResidual=False):
+        self._preconditionedResidual(x, rhs, result, simpleResidual)
+
+    cdef void _residual(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=False):
+        cdef:
+            INDEX_t i
+        if not simpleResidual:
+            self.matvec(x, result)
+            assign3(result, result, -1.0, rhs, 1.0)
+        else:
+            assign(result, rhs)
+
+    cdef void _preconditionedResidual(self,
+                                      {SCALAR}_t[::1] x,
+                                      {SCALAR}_t[::1] rhs,
+                                      {SCALAR}_t[::1] result,
+                                      BOOL_t simpleResidual=False):
+        raise NotImplementedError()
+
+    def residual_py(self,
+                    {SCALAR}_t[::1] x,
+                    {SCALAR}_t[::1] rhs,
+                    {SCALAR}_t[::1] result,
+                    BOOL_t simpleResidual=False):
+        self.residual(x, rhs, result, simpleResidual)
+
+    def isSparse(self):
+        raise NotImplementedError()
+
+    def to_csr(self):
+        raise NotImplementedError()
+
+    def to_dense(self):
+        return Dense_LinearOperator(self.toarray())
+
+    def toarray(self):
+        return self.to_csr().toarray()
+
+    def toLinearOperator(self):
+        def matvec(x):
+            if x.ndim == 1:
+                return self*x
+            elif x.ndim == 2 and x.shape[1] == 1:
+                if x.flags.c_contiguous:
+                    return self*x[:, 0]
+                else:
+                    y = np.zeros((x.shape[0]), dtype=x.dtype)
+                    y[:] = x[:, 0]
+                    return self*y
+            else:
+                raise NotImplementedError()
+
+        from scipy.sparse.linalg import LinearOperator as ScipyLinearOperator
+        return ScipyLinearOperator(shape=self.shape, matvec=matvec)
+
+    def getDenseOpFromApply(self):
+        cdef:
+            INDEX_t i
+            {SCALAR}_t[::1] x = np.zeros((self.shape[1]), dtype={SCALAR})
+            {SCALAR}_t[::1, :] B = np.zeros(self.shape, dtype={SCALAR}, order='F')
+        for i in range(self.shape[1]):
+            if i > 0:
+                x[i-1] = 0.
+            x[i] = 1.
+            self.matvec(x, B[:, i])
+        return np.ascontiguousarray(B)
+
+    @staticmethod
+    def HDF5read(node):
+        if node.attrs['type'] == 'csr':
+            return CSR_LinearOperator.HDF5read(node)
+        elif node.attrs['type'] == 'sss':
+            return SSS_LinearOperator.HDF5read(node)
+        elif node.attrs['type'] == 'split_csr':
+            return split_CSR_LinearOperator.HDF5read(node)
+        elif node.attrs['type'] == 'sparseGraph':
+            return sparseGraph.HDF5read(node)
+        elif node.attrs['type'] == 'restriction':
+            return restrictionOp.HDF5read(node)
+        elif node.attrs['type'] == 'prolongation':
+            return prolongationOp.HDF5read(node)
+        elif node.attrs['type'] == 'dense':
+            return Dense_LinearOperator.HDF5read(node)
+        elif node.attrs['type'] == 'diagonal':
+            return diagonalOperator.HDF5read(node)
+        else:
+            raise NotImplementedError()
+
+    def __getstate__(self):
+        return
+
+    def __setstate__(self, state):
+        pass
+
+    def __repr__(self):
+        return '<%dx%d %s>' % (self.num_rows,
+                               self.num_columns,
+                               self.__class__.__name__)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        raise NotImplementedError()
+
+    def setEntry_py(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        self.setEntry(I, J, val)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        raise NotImplementedError()
+
+    def addToEntry_py(self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        self.addToEntry(I, J, val)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry(self, INDEX_t I, INDEX_t J):
+        raise NotImplementedError()
+
+    def getEntry_py(self, INDEX_t I, INDEX_t J):
+        return self.getEntry(I, J)
+
+    def get_diagonal(self):
+        if self._diagonal is not None:
+            return self._diagonal
+        else:
+            raise NotImplementedError()
+
+    def set_diagonal(self, {SCALAR}_t[::1] diagonal):
+        assert self.num_rows == diagonal.shape[0]
+        self._diagonal = diagonal
+
+    diagonal = property(fget=get_diagonal, fset=set_diagonal)
+
+
+cdef class {SCALAR_label}TimeStepperLinearOperator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator M,
+                 {SCALAR_label}LinearOperator S,
+                 {SCALAR}_t facS,
+                 {SCALAR}_t facM=1.0):
+        assert M.num_columns == S.num_columns
+        assert M.num_rows == S.num_rows
+        super({SCALAR_label}TimeStepperLinearOperator, self).__init__(M.num_rows,
+                                                        M.num_columns)
+        self.M = M
+        self.S = S
+        self.facM = facM
+        self.facS = facS
+        self.z = uninitialized((self.M.shape[0]), dtype={SCALAR})
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        if self.facS != 0.:
+            self.S.matvec(x, y)
+            if self.facS != 1.0:
+                scaleScalar(y, self.facS)
+        if self.facM == 1.0:
+            self.M.matvec_no_overwrite(x, y)
+        else:
+            self.M.matvec(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facM)
+        return 0
+
+    cdef INDEX_t matvec_no_overwrite(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        if self.facS == 1.0:
+            self.S.matvec_no_overwrite(x, y)
+        elif self.facS != 0.:
+            self.S.matvec(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facS)
+        if self.facM == 1.0:
+            self.M.matvec_no_overwrite(x, y)
+        elif self.facM != 0.:
+            self.M.matvec(x, self.z)
+            assign3(y, y, 1.0, self.z, self.facM)
+        return 0
+
+    def get_diagonal(self):
+        return (self.facM*np.array(self.M.diagonal, copy=False) +
+                self.facS*np.array(self.S.diagonal, copy=False))
+
+    diagonal = property(fget=get_diagonal)
+
+    def __repr__(self):
+        if np.real(self.facS) >= 0:
+            return '{}*{} + {}*{}'.format(self.facM, self.M, self.facS, self.S)
+        else:
+            return '{}*{} - {}*{}'.format(self.facM, self.M, -self.facS, self.S)
+
+    def to_csr_linear_operator(self):
+        if isinstance(self.S, {SCALAR_label}Dense_LinearOperator):
+            return {SCALAR_label}Dense_LinearOperator(self.facM*self.M.toarray() + self.facS*self.S.toarray())
+        else:
+            B = self.facM*self.M.to_csr() + self.facS*self.S.to_csr()
+            B.eliminate_zeros()
+            return {SCALAR_label}CSR_LinearOperator(B.indices, B.indptr, B.data)
+
+    def isSparse(self):
+        return self.M.isSparse() and self.S.isSparse()
+
+    def to_csr(self):
+        cdef {SCALAR_label}CSR_LinearOperator csr
+        csr = self.to_csr_linear_operator()
+        return csr.to_csr()
+
+    def toarray(self):
+        return self.facM*self.M.toarray() + self.facS*self.S.toarray()
+
+    def getnnz(self):
+        return self.M.nnz+self.S.nnz
+
+    nnz = property(fget=getnnz)
+
+    def __mul__(self, x):
+        if isinstance(self, {SCALAR_label}TimeStepperLinearOperator) and isinstance(x, ({SCALAR}, float, int)):
+            return {SCALAR_label}TimeStepperLinearOperator(self.M, self.S, self.facS*x, self.facM*x)
+        elif isinstance(x, {SCALAR_label}TimeStepperLinearOperator) and isinstance(self, ({SCALAR}, float, int)):
+            return {SCALAR_label}TimeStepperLinearOperator(x.M, x.S, x.facS*self, x.facM*self)
+        else:
+            return super({SCALAR_label}TimeStepperLinearOperator, self).__mul__(x)
+
+
+cdef class {SCALAR_label}Multiply_Linear_Operator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 {SCALAR}_t factor):
+        super({SCALAR_label}Multiply_Linear_Operator, self).__init__(A.num_rows,
+                                                       A.num_columns)
+        self.A = A
+        self.factor = factor
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        self.A(x, y)
+        scaleScalar(y, self.factor)
+        return 0
+
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        if self.factor != 0.:
+            scaleScalar(y, 1./self.factor)
+            self.A.matvec_no_overwrite(x, y)
+            scaleScalar(y, self.factor)
+        return 0
+
+    def isSparse(self):
+        return self.A.isSparse()
+
+    def to_csr(self):
+        return self.factor*self.A.to_csr()
+
+    def to_csr_linear_operator(self):
+        if isinstance(self.A, {SCALAR_label}Dense_LinearOperator):
+            return {SCALAR_label}Dense_LinearOperator(self.factor*self.A.toarray())
+        else:
+            B = self.factor*self.A.to_csr()
+            Bcsr = {SCALAR_label}CSR_LinearOperator(B.indices, B.indptr, B.data)
+            Bcsr.num_rows = B.shape[0]
+            Bcsr.num_columns = B.shape[1]
+            return Bcsr
+
+    def toarray(self):
+        return self.factor*self.A.toarray()
+
+    def __mul__(self, x):
+        if isinstance(self, {SCALAR_label}Multiply_Linear_Operator) and isinstance(x, ({SCALAR}, float)):
+            return {SCALAR_label}Multiply_Linear_Operator(self.A, self.factor*x)
+        elif isinstance(x, {SCALAR_label}Multiply_Linear_Operator) and isinstance(self, ({SCALAR}, float)):
+            return {SCALAR_label}Multiply_Linear_Operator(x.A, x.factor*self)
+        elif isinstance(x, COMPLEX):
+                return ComplexMultiply_Linear_Operator(wrapRealToComplex(self.A), self.factor*x)
+        else:
+            return super({SCALAR_label}Multiply_Linear_Operator, self).__mul__(x)
+
+    def get_diagonal(self):
+        return self.factor*np.array(self.A.diagonal, copy=False)
+
+    diagonal = property(fget=get_diagonal)
+
+    def __repr__(self):
+        return '{}*{}'.format(self.factor, self.A)
+
+
+cdef class {SCALAR_label}Product_Linear_Operator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 {SCALAR_label}LinearOperator B,
+                 {SCALAR}_t[::1] temporaryMemory=None):
+        assert A.num_columns == B.num_rows, '{} and {} are not compatible'.format(A.num_columns, B.num_rows)
+        super({SCALAR_label}Product_Linear_Operator, self).__init__(A.num_rows,
+                                                      B.num_columns)
+        self.A = A
+        self.B = B
+        if temporaryMemory is not None:
+            assert temporaryMemory.shape[0] == self.A.num_columns
+            self.temporaryMemory = temporaryMemory
+        else:
+            self.temporaryMemory = uninitialized((self.A.num_columns), dtype={SCALAR})
+
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        self.B(x, self.temporaryMemory)
+        self.A(self.temporaryMemory, y)
+        return 0
+
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        self.B(x, self.temporaryMemory)
+        self.A.matvec_no_overwrite(self.temporaryMemory, y)
+        return 0
+
+    cdef void _residual(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] rhs,
+                        {SCALAR}_t[::1] result,
+                        BOOL_t simpleResidual=False):
+        self.B(x, self.temporaryMemory)
+        self.A.residual(self.temporaryMemory, rhs, result, simpleResidual)
+
+    cdef void preconditionedResidual(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] rhs,
+                                     {SCALAR}_t[::1] result,
+                                     BOOL_t simpleResidual=False):
+        self.B.residual(x, rhs, self.temporaryMemory, simpleResidual)
+        self.A(self.temporaryMemory, result)
+
+    def isSparse(self):
+        return self.A.isSparse() and self.B.isSparse()
+
+    def to_csr(self):
+        return self.A.to_csr().dot(self.B.to_csr())
+
+    def toarray(self):
+        if self.isSparse():
+            return self.to_csr().toarray()
+        elif self.A.isSparse():
+            return self.A.to_csr() * self.B.toarray()
+        elif self.B.isSparse():
+            return self.A.toarray() * self.B.to_csr()
+        return self.A.toarray().dot(self.B.toarray())
+
+    def to_csr_linear_operator(self):
+        if isinstance(self.A, {SCALAR_label}Dense_LinearOperator):
+            return {SCALAR_label}Dense_LinearOperator(self.A.toarray().dot(self.facS*self.B.toarray()))
+        else:
+            B = self.A.to_csr().dot(self.B.to_csr())
+            B.eliminate_zeros()
+            Bcsr = {SCALAR_label}CSR_LinearOperator(B.indices, B.indptr, B.data)
+            Bcsr.num_rows = B.shape[0]
+            Bcsr.num_columns = B.shape[1]
+            return Bcsr
+
+    def __repr__(self):
+        return '{}*{}'.format(self.A, self.B)
diff --git a/base/PyNucleus_base/SSS_LinearOperator_decl_{SCALAR}.pxi b/base/PyNucleus_base/SSS_LinearOperator_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..c2722f6
--- /dev/null
+++ b/base/PyNucleus_base/SSS_LinearOperator_decl_{SCALAR}.pxi
@@ -0,0 +1,25 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . blas import uninitialized
+
+
+cdef class {SCALAR_label}SSS_LinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        public INDEX_t[::1] indptr, indices
+        public {SCALAR}_t[::1] data, diagonal
+        public BOOL_t indices_sorted
+        public int NoThreads
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1
+    cdef void setEntry(self, INDEX_t I, INDEX_t J, {SCALAR}_t val)
+    cdef {SCALAR}_t getEntry({SCALAR_label}SSS_LinearOperator self, INDEX_t I, INDEX_t J)
diff --git a/base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi b/base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi
new file mode 100644
index 0000000..6d2738b
--- /dev/null
+++ b/base/PyNucleus_base/SSS_LinearOperator_{SCALAR}.pxi
@@ -0,0 +1,309 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}SSS_LinearOperator({SCALAR_label}LinearOperator):
+    """
+    Sparse symmetric matrix that saves the lower triangular part.
+    """
+    def __init__(self,
+                 INDEX_t[::1] indices,
+                 INDEX_t[::1] indptr,
+                 {SCALAR}_t[::1] data,
+                 {SCALAR}_t[::1] diagonal,
+                 int NoThreads=1):
+        {SCALAR_label}LinearOperator.__init__(self,
+                                  indptr.shape[0]-1,
+                                  indptr.shape[0]-1)
+        self.indices = indices
+        self.indptr = indptr
+        self.data = data
+        self.diagonal = diagonal
+        self.indices_sorted = False
+        self.NoThreads = NoThreads
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec({SCALAR_label}SSS_LinearOperator self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            {SCALAR}_t temp
+        y[:] = 0.
+        if self.NoThreads > 1:
+            with nogil, parallel(num_threads=self.NoThreads):
+                for i in prange(self.num_rows, schedule='static'):
+                    temp = self.diagonal[i]*x[i]
+                    for j in range(self.indptr[i], self.indptr[i+1]):
+                        temp = temp + self.data[j]*x[self.indices[j]]
+                    y[i] = temp
+                for i in prange(self.num_rows, schedule='static'):
+                    for j in range(self.indptr[i], self.indptr[i+1]):
+                        y[self.indices[j]] += self.data[j]*x[i]
+        else:
+            for i in range(self.num_rows):
+                temp = self.diagonal[i]*x[i]
+                for j in range(self.indptr[i], self.indptr[i+1]):
+                    k = self.indices[j]
+                    temp += self.data[j]*x[k]
+                    y[k] += self.data[j]*x[i]
+                y[i] += temp
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite({SCALAR_label}SSS_LinearOperator self,
+                                     {SCALAR}_t[::1] x,
+                                     {SCALAR}_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            {SCALAR}_t temp
+        for i in range(self.num_rows):
+            temp = self.diagonal[i]*x[i]
+            for j in range(self.indptr[i], self.indptr[i+1]):
+                k = self.indices[j]
+                temp += self.data[j]*x[k]
+                y[k] += self.data[j]*x[i]
+            y[i] += temp
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setEntry({SCALAR_label}SSS_LinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, low, mid, high
+        if I == J:
+            self.diagonal[I] = val
+        elif I > J:
+            low = self.indptr[I]
+            high = self.indptr[I+1]
+            if high-low < 20:
+                for i in range(low, high):
+                    if self.indices[i] == J:
+                        self.data[i] = val
+                        break
+            else:
+                # This should scale better than previous implementation,
+                # if we have a high number of non-zeros per row.
+                while self.indices[low] != J:
+                    if high-low <= 1:
+                        raise IndexError()
+                    mid = (low+high) >> 1
+                    if self.indices[mid] <= J:
+                        low = mid
+                    else:
+                        high = mid
+                self.data[low] = val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry({SCALAR_label}SSS_LinearOperator self, INDEX_t I, INDEX_t J, {SCALAR}_t val):
+        cdef:
+            INDEX_t i, low, mid, high
+        if I == J:
+            self.diagonal[I] += val
+        elif I > J:
+            low = self.indptr[I]
+            high = self.indptr[I+1]
+            if high-low < 20:
+                for i in range(low, high):
+                    if self.indices[i] == J:
+                        self.data[i] += val
+                        break
+            else:
+                # This should scale better than previous implementation,
+                # if we have a high number of non-zeros per row.
+                while self.indices[low] != J:
+                    if high-low <= 1:
+                        # raise IndexError()
+                        return
+                    mid = (low+high) >> 1
+                    if self.indices[mid] <= J:
+                        low = mid
+                    else:
+                        high = mid
+                self.data[low] += val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef {SCALAR}_t getEntry({SCALAR_label}SSS_LinearOperator self, INDEX_t I, INDEX_t J):
+        cdef:
+            INDEX_t low, high, i
+        if I == J:
+            return self.diagonal[I]
+        if I < J:
+            I, J = J, I
+        low = self.indptr[I]
+        high = self.indptr[I+1]
+        if high-low < 20:
+            for i in range(low, high):
+                if self.indices[i] == J:
+                    return self.data[i]
+        else:
+            # This should scale better than previous implementation,
+            # if we have a high number of non-zeros per row.
+            while self.indices[low] != J:
+                if high-low <= 1:
+                    return 0.
+                mid = (low+high) >> 1
+                if self.indices[mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            return self.data[low]
+
+    def isSparse(self):
+        return True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def to_csr(self):
+        cdef:
+            np.ndarray[INDEX_t, ndim=1] indptr_mem = np.zeros((self.num_rows+1),
+                                                              dtype=INDEX)
+            INDEX_t[::1] indptr = indptr_mem
+            np.ndarray[INDEX_t, ndim=1] indices_mem
+            INDEX_t[::1] indices
+            np.ndarray[{SCALAR}_t, ndim=1] data_mem
+            {SCALAR}_t[::1] data
+            INDEX_t i, jj, j, nnz
+        for i in range(self.num_rows):
+            indptr[i+1] += 1
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                indptr[i+1] += 1
+                indptr[j+1] += 1
+        for i in range(self.num_rows):
+            indptr[i+1] += indptr[i]
+        nnz = indptr[indptr.shape[0]-1]
+        indices_mem = uninitialized((nnz), dtype=INDEX)
+        indices = indices_mem
+        data_mem = uninitialized((nnz), dtype={SCALAR})
+        data = data_mem
+        for i in range(self.num_rows):
+            indices[indptr[i]] = i
+            data[indptr[i]] = self.diagonal[i]
+            indptr[i] += 1
+            for jj in range(self.indptr[i], self.indptr[i+1]):
+                j = self.indices[jj]
+                indices[indptr[i]] = j
+                data[indptr[i]] = self.data[jj]
+                indptr[i] += 1
+                indices[indptr[j]] = i
+                data[indptr[j]] = self.data[jj]
+                indptr[j] += 1
+        for i in range(self.num_rows, 0, -1):
+            indptr[i] = indptr[i-1]
+        indptr[0] = 0
+        from scipy.sparse import csr_matrix
+        return csr_matrix((data_mem, indices_mem, indptr_mem),
+                          shape=self.shape)
+
+    def to_csr_linear_operator(self):
+        B = self.to_csr()
+        return {SCALAR_label}CSR_LinearOperator(B.indices, B.indptr, B.data)
+
+    def to_csc(self):
+        A = self.to_csr()
+        from scipy.sparse import csc_matrix
+        return csc_matrix((A.data, A.indices, A.indptr),
+                          shape=self.shape)
+
+    def getnnz(self):
+        return self.indptr[-1]+self.num_rows
+
+    nnz = property(fget=getnnz)
+
+    def getMemorySize(self):
+        return ((self.indptr.shape[0]+self.indices.shape[0])*sizeof(INDEX_t) +
+                (self.data.shape[0]+self.diagonal.shape[0])*sizeof({SCALAR}_t))
+
+    def __repr__(self):
+        sizeInMB = self.getMemorySize() >> 20
+        if sizeInMB > 100:
+            return '<%dx%d %s with %d stored elements, %d MB>' % (self.num_rows,
+                                                                  self.num_columns,
+                                                                  self.__class__.__name__,
+                                                                  self.nnz,
+                                                                  sizeInMB)
+        else:
+            return '<%dx%d %s with %d stored elements>' % (self.num_rows,
+                                                           self.num_columns,
+                                                           self.__class__.__name__,
+                                                           self.nnz)
+
+    def HDF5write(self, node):
+        node.create_dataset('indices', data=np.array(self.indices,
+                                                     copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('indptr', data=np.array(self.indptr,
+                                                    copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('data', data=np.array(self.data,
+                                                  copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('diagonal', data=np.array(self.diagonal,
+                                                      copy=False),
+                            compression=COMPRESSION)
+        node.attrs['type'] = 'sss'
+
+    @staticmethod
+    def HDF5read(node):
+        return {SCALAR_label}SSS_LinearOperator(np.array(node['indices'], dtype=INDEX),
+                                  np.array(node['indptr'], dtype=INDEX),
+                                  np.array(node['data'], dtype={SCALAR}),
+                                  np.array(node['diagonal'], dtype={SCALAR}))
+
+    def __getstate__(self):
+        return (np.array(self.indices, dtype=INDEX),
+                np.array(self.indptr, dtype=INDEX),
+                np.array(self.data, dtype={SCALAR}),
+                np.array(self.diagonal, dtype={SCALAR}),
+                self.num_rows,
+                self.num_columns)
+
+    def __setstate__(self, state):
+        self.indices = state[0]
+        self.indptr = state[1]
+        self.data = state[2]
+        self.diagonal = state[3]
+        self.num_rows = state[4]
+        self.num_columns = state[5]
+
+    def sort_indices(self):
+        sort_indices{SCALAR_label}(self.indptr, self.indices, self.data)
+        self.indices_sorted = True
+
+    def setZero(self):
+        cdef:
+            INDEX_t i
+
+        for i in range(self.data.shape[0]):
+            self.data[i] = 0.
+        for i in range(self.diagonal.shape[0]):
+            self.diagonal[i] = 0.
+
+    def copy(self):
+        data = np.array(self.data, copy=True)
+        diagonal = np.array(self.diagonal, copy=True)
+        other = {SCALAR_label}SSS_LinearOperator(self.indices, self.indptr, data, diagonal)
+        return other
+
+    def scale(self, {SCALAR}_t scaling):
+        cdef:
+            INDEX_t i
+        for i in range(self.num_rows):
+            self.diagonal[i] *= scaling
+        for i in range(self.data.shape[0]):
+            self.data[i] *= scaling
diff --git a/base/PyNucleus_base/__init__.py b/base/PyNucleus_base/__init__.py
new file mode 100644
index 0000000..ea16d20
--- /dev/null
+++ b/base/PyNucleus_base/__init__.py
@@ -0,0 +1,55 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from warnings import filterwarnings
+filterwarnings("error", category=RuntimeWarning, module="importlib._bootstrap")
+
+from . utilsFem import (display_available, getLoggingTimer, TimerManager, roc,
+                        exitHandler,
+                        saveDictToHDF5, loadDictFromHDF5,
+                        getSystemInfo,
+                        columns, driver, problem, updateFromDefaults)
+from . myTypes import REAL, INDEX, COMPLEX
+from . blas import uninitialized, uninitialized_like
+
+
+def get_include():
+    import os
+    return os.path.dirname(os.path.realpath(__file__))
+
+
+from . solver_factory import solverFactory as solverFactoryClass
+
+solverFactory = solverFactoryClass()
+from . solvers import (noop_solver,
+                       lu_solver, chol_solver,
+                       cg_solver, gmres_solver, bicgstab_solver,
+                       ichol_solver, ilu_solver,
+                       jacobi_solver,
+                       krylov_solver)
+solverFactory.register(None, noop_solver)
+solverFactory.register('lu', lu_solver)
+solverFactory.register('chol', chol_solver, aliases=['cholesky', 'cholmod'])
+solverFactory.register('cg', cg_solver)
+solverFactory.register('gmres', gmres_solver)
+solverFactory.register('bicgstab', bicgstab_solver)
+solverFactory.register('ichol', ichol_solver)
+solverFactory.register('ilu', ilu_solver)
+solverFactory.register('jacobi', jacobi_solver, aliases=['diagonal'])
+
+from . config import use_pyamg
+if use_pyamg:
+    from . solvers import pyamg_solver
+    solverFactory.register('pyamg', pyamg_solver)
+
+from . solvers import complex_lu_solver, complex_gmres_solver
+solverFactory.register('complex_lu', complex_lu_solver)
+solverFactory.register('complex_gmres', complex_gmres_solver)
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/base/PyNucleus_base/_version.py b/base/PyNucleus_base/_version.py
new file mode 100644
index 0000000..4ce4b52
--- /dev/null
+++ b/base/PyNucleus_base/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_base/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/base/PyNucleus_base/blas.pxd b/base/PyNucleus_base/blas.pxd
new file mode 100644
index 0000000..d551ba6
--- /dev/null
+++ b/base/PyNucleus_base/blas.pxd
@@ -0,0 +1,45 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from libc.math cimport sqrt
+from cython.view cimport array as carray
+
+cpdef carray uninitializedINDEX(tuple shape)
+cpdef carray uninitializedREAL(tuple shape)
+
+ctypedef fused SCALAR_t:
+    REAL_t
+    COMPLEX_t
+
+cdef void assign(SCALAR_t[::1] y, SCALAR_t[::1] x)
+cdef void assignScaled(SCALAR_t[::1] y, SCALAR_t[::1] x, SCALAR_t alpha)
+cdef void assign3(SCALAR_t[::1] z, SCALAR_t[::1] x, SCALAR_t alpha, SCALAR_t[::1] y, SCALAR_t beta)
+cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y)
+cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha)
+cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha)
+cdef void updateScaledVector(REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] alpha)
+cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1)
+cdef REAL_t norm(SCALAR_t[::1] x)
+cdef void gemv(SCALAR_t[:, ::1] A,
+               SCALAR_t[::1] x,
+               SCALAR_t[::1] y,
+               SCALAR_t beta=*)
+cdef void gemvF(SCALAR_t[::1, :] A,
+                SCALAR_t[::1] x,
+                SCALAR_t[::1] y,
+                SCALAR_t beta=*)
+cdef void gemvT(SCALAR_t[:, ::1] A,
+               SCALAR_t[::1] x,
+               SCALAR_t[::1] y,
+               SCALAR_t beta=*)
+cdef void matmat(SCALAR_t[:, ::1] A,
+                 SCALAR_t[:, ::1] B,
+                 SCALAR_t[:, ::1] C)
+cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=*)
+cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result)
diff --git a/base/PyNucleus_base/blas.pyx b/base/PyNucleus_base/blas.pyx
new file mode 100644
index 0000000..7c32de9
--- /dev/null
+++ b/base/PyNucleus_base/blas.pyx
@@ -0,0 +1,654 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from . myTypes import INDEX
+
+include "config.pxi"
+
+cdef:
+    INDEX_t MAX_INT = np.iinfo(INDEX).max
+    REAL_t NAN = np.nan
+
+
+def uninitialized(*args, **kwargs):
+    IF FILL_UNINITIALIZED:
+        kwargs['fill_value'] = NAN
+        return np.full(*args, **kwargs)
+    ELSE:
+        return np.empty(*args, **kwargs)
+
+
+def uninitialized_like(*args, **kwargs):
+    IF FILL_UNINITIALIZED:
+        kwargs['fill_value'] = NAN
+        return np.full_like(*args, **kwargs)
+    ELSE:
+        return np.empty_like(*args, **kwargs)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef carray uninitializedINDEX(tuple shape):
+    cdef:
+        carray a = carray(shape, 4, 'i')
+        size_t s, i
+    IF FILL_UNINITIALIZED:
+        s = 1
+        for i in range(len(shape)):
+            s *= shape[i]
+        for i in range(s):
+            (<INDEX_t*>a.data)[i] = MAX_INT
+    return a
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef carray uninitializedREAL(tuple shape):
+    cdef:
+        carray a = carray(shape, 8, 'd')
+        size_t s, i
+    IF FILL_UNINITIALIZED:
+        s = 1
+        for i in range(len(shape)):
+            s *= shape[i]
+        for i in range(s):
+            (<REAL_t*>a.data)[i] = NAN
+    return a
+
+
+IF USE_BLAS:
+
+    from scipy.linalg.cython_blas cimport dcopy, dscal, daxpy, ddot, dnrm2
+    from scipy.linalg.cython_blas cimport zcopy, zscal, zaxpy, zdotc
+    from scipy.linalg.cython_blas cimport dgemv, zgemv
+    from scipy.linalg.cython_blas cimport dgemm, zgemm
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assign(SCALAR_t[::1] y, SCALAR_t[::1] x):
+        cdef:
+            double* x_ptr
+            double* y_ptr
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            dcopy(&n, x_ptr, &inc, y_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assignScaled(SCALAR_t[::1] y, SCALAR_t[::1] x, SCALAR_t alpha):
+        cdef:
+            double* x_ptr
+            double* y_ptr
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zcopy(&n, x_ptr_c, &inc, y_ptr_c, &inc)
+            zscal(&n, &alpha, y_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            dcopy(&n, x_ptr, &inc, y_ptr, &inc)
+            dscal(&n, &alpha, y_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assign3(SCALAR_t[::1] z, SCALAR_t[::1] x, SCALAR_t alpha, SCALAR_t[::1] y, SCALAR_t beta):
+        cdef:
+            double* x_ptr
+            double* y_ptr
+            double* z_ptr
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            double complex* z_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            z_ptr_c = &z[0]
+            zcopy(&n, x_ptr_c, &inc, z_ptr_c, &inc)
+            if alpha != 1.0:
+                zscal(&n, &alpha, z_ptr_c, &inc)
+            zaxpy(&n, &beta, y_ptr_c, &inc, z_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            z_ptr = &z[0]
+            dcopy(&n, x_ptr, &inc, z_ptr, &inc)
+            if alpha != 1.0:
+                dscal(&n, &alpha, z_ptr, &inc)
+            daxpy(&n, &beta, y_ptr, &inc, z_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y):
+        cdef:
+            double* x_ptr
+            double* y_ptr
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            SCALAR_t alpha = 1.0
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha):
+        cdef:
+            double* x_ptr
+            double* y_ptr
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zaxpy(&n, &alpha, y_ptr_c, &inc, x_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            daxpy(&n, &alpha, y_ptr, &inc, x_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha):
+        cdef:
+            double* x_ptr
+            double complex* x_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            zscal(&n, &alpha, x_ptr_c, &inc)
+        else:
+            x_ptr = &x[0]
+            dscal(&n, &alpha, x_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1):
+        cdef:
+            SCALAR_t s = 0.0
+            double* v0_ptr
+            double* v1_ptr
+            double complex* v0_ptr_c
+            double complex* v1_ptr_c
+            int n = v0.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            v0_ptr_c = &v0[0]
+            v1_ptr_c = &v1[0]
+            s = zdotc(&n, v0_ptr_c, &inc, v1_ptr_c, &inc)
+        else:
+            v0_ptr = &v0[0]
+            v1_ptr = &v1[0]
+            s = ddot(&n, v0_ptr, &inc, v1_ptr, &inc)
+        return s
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t norm(SCALAR_t[::1] x):
+        cdef:
+            REAL_t s = 0.0
+            double* x_ptr
+            double complex* x_ptr_c
+            int n = x.shape[0]
+            int inc = 1
+        if SCALAR_t is COMPLEX_t:
+            x_ptr_c = &x[0]
+            s = zdotc(&n, x_ptr_c, &inc, x_ptr_c, &inc).real
+            return sqrt(s)
+        else:
+            x_ptr = &x[0]
+            return dnrm2(&n, x_ptr, &inc)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
+        cdef:
+            double* A_ptr
+            double* x_ptr
+            double* y_ptr
+            double complex* A_ptr_c
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int m = A.shape[1]
+            int n = A.shape[0]
+            SCALAR_t alpha = 1.
+            int lda = A.shape[1]
+            int incx = 1
+            int incy = 1
+        if SCALAR_t is COMPLEX_t:
+            A_ptr_c = &A[0, 0]
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zgemv('t', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy)
+        else:
+            A_ptr = &A[0, 0]
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            dgemv('t', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void gemvF(SCALAR_t[::1, :] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
+        cdef:
+            double* A_ptr
+            double* x_ptr
+            double* y_ptr
+            double complex* A_ptr_c
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int m = A.shape[0]
+            int n = A.shape[1]
+            SCALAR_t alpha = 1.
+            int lda = A.shape[0]
+            int incx = 1
+            int incy = 1
+        if SCALAR_t is COMPLEX_t:
+            A_ptr_c = &A[0, 0]
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy)
+        else:
+            A_ptr = &A[0, 0]
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy)
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
+        cdef:
+            double* A_ptr
+            double* x_ptr
+            double* y_ptr
+            double complex* A_ptr_c
+            double complex* x_ptr_c
+            double complex* y_ptr_c
+            int m = A.shape[0]
+            int n = A.shape[1]
+            SCALAR_t alpha = 1.
+            int lda = A.shape[0]
+            int incx = 1
+            int incy = 1
+        if SCALAR_t is COMPLEX_t:
+            A_ptr_c = &A[0, 0]
+            x_ptr_c = &x[0]
+            y_ptr_c = &y[0]
+            zgemv('n', &m, &n, &alpha, A_ptr_c, &lda, x_ptr_c, &incx, &beta, y_ptr_c, &incy)
+        else:
+            A_ptr = &A[0, 0]
+            x_ptr = &x[0]
+            y_ptr = &y[0]
+            dgemv('n', &m, &n, &alpha, A_ptr, &lda, x_ptr, &incx, &beta, y_ptr, &incy)
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C):
+        cdef:
+            double* A_ptr
+            double* B_ptr
+            double* C_ptr
+            double complex* A_ptr_c
+            double complex* B_ptr_c
+            double complex* C_ptr_c
+            int m = B.shape[1]
+            int k = B.shape[0]
+            int n = A.shape[0]
+            SCALAR_t alpha = 1.
+            SCALAR_t beta = 0.
+            int lda = A.shape[1]
+            int ldb = B.shape[1]
+            int ldc = C.shape[1]
+        if SCALAR_t is COMPLEX_t:
+            A_ptr_c = &A[0, 0]
+            B_ptr_c = &B[0, 0]
+            C_ptr_c = &C[0, 0]
+            zgemm('n', 'n', &m, &n, &k, &alpha, B_ptr_c, &ldb, A_ptr_c, &lda, &beta, C_ptr_c, &ldc)
+        else:
+            A_ptr = &A[0, 0]
+            B_ptr = &B[0, 0]
+            C_ptr = &C[0, 0]
+            dgemm('n', 'n', &m, &n, &k, &alpha, B_ptr, &ldb, A_ptr, &lda, &beta, C_ptr, &ldc)
+
+ELSE:
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assign(SCALAR_t[::1] y, const SCALAR_t[::1] x):
+        cdef:
+            INDEX_t i
+        for i in range(x.shape[0]):
+            y[i] = x[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assignScaled(SCALAR_t[::1] y, const SCALAR_t[::1] x, SCALAR_t alpha):
+        cdef:
+            INDEX_t i
+        for i in range(x.shape[0]):
+            y[i] = alpha*x[i]
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assign3(SCALAR_t[::1] z, const SCALAR_t[::1] x, SCALAR_t alpha, const SCALAR_t[::1] y, SCALAR_t beta):
+        cdef:
+            INDEX_t i
+        for i in range(x.shape[0]):
+            z[i] = alpha*x[i] + beta*y[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void update(SCALAR_t[::1] x, SCALAR_t[::1] y):
+        cdef:
+            INDEX_t i
+        if SCALAR_t is COMPLEX_t:
+            for i in range(x.shape[0]):
+                x[i] = x[i] + y[i]
+        else:
+            for i in range(x.shape[0]):
+                x[i] += y[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void updateScaled(SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t alpha):
+        cdef:
+            INDEX_t i
+        if SCALAR_t is COMPLEX_t:
+            for i in range(x.shape[0]):
+                x[i] = x[i]+alpha*y[i]
+        else:
+            for i in range(x.shape[0]):
+                x[i] += alpha*y[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void scaleScalar(SCALAR_t[::1] x, SCALAR_t alpha):
+        cdef:
+            INDEX_t i
+        if SCALAR_t is COMPLEX_t:
+            for i in range(x.shape[0]):
+                x[i] *= alpha
+        else:
+            for i in range(x.shape[0]):
+                x[i] = x[i]*alpha
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef SCALAR_t mydot(SCALAR_t[::1] v0, SCALAR_t[::1] v1) nogil:
+        cdef:
+            int i
+            SCALAR_t s = 0.0
+        if SCALAR_t is COMPLEX_t:
+            for i in range(v0.shape[0]):
+                s += v0[i].conjugate()*v1[i]
+        else:
+            for i in range(v0.shape[0]):
+                s += v0[i]*v1[i]
+        return s
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t norm(SCALAR_t[::1] x):
+        cdef:
+            int i
+            REAL_t s = 0.0
+        if SCALAR_t is COMPLEX_t:
+            for i in range(x.shape[0]):
+                s += (x[i].conjugate()*x[i]).real
+        else:
+            for i in range(x.shape[0]):
+                s += x[i]*x[i]
+        return sqrt(s)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void gemv(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
+        cdef:
+            INDEX_t i, j
+            SCALAR_t s
+        if SCALAR_t is COMPLEX_t:
+            if beta != 0.:
+                for i in range(A.shape[0]):
+                    s = 0.
+                    for j in range(A.shape[1]):
+                        s = s + A[i, j]*x[j]
+                    y[i] = beta*y[i]+s
+            else:
+                for i in range(A.shape[0]):
+                    s = 0.
+                    for j in range(A.shape[1]):
+                        s = s + A[i, j]*x[j]
+                    y[i] = s
+        else:
+            if beta != 0.:
+                for i in range(A.shape[0]):
+                    s = 0.
+                    for j in range(A.shape[1]):
+                        s += A[i, j]*x[j]
+                    y[i] = beta*y[i]+s
+            else:
+                for i in range(A.shape[0]):
+                    s = 0.
+                    for j in range(A.shape[1]):
+                        s += A[i, j]*x[j]
+                    y[i] = s
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void gemvT(SCALAR_t[:, ::1] A, SCALAR_t[::1] x, SCALAR_t[::1] y, SCALAR_t beta=0.):
+        cdef:
+            INDEX_t i, j
+        if SCALAR_t is COMPLEX_t:
+            if beta != 0.:
+                for i in range(A.shape[0]):
+                    y[i] = y[i]*beta
+            else:
+                y[:] = 0.
+            for j in range(A.shape[1]):
+                for i in range(A.shape[0]):
+                    y[i] = y[i]+A[j, i]*x[j]
+        else:
+            if beta != 0.:
+                for i in range(A.shape[0]):
+                    y[i] *= beta
+            else:
+                y[:] = 0.
+            for j in range(A.shape[1]):
+                for i in range(A.shape[0]):
+                    y[i] += A[j, i]*x[j]
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void matmat(SCALAR_t[:, ::1] A, SCALAR_t[:, ::1] B, SCALAR_t[:, ::1] C):
+        cdef:
+            INDEX_t i, j, k
+        C[:, :] = 0.
+        if SCALAR_t is COMPLEX_t:
+            for i in range(A.shape[0]):
+                for j in range(B.shape[0]):
+                    for k in range(B.shape[1]):
+                        C[i, k] = C[i, k] + A[i, j]*B[j, k]
+        else:
+            for i in range(A.shape[0]):
+                for j in range(B.shape[0]):
+                    for k in range(B.shape[1]):
+                        C[i, k] += A[i, j]*B[j, k]
+
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void updateScaledVector(REAL_t[::1] x, REAL_t[::1] y, REAL_t[::1] alpha):
+    cdef:
+        INDEX_t i
+    for i in range(x.shape[0]):
+        x[i] += alpha[i]*y[i]
+
+
+IF USE_MKL:
+    ctypedef INDEX_t MKL_INT
+
+    cdef extern from "mkl/mkl_spblas.h":
+        void mkl_cspblas_dcsrgemv (const char *transa , const MKL_INT *m , const REAL_t *a , const MKL_INT *ia , const MKL_INT *ja , const REAL_t *x , REAL_t *y );
+        void mkl_dcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const REAL_t *alpha , const char *matdescra ,
+                         const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre ,
+                         const REAL_t *x , const REAL_t *beta , REAL_t *y );
+        # void mkl_zcsrmv (const char *transa , const MKL_INT *m , const MKL_INT *k , const COMPLEX_t *alpha , const char *matdescra ,
+        #                  const COMPLEX_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre ,
+        #                  const COMPLEX_t *x , const COMPLEX_t *beta , COMPLEX_t *y );
+
+    cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True):
+        cdef:
+            char transA = 78
+            INDEX_t num_rows = indptr.shape[0]-1
+
+        assert overwrite
+
+        if SCALAR_t is COMPLEX_t:
+            # mkl_cspblas_zcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0])
+            pass
+        else:
+            mkl_cspblas_dcsrgemv(&transA, &num_rows, &data[0], &indptr[0], &indices[0], &x[0], &y[0])
+
+
+    cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result):
+        cdef:
+            char transA = 78
+            SCALAR_t alpha = -1.
+            SCALAR_t beta = 1.
+            char matdscr[6]
+            INDEX_t inc = 1
+            INDEX_t num_rows = indptr.shape[0]-1
+            INDEX_t num_columns = x.shape[0]
+
+        matdscr[0] = 71
+        matdscr[2] = 78
+        matdscr[3] = 67
+
+        assign(result, rhs)
+        if SCALAR_t is COMPLEX_t:
+            pass
+            # mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0],
+            #            &data[0], &indices[0], &indptr[0], &indptr[1],
+            #            &x[0], &beta, &result[0])
+        else:
+            mkl_dcsrmv(&transA, &num_rows, &num_columns, &alpha, &matdscr[0],
+                       &data[0], &indices[0], &indptr[0], &indptr[1],
+                       &x[0], &beta, &result[0])
+
+ELSE:
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef void spmv(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] y, BOOL_t overwrite=True):
+        cdef:
+            INDEX_t i, jj, j
+            SCALAR_t temp
+        if SCALAR_t is COMPLEX_t:
+            for i in range(indptr.shape[0]-1):
+                temp = 0.
+                for jj in range(indptr[i], indptr[i+1]):
+                    j = indices[jj]
+                    temp = temp + data[jj]*x[j]
+                if overwrite:
+                    y[i] = temp
+                else:
+                    y[i] = y[i]+temp
+        else:
+            for i in range(indptr.shape[0]-1):
+                temp = 0.
+                for jj in range(indptr[i], indptr[i+1]):
+                    j = indices[jj]
+                    temp += data[jj]*x[j]
+                if overwrite:
+                    y[i] = temp
+                else:
+                    y[i] += temp
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef void spres(INDEX_t[::1] indptr, INDEX_t[::1] indices, SCALAR_t[::1] data, SCALAR_t[::1] x, SCALAR_t[::1] rhs, SCALAR_t[::1] result):
+        cdef:
+            INDEX_t i, jj, j
+            SCALAR_t temp
+            INDEX_t num_rows = indptr.shape[0]-1
+        if SCALAR_t is COMPLEX_t:
+            for i in range(num_rows):
+                temp = rhs[i]
+                for jj in range(indptr[i], indptr[i+1]):
+                    j = indices[jj]
+                    temp = temp-data[jj]*x[j]
+                result[i] = temp
+        else:
+            for i in range(num_rows):
+                temp = rhs[i]
+                for jj in range(indptr[i], indptr[i+1]):
+                    j = indices[jj]
+                    temp -= data[jj]*x[j]
+                result[i] = temp
diff --git a/base/PyNucleus_base/convergence.pxd b/base/PyNucleus_base/convergence.pxd
new file mode 100644
index 0000000..091ad22
--- /dev/null
+++ b/base/PyNucleus_base/convergence.pxd
@@ -0,0 +1,92 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py cimport MPI
+from . myTypes cimport INDEX_t, REAL_t, BOOL_t
+cimport numpy as np
+from . ip_norm cimport normBase, norm_distributed
+
+######################################################################
+# convergence criteria
+
+cdef class convergenceCriterion:
+    cdef:
+        REAL_t tol, localResidual, globalResidual
+        INDEX_t maxiter, iter
+        BOOL_t accumulated
+        normBase norm
+        MPI.Comm comm, clientComm
+        INDEX_t clientRank
+        BOOL_t hasClient
+        INDEX_t ov_level
+
+    cdef BOOL_t eval(self, REAL_t[::1] localResidual, BOOL_t asynchronous=*)
+    cdef REAL_t getGlobalResidual(self)
+    cdef void registerClient(self, MPI.Comm comm, INDEX_t rank)
+    cdef void updateClients(self, BOOL_t converged)
+    cdef void cleanup(self)
+
+
+cdef class noOpConvergenceCriterion(convergenceCriterion):
+    cdef BOOL_t eval(self, REAL_t[::1] localResidual, BOOL_t asynchronous=*)
+
+
+cdef class synchronousConvergenceCriterion(convergenceCriterion):
+    cdef BOOL_t eval(self, REAL_t[::1] residualVec, BOOL_t asynchronous=*)
+    cdef REAL_t getGlobalResidual(self)
+    cdef void updateClients(self, BOOL_t converged)
+
+
+
+######################################################################
+# convergence masters (needed if coarse grid is on separate communicator)
+
+cdef class convergenceMaster:
+    cdef:
+        MPI.Comm masterComm
+        INDEX_t masterRank, clientRank
+    cdef void setStatus(self, BOOL_t converged)
+
+
+cdef class noOpConvergenceMaster(convergenceMaster):
+    pass
+
+
+cdef class synchronousConvergenceMaster(convergenceMaster):
+    cdef void setStatus(self, BOOL_t converged)
+
+######################################################################
+# convergence clients (needed if coarse grid is on separate communicator)
+
+cdef class convergenceClient:
+    cdef:
+        MPI.Comm masterComm
+        INDEX_t masterRank
+    cdef BOOL_t getStatus(self)
+    cdef void cleanup(self)
+
+
+cdef class noOpConvergenceClient(convergenceClient):
+    cdef BOOL_t getStatus(self)
+
+
+cdef class synchronousConvergenceClient(convergenceClient):
+    cdef:
+        INDEX_t tag
+    cdef BOOL_t getStatus(self)
+
+
+cdef class synchronousConvergenceClientSubcomm(convergenceClient):
+    cdef:
+        INDEX_t tag
+        MPI.Comm comm
+    cdef BOOL_t getStatus(self)
+
+
diff --git a/base/PyNucleus_base/convergence.pyx b/base/PyNucleus_base/convergence.pyx
new file mode 100644
index 0000000..8003821
--- /dev/null
+++ b/base/PyNucleus_base/convergence.pyx
@@ -0,0 +1,156 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes import INDEX, REAL
+from . ip_norm cimport ip_serial
+from libc.math cimport sqrt
+import numpy as np
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from . blas import uninitialized
+
+cdef:
+    INDEX_t TAG_CONVERGED = 32012
+
+
+######################################################################
+# convergence criteria
+
+cdef class convergenceCriterion:
+    def __init__(self, REAL_t tol, INDEX_t maxiter, overlaps, BOOL_t accumulated):
+        self.tol = tol
+        self.maxiter = maxiter
+        self.iter = 0
+        self.ov_level = len(overlaps.levels)-1
+        self.norm = norm_distributed(overlaps, level=self.ov_level)
+        self.comm = overlaps.comm
+        self.accumulated = accumulated
+        self.globalResidual = 1.
+        self.hasClient = False
+
+    cdef BOOL_t eval(self, REAL_t[::1] localResidual, BOOL_t asynchronous=False):
+        pass
+
+    cdef REAL_t getGlobalResidual(self):
+        pass
+
+    cdef void registerClient(self, MPI.Comm comm, INDEX_t rank):
+        self.clientComm = comm
+        self.clientRank = rank
+        self.hasClient = True
+
+    cdef void updateClients(self, BOOL_t converged):
+        pass
+
+    cdef void cleanup(self):
+        pass
+
+
+cdef class noOpConvergenceCriterion(convergenceCriterion):
+    def __init__(self, REAL_t tol, INDEX_t maxiter, overlaps, BOOL_t accumulated):
+        convergenceCriterion.__init__(self, tol, maxiter, overlaps, accumulated)
+
+    cdef BOOL_t eval(self, REAL_t[::1] localResidual, BOOL_t asynchronous=False):
+        return False
+
+
+cdef class synchronousConvergenceCriterion(convergenceCriterion):
+    def __init__(self, REAL_t tol, INDEX_t maxiter, overlaps, BOOL_t accumulated):
+        convergenceCriterion.__init__(self, tol, maxiter, overlaps, accumulated)
+
+    cdef BOOL_t eval(self, REAL_t[::1] residualVec, BOOL_t asynchronous=False):
+        cdef:
+            BOOL_t converged
+        self.globalResidual = self.norm.eval(residualVec, self.accumulated, asynchronous=False)
+        converged = (self.globalResidual < self.tol) or self.iter >= self.maxiter
+        self.iter += 1
+        return converged
+
+    cdef REAL_t getGlobalResidual(self):
+        return self.globalResidual
+
+    cdef void updateClients(self, BOOL_t converged):
+        if self.hasClient and self.comm.rank == 0:
+            self.clientComm.isend(converged, dest=self.clientRank, tag=200)
+
+
+######################################################################
+# convergence master (needed if coarse grid is on separate communicator)
+
+cdef class convergenceMaster:
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank, INDEX_t clientRank=0):
+        self.masterComm = masterComm
+        self.masterRank = masterRank
+        self.clientRank = clientRank
+
+    cdef void setStatus(self, BOOL_t converged):
+        pass
+
+
+cdef class noOpConvergenceMaster(convergenceMaster):
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank, INDEX_t clientRank=0):
+        super(noOpConvergenceMaster, self).__init__(masterComm, masterRank, clientRank)
+
+
+cdef class synchronousConvergenceMaster(convergenceMaster):
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank, INDEX_t clientRank=0):
+        super(synchronousConvergenceMaster, self).__init__(masterComm, masterRank, clientRank)
+
+    cdef void setStatus(self, BOOL_t converged):
+        if self.masterComm.rank == self.masterRank:
+            self.masterComm.send(converged, dest=self.clientRank, tag=201)
+
+
+######################################################################
+# convergence clients (needed if coarse grid is on separate communicator)
+
+cdef class convergenceClient:
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank):
+        self.masterComm = masterComm
+        self.masterRank = masterRank
+
+    cdef BOOL_t getStatus(self):
+        pass
+
+    cdef void cleanup(self):
+        pass
+
+
+cdef class noOpConvergenceClient(convergenceClient):
+    cdef BOOL_t getStatus(self):
+        return False
+
+
+cdef class synchronousConvergenceClient(convergenceClient):
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank, tag=200):
+        super(synchronousConvergenceClient, self).__init__(masterComm, masterRank)
+        self.tag = tag
+
+    cdef BOOL_t getStatus(self):
+        cdef:
+            BOOL_t converged
+        converged = self.masterComm.recv(source=self.masterRank, tag=self.tag)
+        return converged
+
+
+cdef class synchronousConvergenceClientSubcomm(convergenceClient):
+    def __init__(self, MPI.Comm masterComm, INDEX_t masterRank, MPI.Comm comm, tag=200):
+        super(synchronousConvergenceClientSubcomm, self).__init__(masterComm, masterRank)
+        self.comm = comm
+        self.tag = tag
+
+    cdef BOOL_t getStatus(self):
+        cdef:
+            BOOL_t converged = False
+        if self.comm.rank == 0:
+            converged = self.masterComm.recv(source=self.masterRank, tag=self.tag)
+        converged = self.comm.bcast(converged, root=0)
+        return converged
+
+
diff --git a/base/PyNucleus_base/factory.py b/base/PyNucleus_base/factory.py
new file mode 100644
index 0000000..8e9dabc
--- /dev/null
+++ b/base/PyNucleus_base/factory.py
@@ -0,0 +1,83 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from copy import deepcopy
+
+
+class factory:
+    def __init__(self):
+        self.classes = {}
+        self.aliases = {}
+
+    def getCanonicalName(self, name):
+        if isinstance(name, str):
+            return name.lower()
+        else:
+            return name
+
+    def register(self, name, classType, params={}, aliases=[]):
+        canonical_name = self.getCanonicalName(name)
+        self.classes[canonical_name] = (name, classType, params)
+        for alias in aliases:
+            canonical_alias = self.getCanonicalName(alias)
+            self.aliases[canonical_alias] = (alias, canonical_name)
+
+    def isRegistered(self, name):
+        return self.getCanonicalName(name) in self.classes or name in self.aliases
+
+    def __call__(self, name, *args, **kwargs):
+        return self.build(name, *args, **kwargs)
+
+    def build(self, name, *args, **kwargs):
+        canonical_name = self.getCanonicalName(name)
+        if canonical_name in self.aliases:
+            canonical_name = self.aliases[canonical_name][1]
+        if canonical_name not in self.classes:
+            raise KeyError('\'{}\' not in factory. {}'.format(name, repr(self)))
+        _, classType, params = self.classes[canonical_name]
+        p = deepcopy(params)
+        p.update(kwargs)
+        obj = classType(*args, **p)
+        return obj
+
+    def numRegistered(self, countAliases=False):
+        if not countAliases:
+            return len(self.classes)
+        else:
+            return len(self.classes) + len(self.aliases)
+
+    def __str__(self):
+        s = ''
+        for canonical_name in self.classes:
+            a = [self.aliases[canonical_alias][0] for canonical_alias in self.aliases if self.aliases[canonical_alias][1] == canonical_name]
+            s += '{} {} {}\n'.format(canonical_name, a, self.classes[canonical_name])
+        return s
+
+    def __repr__(self):
+        s = 'Available:\n'
+        for canonical_name in self.classes:
+            name = self.classes[canonical_name][0]
+            c = self.classes[canonical_name][1]
+            a = [self.aliases[canonical_alias][0] for canonical_alias in self.aliases if self.aliases[canonical_alias][1] == canonical_name]
+            sig = c.__doc__
+            if sig is None:
+                from inspect import signature
+                try:
+                    sig = signature(c)
+                except ValueError:
+                    pass
+            if isinstance(sig, str) and sig.find('\n'):
+                sig = sig.split('\n')[0]
+            if len(a) > 0:
+                s += '\'{}\' with aliases {}, signature: \'{}\'\n'.format(name, a, sig)
+            else:
+                s += '\'{}\', signature: \'{}\'\n'.format(name, sig)
+        return s
+
+    def print(self):
+        print(repr(self))
diff --git a/base/PyNucleus_base/intTuple.pxd b/base/PyNucleus_base/intTuple.pxd
new file mode 100644
index 0000000..25a040b
--- /dev/null
+++ b/base/PyNucleus_base/intTuple.pxd
@@ -0,0 +1,29 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t
+
+
+cdef class intTuple:
+    cdef:
+        INDEX_t * entries
+        int size
+
+    cdef void set(self, INDEX_t * t, int size)
+    cdef void assign(self, INDEX_t * t)
+    cdef void assignNonOwning(self, INDEX_t * t)
+    cdef void get(self, INDEX_t * t)
+    @staticmethod
+    cdef intTuple create(INDEX_t[::1] t)
+    @staticmethod
+    cdef intTuple createNonOwning(INDEX_t[::1] t)
+    @staticmethod
+    @staticmethod
+    cdef intTuple create2(INDEX_t a, INDEX_t b)
+    @staticmethod
+    cdef intTuple create3(INDEX_t a, INDEX_t b, INDEX_t c)
diff --git a/base/PyNucleus_base/intTuple.pyx b/base/PyNucleus_base/intTuple.pyx
new file mode 100644
index 0000000..dee1b0a
--- /dev/null
+++ b/base/PyNucleus_base/intTuple.pyx
@@ -0,0 +1,129 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.stdlib cimport malloc, realloc, free
+from libc.string cimport memcpy
+
+
+cdef enum:
+    INDEX_SIZE = sizeof(INDEX_t)
+
+
+cdef class intTuple:
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void set(self, INDEX_t * t, int size):
+        self.size = size
+        self.entries = <INDEX_t *>malloc(size*INDEX_SIZE)
+        memcpy(&self.entries[0], &t[0], size*INDEX_SIZE)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assign(self, INDEX_t * t):
+        memcpy(&self.entries[0], &t[0], self.size*INDEX_SIZE)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void assignNonOwning(self, INDEX_t * t):
+        self.entries = t
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void get(self, INDEX_t * t):
+        memcpy(&t[0], &self.entries[0], self.size*INDEX_SIZE)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @staticmethod
+    cdef intTuple create(INDEX_t[::1] t):
+        cdef:
+            intTuple tt = intTuple()
+        tt.set(&t[0], t.shape[0])
+        return tt
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @staticmethod
+    cdef intTuple createNonOwning(INDEX_t[::1] t):
+        cdef:
+            intTuple tt = intTuple()
+        tt.size = t.shape[0]
+        tt.entries = &t[0]
+        return tt
+
+    @staticmethod
+    def createPy(INDEX_t[::1] t):
+        return intTuple.create(t)
+
+    @staticmethod
+    cdef intTuple create2(INDEX_t a, INDEX_t b):
+        cdef:
+            intTuple t = intTuple()
+        t.size = 2
+        t.entries = <INDEX_t *>malloc(2*INDEX_SIZE)
+        t.entries[0] = a
+        t.entries[1] = b
+        return t
+
+    @staticmethod
+    def create2Py(INDEX_t a, INDEX_t b):
+        return intTuple.create2(a, b)
+
+    @staticmethod
+    cdef intTuple create3(INDEX_t a, INDEX_t b, INDEX_t c):
+        cdef:
+            intTuple t = intTuple()
+        t.size = 3
+        t.entries = <INDEX_t *>malloc(3*INDEX_SIZE)
+        t.entries[0] = a
+        t.entries[1] = b
+        t.entries[2] = c
+        return t
+
+    @staticmethod
+    def create3Py(INDEX_t a, INDEX_t b, INDEX_t c):
+        return intTuple.create3(a, b, c)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __hash__(self):
+        cdef:
+            INDEX_t hash_val = 2166136261
+            INDEX_t i
+            char * entries = <char*>self.entries
+        for i in range(self.size*INDEX_SIZE):
+            hash_val = hash_val ^ entries[i]
+            hash_val = hash_val * 16777619
+        return hash_val
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __eq__(self, intTuple other):
+        cdef:
+            INDEX_t i
+        for i in range(self.size):
+            if self.entries[i] != other.entries[i]:
+                return False
+        return True
+
+    def __repr__(self):
+        s = '<'
+        s += ','.join([str(self.entries[i]) for i in range(self.size)])
+        s += '>'
+        return s
diff --git a/base/PyNucleus_base/ip_norm.pxd b/base/PyNucleus_base/ip_norm.pxd
new file mode 100644
index 0000000..e28e7c0
--- /dev/null
+++ b/base/PyNucleus_base/ip_norm.pxd
@@ -0,0 +1,89 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from libc.math cimport sqrt
+from . blas cimport mydot, norm
+cimport numpy as np
+from mpi4py cimport MPI
+
+ctypedef fused SCALAR_t:
+    REAL_t
+    COMPLEX_t
+
+ctypedef REAL_t[::1] vector_t
+ctypedef COMPLEX_t[::1] complex_vector_t
+
+
+######################################################################
+# INNER products and norms
+
+cdef class ipBase:
+    cdef REAL_t eval(self,
+                       vector_t v1, vector_t v2,
+                       BOOL_t acc1=*, BOOL_t acc2=*,
+                       BOOL_t asynchronous=*)
+
+
+cdef class normBase:
+    cdef REAL_t eval(self,
+                       vector_t v,
+                       BOOL_t acc=*,
+                       BOOL_t asynchronous=*)
+
+
+cdef class norm_serial(normBase):
+    pass
+
+
+cdef class ip_serial(ipBase):
+    pass
+
+
+cdef class ip_distributed(ipBase):
+    cdef:
+        object overlap
+        MPI.Comm comm
+        INDEX_t level
+        public vector_t temporaryMemory
+        ip_serial localIP
+
+
+cdef class norm_distributed(normBase):
+    cdef:
+        object overlap
+        MPI.Comm comm
+        INDEX_t level
+        public vector_t temporaryMemory
+        ip_serial localIP
+
+
+cdef class complexipBase:
+    cdef COMPLEX_t eval(self,
+                        complex_vector_t v1, complex_vector_t v2,
+                        BOOL_t acc1=*, BOOL_t acc2=*,
+                        BOOL_t asynchronous=*)
+
+
+cdef class complexNormBase:
+    cdef REAL_t eval(self,
+                       complex_vector_t v,
+                       BOOL_t acc=*,
+                       BOOL_t asynchronous=*)
+
+
+cdef class wrapRealNormToComplex(complexNormBase):
+    cdef:
+        normBase norm
+        vector_t temporaryMemory
+
+
+cdef class wrapRealInnerToComplex(complexipBase):
+    cdef:
+        ipBase inner
+        vector_t temporaryMemory, temporaryMemory2
diff --git a/base/PyNucleus_base/ip_norm.pyx b/base/PyNucleus_base/ip_norm.pyx
new file mode 100644
index 0000000..80f4560
--- /dev/null
+++ b/base/PyNucleus_base/ip_norm.pyx
@@ -0,0 +1,302 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from . myTypes import INDEX, REAL, COMPLEX
+from . blas import uninitialized
+cimport cython
+
+include "config.pxi"
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+
+
+######################################################################
+# Inner products and norms
+
+cdef class ipBase:
+    def __init__(self):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self,
+                 vector_t v1, vector_t v2,
+                 BOOL_t acc1=False, BOOL_t acc2=False,
+                 BOOL_t asynchronous=False):
+        return self.eval(v1, v2, acc1, acc2, asynchronous)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v1, vector_t v2,
+                       BOOL_t acc1=False, BOOL_t acc2=False,
+                       BOOL_t asynchronous=False):
+        raise NotImplementedError()
+
+
+cdef class normBase:
+    def __init__(self):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self,
+                 vector_t v,
+                 BOOL_t acc=False,
+                 BOOL_t asynchronous=False):
+        return self.eval(v, acc, asynchronous)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        raise NotImplementedError()
+
+
+cdef class ip_noop(ipBase):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v1, vector_t v2,
+                       BOOL_t acc1=False, BOOL_t acc2=False,
+                       BOOL_t asynchronous=False):
+        return 10.
+
+
+cdef class norm_noop(normBase):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        return 10.
+
+
+cdef class ip_serial(ipBase):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v1, vector_t v2,
+                       BOOL_t acc1=False, BOOL_t acc2=False,
+                       BOOL_t asynchronous=False):
+        return mydot(v1, v2)
+
+
+cdef class norm_serial(normBase):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        return norm(v)
+
+
+cdef class ip_distributed(ipBase):
+    def __init__(self, overlap, INDEX_t level=-1):
+        self.overlap = overlap
+        self.comm = overlap.comm
+        self.level = level
+        self.temporaryMemory = uninitialized((0), dtype=REAL)
+        self.localIP = ip_serial()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v1, vector_t v2,
+                       BOOL_t acc1=False, BOOL_t acc2=False,
+                       BOOL_t asynchronous=False):
+        cdef:
+            REAL_t n
+            vector_t u = self.temporaryMemory
+        assert v1.shape[0] == v2.shape[0]
+        if v1.shape[0] > u.shape[0]:
+            self.temporaryMemory = uninitialized((v1.shape[0]), dtype=REAL)
+            u = self.temporaryMemory
+        if acc1 == acc2:
+            if acc1:
+                self.overlap.distribute(v1, u)
+                n = self.overlap.reduce(self.localIP(v2, u), asynchronous)
+            else:
+                self.overlap.accumulate(v1, u, level=self.level, asynchronous=asynchronous)
+                n = self.overlap.reduce(self.localIP(v2, u), asynchronous)
+        else:
+            if not acc1:
+                # self.overlap.accumulate(v1, u, level=self.level, asynchronous=asynchronous)
+                # self.overlap.distribute(u)
+                n = self.localIP.eval(v2, v1)
+                n = self.overlap.reduce(n, asynchronous)
+            else:
+                # self.overlap.accumulate(v2, u, level=self.level, asynchronous=asynchronous)
+                # self.overlap.distribute(u)
+                n = self.localIP.eval(v1, v2)
+                n = self.overlap.reduce(n, asynchronous)
+        return n
+
+
+cdef class norm_distributed(normBase):
+    def __init__(self, overlap, INDEX_t level=-1):
+        self.overlap = overlap
+        self.comm = overlap.comm
+        self.level = level
+        self.temporaryMemory = uninitialized((0), dtype=REAL)
+        self.localIP = ip_serial()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       vector_t v,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        cdef:
+            vector_t u = self.temporaryMemory
+            REAL_t n, nb
+        if v.shape[0] > u.shape[0]:
+            self.temporaryMemory = uninitialized((v.shape[0]), dtype=REAL)
+            u = self.temporaryMemory
+        if acc:
+            self.overlap.distribute(v, u, level=self.level)
+        else:
+            self.overlap.accumulate(v, u, level=self.level, asynchronous=asynchronous)
+        nb = self.localIP.eval(v, u)
+        n = self.overlap.reduce(nb, asynchronous)
+        n = sqrt(n)
+        return n
+
+
+cdef class complexipBase:
+    def __init__(self):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self,
+                 complex_vector_t v1, complex_vector_t v2,
+                 BOOL_t acc1=False, BOOL_t acc2=False,
+                 BOOL_t asynchronous=False):
+        return self.eval(v1, v2, acc1, acc2, asynchronous)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef COMPLEX_t eval(self,
+                        complex_vector_t v1,
+                        complex_vector_t v2,
+                        BOOL_t acc1=False,
+                        BOOL_t acc2=False,
+                        BOOL_t asynchronous=False):
+        return 10.
+
+
+cdef class complexNormBase:
+    def __init__(self):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self,
+                 complex_vector_t v,
+                 BOOL_t acc=False,
+                 BOOL_t asynchronous=False):
+        return self.eval(v, acc, asynchronous)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       complex_vector_t v,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        return 10.
+
+
+cdef class wrapRealNormToComplex(complexNormBase):
+    def __init__(self, normBase norm):
+        self.norm = norm
+        self.temporaryMemory = uninitialized((0), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self,
+                       complex_vector_t x,
+                       BOOL_t acc=False,
+                       BOOL_t asynchronous=False):
+        cdef:
+            INDEX_t i
+            REAL_t s = 0.0
+        if x.shape[0] != self.temporaryMemory.shape[0]:
+            self.temporaryMemory = uninitialized((x.shape[0]), dtype=REAL)
+        for i in range(x.shape[0]):
+            self.temporaryMemory[i] = x[i].real
+        s += self.norm.eval(self.temporaryMemory, acc)**2
+        for i in range(x.shape[0]):
+            self.temporaryMemory[i] = x[i].imag
+        s += self.norm.eval(self.temporaryMemory, acc)**2
+        return sqrt(s)
+
+
+cdef class wrapRealInnerToComplex(complexipBase):
+    def __init__(self, ipBase inner):
+        self.inner = inner
+        self.temporaryMemory = uninitialized((0), dtype=REAL)
+        self.temporaryMemory2 = uninitialized((0), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef COMPLEX_t eval(self,
+                        complex_vector_t x,
+                        complex_vector_t y,
+                        BOOL_t acc1=False,
+                        BOOL_t acc2=False,
+                        BOOL_t asynchronous=False):
+        cdef:
+            INDEX_t i
+            COMPLEX_t s = 0.0
+            COMPLEX_t I = 1j
+        if x.shape[0] != self.temporaryMemory.shape[0]:
+            self.temporaryMemory = uninitialized((x.shape[0]), dtype=REAL)
+            self.temporaryMemory2 = uninitialized((x.shape[0]), dtype=REAL)
+        for i in range(x.shape[0]):
+            self.temporaryMemory[i] = x[i].real
+        for i in range(y.shape[0]):
+            self.temporaryMemory2[i] = y[i].real
+        # Re * Re
+        s = self.inner.eval(self.temporaryMemory, self.temporaryMemory2, acc1, acc2)
+        for i in range(y.shape[0]):
+            self.temporaryMemory2[i] = y[i].imag
+        # Re * Im
+        s = s + I * self.inner.eval(self.temporaryMemory, self.temporaryMemory2, acc1, acc2)
+        for i in range(x.shape[0]):
+            self.temporaryMemory[i] = x[i].imag
+        # Im * Im
+        s = s + self.inner.eval(self.temporaryMemory, self.temporaryMemory2, acc1, acc2)
+        for i in range(y.shape[0]):
+            self.temporaryMemory2[i] = y[i].real
+        # Im * Re
+        s = s - I * self.inner.eval(self.temporaryMemory, self.temporaryMemory2, acc1, acc2)
+        return s
diff --git a/base/PyNucleus_base/linalg.pxd b/base/PyNucleus_base/linalg.pxd
new file mode 100644
index 0000000..c80be53
--- /dev/null
+++ b/base/PyNucleus_base/linalg.pxd
@@ -0,0 +1,41 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+
+ctypedef fused SCALAR_t:
+    REAL_t
+    COMPLEX_t
+
+
+cdef void forward_solve_csc(INDEX_t[::1] indptr,
+                            INDEX_t[::1] indices,
+                            SCALAR_t[::1] data,
+                            SCALAR_t[::1] b,
+                            SCALAR_t[::1] y,
+                            BOOL_t unitDiagonal)
+
+cdef void backward_solve_csc(INDEX_t[::1] indptr,
+                             INDEX_t[::1] indices,
+                             SCALAR_t[::1] data,
+                             SCALAR_t[::1] b,
+                             SCALAR_t[::1] y)
+
+cdef void forward_solve_sss_noInverse(const INDEX_t[::1] indptr,
+                                      const INDEX_t[::1] indices,
+                                      const REAL_t[::1] data,
+                                      const REAL_t[::1] invDiagonal,
+                                      const REAL_t[::1] b,
+                                      REAL_t[::1] y,
+                                      BOOL_t unitDiagonal=*)
+cdef void backward_solve_sss_noInverse(const INDEX_t[::1] indptr,
+                                       const INDEX_t[::1] indices,
+                                       const REAL_t[::1] data,
+                                       const REAL_t[::1] invDiagonal,
+                                       const REAL_t[::1] b,
+                                       REAL_t[::1] y)
diff --git a/base/PyNucleus_base/linalg.pyx b/base/PyNucleus_base/linalg.pyx
new file mode 100644
index 0000000..da17670
--- /dev/null
+++ b/base/PyNucleus_base/linalg.pyx
@@ -0,0 +1,1147 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport sqrt
+from . myTypes import INDEX, REAL, COMPLEX
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t
+from . linear_operators cimport (sort_indices,
+                                 LinearOperator,
+                                 CSR_LinearOperator,
+                                 SSS_LinearOperator,
+                                 LinearOperator_wrapper,
+                                 TimeStepperLinearOperator,
+                                 ComplexLinearOperator,
+                                 wrapRealToComplex)
+from . solvers cimport cg_solver, gmres_solver, complex_gmres_solver
+from . ip_norm import wrapRealInnerToComplex, wrapRealNormToComplex, ip_serial, norm_serial
+from . ip_norm cimport ipBase, normBase, complexipBase, complexNormBase
+from . utilsCy import UniformOnUnitSphere
+from . blas cimport assign, assignScaled, assign3, update, updateScaled, mydot, gemvF
+from . blas import uninitialized
+from . convergence cimport (convergenceMaster, noOpConvergenceMaster,
+                            convergenceClient, noOpConvergenceClient)
+
+include "config.pxi"
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef REAL_t accumulate_serial(REAL_t[::1] x):
+    pass
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def ichol_csr(A):
+    cdef:
+        np.ndarray[INDEX_t, ndim=1] indptr_mem = np.zeros_like(A.indptr,
+                                                               dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=1] indices_mem
+        np.ndarray[REAL_t, ndim=1] data_mem, diagonal_mem = uninitialized((A.num_rows), dtype=REAL)
+        INDEX_t[::1] Aindptr = A.indptr, Aindices = A.indices
+        REAL_t[::1] Adata = A.data
+        INDEX_t[::1] indptr = indptr_mem, indices
+        REAL_t[::1] data, diagonal = diagonal_mem
+        INDEX_t i, ii, jj, j, nnz, kk, k, hh
+        INDEX_t num_rows = A.num_rows
+    # step 1: build indptr
+    for i in range(num_rows):
+        for jj in range(Aindptr[i], Aindptr[i+1]):
+            j = Aindices[jj]
+            if j < i:
+                indptr[j+1] += 1
+    for i in range(num_rows):
+        indptr[i+1] += indptr[i]
+    # step 2: build indices and initial data
+    nnz = indptr[num_rows]
+    indices_mem = uninitialized((nnz), dtype=INDEX)
+    indices = indices_mem
+    data_mem = np.zeros((nnz), dtype=REAL)
+    data = data_mem
+    for i in range(num_rows):
+        for ii in range(Aindptr[i], Aindptr[i+1]):
+            if Aindices[ii] == i:
+                diagonal[i] = Adata[ii]
+                break
+        for jj in range(Aindptr[i], Aindptr[i+1]):
+            j = Aindices[jj]
+            if j < i:
+                indices[indptr[j]] = i
+                data[indptr[j]] = Adata[jj]
+                indptr[j] += 1
+    for i in range(num_rows, 0, -1):
+        indptr[i] = indptr[i-1]
+    indptr[0] = 0
+    sort_indices(indptr, indices, data)
+    # step 3: perform Cholesky
+    for i in range(num_rows):
+        diagonal[i] = sqrt(diagonal[i])
+        for jj in range(indptr[i], indptr[i+1]):
+            data[jj] /= diagonal[i]
+            j = indices[jj]
+            diagonal[j] -= data[jj]*data[jj]
+            for kk in range(indptr[j], indptr[j+1]):
+                k = indices[kk]
+                for hh in range(jj, indptr[i+1]):
+                    if indices[hh] == k:
+                        data[kk] -= data[hh]*data[jj]
+                        break
+    return indices_mem, indptr_mem, data_mem, diagonal_mem
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def ichol_sss(SSS_LinearOperator A):
+    cdef:
+        np.ndarray[INDEX_t, ndim=1] indptr_mem = np.zeros_like(A.indptr,
+                                                               dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=1] indices_mem
+        np.ndarray[REAL_t, ndim=1] data_mem, diagonal_mem = uninitialized((A.num_rows), dtype=REAL)
+        INDEX_t[::1] Aindptr = A.indptr, Aindices = A.indices
+        REAL_t[::1] Adata = A.data, Adiagonal = A.diagonal
+        INDEX_t[::1] indptr = indptr_mem, indices
+        REAL_t[::1] data, diagonal = diagonal_mem
+        INDEX_t i, ii, jj, j, nnz, kk, k, hh
+        INDEX_t num_rows = A.num_rows
+    # step 1: build indptr
+    for i in range(num_rows):
+        for jj in range(Aindptr[i], Aindptr[i+1]):
+            j = Aindices[jj]
+            if j < i:
+                indptr[j+1] += 1
+    for i in range(num_rows):
+        indptr[i+1] += indptr[i]
+    # step 2: build indices and initial data
+    nnz = indptr[num_rows]
+    indices_mem = uninitialized((nnz), dtype=INDEX)
+    indices = indices_mem
+    data_mem = np.zeros((nnz), dtype=REAL)
+    data = data_mem
+    for i in range(num_rows):
+        diagonal[i] = Adiagonal[i]
+        for jj in range(Aindptr[i], Aindptr[i+1]):
+            j = Aindices[jj]
+            if j < i:
+                indices[indptr[j]] = i
+                data[indptr[j]] = Adata[jj]
+                indptr[j] += 1
+    for i in range(num_rows, 0, -1):
+        indptr[i] = indptr[i-1]
+    indptr[0] = 0
+    sort_indices(indptr, indices, data)
+    # step 3: perform Cholesky
+    for i in range(num_rows):
+        diagonal[i] = sqrt(diagonal[i])
+        for jj in range(indptr[i], indptr[i+1]):
+            data[jj] /= diagonal[i]
+            j = indices[jj]
+            diagonal[j] -= data[jj]*data[jj]
+            for kk in range(indptr[j], indptr[j+1]):
+                k = indices[kk]
+                for hh in range(jj, indptr[i+1]):
+                    if indices[hh] == k:
+                        data[kk] -= data[hh]*data[jj]
+                        break
+    return indices_mem, indptr_mem, data_mem, diagonal_mem
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void forward_solve_csc(INDEX_t[::1] indptr,
+                            INDEX_t[::1] indices,
+                            SCALAR_t[::1] data,
+                            SCALAR_t[::1] b,
+                            SCALAR_t[::1] y,
+                            BOOL_t unitDiagonal):
+    cdef:
+        INDEX_t n = b.shape[0], i, j, i1, i2
+    if SCALAR_t is REAL_t:
+        if unitDiagonal:
+            for j in range(n):
+                i1 = indptr[j]
+                i2 = indptr[j+1]
+                y[j] = b[j]-y[j]
+                # FIX: Should I start with i1 here, not i1+1?
+                # Maybe SuperLU saves the ones on the diagonal?
+                for i in range(i1+1, i2):
+                    y[indices[i]] += data[i]*y[j]
+        else:
+            for j in range(n):
+                i1 = indptr[j]
+                i2 = indptr[j+1]
+                y[j] = (b[j]-y[j])/data[i1]
+                for i in range(i1+1, i2):
+                    y[indices[i]] += data[i]*y[j]
+    else:
+        if unitDiagonal:
+            for j in range(n):
+                i1 = indptr[j]
+                i2 = indptr[j+1]
+                y[j] = b[j]-y[j]
+                # FIX: Should I start with i1 here, not i1+1?
+                # Maybe SuperLU saves the ones on the diagonal?
+                for i in range(i1+1, i2):
+                    y[indices[i]] = y[indices[i]] + data[i]*y[j]
+        else:
+            for j in range(n):
+                i1 = indptr[j]
+                i2 = indptr[j+1]
+                y[j] = (b[j]-y[j])/data[i1]
+                for i in range(i1+1, i2):
+                    y[indices[i]] = y[indices[i]] + data[i]*y[j]
+
+
+IF USE_MKL_TRISOLVE:
+
+    ctypedef INDEX_t MKL_INT
+
+    cdef extern from "mkl/mkl_spblas.h":
+        void mkl_dcsrsm (const char *transa , const MKL_INT *m , const MKL_INT *n , const REAL_t *alpha , const char *matdescra ,
+                         const REAL_t *val , const MKL_INT *indx , const MKL_INT *pntrb , const MKL_INT *pntre ,
+                         const REAL_t *b , const MKL_INT *ldb , REAL_t *c , const MKL_INT *ldc );
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef inline void trisolve_mkl(INDEX_t[::1] indptr,
+                                  INDEX_t[::1] indices,
+                                  REAL_t[::1] data,
+                                  REAL_t[::1] b,
+                                  REAL_t[::1] y,
+                                  BOOL_t forward=True,
+                                  BOOL_t unitDiagonal=False):
+        cdef:
+            char transA
+            REAL_t alpha = 1.
+            char matdscr[6]
+            INDEX_t inc = 1
+            INDEX_t n = indptr.shape[0]-1
+            INDEX_t one = 1
+        matdscr[0] = 84
+        if forward:
+            transA = 84
+        else:
+            transA = 78
+        matdscr[1] = 85
+        if unitDiagonal:
+            matdscr[2] = 85
+        else:
+            matdscr[2] = 78
+        matdscr[3] = 67
+        mkl_dcsrsm(&transA, &n, &one, &alpha, &matdscr[0], &data[0], &indices[0], &indptr[0], &indptr[1], &b[0], &one, &y[0], &one)
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void forward_solve_sss(const INDEX_t[::1] indptr,
+                            const INDEX_t[::1] indices,
+                            const REAL_t[::1] data,
+                            const REAL_t[::1] diagonal,
+                            const REAL_t[::1] b,
+                            REAL_t[::1] y,
+                            BOOL_t unitDiagonal=False):
+    cdef:
+        INDEX_t n = b.shape[0], i, j
+    if unitDiagonal:
+        for j in range(n):
+            y[j] = b[j]-y[j]
+            for i in range(indptr[j], indptr[j+1]):
+                y[indices[i]] += data[i]*y[j]
+    else:
+        for j in range(n):
+            y[j] = (b[j]-y[j])/diagonal[j]
+            for i in range(indptr[j], indptr[j+1]):
+                y[indices[i]] += data[i]*y[j]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void forward_solve_sss_noInverse(const INDEX_t[::1] indptr,
+                                      const INDEX_t[::1] indices,
+                                      const REAL_t[::1] data,
+                                      const REAL_t[::1] invDiagonal,
+                                      const REAL_t[::1] b,
+                                      REAL_t[::1] y,
+                                      BOOL_t unitDiagonal=False):
+    cdef:
+        INDEX_t n = b.shape[0], i, j
+    if unitDiagonal:
+        for j in range(n):
+            y[j] = b[j]-y[j]
+            for i in range(indptr[j], indptr[j+1]):
+                y[indices[i]] += data[i]*y[j]
+    else:
+        for j in range(n):
+            y[j] = (b[j]-y[j])*invDiagonal[j]
+            for i in range(indptr[j], indptr[j+1]):
+                y[indices[i]] += data[i]*y[j]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline void backward_solve_csc(INDEX_t[::1] indptr,
+                                    INDEX_t[::1] indices,
+                                    SCALAR_t[::1] data,
+                                    SCALAR_t[::1] b,
+                                    SCALAR_t[::1] y):
+    cdef:
+        INDEX_t n = b.shape[0], i, j, i1, i2
+    if SCALAR_t is REAL_t:
+        for j in range(n-1, -1, -1):
+            i1 = indptr[j]
+            i2 = indptr[j+1]
+            y[j] = (b[j]-y[j])/data[i2-1]
+            for i in range(i1, i2-1):
+                y[indices[i]] += data[i]*y[j]
+    else:
+        for j in range(n-1, -1, -1):
+            i1 = indptr[j]
+            i2 = indptr[j+1]
+            y[j] = (b[j]-y[j])/data[i2-1]
+            for i in range(i1, i2-1):
+                y[indices[i]] = y[indices[i]]+data[i]*y[j]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline void backward_solve_csr(INDEX_t[::1] indptr,
+                                    INDEX_t[::1] indices,
+                                    REAL_t[::1] data,
+                                    REAL_t[::1] b,
+                                    REAL_t[::1] y):
+    cdef:
+        INDEX_t n = b.shape[0], i, j, jj
+        REAL_t temp
+    for i in range(n-1, -1, -1):
+        temp = b[i]
+        jj = indptr[i+1]-1
+        j = indices[jj]
+        while j > i:
+            temp -= data[jj]*y[j]
+            jj -= 1
+            j = indices[jj]
+        y[i] = temp/data[jj]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline void backward_solve_sss(const INDEX_t[::1] indptr,
+                                    const INDEX_t[::1] indices,
+                                    const REAL_t[::1] data,
+                                    const REAL_t[::1] diagonal,
+                                    const REAL_t[::1] b,
+                                    REAL_t[::1] y):
+    cdef:
+        INDEX_t n = b.shape[0], i, jj
+        REAL_t temp
+    for i in range(n-1, -1, -1):
+        temp = b[i]
+        for jj in range(indptr[i], indptr[i+1]):
+            temp -= data[jj]*y[indices[jj]]
+        y[i] = temp/diagonal[i]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void backward_solve_sss_noInverse(const INDEX_t[::1] indptr,
+                                       const INDEX_t[::1] indices,
+                                       const REAL_t[::1] data,
+                                       const REAL_t[::1] invDiagonal,
+                                       const REAL_t[::1] b,
+                                       REAL_t[::1] y):
+    cdef:
+        INDEX_t n = b.shape[0], i, jj
+        REAL_t temp
+    for i in range(n-1, -1, -1):
+        temp = b[i]
+        for jj in range(indptr[i], indptr[i+1]):
+            temp -= data[jj]*y[indices[jj]]
+        y[i] = temp*invDiagonal[i]
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef solve_LU(INDEX_t[::1] Lindptr, INDEX_t[::1] Lindices, REAL_t[::1] Ldata,
+               INDEX_t[::1] Uindptr, INDEX_t[::1] Uindices, REAL_t[::1] Udata,
+               INDEX_t[::1] perm_r, INDEX_t[::1] perm_c,
+               REAL_t[::1] b):
+    cdef:
+        INDEX_t n = b.shape[0], i, j
+        np.ndarray[REAL_t, ndim=1] temp1_mem = np.zeros((n), dtype=REAL)
+        np.ndarray[REAL_t, ndim=1] temp2_mem = uninitialized((n), dtype=REAL)
+        REAL_t[::1] temp1 = temp1_mem
+        REAL_t[::1] temp2 = temp2_mem
+    for i in range(n):
+        temp2[perm_r[i]] = b[i]
+    forward_solve_csc(Lindptr, Lindices, Ldata, temp2, temp1,
+                      unitDiagonal=True)
+    temp2[:] = 0.
+    backward_solve_csc(Uindptr, Uindices, Udata, temp1, temp2)
+    for i in range(n):
+        temp1[i] = temp2[perm_c[i]]
+    return temp1_mem
+
+
+
+cdef class ILU_solver:
+    cdef:
+        public INDEX_t[::1] Lindptr, Lindices
+        public REAL_t[::1] Ldata
+        public INDEX_t[::1] Uindptr, Uindices
+        public REAL_t[::1] Udata
+        public INDEX_t[::1] perm_c, perm_r
+        REAL_t[::1] temp1
+        REAL_t[::1] temp2
+
+    def __init__(self, num_rows):
+        self.temp1 = uninitialized((num_rows), dtype=REAL)
+        self.temp2 = uninitialized((num_rows), dtype=REAL)
+
+    def setup(self, A, fill_factor=1.):
+        from scipy.sparse.linalg import spilu
+        Clu = spilu(A.to_csr().tocsc(), fill_factor=fill_factor)
+        self.Lindices = Clu.L.indices
+        self.Lindptr = Clu.L.indptr
+        self.Ldata = Clu.L.data
+        self.Uindices = Clu.U.indices
+        self.Uindptr = Clu.U.indptr
+        self.Udata = Clu.U.data
+        self.perm_r = Clu.perm_r
+        self.perm_c = Clu.perm_c
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef solve(self, REAL_t[::1] b, REAL_t[::1] x):
+        cdef INDEX_t i
+        self.temp1[:] = 0.
+        for i in range(x.shape[0]):
+            self.temp2[self.perm_r[i]] = b[i]
+        forward_solve_csc(self.Lindptr, self.Lindices, self.Ldata,
+                          self.temp2, self.temp1,
+                          unitDiagonal=True)
+        self.temp2[:] = 0.
+        backward_solve_csc(self.Uindptr, self.Uindices, self.Udata,
+                           self.temp1, self.temp2)
+        for i in range(x.shape[0]):
+            x[i] = self.temp2[self.perm_c[i]]
+
+    def asPreconditioner(self):
+        return LinearOperator_wrapper(self.temp1.shape[0],
+                                        self.temp1.shape[0],
+                                        self.solve)
+
+
+# Assumes that indices are ordered
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef solve_cholesky(INDEX_t[::1] Lindptr,
+                     INDEX_t[::1] Lindices,
+                     REAL_t[::1] Ldata,
+                     REAL_t[::1] b):
+    cdef:
+        INDEX_t n = b.shape[0], i, j
+        np.ndarray[REAL_t, ndim=1] temp_mem = np.zeros((n), dtype=REAL)
+        REAL_t[::1] temp = temp_mem
+    forward_solve_csc(Lindptr, Lindices, Ldata, b, temp,
+                      unitDiagonal=False)
+    backward_solve_csr(Lindptr, Lindices, Ldata, temp, temp)
+    return temp_mem
+
+
+cdef class cholesky_solver:
+    cdef:
+        public INDEX_t[::1] indptr, indices
+        public REAL_t[::1] data, diagonal, temp
+        CSR_LinearOperator L
+
+    def __init__(self, num_rows):
+        self.temp = uninitialized((num_rows), dtype=REAL)
+
+    def setup(self, A):
+        cdef:
+            INDEX_t i
+        if isinstance(A, CSR_LinearOperator):
+            self.indices, self.indptr, self.data, self.diagonal = ichol_csr(A)
+        elif isinstance(A, SSS_LinearOperator):
+            # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A)
+            self.indices, self.indptr, self.data, self.diagonal = ichol_csr(A.to_csr_linear_operator())
+        elif isinstance(A, TimeStepperLinearOperator):
+            B = A.to_csr_linear_operator()
+            self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B)
+        else:
+            raise NotImplementedError()
+
+        IF USE_MKL_TRISOLVE:
+            from . linear_operators import diagonalOperator
+            T = CSR_LinearOperator(self.indices, self.indptr, self.data).to_csr()+diagonalOperator(self.diagonal).to_csr()
+            self.L = CSR_LinearOperator.from_csr(T)
+        ELSE:
+            for i in range(self.diagonal.shape[0]):
+                self.diagonal[i] = 1./self.diagonal[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef solve(self, REAL_t[::1] b, REAL_t[::1] x):
+        self.temp[:] = 0.0
+        IF USE_MKL_TRISOLVE:
+            trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, b, self.temp, forward=True, unitDiagonal=False)
+            trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, self.temp, x, forward=False, unitDiagonal=False)
+        ELSE:
+            forward_solve_sss_noInverse(self.indptr, self.indices,
+                                    self.data, self.diagonal,
+                                    b, self.temp, unitDiagonal=False)
+            backward_solve_sss_noInverse(self.indptr, self.indices,
+                                         self.data, self.diagonal,
+                                         self.temp, x)
+
+    def asPreconditioner(self):
+        return LinearOperator_wrapper(self.diagonal.shape[0],
+                                        self.diagonal.shape[0],
+                                        self.solve)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef void bicgstab(LinearOperator A,
+                    const REAL_t[::1] b,
+                    REAL_t[::1] x,
+                    REAL_t[::1] x0=None,
+                    REAL_t tol=1e-10,
+                    int maxiter=20,
+                    list residuals=None,
+                    LinearOperator precond=None,
+                    ipBase inner=ip_serial(),
+                    normBase norm=norm_serial(),
+                    accumulate=accumulate_serial,
+                    BOOL_t use2norm=True,
+                    REAL_t[::1] temporaryMemory=None):
+    """
+    Stabilized Biconjugate Gradient iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    precond:  distributed to accumulated
+    b:        distributed
+    x:        accumulated
+    x0:       accumulated
+
+    In the unpreconditioned distributed case, set precond to accumulate.
+
+    If use2norm is False, use Preconditioner norm of residual as
+    stopping criterion, otherwise use 2-norm of residual.
+
+    Memory requirement:
+    8*dim with preconditioner,
+    6*dim without.
+    """
+
+    cdef:
+        INDEX_t i, k, dim = A.shape[0]
+        REAL_t[::1] r0, r, p, p2, s, s2, temp, temp2
+        REAL_t kapppa, kappaNew, alpha, omega, beta
+
+    if temporaryMemory is None:
+        temporaryMemory = uninitialized((8*dim), dtype=REAL)
+
+    else:
+        if precond is not None:
+            assert temporaryMemory.shape[0] >= 8*dim
+        else:
+            assert temporaryMemory.shape[0] >= 6*dim
+
+    r0 = temporaryMemory[:dim]
+    r = temporaryMemory[dim:2*dim]
+    p = temporaryMemory[2*dim:3*dim]
+    s = temporaryMemory[3*dim:4*dim]
+    temp = temporaryMemory[4*dim:5*dim]
+    temp2 = temporaryMemory[5*dim:6*dim]
+    if precond is not None:
+        p2 = temporaryMemory[6*dim:7*dim]
+        s2 = temporaryMemory[7*dim:]
+    else:
+        p2 = p
+        s2 = s
+
+
+    if residuals is None:
+        residuals = []
+    else:
+        assert len(residuals) == 0
+
+    if x0 is None:
+        for i in range(dim):
+            x[i] = 0.
+            p[i] = r[i] = r0[i] = b[i]
+    else:
+        for i in range(dim):
+            x[i] = x0[i]
+        A(x, temp)
+        for i in range(dim):
+            p[i] = r0[i] = r[i] = b[i] - temp[i]
+    accumulate(r0)
+
+    kappa = inner(r, r0, False, True)
+    residuals.append(sqrt(kappa))
+    for k in range(maxiter):
+        if precond is not None:
+            precond(p, p2)
+        A(p2, temp)
+        alpha = kappa / inner(temp, r0, False, True)
+        for i in range(dim):
+            s[i] = r[i]-alpha*temp[i]
+        if precond is not None:
+            precond(s, s2)
+        A(s2, temp2)
+        omega = inner(temp2, s, False, True) / norm(temp2, False)**2
+        for i in range(dim):
+            x[i] += alpha*p2[i] + omega*s2[i]
+        for i in range(dim):
+            r[i] = s[i] - omega*temp2[i]
+        if use2norm:
+            residuals.append(norm(r, False))
+        else:
+            raise NotImplementedError()
+        if residuals[k+1] < tol:
+            return
+        kappaNew = inner(r, r0, False, True)
+        beta = kappaNew/kappa * alpha/omega
+        kappa = kappaNew
+        for i in range(dim):
+            p[i] = r[i] + beta*(p[i] - omega*temp[i])
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef int cg(LinearOperator A,
+             REAL_t[::1] b,
+             REAL_t[::1] x,
+             REAL_t[::1] x0=None,
+             REAL_t tol=1e-10,
+             int maxiter=20,
+             list residuals=None,
+             LinearOperator precond=None,
+             ipBase inner=ip_serial(),
+             normBase norm=norm_serial(),
+             BOOL_t use2norm=False,
+             BOOL_t relativeTolerance=False):
+    cdef:
+        cg_solver solver = cg_solver(A)
+        int numIter
+    if precond is not None:
+        solver.setPreconditioner(precond)
+    solver.tolerance = tol
+    solver.maxIter = maxiter
+    solver.use2norm = use2norm
+    solver.relativeTolerance = relativeTolerance
+    if x0 is not None:
+        solver.setInitialGuess(x0)
+    solver.inner = inner
+    solver.norm = norm
+    solver.setup()
+    numIter = solver.solve(b, x)
+    if residuals is not None:
+        residuals += solver.residuals
+    return numIter
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef flexible_cg(A,
+                  REAL_t[::1] b,
+                  x0=None,
+                  REAL_t tol=1e-10,
+                  int maxiter=20,
+                  residuals=None,
+                  precond=None,
+                  inner=ip_serial):
+    cdef:
+        np.ndarray[REAL_t, ndim=1] rold_mem = uninitialized(b.shape[0],
+                                                         dtype=REAL)
+        REAL_t beta, beta2, alpha
+        REAL_t[::1] r, rold = rold_mem, p, Ap, Br, x
+        int dim = b.shape[0]
+
+        int i, j
+    if x0 is None:
+        x = b.copy()
+    else:
+        x = x0
+
+    if residuals is None:
+        residuals = []
+    else:
+        assert len(residuals) == 0
+
+    # Krylov space spans whole solution space after dim-1 iterations
+    maxiter = min(maxiter, dim)
+
+    if precond is None:
+        r = b - A.dot(x)
+        p = r.copy()
+        beta = sqrt(inner(r, r))
+        residuals.append(beta)
+        if not beta <= tol:
+            beta2 = beta**2
+            for i in range(maxiter):
+                Ap = A.dot(np.array(p, copy=False, dtype=REAL))
+                alpha = beta2/inner(p, Ap)
+                for j in range(dim):
+                    x[j] += alpha*p[j]
+                    r[j] -= alpha*Ap[j]
+                for j in range(dim):
+                    rold[j] = r[j] - rold[j]
+                beta = sqrt(inner(r, rold))
+                residuals.append(beta)
+                if beta <= tol:
+                    break
+                beta = beta**2
+                for j in range(dim):
+                    p[j] = r[j] + beta/beta2*p[j]
+                beta2 = inner(r, r)
+                for j in range(dim):
+                    rold[j] = r[j]
+        return np.array(x, copy=False, dtype=REAL)
+    else:
+        r = b - A*x
+        p = precond*r
+        beta2 = inner(r, p, False, True)
+        residuals.append(sqrt(inner(r, r, False, False)))
+        if not residuals[0] <= tol:
+            for i in range(maxiter):
+                Ap = A*np.array(p, copy=False, dtype=REAL)
+                alpha = beta2/inner(p, Ap, True, False)
+                for j in range(dim):
+                    x[j] += alpha*p[j]
+                    r[j] -= alpha*Ap[j]
+                for j in range(dim):
+                    rold[j] = r[j] - rold[j]
+                Br = precond*np.array(r, copy=False, dtype=REAL)
+                beta = inner(rold, Br, False, True)
+                residuals.append(sqrt(inner(r, r, False, False)))
+                if residuals[i+1] <= tol:
+                    break
+                for j in range(dim):
+                    p[j] = Br[j] + beta/beta2*p[j]
+                beta2 = inner(r, Br, False, True)
+                for j in range(dim):
+                    rold[j] = r[j]
+        return np.array(x, copy=False, dtype=REAL)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef int gmres(LinearOperator A,
+                REAL_t[::1] b,
+                REAL_t[::1] x,
+                REAL_t[::1] x0=None,
+                int maxiter=20,
+                int restarts=1,
+                REAL_t tol=1e-5,
+                list residuals=None,
+                LinearOperator Lprecond=None,
+                LinearOperator Rprecond=None,
+                ipBase inner=ip_serial(),
+                normBase norm=norm_serial(),
+                convergenceMaster convMaster=None,
+                convergenceClient convClient=None,
+                BOOL_t flexible=False,
+                BOOL_t relativeTolerance=False):
+    cdef:
+        gmres_solver solver = gmres_solver(A)
+        int numIter
+    if Rprecond is not None:
+        solver.setPreconditioner(Rprecond, False)
+    elif Lprecond is not None:
+        solver.setPreconditioner(Lprecond, True)
+    solver.tolerance = tol
+    solver.maxIter = maxiter
+    solver.restarts = restarts
+    solver.relativeTolerance = relativeTolerance
+    solver.flexible = flexible
+    if x0 is not None:
+        solver.setInitialGuess(x0)
+    solver.inner = inner
+    solver.norm = norm
+    solver.convMaster = convMaster
+    solver.convClient = convClient
+    solver.setup()
+    numIter = solver.solve(b, x)
+    if residuals is not None:
+        residuals += solver.residuals
+    return numIter
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef int gmresComplex(ComplexLinearOperator A,
+                       COMPLEX_t[::1] b,
+                       COMPLEX_t[::1] x,
+                       COMPLEX_t[::1] x0=None,
+                       int maxiter=20,
+                       int restarts=1,
+                       REAL_t tol=1e-5,
+                       list residuals=None,
+                       ComplexLinearOperator Lprecond=None,
+                       ComplexLinearOperator Rprecond=None,
+                       complexipBase inner=wrapRealInnerToComplex(ip_serial()),
+                       complexNormBase norm=wrapRealNormToComplex(norm_serial()),
+                       convergenceMaster convMaster=None,
+                       convergenceClient convClient=None,
+                       BOOL_t flexible=False,
+                       BOOL_t relativeTolerance=False):
+    cdef:
+        complex_gmres_solver solver = complex_gmres_solver(A)
+        int numIter
+    if Rprecond is not None:
+        solver.setPreconditioner(Rprecond, False)
+    elif Lprecond is not None:
+        solver.setPreconditioner(Lprecond, True)
+    solver.tolerance = tol
+    solver.maxIter = maxiter
+    solver.restarts = restarts
+    solver.relativeTolerance = relativeTolerance
+    solver.flexible = flexible
+    if x0 is not None:
+        solver.setInitialGuess(x0)
+    solver.setNormInner(norm, inner)
+    solver.convMaster = convMaster
+    solver.convClient = convClient
+    solver.setup()
+    numIter = solver.solve(b, x)
+    if residuals is not None:
+        residuals += solver.residuals
+    return numIter
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef void bicgstabComplex(ComplexLinearOperator A,
+                           const COMPLEX_t[::1] b,
+                           COMPLEX_t[::1] x,
+                           COMPLEX_t[::1] x0=None,
+                           REAL_t tol=1e-10,
+                           int maxiter=20,
+                           list residuals=None,
+                           ComplexLinearOperator precond=None,
+                           complexipBase inner=wrapRealInnerToComplex(ip_serial()),
+                           complexNormBase norm=wrapRealNormToComplex(norm_serial()),
+                           BOOL_t use2norm=True,
+                           COMPLEX_t[::1] temporaryMemory=None):
+    """
+    Stabilized Biconjugate Gradient iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    precond:  distributed to accumulated
+    b:        distributed
+    x:        accumulated
+    x0:       accumulated
+
+    In the unpreconditioned distributed case, set precond to accumulate.
+
+    If use2norm is False, use Preconditioner norm of residual as
+    stopping criterion, otherwise use 2-norm of residual.
+
+    Memory requirement:
+    8*dim with preconditioner,
+    6*dim without.
+    """
+
+    cdef:
+        INDEX_t i, k, dim = A.shape[0]
+        COMPLEX_t[::1] r0, r, p, p2, s, s2, temp, temp2
+        COMPLEX_t kapppa, kappaNew, alpha, omega, beta
+
+    if temporaryMemory is None:
+        temporaryMemory = uninitialized((8*dim), dtype=COMPLEX)
+
+    else:
+        if precond is not None:
+            assert temporaryMemory.shape[0] >= 8*dim
+        else:
+            assert temporaryMemory.shape[0] >= 6*dim
+
+    r0 = temporaryMemory[:dim]
+    r = temporaryMemory[dim:2*dim]
+    p = temporaryMemory[2*dim:3*dim]
+    s = temporaryMemory[3*dim:4*dim]
+    temp = temporaryMemory[4*dim:5*dim]
+    temp2 = temporaryMemory[5*dim:6*dim]
+    if precond is not None:
+        p2 = temporaryMemory[6*dim:7*dim]
+        s2 = temporaryMemory[7*dim:]
+    else:
+        p2 = p
+        s2 = s
+
+
+    if residuals is None:
+        residuals = []
+
+    if x0 is None:
+        for i in range(dim):
+            x[i] = 0.
+            p[i] = r[i] = r0[i] = b[i]
+    else:
+        for i in range(dim):
+            x[i] = x0[i]
+        A(x, temp)
+        for i in range(dim):
+            p[i] = r0[i] = r[i] = b[i] - temp[i]
+    # accumulate(r0)
+
+    kappa = inner(r0, r, False, False)
+    residuals.append(sqrt(abs(kappa)))
+    for k in range(maxiter):
+        if precond is not None:
+            precond(p, p2)
+        A(p2, temp)
+        alpha = kappa / inner(r0, temp, False, False)
+        for i in range(dim):
+            s[i] = r[i]-alpha*temp[i]
+        if precond is not None:
+            precond(s, s2)
+        A(s2, temp2)
+        omega = inner(temp2, s, False, True) / norm(temp2, False)**2
+        for i in range(dim):
+            x[i] = x[i] + alpha*p2[i] + omega*s2[i]
+        for i in range(dim):
+            r[i] = s[i] - omega*temp2[i]
+        if use2norm:
+            residuals.append(norm(r, False))
+        else:
+            raise NotImplementedError()
+        if residuals[k+1] < tol:
+            return
+        kappaNew = inner(r0, r, False, False)
+        beta = kappaNew/kappa * alpha/omega
+        kappa = kappaNew
+        for i in range(dim):
+            p[i] = r[i] + beta*(p[i] - omega*temp[i])
+
+
+def estimateSpectralRadius(LinearOperator A,
+                           normBase norm=norm_serial(),
+                           REAL_t eps=1e-3,
+                           INDEX_t kMax=100):
+    """
+    Estimate the absolute value of the largest eigenvalue
+    using the power method.
+    """
+    x = UniformOnUnitSphere(A.shape[0])
+    lold = 0
+    l = 1
+    k = 0
+    while np.absolute(l-lold) > eps and k <= kMax:
+        x = A.dot(x)
+        lold = l
+        l = norm(x, False)
+        x /= l
+        k += 1
+    return l
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cpdef arnoldi(LinearOperator A,
+              REAL_t[::1] x0=None,
+              int maxiter=20,
+              REAL_t tol=1e-10,
+              LinearOperator Lprecond=None,
+              LinearOperator Rprecond=None,
+              ipBase inner=ip_serial(),
+              normBase norm=norm_serial(),
+              REAL_t[::1] temporaryMemory=None,
+              REAL_t[::1, :] temporaryMemoryQ=None,
+              REAL_t[::1, :] temporaryMemoryH=None):
+    """
+    GMRES iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    Lprecond:  distributed to accumulated
+    b:        distributed
+    x0:       accumulated
+    x:        accumulated
+
+    In the unpreconditioned distributed case, set Lprecond to accumulate.
+
+    Memory requirement:
+    dim * (maxiter+1) for Q
+    (maxiter+1) * maxiter for H
+    (4*maxiter + 2) + 2*dim for c,s,gamma,y and r,Ar
+    """
+    cdef:
+        int i = -1, j, dim = A.shape[0], l
+        REAL_t[::1, :] Q, H
+        REAL_t[::1] r, Ar
+
+    if temporaryMemory.shape[0] >= 2*dim:
+        r = temporaryMemory[:dim]
+        Ar = temporaryMemory[dim:2*dim]
+    else:
+        r = uninitialized((dim), dtype=REAL)
+        Ar = uninitialized((dim), dtype=REAL)
+    if ((temporaryMemoryQ.shape[0] == dim) and
+        (temporaryMemoryQ.shape[1] >= maxiter+1)):
+        Q = temporaryMemoryQ
+    else:
+        Q = uninitialized((dim, maxiter+1), dtype=REAL, order='F')
+    if ((temporaryMemoryH.shape[0] >= maxiter+1) and
+        (temporaryMemoryH.shape[1] >= maxiter)):
+        H = temporaryMemoryH
+    else:
+        H = np.zeros((maxiter+1, maxiter), dtype=REAL, order='F')
+
+    if x0 is None:
+        x0 = np.random.rand(dim)
+    for j in range(dim):
+        r[j] = x0[j]
+
+    for l in range(dim):
+        Q[l, 0] = r[l]/norm(r, True)                                      # acc
+    for i in range(maxiter):
+        ##############################
+        # Arnoldi iteration
+        for l in range(dim):
+            r[l] = Q[l, i]                                           # acc
+        if Rprecond:
+            # FIX: Use inplace multiplication
+            r = Rprecond*np.array(r, copy=False, dtype=REAL)
+        A(r, Ar)                                                     # dist
+        if Lprecond:
+            Lprecond(Ar, r)                                          # acc
+        else:
+            for j in range(dim):
+                r[j] = Ar[j]
+        for j in range(i+1):
+            H[j, i] = inner(Q[:, j], r, True, True)
+            for l in range(dim):
+                r[l] -= H[j, i]*Q[l, j]                              # acc
+        H[i+1, i] = norm(r, True)
+        if abs(H[i+1, i]) > tol:
+            for l in range(dim):
+                Q[l, i+1] = r[l]/H[i+1, i]                           # acc
+        else:
+            return np.array(H, dtype=REAL)[:i+1, :i]
+    return np.array(H, dtype=REAL)
+
+
+def lanczos(A, x=None, numIter=5):
+    norm = np.linalg.norm
+    inner = np.vdot
+    if x is None:
+        x = np.ones((A.shape[0]))/np.sqrt(A.shape[0])
+    else:
+        x = x/norm(x)
+    H = uninitialized((2, numIter))
+    w = A*x
+    H[1, 0] = inner(w, x)
+    w -= H[1, 0]*x
+    # alpha[0] = inner(w, x)
+    # w -= alpha[0]*x
+    for m in range(1, numIter):
+        # beta[m-1] = norm(w)
+        # if abs(beta[m-1]) < 1e-10:
+        #     break
+        # xold = x
+        # x = w/beta[m-1]
+        H[0, m] = norm(w)
+        if abs(H[0, m]) < 1e-10:
+            H = H[:, :m]
+            break
+        xold = x
+        x = w/H[0, m]
+
+        # w = A*x-beta[m-1]*xold
+        # alpha[m] = inner(w, x)
+        # w -= alpha[m]*x
+        w = A*x-H[0, m]*xold
+        H[1, m] = inner(w, x)
+        w -= H[1, m]*x
+    return H
+
+def lanczos2(A, M, Minv, x=None, numIter=5):
+    z = uninitialized((A.shape[0]))
+    inner = np.vdot
+    if x is None:
+        x = np.ones((A.shape[0]))
+    x /= np.sqrt(inner(x, M*x))
+    H = uninitialized((2, numIter))
+    w = A*x
+    H[1, 0] = inner(w, x)
+    w -= H[1, 0]*(M*x)
+    # alpha[0] = inner(w, x)
+    # w -= alpha[0]*x
+    for m in range(1, numIter):
+        # beta[m-1] = norm(w)
+        # if abs(beta[m-1]) < 1e-10:
+        #     break
+        # xold = x
+        # x = w/beta[m-1]
+        # z = np.linalg.solve(M.toarray(), w)
+        Minv(w, z)
+        H[0, m] = np.sqrt(inner(w, z))
+        if abs(H[0, m]) < 1e-10:
+            H = H[:, :m]
+            break
+        xold = x
+        x = z/H[0, m]
+
+        # w = A*x-beta[m-1]*xold
+        # alpha[m] = inner(w, x)
+        # w -= alpha[m]*x
+        w = A*x-H[0, m]*(M*xold)
+        H[1, m] = inner(w, x)
+        w -= H[1, m]*(M*x)
+    return H
diff --git a/base/PyNucleus_base/linear_operators.pxd b/base/PyNucleus_base/linear_operators.pxd
new file mode 100644
index 0000000..2978905
--- /dev/null
+++ b/base/PyNucleus_base/linear_operators.pxd
@@ -0,0 +1,170 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+
+include "LinearOperator_decl_REAL.pxi"
+include "LinearOperator_decl_COMPLEX.pxi"
+
+include "LinearOperatorWrapper_decl_REAL.pxi"
+include "LinearOperatorWrapper_decl_COMPLEX.pxi"
+
+include "DenseLinearOperator_decl_REAL.pxi"
+include "DenseLinearOperator_decl_COMPLEX.pxi"
+
+include "CSR_LinearOperator_decl_REAL.pxi"
+include "CSR_LinearOperator_decl_COMPLEX.pxi"
+
+include "SSS_LinearOperator_decl_REAL.pxi"
+include "SSS_LinearOperator_decl_COMPLEX.pxi"
+
+include "DiagonalLinearOperator_decl_REAL.pxi"
+include "DiagonalLinearOperator_decl_COMPLEX.pxi"
+
+include "IJOperator_decl_REAL.pxi"
+include "IJOperator_decl_COMPLEX.pxi"
+
+
+cdef class Triple_Product_Linear_Operator(LinearOperator):
+    cdef:
+        public LinearOperator A, B, C
+        public REAL_t[::1] temporaryMemory
+        public REAL_t[::1] temporaryMemory2
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+    cdef void _residual(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] rhs,
+                        REAL_t[::1] result,
+                        BOOL_t simpleResidual=*)
+
+
+cdef class split_CSR_LinearOperator(LinearOperator):
+    cdef:
+        public CSR_LinearOperator A1, A2
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef class sparseGraph(LinearOperator):
+    cdef:
+        public INDEX_t[::1] indices, indptr
+        public BOOL_t indices_sorted
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+    cdef public INDEX_t matvec_no_overwrite(self,
+                                            REAL_t[::1] x,
+                                            REAL_t[::1] y) except -1
+
+
+cdef class restrictionOp(sparseGraph):
+    cdef:
+        public int NoThreads
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef class prolongationOp(sparseGraph):
+    cdef:
+        public int NoThreads
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef BOOL_t sort_indices(INDEX_t[::1] indptr,
+                       INDEX_t[::1] indices,
+                       REAL_t[::1] data)
+
+
+cdef class blockOperator(LinearOperator):
+    cdef:
+        INDEX_t[::1] blockInptrLeft, blockInptrRight
+        REAL_t[::1] temp
+        public list subblocks
+        tuple blockShape
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef class nullOperator(LinearOperator):
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1
+
+
+cdef class identityOperator(LinearOperator):
+    cdef:
+        REAL_t alpha
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1
+
+
+cdef class blockLowerInverse(blockOperator):
+    cdef:
+        list diagonalInverses
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef class blockUpperInverse(blockOperator):
+    cdef:
+        list diagonalInverses
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
+cdef class wrapRealToComplex(ComplexLinearOperator):
+    cdef:
+        LinearOperator realA
+        REAL_t[::1] temporaryMemory, temporaryMemory2
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1
+
+
+cdef class wrapRealToComplexCSR(ComplexLinearOperator):
+    cdef:
+        CSR_LinearOperator realA
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1
+
+
+cdef class HelmholtzShiftOperator(ComplexLinearOperator):
+    cdef:
+        CSR_LinearOperator M, S
+        COMPLEX_t shift
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1
+
+
+cdef class delayedConstructionOperator(LinearOperator):
+    cdef:
+        public BOOL_t isConstructed
+        public dict params
+        public LinearOperator A
+    cpdef int assure_constructed(self) except -1
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
diff --git a/base/PyNucleus_base/linear_operators.pyx b/base/PyNucleus_base/linear_operators.pyx
new file mode 100644
index 0000000..b7138c6
--- /dev/null
+++ b/base/PyNucleus_base/linear_operators.pyx
@@ -0,0 +1,1555 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from . myTypes import INDEX, REAL, COMPLEX
+from . blas cimport gemv
+from . blas import uninitialized
+from cython.parallel cimport prange, parallel
+
+include "config.pxi"
+
+COMPRESSION = 'gzip'
+
+include "LinearOperator_REAL.pxi"
+include "LinearOperator_COMPLEX.pxi"
+
+include "LinearOperatorWrapper_REAL.pxi"
+include "LinearOperatorWrapper_COMPLEX.pxi"
+
+include "DenseLinearOperator_REAL.pxi"
+include "DenseLinearOperator_COMPLEX.pxi"
+
+include "CSR_LinearOperator_REAL.pxi"
+include "CSR_LinearOperator_COMPLEX.pxi"
+
+include "SSS_LinearOperator_REAL.pxi"
+include "SSS_LinearOperator_COMPLEX.pxi"
+
+include "DiagonalLinearOperator_REAL.pxi"
+include "DiagonalLinearOperator_COMPLEX.pxi"
+
+include "IJOperator_REAL.pxi"
+include "IJOperator_COMPLEX.pxi"
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def transpose(S, inplace=True):
+    cdef:
+        INDEX_t i, j, c, temp
+        INDEX_t nrow = S.shape[0], ncol = S.shape[1]
+        INDEX_t[::1] indices_mv = S.indices
+        INDEX_t[::1] indptr_mv = S.indptr
+        np.ndarray[INDEX_t, ndim=1] newindices = uninitialized((S.nnz), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=1] newindptr = np.zeros((ncol+1), dtype=INDEX)
+        INDEX_t[::1] newindices_mv = newindices
+        INDEX_t[::1] newindptr_mv = newindptr
+        REAL_t[::1] data_mv
+        np.ndarray[REAL_t, ndim=1] newdata
+        REAL_t[::1] newdata_mv
+
+    if hasattr(S, 'data'):
+        data_mv = S.data
+        newdata = uninitialized((S.nnz), dtype=REAL)
+        newdata_mv = newdata
+
+    # count up occurrences of columns
+    for i in range(nrow):
+        for j in range(indptr_mv[i], indptr_mv[i+1]):
+            c = indices_mv[j]
+            newindptr_mv[c+1] += 1
+    # make it into indptr array by cumsum
+    for j in range(1, ncol+1):
+        newindptr_mv[j] += newindptr_mv[j-1]
+    # fill new indices and data, use new indptr to index position
+    if hasattr(S, 'data'):
+        for i in range(nrow):
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                c = indices_mv[j]
+                newindices_mv[newindptr_mv[c]] = i
+                newdata_mv[newindptr_mv[c]] = data_mv[j]
+                newindptr_mv[c] += 1
+    else:
+        for i in range(nrow):
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                c = indices_mv[j]
+                newindices_mv[newindptr_mv[c]] = i
+                newindptr_mv[c] += 1
+    # set new indptr back by one position
+    temp = 0
+    for i in range(ncol+1):
+        newindptr_mv[i], temp = temp, newindptr_mv[i]
+    if inplace:
+        S.indices = newindices
+        S.indptr = newindptr
+        if hasattr(S, 'data'):
+            S.data = newdata
+        S._shape = (ncol, nrow)
+    else:
+        # if hasattr(S, 'data'):
+        #     return csr_matrix((newdata, newindices, newindptr))
+        if isinstance(S, restrictionOp):
+            return prolongationOp(newindices, newindptr, ncol, nrow)
+        elif isinstance(S, prolongationOp):
+            return restrictionOp(newindices, newindptr, ncol, nrow)
+        elif isinstance(S, CSR_LinearOperator):
+            A = CSR_LinearOperator(newindices, newindptr, newdata)
+            A.num_rows = ncol
+            A.num_columns = nrow
+            return A
+        else:
+            raise NotImplementedError()
+
+
+# We are using this for cascade calculation of residuals.
+cdef class Triple_Product_Linear_Operator(LinearOperator):
+    def __init__(self,
+                 LinearOperator A,
+                 LinearOperator B,
+                 LinearOperator C,
+                 REAL_t[::1] temporaryMemory=None,
+                 REAL_t[::1] temporaryMemory2=None):
+        assert A.num_columns == B.num_rows
+        assert B.num_columns == C.num_rows
+        super(Triple_Product_Linear_Operator, self).__init__(A.num_rows,
+                                                             C.num_columns)
+        self.A = A
+        self.B = B
+        self.C = C
+        if temporaryMemory is not None:
+            assert temporaryMemory.shape[0] == self.B.num_columns
+            self.temporaryMemory = temporaryMemory
+        else:
+            self.temporaryMemory = uninitialized((self.B.num_columns), dtype=REAL)
+        if temporaryMemory2 is not None:
+            assert temporaryMemory2.shape[0] == self.A.num_columns
+            self.temporaryMemory2 = temporaryMemory2
+        else:
+            self.temporaryMemory2 = uninitialized((self.A.num_columns), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        self.C(x, self.temporaryMemory)
+        self.B(self.temporaryMemory, self.temporaryMemory2)
+        self.A(self.temporaryMemory2, y)
+        return 0
+
+
+cdef class split_CSR_LinearOperator(LinearOperator):
+    def __init__(self,
+                 CSR_LinearOperator A1,
+                 CSR_LinearOperator A2):
+        LinearOperator.__init__(self,
+                                  A1.indptr.shape[0]-1,
+                                  A1.indptr.shape[0]-1)
+        self.A1 = A1
+        self.A2 = A2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        self.A1.matvec(x, y)
+        self.A2.matvec_no_overwrite(x, y)
+        return 0
+
+    def to_csr(self):
+        return self.A1.to_csr() + self.A2.to_csr()
+
+    def getnnz(self):
+        return self.A1.nnz+self.A2.nnz
+
+    nnz = property(fget=getnnz)
+
+    def __repr__(self):
+        return '<%dx%d %s with %d stored elements>' % (self.num_rows,
+                                                       self.num_columns,
+                                                       self.__class__.__name__,
+                                                       self.nnz)
+
+    def HDF5write(self, node):
+        grpA1 = node.create_group('A1')
+        self.A1.HDF5write(grpA1)
+        grpA2 = node.create_group('A2')
+        self.A2.HDF5write(grpA2)
+        node.attrs['type'] = 'split_csr'
+
+    @staticmethod
+    def HDF5read(node):
+        A1 = CSR_LinearOperator.HDF5read(node['A1'])
+        A2 = CSR_LinearOperator.HDF5read(node['A2'])
+        return split_CSR_LinearOperator(A1, A2)
+
+    def sort_indices(self):
+        self.A1.sort_indices()
+        self.A2.sort_indices()
+
+    def get_indices_sorted(self):
+        return self.A1.indices_sorted and self.A2.indices_sorted
+
+    indices_sorted = property(fget=get_indices_sorted)
+
+
+cdef class sparseGraph(LinearOperator):
+    def __init__(self, INDEX_t[::1] indices, INDEX_t[::1] indptr,
+                 INDEX_t num_rows, INDEX_t num_columns):
+        self.indices = indices
+        self.indptr = indptr
+        super(sparseGraph, self).__init__(num_rows, num_columns)
+        self.indices_sorted = False
+
+    def copy(self):
+        return sparseGraph(self.indices.copy(),
+                              self.indptr.copy(),
+                              self.num_rows,
+                              self.num_columns)
+
+    def transpose(self):
+        newindices = uninitialized(self.nnz, INDEX)
+        newindptr = np.zeros(self.num_columns+1, INDEX)
+        cdef:
+            INDEX_t i, j, c, temp
+            INDEX_t[::1] indices_mv = self.indices
+            INDEX_t[::1] indptr_mv = self.indptr
+            INDEX_t[::1] newindices_mv = newindices
+            INDEX_t[::1] newindptr_mv = newindptr
+        for i in range(self.num_rows):
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                c = indices_mv[j]
+                newindptr_mv[c+1] += 1
+        for j in range(1, self.num_columns+1):
+            newindptr_mv[j] += newindptr_mv[j-1]
+        for i in range(self.num_rows):
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                c = indices_mv[j]
+                newindices_mv[newindptr_mv[c]] = i
+                newindptr_mv[c] += 1
+        temp = 0
+        for i in range(self.num_columns+1):
+            newindptr_mv[i], temp = temp, newindptr_mv[i]
+        self.indices = newindices
+        self.indptr = newindptr
+        self.num_columns, self.num_rows = self.num_rows, self.num_columns
+        self.shape = (self.num_rows, self.num_columns)
+
+    def getnnz(self):
+        return self.indptr[-1]
+
+    nnz = property(fget=getnnz)
+
+    def getshape(self):
+        return (self.num_rows, self.num_columns)
+
+    def setshape(self, val):
+        self.num_rows, self.num_columns = val
+
+    shape = property(fget=getshape, fset=setshape)
+
+    def isSorted(self):
+        """
+        Check if column indices are sorted.
+        """
+        cdef:
+            INDEX_t i, nnz, s, p, q
+        nnz = self.indptr[-1]
+        for i in range(self.indptr.shape[0]-1):
+            s = self.indptr[i]
+            if s ==  nnz:
+                continue
+            p = self.indices[s]
+            for q in self.indices[self.indptr[i]+1:self.indptr[i+1]]:
+                if q <= p:
+                    return False
+                else:
+                    p = q
+        return True
+
+    def sliceRow(self, slice):
+        cdef:
+            INDEX_t last = 0
+            INDEX_t length
+            INDEX_t newRowIdx = 0
+            INDEX_t i, j, k
+            INDEX_t[::1] indices_mv = self.indices
+            INDEX_t[::1] indptr_mv = self.indptr
+            INDEX_t[::1] ind
+        for i in slice:
+            length = indptr_mv[i+1]-indptr_mv[i]
+            ind = indices_mv[indptr_mv[i]:indptr_mv[i+1]]
+            indptr_mv[newRowIdx+1] = last+length
+            for k, j in enumerate(range(indptr_mv[newRowIdx], indptr_mv[newRowIdx+1])):
+                indices_mv[j] = ind[k]
+            newRowIdx += 1
+            last += length
+        self.indices = self.indices[:last]
+        self.indptr = self.indptr[:newRowIdx+1]
+        self.num_rows = newRowIdx
+        self.shape = (self.num_rows, self.num_columns)
+
+    def sliceColumn(self, slice):
+        self.transpose()
+        self.sliceRow(slice)
+        self.transpose()
+
+    def to_csr(self):
+        from scipy.sparse import csr_matrix
+        return csr_matrix((np.ones(len(self.indices)), self.indices,
+                           self.indptr),
+                          shape=(self.num_rows, self.num_columns))
+
+    def todense(self):
+        return self.to_csr().todense()
+
+    def __getstate__(self):
+        return {'indices': self.indices,
+                'indptr': self.indptr,
+                'num_rows': self.num_rows,
+                'num_columns': self.num_columns}
+
+    def __setstate__(self, value):
+        self.__init__(value['indices'], value['indptr'], value['num_rows'], value['num_columns'])
+
+    def __repr__(self):
+        return '<%dx%d %s with %d stored elements>' % (self.num_rows, self.num_columns, self.__class__.__name__, self.nnz)
+
+    cdef INDEX_t matvec(self, REAL_t[::1] x, REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t[::1] indices_mv = self.indices
+            INDEX_t[::1] indptr_mv = self.indptr
+            INDEX_t i, j
+            REAL_t sum
+        for i in range(self.num_rows):
+            sum = 0.0
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                sum += x[indices_mv[j]]
+            y[i] = sum
+        return 0
+
+    cdef public INDEX_t matvec_no_overwrite(self, REAL_t[::1] x, REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t[::1] indices_mv = self.indices
+            INDEX_t[::1] indptr_mv = self.indptr
+            INDEX_t i, j
+            REAL_t sum
+        for i in range(self.num_rows):
+            sum = 0.0
+            for j in range(indptr_mv[i], indptr_mv[i+1]):
+                sum += x[indices_mv[j]]
+            y[i] += sum
+        return 0
+
+    def sort_indices(self):
+        cdef REAL_t[::1] temp = uninitialized((0), dtype=REAL)
+        sort_indices(self.indptr, self.indices, temp)
+        self.indices_sorted = True
+
+    def HDF5write(self, node):
+        node.create_dataset('indices', data=np.array(self.indices,
+                                                     copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('indptr', data=np.array(self.indptr,
+                                                    copy=False),
+                            compression=COMPRESSION)
+        node.attrs['num_rows'] = self.num_rows
+        node.attrs['num_columns'] = self.num_columns
+        node.attrs['type'] = 'sparseGraph'
+
+    @staticmethod
+    def HDF5read(node):
+        return restrictionOp(np.array(node['indices'], dtype=INDEX),
+                             np.array(node['indptr'], dtype=INDEX),
+                             node.attrs['num_rows'], node.attrs['num_columns'])
+
+
+cdef class restrictionOp(sparseGraph):
+    def __init__(self, INDEX_t[::1] indices, INDEX_t[::1] indptr,
+                 INDEX_t num_rows, INDEX_t num_columns, int NoThreads=1):
+        super(restrictionOp, self).__init__(indices, indptr,
+                                            num_rows, num_columns)
+        self.NoThreads = NoThreads
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef INDEX_t matvec(self, REAL_t[::1] x, REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j
+            REAL_t sum
+        if self.NoThreads > 1:
+            with nogil, parallel(num_threads=self.NoThreads):
+                for i in prange(self.num_rows, schedule='static'):
+                    sum = 0.0
+                    for j in range(self.indptr[i], self.indptr[i+1]):
+                        sum = sum + x[self.indices[j]]
+                    y[i] = x[i] + 0.5*sum
+        else:
+            for i in range(self.num_rows):
+                sum = 0.0
+                for j in range(self.indptr[i], self.indptr[i+1]):
+                    sum += x[self.indices[j]]
+                y[i] = x[i] + 0.5*sum
+        return 0
+
+    def to_csr(self):
+        from scipy.sparse import eye
+        return (eye(self.num_rows, self.num_columns, dtype=REAL, format='csr') +
+                0.5*super(restrictionOp, self).to_csr())
+
+    def to_csr_linear_operator(self):
+        B = self.to_csr()
+        C = CSR_LinearOperator(B.indices, B.indptr, B.data)
+        C.num_rows = B.shape[0]
+        C.num_columns = B.shape[1]
+        return C
+
+    def HDF5write(self, node):
+        node.create_dataset('indices', data=np.array(self.indices,
+                                                     copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('indptr', data=np.array(self.indptr,
+                                                    copy=False),
+                            compression=COMPRESSION)
+        node.attrs['num_rows'] = self.num_rows
+        node.attrs['num_columns'] = self.num_columns
+        node.attrs['type'] = 'restriction'
+
+    @staticmethod
+    def HDF5read(node):
+        return restrictionOp(np.array(node['indices'], dtype=INDEX),
+                             np.array(node['indptr'], dtype=INDEX),
+                             node.attrs['num_rows'], node.attrs['num_columns'])
+
+    def restrictMatrix(self, LinearOperator A, LinearOperator Ac):
+        multiply_restr(self, A, Ac)
+
+
+cdef class prolongationOp(sparseGraph):
+    def __init__(self, INDEX_t[::1] indices, INDEX_t[::1] indptr,
+                 INDEX_t num_rows, INDEX_t num_columns, int NoThreads=1):
+        super(prolongationOp, self).__init__(indices, indptr, num_rows, num_columns)
+        self.NoThreads = NoThreads
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef INDEX_t matvec(self, REAL_t[::1] x, REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j
+            REAL_t sum
+        if self.NoThreads > 1:
+            with nogil, parallel(num_threads=self.NoThreads):
+                for i in prange(self.num_rows, schedule='static'):
+                    sum = 0.0
+                    for j in range(self.indptr[i], self.indptr[i+1]):
+                        sum = sum + x[self.indices[j]]
+                    y[i] = 0.5*sum
+                for i in prange(self.num_columns, schedule='static'):
+                    y[i] = y[i] + x[i]
+        else:
+            for i in range(self.num_rows):
+                sum = 0.0
+                for j in range(self.indptr[i], self.indptr[i+1]):
+                    sum += x[self.indices[j]]
+                y[i] = 0.5*sum
+            for i in range(self.num_columns):
+                y[i] += x[i]
+        return 0
+
+    def to_csr(self):
+        from scipy.sparse import eye
+        return (eye(self.num_rows, self.num_columns, dtype=REAL, format='csr') +
+                0.5*super(prolongationOp, self).to_csr())
+
+    def to_csr_linear_operator(self):
+        B = self.to_csr()
+        C = CSR_LinearOperator(B.indices, B.indptr, B.data)
+        C.num_rows = B.shape[0]
+        C.num_columns = B.shape[1]
+        return C
+
+    def HDF5write(self, node):
+        node.create_dataset('indices', data=np.array(self.indices,
+                                                     copy=False),
+                            compression=COMPRESSION)
+        node.create_dataset('indptr', data=np.array(self.indptr,
+                                                    copy=False),
+                            compression=COMPRESSION)
+        node.attrs['num_rows'] = self.num_rows
+        node.attrs['num_columns'] = self.num_columns
+        node.attrs['type'] = 'prolongation'
+
+    @staticmethod
+    def HDF5read(node):
+        return prolongationOp(np.array(node['indices'], dtype=INDEX),
+                              np.array(node['indptr'], dtype=INDEX),
+                              node.attrs['num_rows'], node.attrs['num_columns'])
+
+
+######################################################################
+# Matrix restriction R*A*R.T for restrictionOp R
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t getEntry_restr(INDEX_t i,
+                                    INDEX_t j,
+                                    INDEX_t[::1] R_indptr,
+                                    INDEX_t[::1] R_indices,
+                                    INDEX_t[::1] A_indptr,
+                                    INDEX_t[::1] A_indices,
+                                    REAL_t[::1] A_data):
+    cdef:
+        INDEX_t kk, k, mm1, m1, mm2, m2
+        REAL_t sum = 0., sumI
+    # calculate entry at (i, j) as combination (i, k), (k, m), (m, j)
+    # but the last one is transposed, so we have (i, k), (k, m), (j, m)
+    # J*A*R^t
+    for kk in range(R_indptr[i], R_indptr[i+1]):
+        k = R_indices[kk]
+        sumI = 0.
+        mm1 = A_indptr[k]
+        mm2 = R_indptr[j]
+        # Find matches between (k, m1) and (j, m2)
+        # A*J^t
+        while (mm1 < A_indptr[k+1]) and (mm2 < R_indptr[j+1]):
+            m1 = A_indices[mm1]
+            m2 = R_indices[mm2]
+            if m1 < m2:
+                mm1 += 1
+            elif m1 > m2:
+                mm2 += 1
+            else:
+                sumI += A_data[mm1]*0.5
+                mm1 += 1
+                mm2 += 1
+        # A*I^t
+        mm1 = A_indptr[k]
+        while (mm1 < A_indptr[k+1]) and (A_indices[mm1] < j):
+            mm1 += 1
+        if (mm1 < A_indptr[k+1]) and (A_indices[mm1] == j):
+            sumI += A_data[mm1]
+        sum += 0.5*sumI
+    # I*A*R^t
+    mm1 = A_indptr[i]
+    mm2 = R_indptr[j]
+    # Find matches between (i, m1) and (j, m2)
+    while (mm1 < A_indptr[i+1]) and (mm2 < R_indptr[j+1]):
+        m1 = A_indices[mm1]
+        m2 = R_indices[mm2]
+        if m1 < m2:
+            mm1 += 1
+        elif m1 > m2:
+            mm2 += 1
+        else:
+            sum += A_data[mm1]*0.5
+            mm1 += 1
+            mm2 += 1
+    mm1 = A_indptr[i]
+    while (mm1 < A_indptr[i+1]) and (A_indices[mm1] < j):
+        mm1 += 1
+    if (mm1 < A_indptr[i+1]) and (A_indices[mm1] == j):
+        sum += A_data[mm1]
+    return sum
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t getEntryFromD_restr(INDEX_t i,
+                                         INDEX_t j,
+                                         INDEX_t[::1] R_indptr,
+                                         INDEX_t[::1] R_indices,
+                                         INDEX_t[::1] A_indptr,
+                                         INDEX_t[::1] A_indices,
+                                         REAL_t[::1] A_diagonal):
+    cdef:
+        INDEX_t kk, k, mm1, m1, mm2, m2
+        REAL_t sum
+    # J*D*J.t
+    # Find matches between (k, m1) and (j, m2)
+    mm1 = R_indptr[i]
+    mm2 = R_indptr[j]
+    sum = 0.
+    while (mm1 < R_indptr[i+1]) and (mm2 < R_indptr[j+1]):
+        m1 = R_indices[mm1]
+        m2 = R_indices[mm2]
+        if m1 < m2:
+            mm1 += 1
+        elif m1 > m2:
+            mm2 += 1
+        else:
+            sum += 0.25*A_diagonal[m1]
+            mm1 += 1
+            mm2 += 1
+    # J*D*I.t
+    mm1 = R_indptr[i]
+    while (mm1 < R_indptr[i+1]) and (R_indices[mm1] < j):
+        mm1 += 1
+    if (mm1 < R_indptr[i+1]) and (R_indices[mm1] == j):
+        sum += 0.5*A_diagonal[j]
+    # I*D*J.t
+    mm2 = R_indptr[j]
+    while (mm2 < R_indptr[j+1]) and (R_indices[mm2] < i):
+        mm2 += 1
+    if (mm2 < R_indptr[j+1]) and (R_indices[mm2] == i):
+        sum += 0.5*A_diagonal[i]
+    return sum
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def multiply_restr(restrictionOp R, LinearOperator A, LinearOperator Ac):
+    cdef:
+        INDEX_t i, jj, j, kk, k, mm1, m1, mm2, m2
+        REAL_t sum
+        INDEX_t[::1] indices = Ac.indices, indptr = Ac.indptr
+        REAL_t[::1] data = Ac.data, diagonal
+        INDEX_t[::1] R_indptr = R.indptr, R_indices = R.indices
+        INDEX_t[::1] A_indptr = A.indptr, A_indices = A.indices
+        REAL_t[::1] A_data = A.data, A_diagonal
+    # R can be written as I + J, entries of I are 1.0, entries of J are 0.5.
+    # If A is in SSS format, it can be written as D + L + L.t,
+    # the L part is handled as the A of a CSR matrix.
+    for i in range(indptr.shape[0]-1):
+        for jj in range(indptr[i], indptr[i+1]):
+            j = indices[jj]
+            data[jj] += getEntry_restr(i, j,
+                                       R_indptr,
+                                       R_indices,
+                                       A_indptr,
+                                       A_indices,
+                                       A_data)
+    if isinstance(A, SSS_LinearOperator):
+        A_diagonal = A.diagonal
+        diagonal = Ac.diagonal
+        # R*L.t*R.t
+        for i in range(indptr.shape[0]-1):
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                data[jj] += getEntry_restr(j, i,
+                                           R_indptr,
+                                           R_indices,
+                                           A_indptr,
+                                           A_indices,
+                                           A_data)
+        for i in range(indptr.shape[0]-1):
+            # I*D*I.t
+            diagonal[i] += A_diagonal[i]
+            # R*L*R.t + R*L.t*R.t
+            diagonal[i] += 2*getEntry_restr(i, i,
+                                            R_indptr,
+                                            R_indices,
+                                            A_indptr,
+                                            A_indices,
+                                            A_data)
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                data[jj] += getEntryFromD_restr(i, j,
+                                                R_indptr,
+                                                R_indices,
+                                                A_indptr,
+                                                A_indices,
+                                                A_diagonal)
+            diagonal[i] += getEntryFromD_restr(i, i,
+                                               R_indptr,
+                                               R_indices,
+                                               A_indptr,
+                                               A_indices,
+                                               A_diagonal)
+
+
+######################################################################
+# Matrix restriction R*A*R.T for CSR matrix R
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t getEntry(const INDEX_t i,
+                              const INDEX_t j,
+                              const INDEX_t[::1] R_indptr,
+                              const INDEX_t[::1] R_indices,
+                              const REAL_t[::1] R_data,
+                              const INDEX_t[::1] A_indptr,
+                              const INDEX_t[::1] A_indices,
+                              const REAL_t[::1] A_data):
+    cdef:
+        INDEX_t k, kk, mm1, m1, mm2, m2
+        REAL_t sum = 0., sumI
+    # calculate entry at (i, j) as combination (i, k), (k, m), (m, j)
+    # but the last one is transposed, so we have (i, k), (k, m), (j, m)
+    # R*A*R^t
+    for kk in range(R_indptr[i], R_indptr[i+1]):
+        k = R_indices[kk]
+        sumI = 0.
+        mm1 = A_indptr[k]
+        mm2 = R_indptr[j]
+        # Find matches between (k, m1) and (j, m2)
+        # A*R^t
+        while (mm1 < A_indptr[k+1]) and (mm2 < R_indptr[j+1]):
+            m1 = A_indices[mm1]
+            m2 = R_indices[mm2]
+            if m1 < m2:
+                mm1 += 1
+            elif m1 > m2:
+                mm2 += 1
+            else:
+                sumI += A_data[mm1]*R_data[mm2]
+                mm1 += 1
+                mm2 += 1
+        sum += R_data[kk]*sumI
+    return sum
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t getEntryFromD(const INDEX_t i,
+                                   const INDEX_t j,
+                                   const INDEX_t[::1] R_indptr,
+                                   const INDEX_t[::1] R_indices,
+                                   const REAL_t[::1] R_data,
+                                   const INDEX_t[::1] A_indptr,
+                                   const INDEX_t[::1] A_indices,
+                                   const REAL_t[::1] A_diagonal):
+    cdef:
+        INDEX_t mm1, m1, mm2, m2
+        REAL_t sum
+    # R*D*R.t
+    # Find matches between (k, m1) and (j, m2)
+    mm1 = R_indptr[i]
+    mm2 = R_indptr[j]
+    sum = 0.
+    while (mm1 < R_indptr[i+1]) and (mm2 < R_indptr[j+1]):
+        m1 = R_indices[mm1]
+        m2 = R_indices[mm2]
+        if m1 < m2:
+            mm1 += 1
+        elif m1 > m2:
+            mm2 += 1
+        else:
+            sum += R_data[mm1]*A_diagonal[m1]*R_data[mm2]
+            mm1 += 1
+            mm2 += 1
+    return sum
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def multiply2(CSR_LinearOperator P, LinearOperator A, LinearOperator Ac):
+    cdef:
+        INDEX_t k, ll, l, ii, i, jj, j
+        REAL_t akl, pki, plj
+        INDEX_t[::1] P_indptr = P.indptr, P_indices = P.indices
+        INDEX_t[::1] A_indptr = A.indptr, A_indices = A.indices
+        REAL_t[::1] P_data = P.data, A_data = A.data
+        REAL_t[::1] A_diagonal
+
+    for k in range(A_indptr.shape[0]-1):
+        for ll in range(A_indptr[k], A_indptr[k+1]):
+            l = A_indices[ll]
+            akl = A_data[ll]
+            for ii in range(P_indptr[k], P_indptr[k+1]):
+                i = P_indices[ii]
+                pki = P_data[ii]
+                for jj in range(P_indptr[l], P_indptr[l+1]):
+                    j = P_indices[jj]
+                    plj = P_data[jj]
+                    Ac.addToEntry(i, j, pki*akl*plj)
+    if isinstance(A, SSS_LinearOperator):
+        for k in range(A_indptr.shape[0]-1):
+            for ll in range(A_indptr[k], A_indptr[k+1]):
+                l = A_indices[ll]
+                akl = A_data[ll]
+                for ii in range(P_indptr[k], P_indptr[k+1]):
+                    i = P_indices[ii]
+                    pki = P_data[ii]
+                    for jj in range(P_indptr[l], P_indptr[l+1]):
+                        j = P_indices[jj]
+                        plj = P_data[jj]
+                        Ac.addToEntry(j, i, pki*akl*plj)
+
+        A_diagonal = A.diagonal
+        for k in range(A_indptr.shape[0]-1):
+            akl = A_diagonal[k]
+            for ii in range(P_indptr[k], P_indptr[k+1]):
+                i = P_indices[ii]
+                pki = P_data[ii]
+                for jj in range(P_indptr[k], P_indptr[k+1]):
+                    j = P_indices[jj]
+                    plj = P_data[jj]
+                    Ac.addToEntry(i, j, pki*akl*plj)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def multiply(CSR_LinearOperator R, LinearOperator A, LinearOperator Ac):
+    cdef:
+        INDEX_t i, jj, j
+        INDEX_t[::1] indices = Ac.indices, indptr = Ac.indptr
+        REAL_t[::1] data = Ac.data, diagonal
+        INDEX_t[::1] R_indptr = R.indptr, R_indices = R.indices
+        INDEX_t[::1] A_indptr = A.indptr, A_indices = A.indices
+        REAL_t[::1] R_data = R.data, A_data = A.data, A_diagonal
+    # If A is in SSS format, it can be written as D + L + L.t,
+    # the L part is handled as the A of a CSR matrix.
+    for i in range(indptr.shape[0]-1):
+        for jj in range(indptr[i], indptr[i+1]):
+            j = indices[jj]
+            data[jj] += getEntry(i, j,
+                                 R_indptr,
+                                 R_indices,
+                                 R_data,
+                                 A_indptr,
+                                 A_indices,
+                                 A_data)
+    if isinstance(A, SSS_LinearOperator):
+        A_diagonal = A.diagonal
+        diagonal = Ac.diagonal
+        # R*L.t*R.t
+        for i in range(indptr.shape[0]-1):
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                data[jj] += getEntry(j, i,
+                                     R_indptr,
+                                     R_indices,
+                                     R_data,
+                                     A_indptr,
+                                     A_indices,
+                                     A_data)
+        for i in range(indptr.shape[0]-1):
+            # R*L*R.t + R*L.t*R.t
+            diagonal[i] += 2*getEntry(i, i,
+                                      R_indptr,
+                                      R_indices,
+                                      R_data,
+                                      A_indptr,
+                                      A_indices,
+                                      A_data)
+            for jj in range(indptr[i], indptr[i+1]):
+                j = indices[jj]
+                data[jj] += getEntryFromD(i, j,
+                                          R_indptr,
+                                          R_indices,
+                                          R_data,
+                                          A_indptr,
+                                          A_indices,
+                                          A_diagonal)
+            diagonal[i] += getEntryFromD(i, i,
+                                         R_indptr,
+                                         R_indices,
+                                         R_data,
+                                         A_indptr,
+                                         A_indices,
+                                         A_diagonal)
+
+
+cdef class blockOperator(LinearOperator):
+    def __init__(self, list subblocks):
+        cdef:
+            INDEX_t n, m, i, j, M
+        self.blockShape = (len(subblocks), len(subblocks[0]))
+        self.blockInptrLeft = np.zeros((self.blockShape[0]+1), dtype=INDEX)
+        self.blockInptrRight = np.zeros((self.blockShape[1]+1), dtype=INDEX)
+        n = 0
+        m = 0
+        M = -1
+        for i in range(self.blockShape[0]):
+            assert len(subblocks[i]) == self.blockShape[1]
+            n += subblocks[i][0].shape[0]
+            self.blockInptrLeft[i+1] = self.blockInptrLeft[i]+subblocks[i][0].shape[0]
+            self.blockInptrRight[0] = 0
+            m = 0
+            for j in range(self.blockShape[1]):
+                m += subblocks[i][j].shape[1]
+                self.blockInptrRight[j+1] = self.blockInptrRight[j]+subblocks[i][j].shape[1]
+            if M >= 0:
+                assert m == M
+            else:
+                M = m
+        super(blockOperator, self).__init__(n, m)
+        self.subblocks = subblocks
+        self.temp = uninitialized((self.num_rows), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            LinearOperator lo
+        y[:] = 0.
+        for i in range(self.blockShape[0]):
+            lo = self.subblocks[i][0]
+            lo.matvec(x[self.blockInptrRight[0]:self.blockInptrRight[1]],
+                      y[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+            for j in range(1, self.blockShape[1]):
+                lo = self.subblocks[i][j]
+                lo.matvec(x[self.blockInptrRight[j]:self.blockInptrRight[j+1]],
+                          self.temp[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+                for k in range(self.blockInptrLeft[i], self.blockInptrLeft[i+1]):
+                    y[k] += self.temp[k]
+                # lo.matvec_no_overwrite(x[self.blockInptrRight[j]:self.blockInptrRight[j+1]],
+                #                        y[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+        return 0
+
+    def toarray(self):
+        cdef:
+            INDEX_t i, j
+        B = uninitialized((self.num_rows, self.num_columns), dtype=REAL)
+        for i in range(self.blockShape[0]):
+            for j in range(self.blockShape[1]):
+                lo = self.subblocks[i][j]
+                B[self.blockInptrLeft[i]:self.blockInptrLeft[i+1],
+                  self.blockInptrRight[j]:self.blockInptrRight[j+1]] = lo.toarray()
+        return B
+
+
+cdef class blockDiagonalOperator(blockOperator):
+    def __init__(self, list diagonalBlocks):
+        subblocks = []
+        numBlocks = len(diagonalBlocks)
+        for i in range(numBlocks):
+            d = diagonalBlocks[i]
+            row = []
+            for j in range(i):
+                row.append(nullOperator(d.shape[0], diagonalBlocks[j].shape[1]))
+            row.append(d)
+            for j in range(i+1, numBlocks):
+                row.append(nullOperator(d.shape[0], diagonalBlocks[j].shape[1]))
+            subblocks.append(row)
+        super(blockDiagonalOperator, self).__init__(subblocks)
+
+
+cdef class nullOperator(LinearOperator):
+    def __init__(self, INDEX_t num_rows, INDEX_t num_columns):
+        super(nullOperator, self).__init__(num_rows, num_columns)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        for i in range(self.num_rows):
+            y[i] = 0.
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        return 0
+
+    def toarray(self):
+        return np.zeros((self.num_rows, self.num_columns), dtype=REAL)
+
+    def get_diagonal(self):
+        return np.zeros((min(self.num_rows, self.num_columns)), dtype=REAL)
+
+    diagonal = property(fget=get_diagonal)
+
+
+cdef class identityOperator(LinearOperator):
+    def __init__(self, INDEX_t num_rows, REAL_t alpha=1.0):
+        super(identityOperator, self).__init__(num_rows, num_rows)
+        self.alpha = alpha
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        for i in range(self.num_rows):
+            y[i] = self.alpha*x[i]
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        for i in range(self.num_rows):
+            y[i] += self.alpha*x[i]
+        return 0
+
+    def isSparse(self):
+        return True
+
+    def to_csr(self):
+        from scipy.sparse import csr_matrix
+        indptr = np.arange((self.num_rows+1), dtype=INDEX)
+        indices = np.arange((self.num_rows), dtype=INDEX)
+        data = self.alpha*np.ones((self.num_rows), dtype=REAL)
+        return csr_matrix((data,
+                           indices,
+                           indptr),
+                          shape=self.shape)
+
+    def toarray(self):
+        return self.alpha*np.eye(self.num_rows, dtype=REAL)
+
+    def get_diagonal(self):
+        return np.ones((self.num_rows), dtype=REAL)
+
+    diagonal = property(fget=get_diagonal)
+
+
+cdef class blockLowerInverse(blockOperator):
+    def __init__(self, subblocks, diagonalInverses):
+        if isinstance(subblocks, blockOperator):
+            super(blockLowerInverse, self).__init__(subblocks.subblocks)
+        else:
+            super(blockLowerInverse, self).__init__(subblocks)
+        for i in range(self.blockShape[0]):
+            for j in range(i+1, self.blockShape[1]):
+                assert isinstance(self.subblocks[i][j], nullOperator)
+        self.diagonalInverses = diagonalInverses
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            LinearOperator lo
+        y[:] = 0.
+        for i in range(self.blockShape[0]):
+            for j in range(i):
+                lo = self.subblocks[i][j]
+                lo.matvec(y[self.blockInptrRight[j]:self.blockInptrRight[j+1]],
+                          self.temp[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+                for k in range(self.blockInptrLeft[i], self.blockInptrLeft[i+1]):
+                    y[k] += self.temp[k]
+            for k in range(self.blockInptrLeft[i], self.blockInptrLeft[i+1]):
+                self.temp[k] = x[k] - y[k]
+            lo = self.diagonalInverses[i]
+            lo.matvec(self.temp[self.blockInptrRight[i]:self.blockInptrRight[i+1]],
+                      y[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+        return 0
+
+
+cdef class blockUpperInverse(blockOperator):
+    def __init__(self, subblocks, diagonalInverses):
+        if isinstance(subblocks, blockOperator):
+            super(blockUpperInverse, self).__init__(subblocks.subblocks)
+        else:
+            super(blockUpperInverse, self).__init__(subblocks)
+        for i in range(self.blockShape[0]):
+            for j in range(i):
+                assert isinstance(self.subblocks[i][j], nullOperator)
+        self.diagonalInverses = diagonalInverses
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j, k
+            LinearOperator lo
+        y[:] = 0.
+        for i in range(self.blockShape[0]-1, -1, -1):
+            for j in range(i+1, self.blockShape[0]):
+                lo = self.subblocks[i][j]
+                lo.matvec(y[self.blockInptrRight[j]:self.blockInptrRight[j+1]],
+                          self.temp[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+                for k in range(self.blockInptrLeft[i], self.blockInptrLeft[i+1]):
+                    y[k] += self.temp[k]
+            for k in range(self.blockInptrLeft[i], self.blockInptrLeft[i+1]):
+                self.temp[k] = x[k] - y[k]
+            lo = self.diagonalInverses[i]
+            lo.matvec(self.temp[self.blockInptrRight[i]:self.blockInptrRight[i+1]],
+                      y[self.blockInptrLeft[i]:self.blockInptrLeft[i+1]])
+        return 0
+
+
+cdef class wrapRealToComplex(ComplexLinearOperator):
+    def __init__(self, LinearOperator A):
+        super(wrapRealToComplex, self).__init__(A.num_rows, A.num_columns)
+        self.realA = A
+        self.temporaryMemory = uninitialized((A.num_columns), dtype=REAL)
+        self.temporaryMemory2 = uninitialized((A.num_rows), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+            COMPLEX_t I = 1j
+        for i in range(self.num_columns):
+            self.temporaryMemory[i] = x[i].real
+        self.realA(self.temporaryMemory, self.temporaryMemory2)
+        for i in range(self.num_rows):
+            y[i] = self.temporaryMemory2[i]
+        for i in range(self.num_columns):
+            self.temporaryMemory[i] = x[i].imag
+        self.realA(self.temporaryMemory, self.temporaryMemory2)
+        for i in range(self.num_rows):
+            y[i] = y[i]+I*self.temporaryMemory2[i]
+
+        return 0
+
+    def to_csr_linear_operator(self):
+        B = self.realA.to_csr()
+        return ComplexCSR_LinearOperator(B.indices, B.indptr, np.array(B.data).astype(COMPLEX))
+
+    def to_csr(self):
+        return self.to_csr_linear_operator().to_csr()
+
+
+cdef class wrapRealToComplexCSR(ComplexLinearOperator):
+    def __init__(self, CSR_LinearOperator A):
+        super(wrapRealToComplexCSR, self).__init__(A.num_rows, A.num_columns)
+        self.realA = A
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j
+            COMPLEX_t temp
+            INDEX_t[::1] indptr = self.realA.indptr, indices = self.realA.indices
+            REAL_t[::1] data = self.realA.data
+        for i in range(self.num_rows):
+            temp = 0.0
+            for j in range(indptr[i], indptr[i+1]):
+                temp += data[j]*x[indices[j]]
+            y[i] = temp
+        return 0
+
+    def to_csr_linear_operator(self):
+        return ComplexCSR_LinearOperator(self.realA.indices, self.realA.indptr, np.array(self.realA.data).astype(COMPLEX))
+
+    def to_csr(self):
+        return self.to_csr_linear_operator().to_csr()
+
+
+cdef class HelmholtzShiftOperator(ComplexLinearOperator):
+    def __init__(self, CSR_LinearOperator S, CSR_LinearOperator M, COMPLEX_t shift):
+        super(HelmholtzShiftOperator, self).__init__(S.num_rows, S.num_columns)
+        self.M = M
+        self.S = S
+        self._diagonal = uninitialized((self.num_rows), dtype=COMPLEX)
+        self.setShift(shift)
+
+    def setShift(self, COMPLEX_t shift):
+        cdef:
+            REAL_t[::1] d1, d2
+            INDEX_t i
+        self.shift = shift
+        d1 = self.S.diagonal
+        d2 = self.M.diagonal
+        for i in range(self.num_rows):
+            self._diagonal[i] = d1[i] + shift*d2[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        COMPLEX_t[::1] x,
+                        COMPLEX_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, j
+            COMPLEX_t temp
+            INDEX_t[::1] Sindptr = self.S.indptr, Sindices = self.S.indices
+            REAL_t[::1] Sdata = self.S.data
+            INDEX_t[::1] Mindptr = self.M.indptr, Mindices = self.M.indices
+            REAL_t[::1] Mdata = self.M.data
+        for i in range(self.num_rows):
+            temp = 0.0
+            for j in range(Sindptr[i], Sindptr[i+1]):
+                temp += Sdata[j]*x[Sindices[j]]
+            y[i] = temp
+        for i in range(self.num_rows):
+            temp = 0.0
+            for j in range(Mindptr[i], Mindptr[i+1]):
+                temp += Mdata[j]*x[Mindices[j]]
+            y[i] = y[i]+self.shift*temp
+        return 0
+
+    def get_diagonal(self):
+        return self._diagonal
+
+    def set_diagonal(self, COMPLEX_t[::1] diagonal):
+        self._diagonal = diagonal
+
+    diagonal = property(fget=get_diagonal, fset=set_diagonal)
+
+    def to_csr(self):
+        return self.S.to_csr()+self.shift*self.M.to_csr()
+
+    def to_csr_linear_operator(self):
+        A = self.to_csr()
+        return ComplexCSR_LinearOperator(A.indices, A.indptr, A.data)
+
+    def getnnz(self):
+        return self.M.nnz+self.S.nnz
+
+    nnz = property(fget=getnnz)
+
+
+cdef class debugOperator(LinearOperator):
+    cdef:
+        LinearOperator A
+        str name
+
+    def __init__(self, LinearOperator A, str name=""):
+        super(debugOperator, self).__init__(A.num_rows, A.num_columns)
+        self.A = A
+        self.name = name
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        if self.name != "":
+            print(self.name)
+        print(self.shape, y.shape[0], x.shape[0])
+        print('x', np.linalg.norm(x), np.array(x))
+        self.A(x, y)
+        print('y', np.linalg.norm(y), np.array(y))
+
+
+cdef class sumMultiplyOperator(LinearOperator):
+    cdef:
+        public REAL_t[::1] coeffs
+        REAL_t[::1] z
+        public list ops
+
+    def __init__(self, list ops, REAL_t[::1] coeffs):
+        assert len(ops) > 0
+        assert len(ops) == coeffs.shape[0]
+        shape = ops[0].shape
+        super(sumMultiplyOperator, self).__init__(shape[0], shape[1])
+        for i in range(1, len(ops)):
+            shape2 = ops[i].shape
+            assert (shape[0] == shape2[0]) and (shape[1] == shape2[1])
+        self.ops = ops
+        self.coeffs = coeffs
+        self.z = uninitialized((self.ops[0].shape[0]), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+            LinearOperator op
+        op = self.ops[0]
+        op.matvec(x, y)
+        scaleScalar(y, self.coeffs[0])
+        for i in range(1, len(self.ops)):
+            op = self.ops[i]
+            op.matvec(x, self.z)
+            assign3(y, y, 1.0, self.z, self.coeffs[i])
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+            LinearOperator op
+        for i in range(len(self.ops)):
+            op = self.ops[i]
+            op.matvec(x, self.z)
+            assign3(y, y, 1.0, self.z, self.coeffs[i])
+        return 0
+
+    def toarray(self):
+        return sum([c*op.toarray() for c, op in zip(self.coeffs, self.ops)])
+
+    def to_csr(self):
+        return sum([c*op.csr() for c, op in zip(self.coeffs, self.ops)])
+
+    def get_diagonal(self):
+        return sum([c*np.array(op.diagonal, copy=False) for c, op in zip(self.coeffs, self.ops)])
+
+    diagonal = property(fget=get_diagonal)
+
+
+cdef class interpolationOperator(sumMultiplyOperator):
+    cdef:
+        public REAL_t[::1] nodes
+        public REAL_t[:, ::1] W, W_prime, W_2prime
+        public REAL_t left, right
+
+    def __init__(self, list ops, REAL_t[::1] nodes, REAL_t left, REAL_t right):
+        cdef:
+            INDEX_t i
+            INDEX_t numNodes = nodes.shape[0]
+        coeffs = np.nan*np.ones((numNodes), dtype=REAL)
+        super(interpolationOperator, self).__init__(ops, coeffs)
+        self.nodes = nodes
+        self.left = left
+        self.right = right
+
+        for i in range(self.nodes.shape[0]-1):
+            assert self.nodes[i] < self.nodes[i+1]
+
+        self.W = np.zeros((numNodes, numNodes), dtype=REAL)
+        self.W_prime = np.zeros((numNodes, numNodes), dtype=REAL)
+        self.W_2prime = np.zeros((numNodes, numNodes), dtype=REAL)
+
+    def set(self, REAL_t val, int derivative=0):
+        cdef:
+            INDEX_t k, j, i
+            REAL_t[::1] nodes = self.nodes
+            INDEX_t numNodes = nodes.shape[0]
+            REAL_t[:, ::1] W = self.W
+            REAL_t[:, ::1] W_prime, W_2prime
+        assert self.left <= val
+        assert val <= self.right
+
+        if derivative == 0:
+            for i in range(numNodes):
+                for j in range(numNodes):
+                    if i == j:
+                        W[i, j] = 1.
+                    else:
+                        W[i, j] = 0.
+
+            for k in range(1, numNodes):
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W[j, i] = (W[j, i]*(val-nodes[k+j]) - W[1+j, i]*(val-nodes[j])) / (nodes[j] - nodes[k+j])
+            for i in range(numNodes):
+                self.coeffs[i] = W[0, i]
+        elif derivative == 1:
+            W_prime = self.W_prime
+            for i in range(numNodes):
+                for j in range(numNodes):
+                    if i == j:
+                        W[i, j] = 1.
+                    else:
+                        W[i, j] = 0.
+                    W_prime[i, j] = 0.
+
+            for k in range(1, numNodes):
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W_prime[j, i] = (W_prime[j, i]*(val-nodes[k+j]) + W[j, i] - W_prime[1+j, i]*(val-nodes[j]) - W[1+j, i]) / (nodes[j] - nodes[k+j])
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W[j, i] = (W[j, i]*(val-nodes[k+j]) - W[1+j, i]*(val-nodes[j])) / (nodes[j] - nodes[k+j])
+            for i in range(numNodes):
+                self.coeffs[i] = W_prime[0, i]
+        elif derivative == 2:
+            W_prime = self.W_prime
+            W_2prime = self.W_2prime
+            for i in range(numNodes):
+                for j in range(numNodes):
+                    if i == j:
+                        W[i, j] = 1.
+                    else:
+                        W[i, j] = 0.
+                    W_prime[i, j] = 0.
+                    W_2prime[i, j] = 0.
+
+            for k in range(1, numNodes):
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W_2prime[j, i] = (W_2prime[j, i]*(val-nodes[k+j]) + 2*W_prime[j, i] - W_2prime[1+j, i]*(val-nodes[j]) - 2*W_prime[1+j, i]) / (nodes[j] - nodes[k+j])
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W_prime[j, i] = (W_prime[j, i]*(val-nodes[k+j]) + W[j, i] - W_prime[1+j, i]*(val-nodes[j]) - W[1+j, i]) / (nodes[j] - nodes[k+j])
+                for j in range(numNodes-k):
+                    for i in range(numNodes):
+                        W[j, i] = (W[j, i]*(val-nodes[k+j]) - W[1+j, i]*(val-nodes[j])) / (nodes[j] - nodes[k+j])
+            for i in range(numNodes):
+                self.coeffs[i] = W_2prime[0, i]
+        else:
+            raise NotImplementedError('derivative {} not implemented'.format(derivative))
+
+    def getNumInterpolationNodes(self):
+        return self.nodes.shape[0]
+
+    numInterpolationNodes = property(fget=getNumInterpolationNodes)
+
+    def __repr__(self):
+        return '<%dx%d %s with %d interpolation nodes>' % (self.num_rows, self.num_columns, self.__class__.__name__, self.numInterpolationNodes)
+
+
+cdef class multiIntervalInterpolationOperator(LinearOperator):
+    cdef:
+        public list ops
+        INDEX_t selected
+        REAL_t left, right
+
+    def __init__(self, list intervals, list nodes, list ops):
+        shape = ops[0][0].shape
+        super(multiIntervalInterpolationOperator, self).__init__(shape[0], shape[1])
+        self.ops = []
+        self.left = np.inf
+        self.right = -np.inf
+        for k in range(len(intervals)):
+            left, right = intervals[k]
+            self.left = min(self.left, left)
+            self.right = max(self.right, right)
+            self.ops.append(interpolationOperator(ops[k], nodes[k], left, right))
+        self.selected = -1
+
+    def set(self, REAL_t val, BOOL_t derivative=False):
+        cdef:
+            interpolationOperator op
+            INDEX_t k
+            REAL_t left, right
+        assert self.left <= val, (val, self.left)
+        assert val <= self.right, (val, self.right)
+        for k in range(len(self.ops)):
+            op = self.ops[k]
+            left, right = op.left, op.right
+            if (left <= val) and (val <= right):
+                op.set(val, derivative)
+                self.selected = k
+                break
+        else:
+            assert False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            interpolationOperator op
+        assert self.selected != -1
+        op = self.ops[self.selected]
+        op.matvec(x, y)
+        return 0
+
+    def toarray(self):
+        assert self.selected != -1
+        return self.ops[self.selected].toarray()
+
+    def to_csr(self):
+        assert self.selected != -1
+        return self.ops[self.selected].to_csr()
+
+    def get_diagonal(self):
+        assert self.selected != -1
+        return self.ops[self.selected].diagonal
+
+    diagonal = property(fget=get_diagonal)
+
+    def getNumInterpolationNodes(self):
+        cdef:
+            INDEX_t numInterpolationNodes = 0
+            INDEX_t k
+            interpolationOperator op
+        for k in range(len(self.ops)):
+            op = self.ops[k]
+            numInterpolationNodes += op.numInterpolationNodes
+        return numInterpolationNodes
+
+    numInterpolationNodes = property(fget=getNumInterpolationNodes)
+
+    def getSelectedOp(self):
+        assert self.selected != -1
+        return self.ops[self.selected]
+
+    def __repr__(self):
+        return '<%dx%d %s with %d intervals and %d interpolation nodes>' % (self.num_rows, self.num_columns, self.__class__.__name__, len(self.ops), self.numInterpolationNodes)
+
+
+cdef class delayedConstructionOperator(LinearOperator):
+    def __init__(self, INDEX_t numRows, INDEX_t numCols):
+        super(delayedConstructionOperator, self).__init__(numRows, numCols)
+        self.isConstructed = False
+        self.params = {}
+
+    def construct(self):
+        raise NotImplementedError()
+
+    cpdef int assure_constructed(self) except -1:
+        if not self.isConstructed:
+            self.A = self.construct()
+            assert self.A.num_rows == self.num_rows, "A.num_rows = {} != self.num_rows = {}".format(self.A.num_rows, self.num_rows)
+            assert self.A.num_columns == self.num_columns, "A.num_columns = {} != self.num_columns = {}".format(self.A.num_columns, self.num_columns)
+            self.isConstructed = True
+            return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        self.assure_constructed()
+        self.A.matvec(x, y)
+        return 0
+
+    def toarray(self):
+        self.assure_constructed()
+        return self.A.toarray()
+
+    def to_csr(self):
+        self.assure_constructed()
+        return self.A.to_csr()
+
+    def get_diagonal(self):
+        self.assure_constructed()
+        return self.A.diagonal
+
+    diagonal = property(fget=get_diagonal)
+
+    def getnnz(self):
+        if hasattr(self.A, 'nnz'):
+            return self.A.nnz
+        else:
+            return
+
+    nnz = property(fget=getnnz)
+
+    def setParams(self, **kwargs):
+        for key in kwargs:
+            if key not in self.params or self.params[key] != kwargs[key]:
+                self.isConstructed = False
+                self.params[key] = kwargs[key]
diff --git a/base/PyNucleus_base/memProfile.py b/base/PyNucleus_base/memProfile.py
new file mode 100644
index 0000000..0d86f1e
--- /dev/null
+++ b/base/PyNucleus_base/memProfile.py
@@ -0,0 +1,18 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# requires memory_profiler package
+# run with  mprof run ./driver.py
+# plot with mprof plot [logfile]
+
+try:
+    profile
+    memRegionsAreEnabled = True
+except NameError:
+    memRegionsAreEnabled = False
+    profile = None
diff --git a/base/PyNucleus_base/myTypes32.h b/base/PyNucleus_base/myTypes32.h
new file mode 100644
index 0000000..8333820
--- /dev/null
+++ b/base/PyNucleus_base/myTypes32.h
@@ -0,0 +1,10 @@
+/////////////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 National Technology & Engineering Solutions of Sandia,           //
+// LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           //
+// U.S. Government retains certain rights in this software.                        //
+// If you want to use this code, please refer to the README.rst and LICENSE files. //
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+typedef long int INDEX_t;
+typedef float REAL_t;
diff --git a/base/PyNucleus_base/myTypes32.pxd b/base/PyNucleus_base/myTypes32.pxd
new file mode 100644
index 0000000..0d8a5d7
--- /dev/null
+++ b/base/PyNucleus_base/myTypes32.pxd
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from numpy cimport int8_t, int32_t, int64_t, float32_t, complex64_t, npy_bool
+
+ctypedef int32_t INDEX_t
+ctypedef int8_t TAG_t
+ctypedef int64_t ENCODE_t
+ctypedef float32_t REAL_t
+ctypedef complex64_t COMPLEX_t
+ctypedef npy_bool BOOL_t
diff --git a/base/PyNucleus_base/myTypes32.pyx b/base/PyNucleus_base/myTypes32.pyx
new file mode 100644
index 0000000..14f113e
--- /dev/null
+++ b/base/PyNucleus_base/myTypes32.pyx
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+
+INDEX = np.int32
+TAG = np.int8
+ENCODE = np.int64
+REAL = np.float32
+COMPLEX = np.complex64
+BOOL = np.bool
diff --git a/base/PyNucleus_base/myTypes64.h b/base/PyNucleus_base/myTypes64.h
new file mode 100644
index 0000000..6ca3398
--- /dev/null
+++ b/base/PyNucleus_base/myTypes64.h
@@ -0,0 +1,10 @@
+/////////////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 National Technology & Engineering Solutions of Sandia,           //
+// LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           //
+// U.S. Government retains certain rights in this software.                        //
+// If you want to use this code, please refer to the README.rst and LICENSE files. //
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+typedef int32_t INDEX_t;
+typedef double REAL_t;
diff --git a/base/PyNucleus_base/myTypes64.pxd b/base/PyNucleus_base/myTypes64.pxd
new file mode 100644
index 0000000..4375ad6
--- /dev/null
+++ b/base/PyNucleus_base/myTypes64.pxd
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from numpy cimport int8_t, int32_t, int64_t, float64_t, complex128_t, npy_bool
+
+ctypedef int32_t INDEX_t
+ctypedef int8_t TAG_t
+ctypedef int64_t ENCODE_t
+ctypedef float64_t REAL_t
+ctypedef complex128_t COMPLEX_t
+ctypedef npy_bool BOOL_t
diff --git a/base/PyNucleus_base/myTypes64.pyx b/base/PyNucleus_base/myTypes64.pyx
new file mode 100644
index 0000000..21efb00
--- /dev/null
+++ b/base/PyNucleus_base/myTypes64.pyx
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+
+INDEX = np.int32
+TAG = np.int8
+ENCODE = np.int64
+REAL = np.float64
+COMPLEX = np.complex128
+BOOL = np.bool
diff --git a/base/PyNucleus_base/performanceLogger.pxd b/base/PyNucleus_base/performanceLogger.pxd
new file mode 100644
index 0000000..772813c
--- /dev/null
+++ b/base/PyNucleus_base/performanceLogger.pxd
@@ -0,0 +1,62 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport BOOL_t, REAL_t
+ctypedef object OrderedDict_t
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py cimport MPI
+
+
+cdef class FakeTimer:
+    cdef void start(self)
+    cdef void end(self)
+    cpdef void enterData(self)
+
+
+cdef class Timer(FakeTimer):
+    cdef:
+        double startTime
+        double startTime_unsynced
+        double elapsed
+        double elapsed_unsynced
+        double startMem
+        double endMem
+        str key
+        FakePLogger parent
+        BOOL_t manualDataEntry
+        BOOL_t sync
+        BOOL_t memoryProfiling
+        BOOL_t memoryRegionsAreEnabled
+        MPI.Comm comm
+    cdef void start(self)
+    cdef void end(self)
+    cpdef void enterData(self)
+
+
+cdef class FakePLogger:
+    cdef:
+        BOOL_t memoryProfiling
+        object process
+    cpdef void empty(self)
+    cpdef void addValue(self, str key, value)
+    cpdef FakeTimer Timer(self, str key, BOOL_t manualDataEntry=*)
+
+
+cdef class PLogger(FakePLogger):
+    cdef:
+        public OrderedDict_t values
+    cpdef void empty(self)
+    cpdef void addValue(self, str key, value)
+
+
+cdef class LoggingPLogger(PLogger):
+    cdef:
+        object logger
+        object loggerLevel
+    cpdef FakeTimer Timer(self, str key, BOOL_t manualDataEntry=*)
diff --git a/base/PyNucleus_base/performanceLogger.pyx b/base/PyNucleus_base/performanceLogger.pyx
new file mode 100644
index 0000000..eaa594e
--- /dev/null
+++ b/base/PyNucleus_base/performanceLogger.pyx
@@ -0,0 +1,193 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from timeit import default_timer as time
+from collections import OrderedDict
+from . memProfile import memRegionsAreEnabled as memRegionsAreEnabledPy
+
+cdef REAL_t MB = 1./2**20
+cdef BOOL_t memRegionsAreEnabled = memRegionsAreEnabledPy
+cdef dict memRegions = {}
+
+
+cpdef void startMemRegion(str key):
+    # use memory profiler if available
+    global profile
+    key = key.replace(' ', '_').replace('.', '')
+    memRegions[key] = profile.timestamp(key)
+    memRegions[key].__enter__()
+
+
+cpdef void endMemRegion(str key):
+    key = key.replace(' ', '_').replace('.', '')
+    memRegions[key].__exit__()
+    memRegions[key].timestamps[-1][0]
+    del memRegions[key]
+
+
+cdef class FakeTimer:
+    def __init__(self):
+        pass
+
+    cdef void start(self):
+        pass
+
+    cdef void end(self):
+        pass
+
+    def __enter__(self):
+        pass
+
+    def __exit__(self, type, value, traceback):
+        pass
+
+    cpdef void enterData(self):
+        pass
+
+
+cdef class Timer(FakeTimer):
+    def __init__(self, str key, FakePLogger parent, BOOL_t manualDataEntry=False, MPI.Comm comm=None, BOOL_t sync=False, BOOL_t forceMemRegionOff=False):
+        self.key = key
+        self.elapsed = 0.
+        self.parent = parent
+        self.manualDataEntry = manualDataEntry
+        self.comm = comm
+        self.sync = sync
+        if forceMemRegionOff:
+            self.memoryRegionsAreEnabled = False
+        else:
+            self.memoryRegionsAreEnabled = memRegionsAreEnabled
+        self.memoryProfiling = self.parent.memoryProfiling
+        if self.sync:
+            assert self.comm is not None
+
+    cdef void start(self):
+        if self.sync:
+            self.startTime_unsynced = time()
+            self.comm.Barrier()
+        if self.memoryProfiling:
+            self.startMem = self.parent.process.memory_info()[0]*MB
+        if self.memoryRegionsAreEnabled:
+            startMemRegion(self.key)
+        self.startTime = time()
+
+    cdef void end(self):
+        if self.sync:
+            self.elapsed_unsynced += time()-self.startTime_unsynced
+            self.comm.Barrier()
+        self.elapsed += time()-self.startTime
+        if self.memoryProfiling:
+            self.endMem = self.parent.process.memory_info()[0]*MB
+        if self.memoryRegionsAreEnabled:
+            endMemRegion(self.key)
+        if not self.manualDataEntry:
+            self.parent.addValue(self.key, self.elapsed)
+
+    def __enter__(self):
+        self.start()
+
+    def __exit__(self, type, value, traceback):
+        self.end()
+
+    cpdef void enterData(self):
+        self.parent.addValue(self.key, self.elapsed)
+
+    def getInterval(self):
+        return self.elapsed
+
+    def getIntervalUnsynced(self):
+        return self.elapsed_unsynced
+
+    interval = property(fget=getInterval)
+    interval_unsynced = property(fget=getIntervalUnsynced)
+
+
+cdef class LoggingTimer(Timer):
+    cdef:
+        object logger
+        object loggerLevel
+        str StartMessage
+
+    def __init__(self, logger, loggerLevel, str key, FakePLogger parent, BOOL_t manualDataEntry=False, MPI.Comm comm=None, BOOL_t sync=False, str StartMessage=''):
+        super(LoggingTimer, self).__init__(key, parent, manualDataEntry, comm, sync)
+        self.logger = logger
+        self.loggerLevel = loggerLevel
+        self.StartMessage = StartMessage
+
+    def __enter__(self):
+        if self.StartMessage != '':
+            self.logger.log(self.loggerLevel, self.StartMessage)
+        super(LoggingTimer, self).__enter__()
+
+    def __exit__(self, type, value, traceback):
+        super(LoggingTimer, self).__exit__(type, value, traceback)
+        if not self.memoryProfiling:
+            self.logger.log(self.loggerLevel, self.key + ' in {:.3} s'.format(self.elapsed))
+        else:
+            self.logger.log(self.loggerLevel, self.key + ' in {:.3} s, {} MB (Alloc: {} MB)'.format(self.elapsed, self.endMem, self.endMem-self.startMem))
+
+
+cdef class FakePLogger:
+    def __init__(self):
+        self.memoryProfiling = False
+
+    cpdef void empty(self):
+        pass
+
+    cpdef void addValue(self, str key, value):
+        pass
+
+    def __getitem__(self, str key):
+        return None
+
+    cpdef FakeTimer Timer(self, str key, BOOL_t manualDataEntry=False):
+        return FakeTimer()
+
+
+cdef class PLogger(FakePLogger):
+    def __init__(self, process=None):
+        self.values = OrderedDict()
+        self.process = process
+        self.memoryProfiling = self.process is not None
+
+    cpdef void empty(self):
+        self.values = OrderedDict()
+
+    cpdef void addValue(self, str key, value):
+        try:
+            self.values[key].append(value)
+        except KeyError:
+            self.values[key] = [value]
+
+    def __getitem__(self, str key):
+        return self.values[key]
+
+    cpdef FakeTimer Timer(self, str key, BOOL_t manualDataEntry=False):
+        return Timer(key, self, manualDataEntry)
+
+    def __repr__(self):
+        return self.report()
+
+    def report(self, totalsOnly=True):
+        s = ''
+        for key in sorted(self.values.keys()):
+            if totalsOnly:
+                s += '{}: {} ({} calls)\n'.format(str(key), sum(self.values[key]), len(self.values[key]))
+            else:
+                s += str(key) +': ' + self.values[key].__repr__() + '\n'
+        return s
+
+
+cdef class LoggingPLogger(PLogger):
+    def __init__(self, logger, loggerLevel):
+        PLogger.__init__(self)
+        self.logger = logger
+        self.loggerLevel = loggerLevel
+
+    cpdef FakeTimer Timer(self, str key, BOOL_t manualDataEntry=False):
+        return LoggingTimer(self.logger, self.loggerLevel, key, self, manualDataEntry)
diff --git a/base/PyNucleus_base/plot_utils.py b/base/PyNucleus_base/plot_utils.py
new file mode 100644
index 0000000..620f1e6
--- /dev/null
+++ b/base/PyNucleus_base/plot_utils.py
@@ -0,0 +1,222 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+def latexOptions(fig_width=None, fig_height=None, ratio=None,
+                 fontsize=10, otherMPL=None):
+    import numpy as np
+    from cycler import cycler
+    if fig_width is None:
+        # fig_width = 6.33
+        fig_width = 4.9
+    if fig_height is None:
+        if ratio is None:
+            golden_mean = (np.sqrt(5)-1.0)/2.0    # Aesthetic ratio
+            fig_height = fig_width*golden_mean    # height in inches
+        else:
+            fig_height = fig_width*ratio
+    MPLconf = {
+        'text.usetex': True,
+        'axes.titlesize': fontsize,
+        'axes.labelsize': fontsize,
+        'legend.fontsize': fontsize,
+        'xtick.labelsize': fontsize,
+        'ytick.labelsize': fontsize,
+        'lines.linewidth': 1,
+        'lines.markersize': 4,
+        'text.latex.preamble': r'\usepackage{amsmath,amsfonts,amssymb,mathrsfs}',
+        'font.family': 'serif',
+        'mathtext.fontset': 'cm',
+        # 'font.family': 'STIXGeneral',
+        # 'mathtext.rm': 'Bitstream Vera Sans',
+        # 'mathtext.it': 'Bitstream Vera Sans:italic',
+        # 'mathtext.bf': 'Bitstream Vera Sans:bold',
+        # 'font.serif': 'cm',
+        'font.size': fontsize,
+        'figure.figsize': [fig_width, fig_height],
+        'axes.prop_cycle': cycler('color', ['#006BA4', '#FF800E', '#ABABAB', '#595959', '#5F9ED1', '#C85200', '#898989', '#A2C8EC', '#FFBC79', '#CFCFCF'])
+    }
+    if otherMPL is not None:
+        MPLconf.update(otherMPL)
+    return MPLconf
+
+
+def latexContext(fig_width=None, fig_height=None, ratio=None,
+                 fontsize=10, otherMPL=None):
+    from matplotlib import rc_context
+    MPLconf = latexOptions(fig_width, fig_height, ratio, fontsize, otherMPL)
+    return rc_context(MPLconf)
+
+
+def beamerContext(fig_width=None, fig_height=None, ratio=None,
+                  fontsize=8, otherMPL=None):
+    MPLconf = {'lines.markersize': 1,
+               'savefig.dpi': 100*4,
+               'font.family': 'sans-serif',
+               'font.serif': ['DejaVu Serif',
+                              'Bitstream Vera Serif',
+                              'Computer Modern Roman',
+                              'New Century Schoolbook',
+                              'Century Schoolbook L',
+                              'Utopia',
+                              'ITC Bookman',
+                              'Bookman',
+                              'Nimbus Roman No9 L',
+                              'Times New Roman',
+                              'Times',
+                              'Palatino',
+                              'Charter',
+                              'serif'],
+               'patch.linewidth': 0.5}
+    if otherMPL is not None:
+        MPLconf.update(otherMPL)
+    return latexContext(fig_width, fig_height, ratio, fontsize, MPLconf)
+
+
+def posterContext(fig_width=None, fig_height=None, ratio=None,
+                  fontsize=25, otherMPL=None):
+    MPLconf = {'lines.markersize': 10,
+               'savefig.dpi': 100*4,
+               'font.family': 'serif',
+               'font.serif': 'cm',
+               'patch.linewidth': 0.5}
+    if otherMPL is not None:
+        MPLconf.update(otherMPL)
+    return latexContext(fig_width, fig_height, ratio, fontsize, MPLconf)
+
+
+def plot_with_latex(fun, fig_width=None, fig_height=None, ratio=None,
+                    fontsize=10, otherMPL=None):
+    from inspect import getargspec
+
+    argspec = getargspec(fun)
+
+    def new_fun(*args, **kwargs):
+        kwargs_new = {}
+        for i in range(len(args)):
+            kwargs_new[argspec[0][i]] = args[i]
+        for key in kwargs:
+            if key in argspec[0][:]:
+                kwargs_new[key] = kwargs[key]
+        with latexContext(fig_width, fig_height, ratio, fontsize, otherMPL):
+            r = fun(**kwargs_new)
+        return r
+    return new_fun
+
+
+def plot_with_beamer(fun, fig_width=None, fig_height=None, ratio=None,
+                     fontsize=8, otherMPL=None):
+    from inspect import getargspec
+
+    argspec = getargspec(fun)
+
+    def new_fun(*args, **kwargs):
+        kwargs_new = {}
+        for i in range(len(args)):
+            kwargs_new[argspec[0][i]] = args[i]
+        for key in kwargs:
+            if key in argspec[0][:]:
+                kwargs_new[key] = kwargs[key]
+        with beamerContext(fig_width, fig_height, ratio, fontsize, otherMPL):
+            r = fun(**kwargs_new)
+        return r
+    return new_fun
+
+
+def plot_for_poster(fun, fig_width=None, fig_height=None, ratio=None,
+                    fontsize=25, otherMPL=None):
+    from inspect import getargspec
+
+    argspec = getargspec(fun)
+
+    def new_fun(*args, **kwargs):
+        kwargs_new = {}
+        for i in range(len(args)):
+            kwargs_new[argspec[0][i]] = args[i]
+        for key in kwargs:
+            if key in argspec[0][:]:
+                kwargs_new[key] = kwargs[key]
+        with posterContext(fig_width, fig_height, ratio, fontsize, otherMPL):
+            r = fun(**kwargs_new)
+        return r
+    return new_fun
+
+
+def plotTriangle(x, y, fac, ax=None):
+    if ax is None:
+        ax = plt.gca()
+    dx = 0.8*(x[-2]-x[-1])
+    x1 =x[-1]+dx
+    y2 = y[-2]*(x[-1]/x1)**fac
+
+    ax.plot([x[-1], x[-1], x1, x[-1]],
+            [y[-2], y2, y[-2], y[-2]])
+    ax.text(0.5*x[-1]+0.5*x1, y[-2], str(1), horizontalalignment='right', verticalalignment='bottom')
+    ax.text(x[-1], 0.5*y[-2]+0.5*y2, str(fac), horizontalalignment='right', verticalalignment='top')
+
+
+def tabulate(x, results, floatfmt=None, groups=False, **kwargs):
+    import numpy as np
+    from . import roc
+    endl = '\n'
+    ltx_endl = ' \\\\'+endl
+    hline = '\\hline'+endl
+
+    def myFmt(a, fmt):
+        if isinstance(a, str):
+            return a
+        elif a is None:
+            return ''
+        else:
+            return fmt.format(a)
+
+    d = []
+    expected = ['theoretical']
+    grpheaders = ['']
+    columnfmt = 'r'
+    if groups:
+        for key, vals in results:
+            columnfmt += '|'
+            grpheaders.append('\\multicolumn{'+str(2*len(vals))+'}{c}{'+key+'}')
+            for result, expectedOrder in vals:
+                r = np.concatenate((np.array([[None]]),
+                                    roc(x, result))).flatten()
+                d.append(np.vstack((result.flatten(), r)))
+                expected += [None, expectedOrder]
+                columnfmt += 'rr'
+    else:
+        columnfmt += '|'
+        for result, expectedOrder in results:
+            r = np.concatenate((np.array([[None]]),
+                                roc(x, result))).flatten()
+            d.append(np.vstack((result.flatten(), r)))
+            expected += [None, expectedOrder]
+            columnfmt += 'rr'
+    d = np.vstack((x.flatten(), *d)).T
+
+    s = ''
+    s += '\\begin{tabular}{'+columnfmt+'}' + endl
+    if len(grpheaders) > 1:
+        s += ' & '.join(grpheaders) + ltx_endl
+    s += ' & '.join(kwargs['headers']) + ltx_endl
+    s += hline
+    for i in range(d.shape[0]):
+        s += ' & '.join([floatfmt[j].format(d[i, j]) if d[i, j] is not None else ''  for j in range(d.shape[1])]) + ltx_endl
+    s += hline
+    s += ' & '.join([myFmt(expected[j], floatfmt[j]) for j in range(len(expected))]) + ltx_endl
+    s += '\\end{tabular}'+endl
+    return s
+
+
+def latexFormatRate(r, digits=2):
+    import numpy as np
+    if abs(r-1.0) < 1e-9:
+        return ''
+    elif abs(r-np.around(r))<1e-9:
+        return '^{{{}}}'.format(int(np.around(r)))
+    else:
+        return ('^{{{:.' + str(digits) + '}}}').format(r)
diff --git a/base/PyNucleus_base/setupUtils.py b/base/PyNucleus_base/setupUtils.py
new file mode 100644
index 0000000..6dd9c46
--- /dev/null
+++ b/base/PyNucleus_base/setupUtils.py
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_packageTools import package as packageBase
+import os
+
+
+class package(packageBase):
+    def setInclude(self):
+        super(package, self).setInclude()
+        self.config['includeDirs'].append(os.path.dirname(os.path.realpath(__file__)))
diff --git a/base/PyNucleus_base/solver_factory.py b/base/PyNucleus_base/solver_factory.py
new file mode 100644
index 0000000..5ba37ec
--- /dev/null
+++ b/base/PyNucleus_base/solver_factory.py
@@ -0,0 +1,100 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import warnings
+from . linear_operators import ComplexLinearOperator
+from . factory import factory
+
+
+class solverFactory(factory):
+    def __init__(self):
+        self.singleLevelSolverFactory = factory()
+        self.multiLevelSolverFactory = factory()
+
+    def isRegistered(self, name):
+        return (self.singleLevelSolverFactory.isRegistered(name) or
+                self.multiLevelSolverFactory.isRegistered(name) or
+                self.isRegisteredComboSolver(name))
+
+    def isRegisteredComboSolver(self, name):
+        names = name.split('-')
+        if len(names) <= 1:
+            return False
+        for name in names:
+            if not self.isRegistered(name):
+                return False
+        return True
+
+    def register(self, name, classType, isMultilevelSolver=False, aliases=[]):
+        if not isMultilevelSolver:
+            self.singleLevelSolverFactory.register(name, classType, aliases=aliases)
+        else:
+            self.multiLevelSolverFactory.register(name, classType, aliases=aliases)
+
+    def build(self, name, **kwargs):
+        setup = kwargs.pop('setup', False)
+        if len(name.split('-')) == 1:
+            name = self.getCanonicalName(name)
+
+            if self.singleLevelSolverFactory.isRegistered(name):
+                A = kwargs.pop('A', None)
+                kwargs.pop('hierarchy', None)
+                num_rows = kwargs.pop('num_rows', -1)
+                if isinstance(A, ComplexLinearOperator) and self.singleLevelSolverFactory.isRegistered('complex_'+name):
+                    name = 'complex_'+name
+                solver = self.singleLevelSolverFactory.build(name, A, num_rows)
+            elif self.multiLevelSolverFactory.isRegistered(name):
+                kwargs.pop('A', None)
+                hierarchy = kwargs.pop('hierarchy')
+                smoother = kwargs.pop('smoother', 'jacobi')
+                if not isinstance(hierarchy, list) and isinstance(hierarchy.builtHierarchies[-1].algebraicLevels[-1].A, ComplexLinearOperator) and self.multiLevelSolverFactory.isRegistered('complex_'+name):
+                    name = 'complex_'+name
+                solver = self.multiLevelSolverFactory.build(name, hierarchy, smoother, **kwargs)
+            else:
+                raise KeyError(name)
+            for key in kwargs:
+                if hasattr(solver, key):
+                    solver.__setattr__(key, kwargs[key])
+                elif key in ('tolerance', 'maxIter'):
+                    pass
+                else:
+                    msg = '{} does not have attr \"{}\"'.format(solver, key)
+                    warnings.warn(msg)
+                    # raise NotImplementedError(msg)
+            if setup:
+                solver.setup()
+            return solver
+        else:
+            names = name.split('-')
+            solvers = []
+            for name in names:
+                params = kwargs.get(name, {})
+                if 'A' in kwargs:
+                    params['A'] = kwargs['A']
+                if 'num_rows' in kwargs:
+                    params['num_rows'] = kwargs['num_rows']
+                if 'hierarchy' in kwargs:
+                    params['hierarchy'] = kwargs['hierarchy']
+                solvers.append(self.build(name, **params))
+            if setup:
+                for k in range(len(solvers)):
+                    if not solvers[k].initialized:
+                        solvers[k].setup()
+            for k in range(len(solvers)-1):
+                solvers[k].setPreconditioner(solvers[k+1].asPreconditioner())
+            return solvers[0]
+
+    def __repr__(self):
+        s = ''
+        if self.singleLevelSolverFactory.numRegistered() > 0:
+            s += 'Single level solvers:\n'
+            s += repr(self.singleLevelSolverFactory)
+        if self.multiLevelSolverFactory.numRegistered() > 0:
+            s += 'Multi level solvers:\n'
+            s += repr(self.multiLevelSolverFactory)
+        return s
diff --git a/base/PyNucleus_base/solvers.pxd b/base/PyNucleus_base/solvers.pxd
new file mode 100644
index 0000000..5b349a4
--- /dev/null
+++ b/base/PyNucleus_base/solvers.pxd
@@ -0,0 +1,220 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from . linear_operators cimport (LinearOperator,
+                                 ComplexLinearOperator,
+                                 CSR_LinearOperator,
+                                 SSS_LinearOperator,
+                                 Dense_LinearOperator,
+                                 invDiagonal,
+                                 ComplexCSR_LinearOperator,
+                                 ComplexSSS_LinearOperator,
+                                 ComplexDense_LinearOperator,
+                                 HelmholtzShiftOperator)
+from . ip_norm cimport ipBase, normBase, complexipBase, complexNormBase
+from . convergence cimport (convergenceMaster, noOpConvergenceMaster,
+                            convergenceClient, noOpConvergenceClient)
+from . performanceLogger cimport PLogger, FakePLogger
+
+include "config.pxi"
+
+cdef class solver:
+    cdef:
+        public BOOL_t initialized
+        LinearOperator A
+        public INDEX_t num_rows
+        public FakePLogger PLogger
+    cpdef void setup(self, LinearOperator A=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+cdef class preconditioner(LinearOperator):
+    cdef:
+        public solver solOp
+        public dict ctxAttrs
+
+
+cdef class lu_solver(solver):
+    cdef:
+        INDEX_t[::1] perm_r, perm_c, perm
+        CSR_LinearOperator L, U
+        REAL_t[::1] temp_mem
+        object Ainv, lu
+
+
+cdef class chol_solver(solver):
+    cdef:
+        object Ainv
+
+
+cdef class ichol_solver(solver):
+    cdef:
+        public INDEX_t[::1] indptr, indices
+        public REAL_t[::1] data, diagonal, temp
+        CSR_LinearOperator L
+
+
+cdef class ilu_solver(solver):
+    cdef:
+        public INDEX_t[::1] Lindptr, Lindices
+        public REAL_t[::1] Ldata
+        public INDEX_t[::1] Uindptr, Uindices
+        public REAL_t[::1] Udata
+        public INDEX_t[::1] perm_c, perm_r
+        REAL_t[::1] temp1
+        REAL_t[::1] temp2
+        public REAL_t fill_factor
+
+
+cdef class jacobi_solver(solver):
+    cdef:
+        invDiagonal invD
+
+
+cdef class iterative_solver(solver):
+    cdef:
+        public INDEX_t maxIter
+        public REAL_t tolerance
+        REAL_t tol
+        public list residuals
+        REAL_t[::1] x0
+        REAL_t[::1] r
+        public ipBase inner
+        public normBase norm
+        public BOOL_t relativeTolerance
+    cpdef void setInitialGuess(self, REAL_t[::1] x0=*)
+    cpdef void setNormInner(self, normBase norm, ipBase inner)
+    cpdef void setOverlapNormInner(self, object overlaps, level=*)
+    cpdef void setup(self, LinearOperator A=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+cdef class krylov_solver(iterative_solver):
+    cdef:
+        public LinearOperator prec
+        BOOL_t isLeftPrec
+        public convergenceMaster convMaster
+        public convergenceClient convClient
+    cpdef void setup(self, LinearOperator A=*)
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+cdef class cg_solver(krylov_solver):
+    cdef:
+        REAL_t[::1] temporaryMemory
+        REAL_t[::1] precTemporaryMemory
+        public BOOL_t use2norm
+    cpdef void setup(self, LinearOperator A=*)
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+cdef class gmres_solver(krylov_solver):
+    cdef:
+        REAL_t[::1] Ar
+        public BOOL_t use2norm
+        public BOOL_t flexible
+        public INDEX_t restarts
+        REAL_t[::1, :] Q
+        REAL_t[::1, :] Z
+        REAL_t[::1, :] H
+        REAL_t[::1] c, s, gamma, y
+    cpdef void setup(self, LinearOperator A=*)
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+cdef class bicgstab_solver(krylov_solver):
+    cdef:
+        REAL_t[::1] r0
+        REAL_t[::1] p
+        REAL_t[::1] p2
+        REAL_t[::1] s
+        REAL_t[::1] s2
+        REAL_t[::1] temp
+        REAL_t[::1] temp2
+        public BOOL_t use2norm
+    cpdef void setup(self, LinearOperator A=*)
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, REAL_t[::1] b, REAL_t[::1] x) except -1
+
+
+IF USE_PYAMG:
+    cdef class pyamg_solver(iterative_solver):
+        cdef:
+            object ml
+
+
+cdef class complex_solver:
+    cdef:
+        public BOOL_t initialized
+        ComplexLinearOperator A
+        public INDEX_t num_rows
+        public FakePLogger PLogger
+    cpdef void setup(self, ComplexLinearOperator A=*)
+    cdef int solve(self, COMPLEX_t[::1] b, COMPLEX_t[::1] x) except -1
+
+
+cdef class complex_preconditioner(ComplexLinearOperator):
+    cdef:
+        public complex_solver solOp
+        public dict ctxAttrs
+
+
+cdef class complex_lu_solver(complex_solver):
+    cdef:
+        INDEX_t[::1] perm_r, perm_c, perm
+        ComplexCSR_LinearOperator L, U
+        COMPLEX_t[::1] temp_mem
+        object Ainv, lu
+
+
+cdef class complex_iterative_solver(complex_solver):
+    cdef:
+        public INDEX_t maxIter
+        public REAL_t tolerance
+        REAL_t tol
+        public list residuals
+        COMPLEX_t[::1] x0
+        COMPLEX_t[::1] r
+        public complexipBase inner
+        public complexNormBase norm
+        public BOOL_t relativeTolerance
+    cpdef void setInitialGuess(self, COMPLEX_t[::1] x0=*)
+    cpdef void setNormInner(self, complexNormBase norm, complexipBase inner)
+    cpdef void setOverlapNormInner(self, object overlaps, level=*)
+    cpdef void setup(self, ComplexLinearOperator A=*)
+    cdef int solve(self, COMPLEX_t[::1] b, COMPLEX_t[::1] x) except -1
+
+
+cdef class complex_krylov_solver(complex_iterative_solver):
+    cdef:
+        public ComplexLinearOperator prec
+        BOOL_t isLeftPrec
+        public convergenceMaster convMaster
+        public convergenceClient convClient
+    cpdef void setup(self, ComplexLinearOperator A=*)
+    cpdef void setPreconditioner(self, ComplexLinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, COMPLEX_t[::1] b, COMPLEX_t[::1] x) except -1
+
+
+cdef class complex_gmres_solver(complex_krylov_solver):
+    cdef:
+        COMPLEX_t[::1] Ar
+        public BOOL_t use2norm
+        public BOOL_t flexible
+        public INDEX_t restarts
+        COMPLEX_t[::1, :] Q
+        COMPLEX_t[::1, :] Z
+        COMPLEX_t[::1, :] H
+        COMPLEX_t[::1] c, s, gamma, y
+    cpdef void setup(self, ComplexLinearOperator A=*)
+    cpdef void setPreconditioner(self, ComplexLinearOperator prec, BOOL_t left=*)
+    cdef int solve(self, COMPLEX_t[::1] b, COMPLEX_t[::1] x) except -1
diff --git a/base/PyNucleus_base/solvers.pyx b/base/PyNucleus_base/solvers.pyx
new file mode 100644
index 0000000..1ad5b02
--- /dev/null
+++ b/base/PyNucleus_base/solvers.pyx
@@ -0,0 +1,1411 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport sqrt
+from . myTypes import INDEX, REAL, COMPLEX
+from . blas cimport assign, assignScaled, assign3, update, updateScaled, mydot, gemvF
+from . blas import uninitialized
+from . ip_norm cimport vector_t, complex_vector_t, ip_serial, norm_serial, ip_distributed, norm_distributed, wrapRealInnerToComplex, wrapRealNormToComplex
+from . linalg import ichol_csr, ichol_sss
+from . linalg cimport (forward_solve_csc, backward_solve_csc,
+                       forward_solve_sss_noInverse,
+                       backward_solve_sss_noInverse)
+
+include "config.pxi"
+
+
+cdef class solver:
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        self.initialized = False
+        self.PLogger = FakePLogger()
+        if A is not None:
+            self.A = A
+            self.num_rows = A.num_rows
+        else:
+            self.A = None
+            assert num_rows >= 0, 'num_rows < 0'
+            self.num_rows = num_rows
+
+    cpdef void setup(self, LinearOperator A=None):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self, vector_t b, vector_t x):
+        return self.solve(b, x)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        assert self.initialized, 'Solver not initialized, need to call \'solver.setup\' first.'
+        assert b.shape[0] == self.num_rows, \
+            'RHS vector has size {}, solver expects {}'.format(b.shape[0],
+                                                               self.num_rows)
+        assert x.shape[0] == self.num_rows, \
+            'x vector has size {}, solver expects {}'.format(x.shape[0],
+                                                             self.num_rows)
+
+    def asPreconditioner(self):
+        return preconditioner(self)
+
+    def __repr__(self):
+        return str(self)
+
+
+cdef class preconditioner(LinearOperator):
+    def __init__(self, solver solOp, dict ctxAttrs={}):
+        LinearOperator.__init__(self, solOp.num_rows, solOp.num_rows)
+        self.solOp = solOp
+        self.ctxAttrs = ctxAttrs
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        vector_t x,
+                        vector_t y) except -1:
+        assert self.solOp.initialized, 'solOp not initialized'
+        self.solOp.solve(x, y)
+        return 0
+
+    def __str__(self):
+        return str(self.solOp)
+
+
+cdef class noop_solver(solver):
+    def __init__(self, LinearOperator A, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+
+    cpdef void setup(self, LinearOperator A=None):
+        self.initialized = True
+
+
+cdef class lu_solver(solver):
+    def __init__(self, LinearOperator A, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+
+    cpdef void setup(self, LinearOperator A=None):
+        cdef:
+            INDEX_t i, j, explicitZeros, explicitZerosRow
+            REAL_t[:, ::1] data
+
+        if A is not None:
+            self.A = A
+
+        if not isinstance(self.A, (SSS_LinearOperator,
+                                   CSR_LinearOperator,
+                                   Dense_LinearOperator)):
+            if self.A.isSparse():
+                self.A = self.A.to_csr_linear_operator()
+            else:
+                self.A = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray()))
+        try_sparsification = False
+        sparsificationThreshold = 0.9
+        if isinstance(self.A, Dense_LinearOperator) and try_sparsification:
+            explicitZeros = 0
+            data = self.A.data
+            for i in range(self.A.num_rows):
+                explicitZerosRow = 0
+                for j in range(self.A.num_columns):
+                    if data[i, j] == 0.:
+                        explicitZerosRow += 1
+                explicitZeros += explicitZerosRow
+                if not (explicitZerosRow > sparsificationThreshold*self.A.num_columns):
+                    break
+            if explicitZeros > sparsificationThreshold*self.A.num_rows*self.A.num_columns:
+                print('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*explicitZeros/REAL(self.A.num_rows*self.A.num_columns)))
+                self.A = CSR_LinearOperator.from_dense(self.A)
+        if isinstance(self.A, (SSS_LinearOperator,
+                               CSR_LinearOperator)):
+            from scipy.sparse.linalg import splu
+            try:
+                if isinstance(self.A, SSS_LinearOperator):
+                    Ainv = splu(self.A.to_csc())
+                else:
+                    Ainv = splu(self.A.to_csr().tocsc())
+            except RuntimeError:
+                print(self.A, np.array(self.A.data))
+                raise
+            try:
+                self.L = CSR_LinearOperator.from_csr(Ainv.L)
+                self.U = CSR_LinearOperator.from_csr(Ainv.U)
+                self.perm_r = Ainv.perm_r
+                self.perm_c = Ainv.perm_c
+                n = self.perm_c.shape[0]
+                self.temp_mem = uninitialized((n), dtype=REAL)
+            except AttributeError:
+                self.Ainv = Ainv
+        elif isinstance(self.A, Dense_LinearOperator):
+            from scipy.linalg import lu_factor
+            self.lu, self.perm = lu_factor(self.A.data)
+        else:
+            raise NotImplementedError('Cannot use operator of type "{}"'.format(type(self.A)))
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        cdef:
+            INDEX_t i, n
+            INDEX_t[::1] perm_r, perm_c
+            vector_t temp
+        solver.solve(self, b, x)
+        if isinstance(self.A, (SSS_LinearOperator, CSR_LinearOperator)):
+            perm_r = self.perm_r
+            perm_c = self.perm_c
+            try:
+                temp = self.temp_mem
+                n = perm_c.shape[0]
+                for i in range(n):
+                    temp[perm_r[i]] = b[i]
+                x[:] = 0.
+                forward_solve_csc(self.L.indptr, self.L.indices, self.L.data,
+                                  temp, x,
+                                  unitDiagonal=True)
+                temp[:] = 0.
+                backward_solve_csc(self.U.indptr, self.U.indices, self.U.data, x, temp)
+                for i in range(n):
+                    x[i] = temp[perm_c[i]]
+            except AttributeError:
+                x[:] = self.Ainv.solve(np.array(b, copy=False, dtype=REAL))
+        else:
+            from scipy.linalg import lu_solve
+            assign(x, b)
+            lu_solve((self.lu, self.perm),
+                     np.array(x, copy=False, dtype=REAL),
+                     overwrite_b=True)
+        return 1
+
+    def __str__(self):
+        return 'LU'
+
+
+cdef class chol_solver(solver):
+    def __init__(self, LinearOperator A, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+
+    cpdef void setup(self, LinearOperator A=None):
+        if A is not None:
+            self.A = A
+
+        if not isinstance(self.A, (SSS_LinearOperator,
+                                   CSR_LinearOperator,
+                                   Dense_LinearOperator)):
+            if self.A.isSparse():
+                self.A = self.A.to_csr_linear_operator()
+            else:
+                self.A = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray()))
+
+        IF USE_CHOLMOD:
+            from sksparse.cholmod import cholesky
+
+            if isinstance(self.A, (SSS_LinearOperator,
+                                   CSR_LinearOperator)):
+                try:
+                    self.Ainv = cholesky(self.A.to_csc())
+                except AttributeError:
+                    self.Ainv = cholesky(self.A.to_csr().tocsc())
+            elif isinstance(self.A, Dense_LinearOperator):
+                self.Ainv = cholesky(self.A.data)
+            else:
+                try:
+                    self.A = self.A.to_csr_linear_operator()
+                    self.Ainv = cholesky(self.A)
+                except AttributeError:
+                    raise NotImplementedError()
+        ELSE:
+            raise NotImplementedError("Cholmod not available, install \"scikit-sparse\".")
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        solver.solve(self, b, x)
+        np.array(x, copy=False, dtype=REAL)[:] = self.Ainv(np.array(b, copy=False, dtype=REAL))
+        return 1
+
+    def __str__(self):
+        return 'Cholesky'
+
+
+cdef class ichol_solver(solver):
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+        self.temp = uninitialized((self.num_rows), dtype=REAL)
+
+    cpdef void setup(self, LinearOperator A=None):
+        cdef:
+            INDEX_t i
+        if A is not None:
+            self.A = A
+        if isinstance(self.A, CSR_LinearOperator):
+            self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A)
+        elif isinstance(self.A, SSS_LinearOperator):
+            # self.indices, self.indptr, self.data, self.diagonal = ichol_sss(A)
+            self.indices, self.indptr, self.data, self.diagonal = ichol_csr(self.A.to_csr_linear_operator())
+        else:
+            try:
+                B = self.A.to_csr_linear_operator()
+                self.indices, self.indptr, self.data, self.diagonal = ichol_csr(B)
+            except:
+                raise NotImplementedError()
+
+        IF USE_MKL_TRISOLVE:
+            from . linear_operators import diagonalOperator
+            T = CSR_LinearOperator(self.indices, self.indptr, self.data).to_csr()+diagonalOperator(self.diagonal).to_csr()
+            self.L = CSR_LinearOperator.from_csr(T)
+        ELSE:
+            for i in range(self.diagonal.shape[0]):
+                self.diagonal[i] = 1./self.diagonal[i]
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        solver.solve(self, b, x)
+        self.temp[:] = 0.0
+        IF USE_MKL_TRISOLVE:
+            trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, b, self.temp, forward=True, unitDiagonal=False)
+            trisolve_mkl(self.L.indptr, self.L.indices, self.L.data, self.temp, x, forward=False, unitDiagonal=False)
+        ELSE:
+            forward_solve_sss_noInverse(self.indptr, self.indices,
+                                        self.data, self.diagonal,
+                                        b, self.temp, unitDiagonal=False)
+            backward_solve_sss_noInverse(self.indptr, self.indices,
+                                         self.data, self.diagonal,
+                                         self.temp, x)
+        return 1
+
+    def __str__(self):
+        return 'Incomplete Cholesky'
+
+
+cdef class ilu_solver(solver):
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+        self.temp1 = uninitialized((self.num_rows), dtype=REAL)
+        self.temp2 = uninitialized((self.num_rows), dtype=REAL)
+        self.fill_factor = 1.
+
+    cpdef void setup(self, LinearOperator A=None):
+        from scipy.sparse.linalg import spilu
+        Clu = spilu(A.to_csr().tocsc(), fill_factor=self.fill_factor)
+        self.Lindices = Clu.L.indices
+        self.Lindptr = Clu.L.indptr
+        self.Ldata = Clu.L.data
+        self.Uindices = Clu.U.indices
+        self.Uindptr = Clu.U.indptr
+        self.Udata = Clu.U.data
+        self.perm_r = Clu.perm_r
+        self.perm_c = Clu.perm_c
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        solver.solve(self, b, x)
+        cdef:
+            INDEX_t i
+        self.temp1[:] = 0.
+        for i in range(x.shape[0]):
+            self.temp2[self.perm_r[i]] = b[i]
+        forward_solve_csc(self.Lindptr, self.Lindices, self.Ldata,
+                          self.temp2, self.temp1,
+                          unitDiagonal=True)
+        self.temp2[:] = 0.
+        backward_solve_csc(self.Uindptr, self.Uindices, self.Udata,
+                           self.temp1, self.temp2)
+        for i in range(x.shape[0]):
+            x[i] = self.temp2[self.perm_c[i]]
+        return 1
+
+    def __str__(self):
+        return 'Incomplete LU'
+
+
+cdef class jacobi_solver(solver):
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+
+    cpdef void setup(self, LinearOperator A=None):
+        if A is not None:
+            self.A = A
+        self.invD = invDiagonal(self.A)
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        solver.solve(self, b, x)
+        self.invD.matvec(b, x)
+        return 1
+
+    def __str__(self):
+        return 'Jacobi'
+
+
+cdef class iterative_solver(solver):
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        solver.__init__(self, A, num_rows)
+        self.residuals = []
+        self.setNormInner(norm_serial(), ip_serial())
+        self.maxIter = -1
+        self.tolerance = 1e-5
+        self.relativeTolerance = False
+        self.x0 = None
+
+    cpdef void setInitialGuess(self, vector_t x0=None):
+        if x0 is not None:
+            assert self.num_rows == x0.shape[0], \
+                'x0 vector has size {}, solver expects {}'.format(x0.shape[0],
+                                                                  self.num_rows)
+            self.x0 = x0
+        else:
+            self.x0 = None
+
+    cpdef void setNormInner(self, normBase norm, ipBase inner):
+        self.norm = norm
+        self.inner = inner
+
+    cpdef void setOverlapNormInner(self, object overlaps, level=-1):
+        self.setNormInner(norm_distributed(overlaps, level),
+                          ip_distributed(overlaps, level))
+
+    cpdef void setup(self, LinearOperator A=None):
+        if A is not None:
+            assert A.num_rows == self.num_rows, \
+                'A has {} rows, but solver expects {}.'.format(A.num_rows, self.num_rows)
+            self.A = A
+        else:
+            assert self.A is not None, 'A not set'
+        self.r = uninitialized((self.num_rows), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        cdef:
+            REAL_t res
+            vector_t r = self.r
+            LinearOperator A = self.A
+            normBase norm = self.norm
+
+        solver.solve(self, b, x)
+
+        if self.x0 is None:
+            for i in range(self.num_rows):
+                x[i] = 0.
+        elif &x[0] != &self.x0[0]:
+            assign(x, self.x0)
+
+        if self.relativeTolerance:
+            A.residual(x, b, r, simpleResidual=self.x0 is None)   # dist
+            res = norm.eval(r, False)                             # ip(dist, dist)
+            self.tol = self.tolerance*res
+        else:
+            self.tol = self.tolerance
+
+
+cdef class krylov_solver(iterative_solver):
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        iterative_solver.__init__(self, A, num_rows)
+        self.prec = None
+        self.convMaster = None
+        self.convClient = None
+
+    cpdef void setup(self, LinearOperator A=None):
+        iterative_solver.setup(self, A)
+        if self.prec is not None and isinstance(self.prec, preconditioner) and (not self.prec.solOp.initialized or A is not None):
+            self.prec.solOp.setup(self.A)
+
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=True):
+        self.prec = prec
+        self.isLeftPrec = left
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        iterative_solver.solve(self, b, x)
+        if not self.relativeTolerance:
+            self.A.residual(x, b, self.r, simpleResidual=self.x0 is None)   # dist
+
+
+cdef class cg_solver(krylov_solver):
+    """
+    Conjugate Gradient iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    precond:  distributed to accumulated
+    b:        distributed
+    x:        accumulated
+    x0:       accumulated
+
+    In the unpreconditioned distributed case, set precond to accumulate.
+
+    If use2norm is False, use Preconditioner norm of residual as
+    stopping criterion, otherwise use 2-norm of residual.
+
+    Memory requirement:
+    4*dim with preconditioner,
+    3*dim without.
+    """
+
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        krylov_solver.__init__(self, A)
+        self.use2norm = False
+        self.maxIter = 50
+
+    cpdef void setup(self, LinearOperator A=None):
+        krylov_solver.setup(self, A)
+        self.temporaryMemory = uninitialized((2*self.num_rows), dtype=REAL)
+        self.initialized = True
+
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=True):
+        krylov_solver.setPreconditioner(self, prec, left)
+        self.precTemporaryMemory = uninitialized(self.num_rows, dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        krylov_solver.solve(self, b, x)
+
+        cdef:
+            REAL_t beta, betaOld, alpha, temp
+            INDEX_t dim = self.num_rows
+            vector_t r = self.r
+            vector_t p = self.temporaryMemory[:dim]
+            vector_t Ap = self.temporaryMemory[dim:2*dim]
+            vector_t Br
+            INDEX_t i, k = 0
+            list residuals = []
+            ipBase inner = self.inner
+            normBase norm = self.norm
+            LinearOperator A = self.A
+            LinearOperator precond = None
+            REAL_t tol = self.tol
+            INDEX_t maxiter = self.maxIter
+            BOOL_t use2norm = self.use2norm
+            REAL_t convCrit = 0.
+
+        if self.prec is not None:
+            precond = self.prec
+            Br = self.precTemporaryMemory
+        else:
+            Br = None
+
+        # Krylov space spans whole solution space after dim-1 iterations
+        # Don't do this, doesn't work for distributed problem
+        # maxiter = min(maxiter, dim)
+
+        if precond is None:
+            assign(p, r)
+            betaOld = inner.eval(r, p, True, True)                              # ip(dist, acc)
+            convCrit = sqrt(betaOld)
+        else:
+            precond.matvec(r, p)                                                # acc
+            betaOld = inner.eval(r, p, False, True)                             # ip(dist, acc)
+            if use2norm:
+                convCrit = norm.eval(r, False)                                  # ip(dist, dist)
+            else:
+                convCrit = sqrt(betaOld)
+        residuals.append(convCrit)
+        if convCrit <= tol:
+            self.residuals = residuals
+            return 0
+        for i in range(maxiter):
+            A.matvec(p, Ap)                                            # dist
+            alpha = betaOld/inner.eval(p, Ap, True, False)             # ip(acc, dist)
+            updateScaled(x, p, alpha)                                  # acc
+            updateScaled(r, Ap, -alpha)                                # dist
+            if k == 50:
+                # recalculate residual to avoid rounding errors
+                A.residual(x, b, r)                                    # dist
+                k = 0
+            if precond is None:
+                beta = norm.eval(r, True)                                   # ip(dist, dist)
+                convCrit = beta
+                residuals.append(convCrit)
+                if convCrit <= tol:
+                    self.residuals = residuals
+                    return i
+                beta = beta**2
+                temp = beta/betaOld
+                assign3(p, p, temp, r, 1.0)                            # acc
+            else:
+                precond.matvec(r, Br)                                  # acc
+                beta = inner.eval(r, Br, False, True)                       # ip(dist, acc)
+                if use2norm:
+                    convCrit = norm.eval(r, False)                     # ip(dist, dist)
+                else:
+                    convCrit = sqrt(beta)
+                residuals.append(convCrit)
+                if convCrit <= tol:
+                    self.residuals = residuals
+                    return i
+                temp = beta/betaOld
+                assign3(p, p, temp, Br, 1.0)                           # acc
+            betaOld = beta
+            k += 1
+        self.residuals = residuals
+        return maxiter
+
+    def __str__(self):
+        s = 'CG(tolerance={},relTol={},maxIter={},2-norm={})'.format(self.tolerance, self.relativeTolerance, self.maxIter, self.use2norm)
+        if self.prec is not None:
+            if self.isLeftPrec:
+                return s+', left preconditioned by '+str(self.prec)
+            else:
+                return s+', right preconditioned by '+str(self.prec)
+        else:
+            return s
+
+
+cdef class gmres_solver(krylov_solver):
+    """
+    GMRES iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    Lprecond:  distributed to accumulated
+    Rprecond:  distributed to accumulated
+    b:        distributed
+    x0:       accumulated
+    x:        accumulated
+
+    In the unpreconditioned distributed case, set Lprecond to accumulate.
+
+    Memory requirement:
+    dim * (maxiter+1) for Q
+    (maxiter+1) * maxiter for H
+    (4*maxiter + 2) + 2*dim for c,s,gamma,y and r, Ar
+    dim * (maxiter+1) for Z if flexible
+    """
+
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        krylov_solver.__init__(self, A)
+        self.use2norm = False
+        self.flexible = False
+        self.restarts = 1
+
+    cpdef void setup(self, LinearOperator A=None):
+        krylov_solver.setup(self, A)
+        self.Ar = uninitialized((self.num_rows), dtype=REAL)
+        assert self.maxIter > 0, 'Need maxiter > 0'
+        self.H = np.ones((self.maxIter+1, self.maxIter), dtype=REAL, order='F')
+        # set first dim to 1, not 0, so that things work
+        d = max(self.num_rows, 1)
+        self.Q = uninitialized((d, self.maxIter+1), dtype=REAL, order='F')
+        if self.flexible:
+            self.Z = uninitialized((d, self.maxIter+1), dtype=REAL, order='F')
+        self.c = uninitialized((self.maxIter), dtype=REAL)
+        self.s = uninitialized((self.maxIter), dtype=REAL)
+        self.gamma = uninitialized((self.maxIter+1), dtype=REAL)
+        self.y = uninitialized((self.maxIter+1), dtype=REAL)
+        self.initialized = True
+
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=True):
+        krylov_solver.setPreconditioner(self, prec, left)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        krylov_solver.solve(self, b, x)
+
+        cdef:
+            int k, i = -1, j, dim = self.num_rows, l
+            REAL_t eps = 1e-15, beta, temp, rho, sigma
+            REAL_t[::1, :] Q = self.Q
+            REAL_t[::1, :] H = self.H
+            REAL_t[::1, :] Z = None
+            REAL_t[::1] c = self.c, s = self.s, gamma = self.gamma
+            vector_t y = self.y
+            vector_t r = self.r
+            vector_t Ar = self.Ar
+            BOOL_t breakout = False
+            BOOL_t converged
+            BOOL_t doLprecond = self.isLeftPrec and self.prec is not None
+            BOOL_t doRprecond = not self.isLeftPrec and self.prec is not None
+            LinearOperator A = self.A
+            LinearOperator Lprecond = None, Rprecond = None
+            list residuals = []
+            ipBase inner = self.inner
+            normBase norm = self.norm
+            REAL_t tol = self.tol
+            INDEX_t maxiter = self.maxIter
+            INDEX_t restarts = self.restarts
+            BOOL_t flexible = self.flexible
+            int allIter = 0
+            convergenceMaster convMaster=self.convMaster
+            convergenceClient convClient=self.convClient
+
+        if doLprecond:
+            Lprecond = self.prec
+        if doRprecond:
+            Rprecond = self.prec
+        if flexible:
+            Z = self.Z
+
+        for k in range(restarts):
+            if breakout:
+                self.residuals = residuals
+                return allIter
+            A.matvec(x, Ar)                                                  # dist
+            if doLprecond:
+                assign3(Ar, Ar, -1.0, b, 1.0)                                # dist
+                Lprecond.matvec(Ar, r)                                       # acc
+                gamma[0] = norm.eval(r, True)
+            else:
+                assign3(r, b, 1.0, Ar, -1.0)                                 # dist
+                gamma[0] = norm.eval(r, False)
+            if len(residuals) == 0:
+                residuals.append(abs(gamma[0]))
+            converged = abs(gamma[0]) < tol
+            if convMaster is not None:
+                convMaster.setStatus(converged)
+            if convClient is not None:
+                converged = convClient.getStatus()
+            if converged:
+                self.residuals = residuals
+                return allIter
+            assignScaled(Q[:, 0], r, 1./gamma[0])                            # acc for Lprecond, dist for Rprecond
+            for i in range(maxiter):
+                ##############################
+                # Arnoldi iteration
+                assign(r, Q[:, i])                                           # acc for Lprecond, dist for Rprecond
+                if not flexible:
+                    if doLprecond:
+                        A.matvec(r, Ar)                                      # dist
+                        Lprecond.matvec(Ar, r)                               # acc
+                    elif doRprecond:
+                        Rprecond.matvec(r, Ar)                               # acc
+                        A.matvec(Ar, r)                                      # dist
+                    else:
+                        A.matvec(r, Ar)
+                        assign(r, Ar)
+                else:
+                    if doLprecond:
+                        A.matvec(r, Z[:, i])                                 # dist
+                        Lprecond.matvec(Z[:, i], r)                          # acc
+                    elif doRprecond:
+                        Rprecond.matvec(r, Z[:, i])                          # acc
+                        A.matvec(Z[:, i], r)                                 # dist
+                    else:
+                        A.matvec(r, Z[:, i])
+                        assign(r, Z[:, i])
+                if doRprecond:
+                    if dim > 0:
+                        for j in range(i+1):
+                            H[j, i] = inner.eval(Q[:, j], r, False, False)
+                            updateScaled(r, Q[:, j], -H[j, i])               # dist
+                    H[i+1, i] = norm.eval(r, False)
+                else:
+                    if dim > 0:
+                        for j in range(i+1):
+                            H[j, i] = inner.eval(Q[:, j], r, True, True)
+                            updateScaled(r, Q[:, j], -H[j, i])               # acc
+                    H[i+1, i] = norm.eval(r, True)
+                converged = abs(H[i+1, i]) > eps
+                if convMaster is not None:
+                    convMaster.setStatus(converged)
+                if convClient is not None:
+                    converged = convClient.getStatus()
+                if converged:
+                    assignScaled(Q[:, i+1], r, 1./H[i+1, i])                 # acc for Lprecond, dist for Rprecond
+                else:
+                    breakout = True
+                    break
+                ##############################
+                # Apply previous Givens rotations to last column of H
+                for j in range(i):
+                    rho = H[j, i]
+                    sigma = H[j+1, i]
+                    H[j, i] = c[j]*rho + s[j]*sigma
+                    H[j+1, i] = -s[j]*rho + c[j]*sigma
+                ##############################
+                # determine new Givens rotation
+                beta = sqrt(H[i, i]**2 + H[i+1, i]**2)
+                c[i] = H[i, i]/beta
+                s[i] = H[i+1, i]/beta
+                ##############################
+                # Apply new Givens rotation to H
+                H[i, i] = beta
+                # H[i+1, i] = 0.0
+                ##############################
+                # Apply new Givens rotation to rhs
+                gamma[i+1] = -s[i]*gamma[i]
+                gamma[i] = c[i]*gamma[i]
+                residuals.append(abs(gamma[i+1]))
+                converged = abs(gamma[i+1]) < tol
+                if convMaster is not None:
+                    convMaster.setStatus(converged)
+                if convClient is not None:
+                    converged = convClient.getStatus()
+                if converged:
+                    breakout = True
+                    break
+            allIter += i
+            ##############################
+            # perform back-solve for y
+            for j in range(i, -1, -1):
+                temp = gamma[j]
+                for l in range(j+1, i+1):
+                    temp -= H[j, l]*y[l]
+                y[j] = temp/H[j, j]
+            ##############################
+            # update x
+            if not flexible:
+                gemvF(Q[:, :i+1], y[:i+1], r)                                # acc for Lprecond, dist for Rprecond
+                if doRprecond:
+                    Rprecond.matvec(r, Ar)                                   # acc
+                    update(x, Ar)                                            # acc
+                else:
+                    update(x, r)                                             # acc
+            else:
+                gemvF(Z[:, :i+1], y[:i+1], r)                                # dist for Lprecond, acc for Rprecond
+                assert not doLprecond                                        # TODO: figure out what to do here
+                update(x, r)                                                 # acc
+        self.residuals = residuals
+        return allIter
+
+    def __str__(self):
+        s = 'GMRES(tolerance={},relTol={},maxIter={},restarts={},2-norm={},flexible={})'.format(self.tolerance, self.relativeTolerance, self.maxIter, self.restarts, self.use2norm, self.flexible)
+        if self.prec is not None:
+            if self.isLeftPrec:
+                return s+', left preconditioned by '+str(self.prec)
+            else:
+                return s+', right preconditioned by '+str(self.prec)
+        else:
+            return s
+
+
+
+cdef class bicgstab_solver(krylov_solver):
+    """
+    Stabilized Biconjugate Gradient iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    precond:  distributed to accumulated
+    b:        distributed
+    x:        accumulated
+    x0:       accumulated
+
+    In the unpreconditioned distributed case, set precond to accumulate.
+
+    If use2norm is False, use Preconditioner norm of residual as
+    stopping criterion, otherwise use 2-norm of residual.
+
+    Memory requirement:
+    8*dim with preconditioner,
+    6*dim without.
+    """
+
+    def __init__(self, LinearOperator A=None, INDEX_t num_rows=-1):
+        krylov_solver.__init__(self, A)
+        self.use2norm = True
+        self.maxIter = 50
+
+    cpdef void setup(self, LinearOperator A=None):
+        krylov_solver.setup(self, A)
+        self.r0 = uninitialized((self.num_rows), dtype=REAL)
+        self.p = uninitialized((self.num_rows), dtype=REAL)
+        self.s = uninitialized((self.num_rows), dtype=REAL)
+        self.temp = uninitialized((self.num_rows), dtype=REAL)
+        self.temp2 = uninitialized((self.num_rows), dtype=REAL)
+        self.initialized = True
+
+    cpdef void setPreconditioner(self, LinearOperator prec, BOOL_t left=True):
+        krylov_solver.setPreconditioner(self, prec, left)
+        self.p2 = uninitialized(self.num_rows, dtype=REAL)
+        self.s2 = uninitialized(self.num_rows, dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef int solve(self, vector_t b, vector_t x) except -1:
+        krylov_solver.solve(self, b, x)
+
+        cdef:
+            INDEX_t i, k, dim = self.num_rows
+            vector_t r0 = self.r0
+            vector_t r = self.r
+            vector_t p = self.p
+            vector_t p2
+            vector_t s = self.s
+            vector_t s2
+            vector_t temp = self.temp
+            vector_t temp2 = self.temp2
+            REAL_t kapppa, kappaNew, alpha, omega, beta, tt
+            list residuals = []
+            ipBase inner = self.inner
+            normBase norm = self.norm
+            REAL_t tol = self.tol
+            INDEX_t maxiter = self.maxIter
+            BOOL_t use2norm = self.use2norm
+            LinearOperator A = self.A
+            LinearOperator precond
+
+        if self.prec is not None:
+            precond = self.prec
+            p2 = self.p2
+            s2 = self.s2
+        else:
+            precond = None
+            p2 = p
+            s2 = s
+
+        if precond is not None:
+            # need an accumulated vector for r0
+            assign(p, r)
+            precond.matvec(r, r0)
+        else:
+            for i in range(dim):
+                p[i] = r0[i] = r[i]
+
+        kappa = inner.eval(r, r0, False, True)
+        residuals.append(sqrt(kappa))
+        for k in range(maxiter):
+            if precond is not None:
+                precond.matvec(p, p2)
+            A.matvec(p2, temp)
+            tt = inner.eval(temp, r0, False, True)
+            alpha = kappa / tt
+            assign3(s, r, 1.0, temp, -alpha)
+            if precond is not None:
+                precond.matvec(s, s2)
+            A.matvec(s2, temp2)
+            omega = inner.eval(temp2, s, False, False) / norm.eval(temp2, False)**2
+            for i in range(dim):
+                x[i] += alpha*p2[i] + omega*s2[i]
+            assign3(r, s, 1.0, temp2, -omega)
+            if use2norm:
+                residuals.append(norm.eval(r, False))
+            else:
+                raise NotImplementedError()
+            if residuals[k+1] < tol:
+                self.residuals = residuals
+                return k
+            kappaNew = inner.eval(r, r0, False, True)
+            beta = kappaNew/kappa * alpha/omega
+            kappa = kappaNew
+            for i in range(dim):
+                p[i] = r[i] + beta*(p[i] - omega*temp[i])
+        self.residuals = residuals
+        return maxiter
+
+    def __str__(self):
+        s = 'BiCGStab(tolerance={},relTol={},maxIter={},2-norm={})'.format(self.tolerance, self.relativeTolerance, self.maxIter, self.use2norm)
+        if self.prec is not None:
+            if self.isLeftPrec:
+                return s+', left preconditioned by '+str(self.prec)
+            else:
+                return s+', right preconditioned by '+str(self.prec)
+        else:
+            return s
+
+
+IF USE_PYAMG:
+    from pyamg import smoothed_aggregation_solver
+
+    cdef class pyamg_solver(iterative_solver):
+        def __init__(self, LinearOperator A=None, num_rows=-1):
+            iterative_solver.__init__(self, A, num_rows)
+
+        cpdef void setup(self, LinearOperator A=None):
+            iterative_solver.setup(self, A)
+            # self.ml = ruge_stuben_solver(self.A.to_csr(),
+            #                              coarse_solver='splu',
+            #                              max_coarse=2500,
+            #                              presmoother=('gauss_seidel', {'sweep': 'forward'}),
+            #                              postsmoother=('gauss_seidel', {'sweep': 'backward'}))
+
+            self.ml = smoothed_aggregation_solver(self.A.to_csr(),
+                                                  np.ones((self.num_rows)),
+                                                  smooth=None,
+                                                  coarse_solver='splu',
+                                                  max_coarse=2500,
+                                                  presmoother=('gauss_seidel', {'sweep': 'forward'}),
+                                                  postsmoother=('gauss_seidel', {'sweep': 'backward'}))
+            self.initialized = True
+
+        cdef int solve(self, vector_t b, vector_t x) except -1:
+            residuals = []
+            x_np = np.array(x, copy=False)
+            if self.x0 is not None:
+                x_np[:] = self.ml.solve(np.array(b, copy=False),
+                                        x0=np.array(self.x0, copy=False),
+                                        tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg')
+            else:
+                x_np[:] = self.ml.solve(np.array(b, copy=False),
+                                        tol=self.tol, maxiter=self.maxIter, residuals=residuals, accel='cg')
+            self.residuals = residuals
+            return len(residuals)
+
+        def __str__(self):
+            return str(self.ml)
+
+
+######################################################################
+
+
+cdef class complex_solver:
+    def __init__(self, ComplexLinearOperator A=None, INDEX_t num_rows=-1):
+        self.initialized = False
+        self.PLogger = FakePLogger()
+        if A is not None:
+            self.A = A
+            self.num_rows = A.num_rows
+        else:
+            self.A = None
+            assert num_rows >= 0, 'num_rows < 0'
+            self.num_rows = num_rows
+
+    cpdef void setup(self, ComplexLinearOperator A=None):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self, complex_vector_t b, complex_vector_t x):
+        return self.solve(b, x)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, complex_vector_t b, complex_vector_t x) except -1:
+        assert self.initialized, 'Solver not initialized, need to call \'solver.setup\' first.'
+        assert b.shape[0] == self.num_rows, \
+            'RHS vector has size {}, solver expects {}'.format(b.shape[0],
+                                                               self.num_rows)
+        assert x.shape[0] == self.num_rows, \
+            'x vector has size {}, solver expects {}'.format(x.shape[0],
+                                                             self.num_rows)
+
+    def asPreconditioner(self):
+        return preconditioner(self)
+
+
+cdef class complex_preconditioner(ComplexLinearOperator):
+    def __init__(self, complex_solver solOp, dict ctxAttrs={}):
+        ComplexLinearOperator.__init__(self, solOp.num_rows, solOp.num_rows)
+        self.solOp = solOp
+        self.ctxAttrs = ctxAttrs
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        complex_vector_t x,
+                        complex_vector_t y) except -1:
+        assert self.solOp.initialized, 'solOp not initialized'
+        self.solOp.solve(x, y)
+        return 0
+
+    def __str__(self):
+        return str(self.solOp)
+
+
+cdef class complex_lu_solver(complex_solver):
+    def __init__(self, ComplexLinearOperator A, INDEX_t num_rows=-1):
+        complex_solver.__init__(self, A, num_rows)
+
+    cpdef void setup(self, ComplexLinearOperator A=None):
+        if A is not None:
+            self.A = A
+
+        if isinstance(self.A, (ComplexLinearOperator, HelmholtzShiftOperator)):
+            from scipy.sparse.linalg import splu
+            try:
+                if isinstance(self.A, ComplexSSS_LinearOperator):
+                    Ainv = splu(self.A.to_csc())
+                else:
+                    Ainv = splu(self.A.to_csr().tocsc())
+            except RuntimeError:
+                print(self.A, np.array(self.A.data))
+                raise
+            try:
+                self.L = ComplexCSR_LinearOperator.from_csr(Ainv.L)
+                self.U = ComplexCSR_LinearOperator.from_csr(Ainv.U)
+                self.perm_r = Ainv.perm_r
+                self.perm_c = Ainv.perm_c
+                n = self.perm_c.shape[0]
+                self.temp_mem = uninitialized((n), dtype=COMPLEX)
+            except AttributeError:
+                self.Ainv = Ainv
+        elif isinstance(self.A, ComplexDense_LinearOperator):
+            from scipy.linalg import lu_factor
+            self.lu, self.perm = lu_factor(self.A.data)
+        else:
+            raise NotImplementedError('Cannot use operator of type "{}"'.format(type(self.A)))
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, complex_vector_t b, complex_vector_t x) except -1:
+        cdef:
+            INDEX_t i, n
+            INDEX_t[::1] perm_r, perm_c
+            complex_vector_t temp
+        complex_solver.solve(self, b, x)
+        if isinstance(self.A, (ComplexSSS_LinearOperator, ComplexCSR_LinearOperator, HelmholtzShiftOperator)):
+            perm_r = self.perm_r
+            perm_c = self.perm_c
+            try:
+                temp = self.temp_mem
+                n = perm_c.shape[0]
+                for i in range(n):
+                    temp[perm_r[i]] = b[i]
+                x[:] = 0.
+                forward_solve_csc(self.L.indptr, self.L.indices, self.L.data,
+                                  temp, x,
+                                  unitDiagonal=True)
+                temp[:] = 0.
+                backward_solve_csc(self.U.indptr, self.U.indices, self.U.data, x, temp)
+                for i in range(n):
+                    x[i] = temp[perm_c[i]]
+            except AttributeError:
+                x[:] = self.Ainv.solve(np.array(b, copy=False, dtype=COMPLEX))
+        elif isinstance(self.A, ComplexDense_LinearOperator):
+            from scipy.linalg import lu_solve
+            assign(x, b)
+            lu_solve((self.lu, self.perm),
+                     np.array(x, copy=False, dtype=COMPLEX),
+                     overwrite_b=True)
+        else:
+            raise NotImplementedError('Cannot use operator of type "{}"'.format(type(self.A)))
+        return 1
+
+    def __str__(self):
+        return 'LU'
+
+
+cdef class complex_iterative_solver(complex_solver):
+    def __init__(self, ComplexLinearOperator A=None, INDEX_t num_rows=-1):
+        complex_solver.__init__(self, A, num_rows)
+        self.residuals = []
+        self.setNormInner(wrapRealNormToComplex(norm_serial()),
+                          wrapRealInnerToComplex(ip_serial()))
+        self.maxIter = -1
+        self.tolerance = 1e-5
+        self.relativeTolerance = False
+        self.x0 = None
+
+    cpdef void setInitialGuess(self, complex_vector_t x0=None):
+        if x0 is not None:
+            assert self.num_rows == x0.shape[0], \
+                'x0 vector has size {}, solver expects {}'.format(x0.shape[0],
+                                                                  self.num_rows)
+            self.x0 = x0
+        else:
+            self.x0 = None
+
+    cpdef void setNormInner(self, complexNormBase norm, complexipBase inner):
+        self.norm = norm
+        self.inner = inner
+
+    cpdef void setOverlapNormInner(self, object overlaps, level=-1):
+        self.setNormInner(wrapRealNormToComplex(norm_distributed(overlaps, level)),
+                          wrapRealInnerToComplex(ip_distributed(overlaps, level)))
+
+    cpdef void setup(self, ComplexLinearOperator A=None):
+        if A is not None:
+            assert A.num_rows == self.num_rows, \
+                'A has {} rows, but solver expects {}.'.format(A.num_rows, self.num_rows)
+            self.A = A
+        else:
+            assert self.A is not None, 'A not set'
+        self.r = uninitialized((self.num_rows), dtype=COMPLEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, complex_vector_t b, complex_vector_t x) except -1:
+        cdef:
+            REAL_t res
+            complex_vector_t r = self.r
+            ComplexLinearOperator A = self.A
+            complexNormBase norm = self.norm
+
+        complex_solver.solve(self, b, x)
+
+        if self.x0 is None:
+            for i in range(self.num_rows):
+                x[i] = 0.
+        elif &x[0] != &self.x0[0]:
+            assign(x, self.x0)
+
+        if self.relativeTolerance:
+            A.residual(x, b, r, simpleResidual=self.x0 is None)   # dist
+            res = norm.eval(r, False)                             # ip(dist, dist)
+            self.tol = self.tolerance*res
+        else:
+            self.tol = self.tolerance
+
+
+cdef class complex_krylov_solver(complex_iterative_solver):
+    def __init__(self, ComplexLinearOperator A=None, INDEX_t num_rows=-1):
+        complex_iterative_solver.__init__(self, A, num_rows)
+        self.prec = None
+        self.convMaster = None
+        self.convClient = None
+
+    cpdef void setup(self, ComplexLinearOperator A=None):
+        complex_iterative_solver.setup(self, A)
+        if self.prec is not None and isinstance(self.prec, complex_preconditioner) and (not self.prec.solOp.initialized or A is not None):
+            self.prec.solOp.setup(self.A)
+
+    cpdef void setPreconditioner(self, ComplexLinearOperator prec, BOOL_t left=True):
+        self.prec = prec
+        self.isLeftPrec = left
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self, complex_vector_t b, complex_vector_t x) except -1:
+        complex_iterative_solver.solve(self, b, x)
+        if not self.relativeTolerance:
+            self.A.residual(x, b, self.r, simpleResidual=self.x0 is None)   # dist
+
+
+cdef class complex_gmres_solver(complex_krylov_solver):
+    """
+    GMRES iteration.
+
+    In a distributed solve, we want:
+    A:        accumulated to distributed
+    Lprecond:  distributed to accumulated
+    Rprecond:  distributed to accumulated
+    b:        distributed
+    x0:       accumulated
+    x:        accumulated
+
+    In the unpreconditioned distributed case, set Lprecond to accumulate.
+
+    Memory requirement:
+    dim * (maxiter+1) for Q
+    (maxiter+1) * maxiter for H
+    (4*maxiter + 2) + 2*dim for c,s,gamma,y and r, Ar
+    dim * (maxiter+1) for Z if flexible
+    """
+
+    def __init__(self, ComplexLinearOperator A=None, INDEX_t num_rows=-1):
+        complex_krylov_solver.__init__(self, A)
+        self.use2norm = False
+        self.flexible = False
+        self.restarts = 1
+
+    cpdef void setup(self, ComplexLinearOperator A=None):
+        complex_krylov_solver.setup(self, A)
+        self.Ar = uninitialized((self.num_rows), dtype=COMPLEX)
+        assert self.maxIter > 0, 'Need maxiter > 0'
+        self.H = np.ones((self.maxIter+1, self.maxIter), dtype=COMPLEX, order='F')
+        # set first dim to 1, not 0, so that things work
+        d = max(self.num_rows, 1)
+        self.Q = uninitialized((d, self.maxIter+1), dtype=COMPLEX, order='F')
+        if self.flexible:
+            self.Z = uninitialized((d, self.maxIter+1), dtype=COMPLEX, order='F')
+        self.c = uninitialized((self.maxIter), dtype=COMPLEX)
+        self.s = uninitialized((self.maxIter), dtype=COMPLEX)
+        self.gamma = uninitialized((self.maxIter+1), dtype=COMPLEX)
+        self.y = uninitialized((self.maxIter+1), dtype=COMPLEX)
+        self.initialized = True
+
+    cpdef void setPreconditioner(self, ComplexLinearOperator prec, BOOL_t left=True):
+        complex_krylov_solver.setPreconditioner(self, prec, left)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef int solve(self, complex_vector_t b, complex_vector_t x) except -1:
+        complex_krylov_solver.solve(self, b, x)
+
+        cdef:
+            int k, i = -1, j, dim = self.num_rows, l
+            REAL_t eps = 1e-15, beta
+            COMPLEX_t temp, rho, sigma
+            COMPLEX_t[::1, :] Q = self.Q
+            COMPLEX_t[::1, :] H = self.H
+            COMPLEX_t[::1, :] Z = None
+            COMPLEX_t[::1] c = self.c, s = self.s, gamma = self.gamma
+            complex_vector_t y = self.y
+            complex_vector_t r = self.r
+            complex_vector_t Ar = self.Ar
+            BOOL_t breakout = False
+            BOOL_t converged
+            BOOL_t doLprecond = self.isLeftPrec and self.prec is not None
+            BOOL_t doRprecond = not self.isLeftPrec and self.prec is not None
+            ComplexLinearOperator A = self.A
+            ComplexLinearOperator Lprecond = None, Rprecond = None
+            list residuals = []
+            complexipBase inner = self.inner
+            complexNormBase norm = self.norm
+            REAL_t tol = self.tol
+            INDEX_t maxiter = self.maxIter
+            INDEX_t restarts = self.restarts
+            BOOL_t flexible = self.flexible
+            int allIter = 0
+            convergenceMaster convMaster=self.convMaster
+            convergenceClient convClient=self.convClient
+
+        if doLprecond:
+            Lprecond = self.prec
+        if doRprecond:
+            Rprecond = self.prec
+        if flexible:
+            Z = self.Z
+
+        for k in range(restarts):
+            if breakout:
+                self.residuals = residuals
+                return allIter
+            A.matvec(x, Ar)                                                  # dist
+            if doLprecond:
+                assign3(Ar, Ar, -1.0, b, 1.0)                                # dist
+                Lprecond.matvec(Ar, r)                                       # acc
+                gamma[0] = norm.eval(r, True)
+            else:
+                assign3(r, b, 1.0, Ar, -1.0)                                 # dist
+                gamma[0] = norm.eval(r, False)
+            if len(residuals) == 0:
+                residuals.append(abs(gamma[0]))
+            converged = abs(gamma[0]) < tol
+            if convMaster is not None:
+                convMaster.setStatus(converged)
+            if convClient is not None:
+                converged = convClient.getStatus()
+            if converged:
+                self.residuals = residuals
+                return allIter
+            assignScaled(Q[:, 0], r, 1./gamma[0])                            # acc for Lprecond, dist for Rprecond
+            for i in range(maxiter):
+                ##############################
+                # Arnoldi iteration
+                assign(r, Q[:, i])                                           # acc for Lprecond, dist for Rprecond
+                if not flexible:
+                    if doLprecond:
+                        A.matvec(r, Ar)                                      # dist
+                        Lprecond.matvec(Ar, r)                               # acc
+                    elif doRprecond:
+                        Rprecond.matvec(r, Ar)                               # acc
+                        A.matvec(Ar, r)                                      # dist
+                    else:
+                        A.matvec(r, Ar)
+                        assign(r, Ar)
+                else:
+                    if doLprecond:
+                        A.matvec(r, Z[:, i])                                 # dist
+                        Lprecond.matvec(Z[:, i], r)                          # acc
+                    elif doRprecond:
+                        Rprecond.matvec(r, Z[:, i])                          # acc
+                        A.matvec(Z[:, i], r)                                 # dist
+                    else:
+                        A.matvec(r, Z[:, i])
+                        assign(r, Z[:, i])
+                if doRprecond:
+                    if dim > 0:
+                        for j in range(i+1):
+                            H[j, i] = inner.eval(Q[:, j], r, False, False)
+                            updateScaled(r, Q[:, j], -H[j, i])               # dist
+                    H[i+1, i] = norm.eval(r, False)
+                else:
+                    if dim > 0:
+                        for j in range(i+1):
+                            H[j, i] = inner.eval(Q[:, j], r, True, True)
+                            updateScaled(r, Q[:, j], -H[j, i])               # acc
+                    H[i+1, i] = norm.eval(r, True)
+                converged = abs(H[i+1, i]) > eps
+                if convMaster is not None:
+                    convMaster.setStatus(converged)
+                if convClient is not None:
+                    converged = convClient.getStatus()
+                if converged:
+                    assignScaled(Q[:, i+1], r, 1./H[i+1, i])                 # acc for Lprecond, dist for Rprecond
+                else:
+                    breakout = True
+                    break
+                ##############################
+                # Apply previous Givens rotations to last column of H
+                for j in range(i):
+                    rho = H[j, i]
+                    sigma = H[j+1, i]
+                    H[j, i] = c[j]*rho + s[j]*sigma
+                    H[j+1, i] = -s[j].conjugate()*rho + c[j].conjugate()*sigma
+                ##############################
+                # determine new Givens rotation
+                beta = sqrt(abs(H[i, i])**2 + abs(H[i+1, i])**2)
+                c[i] = H[i, i].conjugate()/beta
+                s[i] = H[i+1, i].conjugate()/beta
+                ##############################
+                # Apply new Givens rotation to H
+                H[i, i] = beta
+                # H[i+1, i] = 0.0
+                ##############################
+                # Apply new Givens rotation to rhs
+                gamma[i+1] = -s[i].conjugate()*gamma[i]
+                gamma[i] = c[i]*gamma[i]
+                residuals.append(abs(gamma[i+1]))
+                converged = abs(gamma[i+1]) < tol
+                if convMaster is not None:
+                    convMaster.setStatus(converged)
+                if convClient is not None:
+                    converged = convClient.getStatus()
+                if converged:
+                    breakout = True
+                    break
+            allIter += i
+            ##############################
+            # perform back-solve for y
+            for j in range(i, -1, -1):
+                temp = gamma[j]
+                for l in range(j+1, i+1):
+                    temp -= H[j, l]*y[l]
+                y[j] = temp/H[j, j]
+            ##############################
+            # update x
+            if not flexible:
+                gemvF(Q[:, :i+1], y[:i+1], r)                                # acc for Lprecond, dist for Rprecond
+                if doRprecond:
+                    Rprecond.matvec(r, Ar)                                   # acc
+                    update(x, Ar)                                            # acc
+                else:
+                    update(x, r)                                             # acc
+            else:
+                gemvF(Z[:, :i+1], y[:i+1], r)                                # dist for Lprecond, acc for Rprecond
+                assert not doLprecond                                        # TODO: figure out what to do here
+                update(x, r)                                                 # acc
+        self.residuals = residuals
+        return allIter
+
+    def __str__(self):
+        s = 'GMRES(tolerance={},relTol={},maxIter={},restarts={},2-norm={},flexible={})'.format(self.tolerance, self.relativeTolerance, self.maxIter, self.restarts, self.use2norm, self.flexible)
+        if self.prec is not None:
+            if self.isLeftPrec:
+                return s+', left preconditioned by '+str(self.prec)
+            else:
+                return s+', right preconditioned by '+str(self.prec)
+        else:
+            return s
diff --git a/base/PyNucleus_base/sparseGraph.pxd b/base/PyNucleus_base/sparseGraph.pxd
new file mode 100644
index 0000000..0c138b2
--- /dev/null
+++ b/base/PyNucleus_base/sparseGraph.pxd
@@ -0,0 +1,15 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, BOOL_t
+from . linear_operators cimport sparseGraph
+
+
+cpdef void cuthill_mckee(sparseGraph graph,
+                         INDEX_t[::1] order,
+                         BOOL_t reverse=*)
diff --git a/base/PyNucleus_base/sparseGraph.pyx b/base/PyNucleus_base/sparseGraph.pyx
new file mode 100644
index 0000000..9a688f6
--- /dev/null
+++ b/base/PyNucleus_base/sparseGraph.pyx
@@ -0,0 +1,263 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes import INDEX, REAL
+import numpy as np
+from . myTypes cimport INDEX_t, REAL_t
+from . blas import uninitialized
+cimport numpy as np
+cimport cython
+from . linear_operators cimport (LinearOperator,
+                                 CSR_LinearOperator,
+                                 sparseGraph,
+                                 restrictionOp,
+                                 prolongationOp)
+
+
+class combinedOperator(LinearOperator):
+    def __init__(self, operators, factors=None):
+        if factors is None:
+            factors = [1.0]*len(operators)
+        for i in range(len(operators)):
+            if isinstance(operators[i], tuple):
+                operators[i] = sparseGraph(*operators[i])
+        self.operators = operators
+        self.factors = factors
+        self.ndim = 2
+        super(combinedOperator, self).__init__(operators[0].shape[0], operators[0].shape[1])
+
+    def sliceRow(self, slice):
+        for i in range(len(self.operators)):
+            self.operators[i].sliceRow(slice)
+        self.shape = self.operators[0].shape
+
+    def sliceColumn(self, slice):
+        for i in range(len(self.operators)):
+            self.operators[i].sliceColumn(slice)
+        self.shape = self.operators[0].shape
+
+    def matvec(self, x):
+        y = self.factors[0]*(self.operators[0]*x)
+        for op, fac in zip(self.operators[1:], self.factors[1:]):
+            y += fac*(op*x)
+        return y
+
+    def toCSR(self):
+        C = self.factors[0]*self.operators[0].toCSR()
+        for op, fac in zip(self.operators[1:], self.factors[1:]):
+            C = C + fac*op.toCSR()
+        return C
+
+    def __add__(self, other):
+        return combinedOperator(self.operators + other.operators,
+                                self.factors + other.factors)
+
+    def __rmul__(self, other):
+        factors = self.factors[:]
+        for i in range(len(factors)):
+            factors[i] *= other
+        return combinedOperator(self.operators[:], factors)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef dropRowsInPlace(S, INDEX_t[::1] rowIndices):
+    cdef:
+        INDEX_t i, j = 0, k, ri, m = 0, l = 0
+        INDEX_t nrows = S.shape[0]
+        INDEX_t[::1] indptr = S.indptr
+        INDEX_t[::1] indices = S.indices
+        REAL_t[::1] data
+
+    if rowIndices.shape[0] == 0:
+        return
+
+    if hasattr(S, 'data'):
+        data = S.data
+
+    ri = rowIndices[m]  # First row to be dropped
+    if hasattr(S, 'data'):
+        for i in range(nrows):
+            if (i == ri):
+                # don't do anything, just select next row that needs to be dropped
+                m += 1
+                if m < rowIndices.shape[0]:
+                    ri = rowIndices[m]
+            else:
+                for k in range(indptr[i], indptr[i+1]):
+                    indices[j] = indices[k]
+                    data[j] = data[k]
+                    j += 1
+                l += 1
+                indptr[l] = j
+    else:
+        for i in range(nrows):
+            if (i == ri):
+                # don't do anything, just select next row that needs to be dropped
+                m += 1
+                if m < rowIndices.shape[0]:
+                    ri = rowIndices[m]
+            else:
+                for k in range(indptr[i], indptr[i+1]):
+                    indices[j] = indices[k]
+                    j += 1
+                l += 1
+                indptr[l] = j
+    S.indices = S.indices[:j]
+    S.indptr = S.indptr[:l+1]
+    if hasattr(S, 'data'):
+        S.data = S.data[:j]
+    # if isinstance(S, csr_matrix):
+    #     S._shape = (nrows-rowIndices.shape[0], S.shape[1])
+    # else:
+    S.shape = (nrows-rowIndices.shape[0], S.shape[1])
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef dropColsInPlace(S, INDEX_t[::1] col_idx):
+    cdef:
+        INDEX_t[::1] indptr = S.indptr
+        INDEX_t[::1] indices = S.indices
+        INDEX_t nrows = S.shape[0]
+        INDEX_t ncols = S.shape[1]
+        REAL_t[::1] data
+        INDEX_t p, i, k, jj, j, z = len(col_idx)-1
+
+    if hasattr(S, 'data'):
+        data = S.data
+    p = 0
+    for i in range(len(indptr)-1):
+        k = 0
+        for jj in range(indptr[i], indptr[i+1]):
+            j = indices[jj]
+            while j > col_idx[k] and k < z:
+                k += 1
+            if j > col_idx[k]:
+                indices[p] = j-k-1
+                data[p] = data[jj]
+                p += 1
+            elif j < col_idx[k]:
+                indices[p] = j-k
+                data[p] = data[jj]
+                p += 1
+        indptr[i] = p
+
+    for i in range(len(indptr)-1, 0, -1):
+        indptr[i] = indptr[i-1]
+    indptr[0] = 0
+    S.indices = S.indices[:p]
+    if hasattr(S, 'data'):
+        S.data = S.data[:p]
+
+    # if isinstance(S, csr_matrix):
+    #     S._shape = (nrows, ncols-len(col_idx))
+    # else:
+    S.shape = (nrows, ncols-len(col_idx))
+
+
+# stolen from scipy
+cdef _node_degrees(INDEX_t[::1] ind,
+                   INDEX_t[::1] ptr,
+                   INDEX_t num_rows):
+    """
+    Find the degree of each node (matrix row) in a graph represented
+    by a sparse CSR or CSC matrix.
+    """
+    cdef INDEX_t ii, jj
+    cdef INDEX_t[::1] degree = np.zeros(num_rows, dtype=INDEX)
+
+    for ii in range(num_rows):
+        degree[ii] = ptr[ii + 1] - ptr[ii]
+        for jj in range(ptr[ii], ptr[ii + 1]):
+            if ind[jj] == ii:
+                # add one if the diagonal is in row ii
+                degree[ii] += 1
+                break
+    return degree
+
+
+# stolen from scipy
+cpdef void cuthill_mckee(sparseGraph graph,
+                         INDEX_t[::1] order,
+                         BOOL_t reverse=False):
+    """
+    Cuthill-McKee ordering of a sparse symmetric CSR or CSC matrix.
+    We follow the original Cuthill-McKee paper and always start the routine
+    at a node of lowest degree for each connected component.
+    """
+    cdef:
+        INDEX_t[::1] ind = graph.indices
+        INDEX_t[::1] ptr = graph.indptr
+        INDEX_t num_rows = graph.num_rows
+    cdef INDEX_t N = 0, N_old, level_start, level_end, temp
+    cdef INDEX_t zz, ii, jj, kk, ll, level_len
+    cdef INDEX_t[::1] reverse_order
+    cdef INDEX_t[::1] degree = _node_degrees(ind, ptr, num_rows)
+    cdef INDEX_t[::1] inds = np.argsort(degree).astype(INDEX)
+    cdef INDEX_t[::1] rev_inds = np.argsort(inds).astype(INDEX)
+    cdef INDEX_t[::1] temp_degrees = np.zeros(np.max(degree), dtype=INDEX)
+    cdef INDEX_t i, j, seed, temp2
+
+    # loop over zz takes into account possible disconnected graph.
+    for zz in range(num_rows):
+        if inds[zz] != -1:   # Do BFS with seed=inds[zz]
+            seed = inds[zz]
+            order[N] = seed
+            N += 1
+            inds[rev_inds[seed]] = -1
+            level_start = N - 1
+            level_end = N
+
+            while level_start < level_end:
+                for ii in range(level_start, level_end):
+                    i = order[ii]
+                    N_old = N
+
+                    # add unvisited neighbors
+                    for jj in range(ptr[i], ptr[i + 1]):
+                        # j is node number connected to i
+                        j = ind[jj]
+                        if inds[rev_inds[j]] != -1:
+                            inds[rev_inds[j]] = -1
+                            order[N] = j
+                            N += 1
+
+                    # Add values to temp_degrees array for insertion sort
+                    level_len = 0
+                    for kk in range(N_old, N):
+                        temp_degrees[level_len] = degree[order[kk]]
+                        level_len += 1
+
+                    # Do insertion sort for nodes from lowest to highest degree
+                    for kk in range(1,level_len):
+                        temp = temp_degrees[kk]
+                        temp2 = order[N_old+kk]
+                        ll = kk
+                        while (ll > 0) and (temp < temp_degrees[ll-1]):
+                            temp_degrees[ll] = temp_degrees[ll-1]
+                            order[N_old+ll] = order[N_old+ll-1]
+                            ll -= 1
+                        temp_degrees[ll] = temp
+                        order[N_old+ll] = temp2
+
+                # set next level start and end ranges
+                level_start = level_end
+                level_end = N
+
+        if N == num_rows:
+            break
+
+    if reverse:
+        reverse_order = uninitialized((num_rows), dtype=INDEX)
+        for i in range(num_rows):
+            reverse_order[num_rows-1-i] = order[i]
+        for i in range(num_rows):
+            order[i] = reverse_order[i]
diff --git a/base/PyNucleus_base/sparsityPattern.pxd b/base/PyNucleus_base/sparsityPattern.pxd
new file mode 100644
index 0000000..d3d3610
--- /dev/null
+++ b/base/PyNucleus_base/sparsityPattern.pxd
@@ -0,0 +1,24 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . myTypes cimport INDEX_t, REAL_t, BOOL_t
+from numpy cimport uint16_t
+
+
+cdef class sparsityPattern:
+    cdef:
+        INDEX_t ** indexL
+        INDEX_t[::1] counts
+        uint16_t initial_length
+        uint16_t[::1] lengths
+        INDEX_t num_dofs, nnz
+        uint16_t index
+
+    cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J)
+    cdef inline void add(self, INDEX_t I, INDEX_t J)
+    cdef freeze(self)
diff --git a/base/PyNucleus_base/sparsityPattern.pyx b/base/PyNucleus_base/sparsityPattern.pyx
new file mode 100644
index 0000000..61820eb
--- /dev/null
+++ b/base/PyNucleus_base/sparsityPattern.pyx
@@ -0,0 +1,126 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.stdlib cimport malloc, realloc, free
+import numpy as np
+cimport numpy as np
+cimport cython
+from . myTypes import INDEX, REAL
+from . blas import uninitialized
+
+
+cdef class sparsityPattern:
+    def __init__(self, INDEX_t num_dofs, np.uint16_t initial_length=9):
+        cdef:
+            INDEX_t i
+        self.num_dofs = num_dofs
+        self.initial_length = initial_length
+        self.nnz = 0
+        self.counts = np.zeros((num_dofs), dtype=INDEX)
+        self.lengths = initial_length*np.ones((num_dofs), dtype=np.uint16)
+        self.indexL = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
+        # reserve initial memory for array of variable column size
+        for i in range(num_dofs):
+            self.indexL[i] = <INDEX_t *>malloc(self.initial_length*sizeof(INDEX_t))
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J):
+        cdef:
+            uint16_t m, low, high, mid
+            INDEX_t K
+
+        if self.counts[I] < 20:
+            for m in range(self.counts[I]):
+                K = self.indexL[I][m]
+                if K == J:
+                    self.index = m
+                    return True
+                elif K > J:
+                    self.index = m
+                    return False
+            else:
+                self.index = self.counts[I]
+                return False
+        else:
+            low = 0
+            high = self.counts[I]
+            while self.indexL[I][low] != J:
+                if high-low <= 1:
+                    if self.indexL[I][low] > J:
+                        self.index = low
+                    else:
+                        self.index = low+1
+                    return False
+                mid = (low+high) >> 1
+                if self.indexL[I][mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            self.index = low
+            return True
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef inline void add(self, INDEX_t I, INDEX_t J):
+        cdef:
+            INDEX_t m, n
+        if not self.findIndex(I, J):
+            # J was not present
+            # Do we need more space?
+            if self.counts[I] == self.lengths[I]:
+                self.indexL[I] = <INDEX_t *>realloc(self.indexL[I], (self.lengths[I]+self.initial_length)*sizeof(INDEX_t))
+                self.lengths[I] += self.initial_length
+            # where should we insert?
+            m = self.index
+            # move previous indices out of the way
+            for n in range(self.counts[I], m, -1):
+                self.indexL[I][n] = self.indexL[I][n-1]
+            # insert in empty spot
+            self.indexL[I][m] = J
+            self.counts[I] += 1
+            self.nnz += 1
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef freeze(self):
+        cdef:
+            INDEX_t i, j, k, nnz
+            INDEX_t[::1] indptr, indices
+            np.ndarray[INDEX_t, ndim=1] indptr_mem, indices_mem
+        # del self.lengths
+
+        # write indices list of lists into array
+        indices_mem = uninitialized((self.nnz), dtype=INDEX)
+        indices = indices_mem
+        k = 0
+        for i in range(self.num_dofs):
+            for j in range(self.counts[i]):
+                indices[k] = self.indexL[i][j]
+                k += 1
+            free(self.indexL[i])
+        free(self.indexL)
+
+        # fill indptr array
+        indptr_mem = uninitialized((self.num_dofs+1), dtype=INDEX)
+        indptr = indptr_mem
+        nnz = 0
+        for i in range(self.num_dofs):
+            indptr[i] = nnz
+            nnz += self.counts[i]
+        indptr[self.num_dofs] = nnz
+        return indptr_mem, indices_mem
+
+    def add_python(self, INDEX_t I, INDEX_t J):
+        self.add(I, J)
+
+    def freeze_python(self):
+        return self.freeze()
diff --git a/base/PyNucleus_base/tupleDict.pxd b/base/PyNucleus_base/tupleDict.pxd
new file mode 100644
index 0000000..0c0dcda
--- /dev/null
+++ b/base/PyNucleus_base/tupleDict.pxd
@@ -0,0 +1,63 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+from . myTypes cimport INDEX_t, BOOL_t
+
+include "tupleDict_decl_INDEX.pxi"
+include "tupleDict_decl_MASK.pxi"
+
+ctypedef np.uint64_t MEM_t
+
+
+cdef class indexSet:
+    cdef BOOL_t inSet(self, INDEX_t i)
+    cpdef void fromSet(self, set s)
+    cpdef set toSet(self)
+    cpdef INDEX_t[::1] toArray(self)
+    cdef indexSetIterator getIter(self)
+    cdef INDEX_t getNumEntries(self)
+    cpdef void empty(self)
+    cpdef indexSet union(self, indexSet other)
+    cpdef indexSet inter(self, indexSet other)
+    cpdef indexSet setminus(self, indexSet other)
+    cpdef BOOL_t isSorted(self)
+
+
+cdef class indexSetIterator:
+    cdef:
+        indexSet iS
+        readonly INDEX_t i
+    cdef void setIndexSet(self, indexSet iS)
+    cdef void reset(self)
+    cdef BOOL_t step(self)
+
+
+cdef class arrayIndexSet(indexSet):
+    cdef:
+        INDEX_t[::1] I
+
+
+cdef class arrayIndexSetIterator(indexSetIterator):
+    cdef:
+        INDEX_t k
+
+
+cdef class bitArray(indexSet):
+    cdef:
+        readonly INDEX_t length
+        MEM_t* a
+        bitArrayIterator it
+
+    cdef void set(self, INDEX_t i)
+
+
+cdef class bitArrayIterator(indexSetIterator):
+    cdef:
+        bitArray bA
+        INDEX_t k, n
diff --git a/base/PyNucleus_base/tupleDict.pyx b/base/PyNucleus_base/tupleDict.pyx
new file mode 100644
index 0000000..e3d739a
--- /dev/null
+++ b/base/PyNucleus_base/tupleDict.pyx
@@ -0,0 +1,516 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from libc.stdlib cimport malloc, realloc, free
+from libc.stdlib cimport qsort
+from . myTypes import INDEX
+cimport cython
+
+cdef extern from "malloc.h" nogil:
+     int malloc_trim(size_t pad)
+
+# def return_memory_to_OS():
+#     malloc_trim(0)
+
+include "tupleDict_INDEX.pxi"
+include "tupleDict_MASK.pxi"
+
+
+cdef class indexSet:
+    cdef BOOL_t inSet(self, INDEX_t i):
+        raise NotImplementedError()
+
+    def inSet_py(self, INDEX_t i):
+        return self.inSet(i)
+
+    cpdef void fromSet(self, set s):
+        raise NotImplementedError()
+
+    cpdef set toSet(self):
+        raise NotImplementedError()
+
+    cpdef INDEX_t[::1] toArray(self):
+        raise NotImplementedError()
+
+    cdef indexSetIterator getIter(self):
+        raise NotImplementedError()
+
+    def __iter__(self):
+        return self.getIter()
+
+    def getIter_py(self):
+        return self.getIter()
+
+    cdef INDEX_t getNumEntries(self):
+        raise NotImplementedError()
+
+    def __len__(self):
+        return self.getNumEntries()
+
+    cpdef void empty(self):
+        raise NotImplementedError()
+
+    cpdef indexSet union(self, indexSet other):
+        raise NotImplementedError()
+
+    cpdef indexSet inter(self, indexSet other):
+        raise NotImplementedError()
+
+    cpdef indexSet setminus(self, indexSet other):
+        raise NotImplementedError()
+
+    cpdef BOOL_t isSorted(self):
+        cdef:
+            indexSetIterator it = self.getIter()
+            INDEX_t i, j
+            BOOL_t sorted = True
+
+        if it.step():
+            i = it.i
+            while it.step():
+                j = it.i
+                sorted = sorted & (i < j)
+                i = j
+        return sorted
+
+
+
+cdef class indexSetIterator:
+    def __init__(self):
+        pass
+
+    cdef void setIndexSet(self, indexSet iS):
+        self.iS = iS
+        self.reset()
+
+    cdef void reset(self):
+        raise NotImplementedError()
+
+    cdef BOOL_t step(self):
+        raise NotImplementedError()
+
+    def __next__(self):
+        if self.step():
+            return self.i
+        else:
+            raise StopIteration
+
+
+cdef inline int compareIndices(const void *pa, const void *pb) nogil:
+    cdef:
+        INDEX_t a = (<INDEX_t *> pa)[0]
+        INDEX_t b = (<INDEX_t *> pb)[0]
+    return a > b
+
+
+cdef class arrayIndexSet(indexSet):
+    def __init__(self, INDEX_t[::1] I = None, BOOL_t sorted=False):
+        if I is not None:
+            if not sorted:
+                qsort(&I[0], I.shape[0], sizeof(INDEX_t), compareIndices)
+            self.I = I
+        else:
+            self.I = np.empty((0), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t inSet(self, INDEX_t i):
+        cdef:
+            INDEX_t low = 0
+            INDEX_t high = self.I.shape[0]
+            INDEX_t mid
+        if high-low < 20:
+            for mid in range(low, high):
+                if self.I[mid] == i:
+                    return True
+            return False
+        else:
+            while self.I[low] != i:
+                if high-low <= 1:
+                    return False
+                mid = (low+high) >> 1
+                if self.I[mid] <= i:
+                    low = mid
+                else:
+                    high = mid
+            return True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef void fromSet(self, set s):
+        cdef:
+            INDEX_t i, k
+        self.I = np.empty((len(s)), dtype=INDEX)
+        k = 0
+        for i in s:
+            self.I[k] = i
+            k += 1
+        qsort(&self.I[0], self.I.shape[0], sizeof(INDEX_t), compareIndices)
+
+    cpdef set toSet(self):
+        cdef:
+            INDEX_t k
+            set s = set()
+        for k in range(self.I.shape[0]):
+            s.add(self.I[k])
+        return s
+
+    cpdef INDEX_t[::1] toArray(self):
+        return self.I
+
+    cdef indexSetIterator getIter(self):
+        return arrayIndexSetIterator(self)
+
+    cdef INDEX_t getNumEntries(self):
+        return self.I.shape[0]
+
+    cpdef void empty(self):
+        self.I = np.empty((0), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef indexSet union(self, indexSet other_):
+        cdef:
+            arrayIndexSet other = other_
+            INDEX_t l1, l2
+            INDEX_t k1, k2, k, i1, i2
+            arrayIndexSet newIS
+
+        l1 = self.getNumEntries()
+        l2 = other.getNumEntries()
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                k += 1
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                k += 1
+                k1 += 1
+            else:
+                k += 1
+                k2 += 1
+        if k1 == l1:
+            k += l2-k2
+        else:
+            k += l1-k1
+
+        newIS = arrayIndexSet(np.empty((k), dtype=INDEX), True)
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                newIS.I[k] = i1
+                k += 1
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                newIS.I[k] = i1
+                k += 1
+                k1 += 1
+            else:
+                newIS.I[k] = i2
+                k += 1
+                k2 += 1
+        if k1 == l1:
+            for k1 in range(k2, l2):
+                newIS.I[k] = other.I[k1]
+                k += 1
+        else:
+            for k2 in range(k1, l1):
+                newIS.I[k] = self.I[k2]
+                k += 1
+
+        return newIS
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef indexSet inter(self, indexSet other_):
+        cdef:
+            arrayIndexSet other = other_
+            INDEX_t l1, l2
+            INDEX_t k1, k2, k, i1 = -1, i2 = -1
+            arrayIndexSet newIS
+
+        l1 = self.getNumEntries()
+        l2 = other.getNumEntries()
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                k += 1
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                k1 += 1
+            else:
+                k2 += 1
+
+        newIS = arrayIndexSet(np.empty((k), dtype=INDEX), True)
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                newIS.I[k] = i1
+                k += 1
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                k1 += 1
+            else:
+                k2 += 1
+
+        return newIS
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef indexSet setminus(self, indexSet other_):
+        cdef:
+            arrayIndexSet other = other_
+            INDEX_t l1, l2
+            INDEX_t k1, k2, k, i1 = -1, i2 = -1
+            arrayIndexSet newIS
+
+        l1 = self.getNumEntries()
+        l2 = other.getNumEntries()
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                k += 1
+                k1 += 1
+            else:
+                k2 += 1
+
+        while (k1 < l1):
+            k += 1
+            k1 += 1
+
+        newIS = arrayIndexSet(np.empty((k), dtype=INDEX), True)
+
+        k1 = 0
+        k2 = 0
+        k = 0
+        while (k1 < l1) and (k2 < l2):
+            i1 = self.I[k1]
+            i2 = other.I[k2]
+            if i1 == i2:
+                k1 += 1
+                k2 += 1
+            elif i1 < i2:
+                newIS.I[k] = i1
+                k += 1
+                k1 += 1
+            else:
+                k2 += 1
+
+        while (k1 < l1):
+            i1 = self.I[k1]
+            newIS.I[k] = i1
+            k += 1
+            k1 += 1
+
+        return newIS
+
+
+
+cdef class arrayIndexSetIterator(indexSetIterator):
+    def __init__(self, arrayIndexSet aIS=None):
+        if aIS is not None:
+            self.setIndexSet(aIS)
+
+    cdef void reset(self):
+        self.k = -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t step(self):
+        cdef:
+            arrayIndexSet aIS = self.iS
+        self.k += 1
+        if self.k < aIS.I.shape[0]:
+            self.i = aIS.I[self.k]
+            return True
+        else:
+            return False
+
+
+cdef class bitArray(indexSet):
+    def __init__(self, INDEX_t hintMaxLength=1, INDEX_t maxElement=0):
+        self.length = max(hintMaxLength, maxElement/(sizeof(MEM_t)*8)+1)
+        self.a = <MEM_t *>malloc(self.length*sizeof(MEM_t))
+        for j in range(self.length):
+            self.a[j] = 0
+
+    @cython.cdivision(True)
+    cdef void set(self, INDEX_t i):
+        cdef:
+            INDEX_t k = i/(sizeof(MEM_t)*8)
+            INDEX_t n = i-k*sizeof(MEM_t)*8
+            INDEX_t j, l
+            MEM_t one = 1
+        if k >= self.length:
+            l = self.length
+            self.length = k+1
+            self.a = <MEM_t *>realloc(self.a, self.length * sizeof(MEM_t))
+            for j in range(l, self.length):
+                self.a[j] = 0
+        self.a[k] |= one << n
+
+    def set_py(self, INDEX_t i):
+        self.set(i)
+
+    @cython.cdivision(True)
+    cdef BOOL_t inSet(self, INDEX_t i):
+        cdef:
+            INDEX_t k = i/(sizeof(MEM_t)*8)
+            INDEX_t n = i-k*sizeof(MEM_t)*8
+        if 0 <= k < self.length:
+            return (self.a[k] >> n) & 1
+        else:
+            return False
+
+    cpdef set toSet(self):
+        cdef:
+            set s = set()
+            indexSetIterator it = self.getIter()
+        while it.step():
+            s.add(it.i)
+        return s
+
+    cpdef void fromSet(self, set s):
+        cdef:
+            INDEX_t i
+        self.empty()
+        for i in s:
+            self.set(i)
+
+    cdef INDEX_t getNumEntries(self):
+        cdef:
+            INDEX_t k, c = 0
+            MEM_t v
+        for k in range(self.length):
+            v = self.a[k]
+            for _ in range(sizeof(MEM_t)*8):
+                if v & 1:
+                    c += 1
+                v = v >> 1
+        return c
+
+    cpdef void empty(self):
+        cdef:
+            INDEX_t j
+        for j in range(self.length):
+            self.a[j] = 0
+
+    def __dealloc__(self):
+        free(self.a)
+
+    cdef indexSetIterator getIter(self):
+        return bitArrayIterator(self)
+
+    cpdef indexSet union(self, indexSet other_):
+        cdef:
+            bitArray other = other_
+            bitArray bA = bitArray(max(self.length, other.length))
+            INDEX_t k
+
+        for k in range(min(self.length, other.length)):
+            bA.a[k] = self.a[k] | other.a[k]
+        if self.length > other.length:
+            for k in range(other.length, self.length):
+                bA.a[k] = self.a[k]
+        else:
+            for k in range(self.length, other.length):
+                bA.a[k] = other.a[k]
+        return bA
+
+    cpdef indexSet inter(self, indexSet other_):
+        cdef:
+            bitArray other = other_
+            bitArray bA = bitArray(min(self.length, other.length))
+            INDEX_t k
+
+        for k in range(min(self.length, other.length)):
+            bA.a[k] = self.a[k] & other.a[k]
+        return bA
+
+
+cdef class bitArrayIterator(indexSetIterator):
+    def __init__(self, bitArray bA=None):
+        if bA is not None:
+            self.setIndexSet(bA)
+
+    cdef void reset(self):
+        self.k = -1
+        self.n = sizeof(MEM_t)*8-1
+        self.i = -1
+
+    cdef BOOL_t step(self):
+        cdef:
+            bitArray bA = self.iS
+            INDEX_t k0, n0, k, n
+            MEM_t v
+
+        if self.n == sizeof(MEM_t)*8-1:
+            k0 = self.k+1
+            n0 = 0
+        else:
+            k0 = self.k
+            n0 = self.n+1
+        v = bA.a[k0]
+        v = v >> n0
+        for n in range(n0, sizeof(MEM_t)*8):
+            if v & 1:
+                self.k = k0
+                self.n = n
+                self.i = self.k*sizeof(MEM_t)*8+self.n
+                return True
+            v = v >> 1
+
+        for k in range(k0+1, bA.length):
+            v = bA.a[k]
+            for n in range(sizeof(MEM_t)*8):
+                if v & 1:
+                    self.k = k
+                    self.n = n
+                    self.i = self.k*sizeof(MEM_t)*8+self.n
+                    return True
+                v = v >> 1
+        return False
diff --git a/base/PyNucleus_base/tupleDict_decl_{VALUE}.pxi b/base/PyNucleus_base/tupleDict_decl_{VALUE}.pxi
new file mode 100644
index 0000000..ad2bb4e
--- /dev/null
+++ b/base/PyNucleus_base/tupleDict_decl_{VALUE}.pxi
@@ -0,0 +1,36 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class tupleDict{VALUE}:
+    cdef:
+        INDEX_t ** indexL
+        {VALUE_t} ** vals
+        {LENGTH_t}[::1] counts
+        {LENGTH_t} initial_length
+        {LENGTH_t} length_inc
+        {LENGTH_t}[::1] lengths
+        INDEX_t num_dofs
+        readonly INDEX_t nnz
+        BOOL_t deleteHits, logicalAndHits
+        INDEX_t i, jj
+        public {VALUE_t} invalid
+        {LENGTH_t} invalidIndex
+        {LENGTH_t} index
+    cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J)
+    cdef inline void increaseSize(self, INDEX_t I, {LENGTH_t} increment)
+    cdef {VALUE_t} enterValue(self, const INDEX_t[::1] e, {VALUE_t} val)
+    cdef {VALUE_t} removeValue(self, const INDEX_t[::1] e)
+    cpdef {VALUE_t} enterValue_py(self, const INDEX_t[::1] e, {VALUE_t} val)
+    cpdef {VALUE_t} removeValue_py(self, const INDEX_t[::1] e)
+    cdef {VALUE_t} getValue(self, const INDEX_t[::1] e)
+    cdef void startIter(self)
+    cdef BOOL_t next(self, INDEX_t[::1] e, {VALUE_t} * val)
+    cdef tuple getData(self)
+    cpdef void merge(self, tupleDict{VALUE} other)
+    cpdef void mergeData(self, {LENGTH_t}[::1] counts, INDEX_t[::1] indexL, {VALUE_t}[::1] vals)
+    cdef INDEX_t getSizeInBytes(self)
diff --git a/base/PyNucleus_base/tupleDict_{VALUE}.pxi b/base/PyNucleus_base/tupleDict_{VALUE}.pxi
new file mode 100644
index 0000000..b833e06
--- /dev/null
+++ b/base/PyNucleus_base/tupleDict_{VALUE}.pxi
@@ -0,0 +1,296 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class tupleDict{VALUE}:
+    def __init__(self,
+                 INDEX_t num_dofs,
+                 {LENGTH_t} initial_length=0,
+                 {LENGTH_t} length_inc=3,
+                 BOOL_t deleteHits=True,
+                 BOOL_t logicalAndHits=False):
+        cdef:
+            INDEX_t i
+        self.num_dofs = num_dofs
+        self.initial_length = initial_length
+        self.length_inc = length_inc
+        self.nnz = 0
+        self.counts = np.zeros((num_dofs), dtype={LENGTH_dtype})
+        self.lengths = initial_length*np.ones((num_dofs), dtype={LENGTH_dtype})
+        self.indexL = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
+        self.vals = <{VALUE_t} **>malloc(num_dofs*sizeof({VALUE_t} *))
+        # reserve initial memory for array of variable column size
+        for i in range(num_dofs):
+            self.indexL[i] = <INDEX_t *>malloc(self.initial_length *
+                                               sizeof(INDEX_t))
+            self.vals[i] = <{VALUE_t} *>malloc(self.initial_length *
+                                               sizeof({VALUE_t}))
+        self.deleteHits = deleteHits
+        self.logicalAndHits = logicalAndHits
+        self.invalid = np.iinfo({VALUE_dtype}).max
+        self.invalidIndex = np.iinfo({LENGTH_dtype}).max
+
+    cdef INDEX_t getSizeInBytes(self):
+        cdef:
+            INDEX_t s, i, l
+        s = self.num_dofs * (2*sizeof({LENGTH_t}) + sizeof(INDEX_t*) + sizeof({VALUE_t}*))
+        l = 0
+        for i in range(self.num_dofs):
+            l += self.lengths[i]
+        s += l*(sizeof(INDEX_t)+sizeof({VALUE_t}))
+        return s
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef inline BOOL_t findIndex(self, INDEX_t I, INDEX_t J):
+        cdef:
+            {LENGTH_t} m, low, high, mid
+            INDEX_t K
+
+        if self.counts[I] < 20:
+            for m in range(self.counts[I]):
+                K = self.indexL[I][m]
+                if K == J:
+                    self.index = m
+                    return True
+                elif K > J:
+                    self.index = m
+                    return False
+            else:
+                self.index = self.counts[I]
+                return False
+        else:
+            low = 0
+            high = self.counts[I]
+            while self.indexL[I][low] != J:
+                if high-low <= 1:
+                    if self.indexL[I][low] > J:
+                        self.index = low
+                    else:
+                        self.index = low+1
+                    return False
+                mid = (low+high) >> 1
+                if self.indexL[I][mid] <= J:
+                    low = mid
+                else:
+                    high = mid
+            self.index = low
+            return True
+
+    cdef inline void increaseSize(self, INDEX_t I, {LENGTH_t} increment):
+        self.lengths[I] += increment
+        self.indexL[I] = <INDEX_t *>realloc(self.indexL[I],
+                                            (self.lengths[I]) *
+                                            sizeof(INDEX_t))
+        self.vals[I] = <{VALUE_t} *>realloc(self.vals[I],
+                                            (self.lengths[I]) *
+                                            sizeof({VALUE_t}))
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef {VALUE_t} enterValue(self, const INDEX_t[::1] e, {VALUE_t} val):
+        cdef:
+            INDEX_t m, n, I = e[0], J = e[1]
+
+        if self.findIndex(I, J):  # J is already present
+            m = self.index
+            if self.deleteHits:
+                val = self.vals[I][m]
+                for n in range(m+1, self.counts[I]):
+                    self.indexL[I][n-1] = self.indexL[I][n]
+                    self.vals[I][n-1] = self.vals[I][n]
+                self.counts[I] -= 1
+                self.nnz -= 1
+            elif self.logicalAndHits:
+                self.vals[I][m] |= val
+                val = self.vals[I][m]
+            else:
+                val = self.vals[I][m]
+            return val
+        else:
+            # J was not present
+            m = self.index
+            # Do we need more space?
+            if self.counts[I] == self.lengths[I]:
+                self.increaseSize(I, self.length_inc)
+            # move previous indices out of the way
+            for n in range(self.counts[I], m, -1):
+                self.indexL[I][n] = self.indexL[I][n-1]
+                self.vals[I][n] = self.vals[I][n-1]
+            # insert in empty spot
+            self.indexL[I][m] = J
+            self.vals[I][m] = val
+            self.counts[I] += 1
+            self.nnz += 1
+            return val
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef {VALUE_t} removeValue(self, const INDEX_t[::1] e):
+        cdef:
+            INDEX_t m, n, I = e[0], J = e[1]
+            {VALUE_t} val
+
+        if self.findIndex(I, J):  # J is already present
+            m = self.index
+            val = self.vals[I][m]
+            for n in range(m+1, self.counts[I]):
+                self.indexL[I][n-1] = self.indexL[I][n]
+                self.vals[I][n-1] = self.vals[I][n]
+            self.counts[I] -= 1
+            self.nnz -= 1
+            return val
+        return self.invalid
+
+    cpdef {VALUE_t} enterValue_py(self, const INDEX_t[::1] e, {VALUE_t} val):
+        self.enterValue(e, val)
+
+    cpdef {VALUE_t} removeValue_py(self, const INDEX_t[::1] e):
+        self.removeValue(e)
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef {VALUE_t} getValue(self, const INDEX_t[::1] e):
+        cdef:
+            INDEX_t m
+        if self.findIndex(e[0], e[1]):  # J is already present
+            return self.vals[e[0]][self.index]
+        else:
+            return self.invalid
+
+    def __getitem__(self, INDEX_t[::1] edge):
+        return self.getValue(edge)
+
+    def __dealloc__(self):
+        cdef:
+            INDEX_t i
+        for i in range(self.num_dofs):
+            free(self.indexL[i])
+            free(self.vals[i])
+        free(self.indexL)
+        free(self.vals)
+        malloc_trim(0)
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef void startIter(self):
+        self.i = 0
+        while self.i < self.num_dofs and self.counts[self.i] == 0:
+            self.i += 1
+        self.jj = 0
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t next(self, INDEX_t[::1] e, {VALUE_t} * val):
+        cdef:
+            INDEX_t i = self.i, jj = self.jj, j
+        if i < self.num_dofs:
+            j = self.indexL[i][jj]
+            val[0] = self.vals[i][jj]
+        else:
+            return False
+        e[0] = i
+        e[1] = j
+        if jj < self.counts[i]-1:
+            self.jj += 1
+        else:
+            self.jj = 0
+            i += 1
+            while i < self.num_dofs and self.counts[i] == 0:
+                i += 1
+            self.i = i
+        return True
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef tuple getData(self):
+        cdef:
+            INDEX_t[::1] indexL
+            {VALUE_t}[::1] vals
+            INDEX_t i, j, k
+        indexL = np.empty((self.nnz), dtype=INDEX)
+        vals = np.empty((self.nnz), dtype={VALUE_dtype})
+        k = 0
+        for i in range(self.num_dofs):
+            for j in range(self.counts[i]):
+                indexL[k] = self.indexL[i][j]
+                vals[k] = self.vals[i][j]
+                k += 1
+        return indexL, vals
+
+    def __getstate__(self):
+        indexL, vals = self.getData()
+        return (self.num_dofs, self.length_inc, self.deleteHits, self.logicalAndHits, np.array(self.counts), np.array(indexL), np.array(vals))
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def __setstate__(self, state):
+        cdef:
+            INDEX_t[::1] indexL = state[5]
+            {VALUE_t}[::1] vals = state[6]
+            INDEX_t i, j, k
+        self.__init__(state[0], 0, state[1], state[2], state[3])
+        self.lengths = state[4]
+        k = 0
+        for i in range(self.num_dofs):
+            self.counts[i] = self.lengths[i]
+            self.indexL[i] = <INDEX_t *>malloc(self.lengths[i] *
+                                               sizeof(INDEX_t))
+            self.vals[i] = <{VALUE_t} *>malloc(self.lengths[i] *
+                                               sizeof({VALUE_t}))
+            for j in range(self.counts[i]):
+                self.indexL[i][j] = indexL[k]
+                self.vals[i][j] = vals[k]
+                k += 1
+        self.nnz = k
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cpdef void merge(self, tupleDict{VALUE} other):
+        cdef:
+            INDEX_t[::1] e = np.empty((2), dtype=INDEX)
+            {VALUE_t} val
+            INDEX_t i
+        assert self.num_dofs == other.num_dofs
+        other.startIter()
+        while other.next(e, &val):
+            self.enterValue(e, val)
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cpdef void mergeData(self, {LENGTH_t}[::1] counts, INDEX_t[::1] indexL, {VALUE_t}[::1] vals):
+        cdef:
+            INDEX_t[::1] e = np.empty((2), dtype=INDEX)
+            INDEX_t i, k
+        assert self.num_dofs == counts.shape[0]
+        k = 0
+        for i in range(self.num_dofs):
+            e[0] = i
+            for j in range(counts[i]):
+                e[1] = indexL[k]
+                self.enterValue(e, vals[k])
+                k += 1
+
+    def isCorrect(self):
+        cdef:
+            INDEX_t i, j
+        for i in range(self.num_dofs):
+            for j in range(self.counts[i]-1):
+                if self.indexL[i][j] > self.indexL[i][j+1]:
+                    print(i, j, self.indexL[i][j], self.indexL[i][j+1])
+                    return False
+        return True
diff --git a/base/PyNucleus_base/utilsCy.pyx b/base/PyNucleus_base/utilsCy.pyx
new file mode 100644
index 0000000..679d740
--- /dev/null
+++ b/base/PyNucleus_base/utilsCy.pyx
@@ -0,0 +1,76 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import numpy as np
+cimport numpy as np
+from numpy.linalg import norm as normSeq
+import logging
+cimport cython
+from libc.math cimport log
+
+from . myTypes import INDEX, REAL
+from . myTypes cimport INDEX_t, REAL_t
+from . blas import uninitialized_like
+
+
+LOGGER = logging.getLogger(__name__)
+
+
+def UniformOnUnitSphere(dim, samples=1, norm=normSeq):
+    "Uniform distribution on the unit sphere."
+    if samples > 1:
+        shape = (dim, samples)
+        vec = np.random.normal(size=shape)
+        for i in range(samples):
+            vec[:, i] = vec[:, i]/norm(vec[:, i])
+    else:
+        shape = (dim)
+        vec = np.random.normal(size=shape)
+        vec = vec/norm(vec)
+    return vec
+
+
+
+import mpi4py
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+
+
+cdef class FakeComm_class(MPI.Comm):
+    cdef:
+        int rank_, size_
+        public dict values
+
+    def setRankSize(self, rank, size):
+        self.rank_ = rank
+        self.size_ = size
+
+    property size:
+        """number of processes in communicator"""
+        def __get__(self):
+            return self.size_
+
+    property rank:
+        """rank of this process in communicator"""
+        def __get__(self):
+            return self.rank_
+
+    def Barrier(self):
+        pass
+
+    def allreduce(self, v, *args, **kwargs):
+        return v
+
+
+def FakeComm(rank, size):
+    c = FakeComm_class()
+    c.setRankSize(rank, size)
+    c.values = {}
+    return c
diff --git a/base/PyNucleus_base/utilsFem.py b/base/PyNucleus_base/utilsFem.py
new file mode 100644
index 0000000..8f274bb
--- /dev/null
+++ b/base/PyNucleus_base/utilsFem.py
@@ -0,0 +1,1437 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from time import time
+import numpy as np
+import logging
+import os
+import atexit
+import sys
+import traceback
+import re
+import argparse
+import mpi4py
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from collections import OrderedDict
+from copy import deepcopy
+from . myTypes import INDEX, REAL
+from . performanceLogger import PLogger, LoggingTimer, Timer
+from . blas import uninitialized, uninitialized_like
+
+_syncDefault = False
+
+
+def setSyncDefault(sync):
+    global _syncDefault
+    _syncDefault = sync
+
+
+class TimerManager:
+    def __init__(self, logger, comm=None, print_rank=0, prefix='', myPLogger=None, memoryProfiling=False):
+        self.logger = logger
+        self.comm = comm
+        self.print_rank = print_rank
+        self.prefix = prefix
+        self.memoryProfiling = memoryProfiling
+        if self.memoryProfiling:
+            from psutil import Process
+            self.process = Process(os.getpid())
+        else:
+            self.process = None
+        if myPLogger is None:
+            self.PLogger = PLogger(self.process)
+            self.isSubManager = False
+            self.totalTimer = Timer('total', self.PLogger, forceMemRegionOff=True)
+            self.totalTimer.__enter__()
+        else:
+            self.isSubManager = True
+            self.PLogger = myPLogger
+
+    def getTimer(self, FinalMessage=None, StartMessage=None,
+                 level=logging.INFO, overrideComm=None, sync=_syncDefault):
+        if overrideComm is None:
+            comm = self.comm
+        else:
+            comm = overrideComm
+        if comm is None or comm.rank == self.print_rank:
+            t = LoggingTimer(self.logger, level, FinalMessage, self.PLogger)
+        else:
+            t = Timer(FinalMessage, self.PLogger)
+        return t
+
+    def __call__(self, *args, **kwargs):
+        return self.getTimer(*args, **kwargs)
+
+    def setOutputGroup(self, rank, oG):
+
+        def mergeOrdered(a_list, b_list):
+            keys = []
+            while (len(a_list)>0) and (len(b_list)>0):
+                if a_list[0] in b_list:
+                    if a_list[0] == b_list[0]:
+                        keys.append(a_list[0])
+                        a_list.pop(0)
+                        b_list.pop(0)
+                    else:
+                        keys.append(b_list[0])
+                        b_list.pop(0)
+                elif b_list[0] in a_list:
+                    keys.append(a_list[0])
+                    a_list.pop(0)
+                else:
+                    keys.append(a_list[0])
+                    a_list.pop(0)
+            return keys
+
+        if not self.isSubManager:
+            self.totalTimer.__exit__(None, None, None)
+        data = self.PLogger.values
+        data2 = OrderedDict()
+        for key in data.keys():
+            val = data[key]
+            data2[key] = (len(val), np.min(val), np.mean(val), np.median(val), np.max(val))
+        data = data2
+        if self.comm is not None:
+            data = self.comm.gather(data, root=rank)
+        else:
+            data = [data]
+        if self.comm is None or self.comm.rank == rank:
+            keys = list(data[0].keys())
+            for i in range(1, len(data)):
+                keys = mergeOrdered(keys, list(data[i].keys()))
+            pData = {}
+            for i in range(len(data)):
+                for key in keys:
+                    d = data[i].get(key, (0, np.inf, 0., 0., -np.inf))
+                    try:
+                        pData[key].append(d)
+                    except KeyError:
+                        pData[key] = [d]
+            for key in pData:
+                oG.add(key, pData[key])
+
+    def getSubManager(self, logger):
+        return TimerManager(logger, self.comm, self.print_rank, self.prefix, self.PLogger)
+
+
+def getLoggingTimer(logger, comm=None, print_rank=0, prefix='', rootOutput=False):
+    def getTimer(FinalMessage='', StartMessage='',
+                 level=logging.INFO, overrideComm=None, sync=_syncDefault):
+        from . performanceLogger import FakePLogger
+        # return Timer(prefix=prefix, FinalMessage=FinalMessage, StartMessage=StartMessage,
+        #              logger=logger, level=level, comm=comm, overrideComm=overrideComm, print_rank=print_rank, sync=sync)
+        if StartMessage != '':
+            StartMessage = prefix+StartMessage
+        return LoggingTimer(logger, level, prefix+FinalMessage, FakePLogger(), StartMessage=StartMessage,
+                            comm=comm, sync=sync)
+    return getTimer
+
+
+display_available = ("DISPLAY" in os.environ and
+                     "SSH_CONNECTION" not in os.environ)
+
+
+def computeErrors(levels, solutions, norm, timeVals=None):
+    assert len(levels) == len(solutions)
+    if timeVals is not None:
+        for lvlNo in range(len(levels)):
+            assert solutions[lvlNo].shape[0] == len(timeVals[lvlNo])
+    for lvlNo in range(1, len(levels)):
+        assert solutions[lvlNo].shape[1] == levels[lvlNo]['P'].shape[0]
+        assert solutions[lvlNo-1].shape[1] == levels[lvlNo]['P'].shape[1]
+
+    errors = []
+
+    uFine = solutions[-1]
+    for lvlNo in range(len(levels)-1):
+        u = solutions[lvlNo]
+        for j in range(lvlNo+1, len(levels)):
+            u2 = uninitialized((u.shape[0], levels[j]['P'].shape[0]), dtype=REAL)
+            for k in range(u.shape[0]):
+                u2[k, :] = levels[j]['P']*u[k, :]
+            u = u2
+        if timeVals is not None:
+            # linear interpolation in time
+            uNew = uninitialized_like(uFine)
+            uNew[0, :] = u[0, :]
+            uNew[-1, :] = u[-1, :]
+            for k in range(1, uFine.shape[0]-1):
+                t = timeVals[-1][k]
+                j = 0
+                while timeVals[lvlNo][j+1] < t:
+                    j += 1
+                t0 = timeVals[lvlNo][j]
+                t1 = timeVals[lvlNo][j+1]
+                assert t0 <= t <= t1
+                uNew[k, :] = (t1-t)/(t1-t0)*u[j, :] + (t-t0)/(t1-t0)*u[j+1, :]
+            u = uNew
+        errors.append(norm(u-uFine))
+    return errors
+
+
+def roc(idx, val, FillUp=False, exp=False):
+    "Calculates the rate of convergence."
+    idx, val = np.atleast_2d(idx), np.atleast_2d(val)
+    if idx.shape[0] == 1:
+        idx = idx.transpose()
+    if idx.shape[0] != val.shape[0]:
+        val = val.transpose()
+    if idx.shape[0] != val.shape[0]:
+        raise Exception('Arrays of sizes {} and {} not compatible.'.format(idx.shape[0], val.shape[0]))
+    if exp:
+        rate = np.log(val[0:-1, :]/val[1:, :])/(idx[0:-1, :]-idx[1:, :])
+    else:
+        rate = np.log(val[0:-1, :]/val[1:, :])/np.log(idx[0:-1, :]/idx[1:, :])
+    if FillUp:
+        return np.vstack([rate, [np.nan]])
+    else:
+        return rate
+
+
+class exitHandler(object):
+    def __init__(self, comm):
+        self.comm = comm
+        self.exit_code = None
+        self.exception = None
+        self.exc_type = None
+        self._orig_exit = sys.exit
+        sys.exit = self.exit
+        sys.excepthook = self.exc_handler
+        atexit.register(self.atExitHandler)
+
+    def exit(self, code=0):
+        self.exit_code = code
+        self._orig_exit(code)
+
+    def exc_handler(self, exc_type, exc, *args):
+        self.exc_type = exc_type
+        self.exception = exc
+
+    def atExitHandler(self):
+        if self.exit_code is not None and self.exit_code != 0:
+            logging.error("death by sys.exit(%d)" % self.exit_code)
+            self.comm.Abort(self.exit_code)
+        elif self.exception is not None:
+            lines = traceback.format_exception(self.exc_type, self.exception,
+                                               tb=self.exception.__traceback__)
+            msg = ''.join(['{}: {}'.format(self.comm.rank, l) for l in lines])
+            logging.error('\n'+msg)
+            self.comm.Abort(1234)
+
+
+def saveDictToHDF5(params, f, ignore=set()):
+    import h5py
+    for key, val in params.items():
+        if key in ignore:
+            continue
+        if isinstance(val, dict):
+            g = f.create_group(key)
+            saveDictToHDF5(val, g)
+        elif isinstance(val, np.ndarray):
+            f.create_dataset(key, data=val)
+        elif isinstance(val, list):
+            try:
+                if isinstance(val[0], list) and isinstance(val[0][0], (int, float, INDEX, REAL)):
+                    raise ValueError()
+                f.create_dataset(key, data=np.array(val))
+            except:
+                if isinstance(val[0], list) and isinstance(val[0][0], (int, float, INDEX, REAL)):
+                    g = f.create_group(key)
+                    g.attrs['type'] = 'compressedList'
+                    l = 0
+                    for i in range(len(val)):
+                        l += len(val[i])
+                    indptr = uninitialized((len(val)+1), dtype=INDEX)
+                    if isinstance(val[0][0], (int, INDEX)):
+                        data = uninitialized((l), dtype=INDEX)
+                    else:
+                        data = uninitialized((l), dtype=REAL)
+                    l = 0
+                    for i in range(len(val)):
+                        indptr[i] = l
+                        data[l:l+len(val[i])] = val[i]
+                        l += len(val[i])
+                    indptr[-1] = l
+                    g.create_dataset('indptr', data=indptr)
+                    g.create_dataset('data', data=data)
+                elif isinstance(val[0], str):
+                    f.create_dataset(key, data=np.array(val, dtype=np.string_))
+                else:
+                    g = f.create_group(key)
+                    g.attrs['type'] = 'list'
+                    for k in range(len(val)):
+                        g.attrs[str(k)] = val[k]
+        elif val is None:
+            try:
+                f.attrs[key] = h5py.Empty(np.dtype("f"))
+            except AttributeError:
+                print('Failed to write \'{}\' because h5py is too old.'.format(key))
+        elif hasattr(val, 'HDF5write') and callable(val.HDF5write):
+            g = f.create_group(key)
+            val.HDF5write(g)
+        elif hasattr(val, 'toarray') and callable(val.toarray):
+            f.create_dataset(key, data=val.toarray())
+        else:
+            try:
+                f.attrs[key] = val
+            except:
+                print('Failed to write \'{}\''.format(key))
+                f.attrs[key] = str(val)
+
+
+def loadDictFromHDF5(f):
+    import h5py
+    from . linear_operators import LinearOperator
+    from PyNucleus.fem.DoFMaps import DoFMap
+    params = {}
+    for key in f.attrs:
+        if isinstance(f.attrs[key], h5py.Empty):
+            params[key] = None
+        else:
+            params[key] = f.attrs[key]
+    for key in f:
+        if isinstance(f[key], h5py.Group):
+            if 'type' in f[key].attrs:
+                if f[key].attrs['type'] == 'list':
+                    l = []
+                    for k in range(len(f[key].attrs)-1):
+                        l.append(f[key].attrs[str(k)])
+                    params[key] = l
+                elif f[key].attrs['type'] == 'compressedList':
+                    l = []
+                    indptr = np.array(f[key]['indptr'], dtype=INDEX)
+                    if isinstance(f[key]['data'], (int, INDEX)):
+                        data = np.array(f[key]['data'], dtype=INDEX)
+                    else:
+                        data = np.array(f[key]['data'], dtype=REAL)
+                    for i in range(len(indptr)-1):
+                        l.append(data[indptr[i]:indptr[i+1]].tolist())
+                    params[key] = l
+                elif f[key].attrs['type'] == 'series':
+                    d = loadDictFromHDF5(f[key])
+                    grp = seriesOutputGroup(key)
+                    grp.fromDict(d)
+                    params[key] = grp
+                elif f[key].attrs['type'] == 'DoFMap':
+                    params[key] = DoFMap.HDF5read(f[key])
+                else:
+                    params[key] = LinearOperator.HDF5read(f[key])
+            elif 'vertices' in f[key] and 'cells' in f[key]:
+                from PyNucleus.fem import meshNd
+                params[key] = meshNd.HDF5read(f[key])
+            else:
+                params[key] = loadDictFromHDF5(f[key])
+        else:
+            params[key] = np.array(f[key])
+            try:
+                l = []
+                for i in range(len(params[key])):
+                    l.append(params[key][i].decode('utf-8'))
+                params[key] = l
+            except:
+                pass
+    return params
+
+
+def processDictForYaml(params):
+    from PyNucleus.fem import function
+    paramsNew = {}
+    for key in params:
+        if isinstance(params[key], dict):
+            paramsNew[key] = processDictForYaml(params[key])
+        elif isinstance(params[key], REAL):
+            paramsNew[key] = float(params[key])
+        elif isinstance(params[key], np.ndarray):
+            if params[key].dtype == REAL:
+                if params[key].ndim == 1:
+                    paramsNew[key] = params[key].tolist()
+                    for i in range(len(paramsNew[key])):
+                        paramsNew[key][i] = float(paramsNew[key][i])
+                elif  params[key].ndim == 2:
+                    paramsNew[key] = params[key].tolist()
+                    for i in range(len(paramsNew[key])):
+                        for j in range(len(paramsNew[key][i])):
+                            paramsNew[key][i][j] = float(paramsNew[key][i][j])
+                else:
+                    raise NotImplementedError()
+            else:
+                paramsNew[key] = params[key].tolist()
+        elif isinstance(params[key], list):
+            paramsNew[key] = params[key]
+            for i in range(len(paramsNew[key])):
+                if isinstance(paramsNew[key][i], REAL):
+                    paramsNew[key][i] = float(paramsNew[key][i])
+        elif isinstance(params[key], function):
+            paramsNew[key] = str(params[key])
+        else:
+            paramsNew[key] = params[key]
+    return paramsNew
+
+
+def updateFromDefaults(params, defaults):
+    for key in defaults:
+        if key not in params:
+            params[key] = defaults[key]
+        elif isinstance(defaults[key], dict):
+            updateFromDefaults(params[key], defaults[key])
+
+
+KEY_VAL_FORMAT = '{:<54}{}'
+
+
+def getMPIinfo():
+    from sys import modules
+    if 'mpi4py.MPI' in modules:
+        from textwrap import dedent
+        import mpi4py
+        mpi4py.initialize = False
+        from mpi4py import MPI
+        if not MPI.Is_initialized():
+            return ''
+        t = {MPI.THREAD_SINGLE: 'single',
+             MPI.THREAD_FUNNELED: 'funneled',
+             MPI.THREAD_SERIALIZED: 'serialized',
+             MPI.THREAD_MULTIPLE: 'multiple'}
+        hosts = MPI.COMM_WORLD.gather(MPI.Get_processor_name())
+        if MPI.COMM_WORLD.rank == 0:
+            hosts = ','.join(set(hosts))
+        s = ['{}'.format(MPI.Get_library_version()[:-1])]
+        for label, value in [('MPI standard supported:', MPI.Get_version()),
+                             ('Vendor:', MPI.get_vendor()),
+                             ('Level of thread support:', t[MPI.Query_thread()]),
+                             ('Is threaded:', MPI.Is_thread_main()),
+                             ('Threads requested:', mpi4py.rc.threads),
+                             ('Thread level requested:', mpi4py.rc.thread_level),
+                             ('Hosts:', hosts),
+                             ('Communicator size:', MPI.COMM_WORLD.size)]:
+            s.append(KEY_VAL_FORMAT.format(label, value))
+        return '\n'.join(s)
+    else:
+        return ''
+
+
+def getEnvVariables(envVars=[('OMP_NUM_THREADS', True)]):
+    from os import environ
+    s = []
+    for var, printNotSet in envVars:
+        if var in environ:
+            varVal = environ[var]
+        elif printNotSet:
+            varVal = 'not set'
+        else:
+            continue
+        s.append(KEY_VAL_FORMAT.format(var+':', varVal))
+    return '\n'.join(s)
+
+
+def getSystemInfo(argv=None, envVars=[('OMP_NUM_THREADS', True)]):
+    from sys import executable
+    s = '\n'
+    if argv is not None:
+        s += KEY_VAL_FORMAT.format('Running:', executable + ' ' + ' '.join(argv)) + '\n'
+    else:
+        s += KEY_VAL_FORMAT.format('Running:', executable) + '\n'
+    import mpi4py
+    mpi4py.initialize = False
+    from mpi4py import MPI
+    if MPI.Is_initialized():
+        s += getMPIinfo()+'\n'
+    s += getEnvVariables(envVars)+'\n'
+    import pkg_resources
+    from PyNucleus import subpackages
+    versions = {}
+    for pkg in ['numpy', 'scipy', 'mpi4py', 'cython']:
+        version = pkg_resources.get_distribution(pkg).version
+        try:
+            versions[version].append(pkg)
+        except KeyError:
+            versions[version] = [pkg]
+    for version in versions:
+        s += KEY_VAL_FORMAT.format(','.join(versions[version])+':', version)+'\n'
+    versions = {}
+    for pkg in sorted(subpackages.keys()):
+        version = pkg_resources.get_distribution('PyNucleus_'+pkg).version
+        try:
+            versions[version].append(pkg)
+        except KeyError:
+            versions[version] = [pkg]
+    for version in versions:
+        s += KEY_VAL_FORMAT.format('PyNucleus_'+(','.join(versions[version]))+':', version)+'\n'
+    return s
+
+
+class MPIFileHandler(logging.Handler):
+    """
+    A handler class which writes formatted logging records to disk files.
+    """
+    def __init__(self, filename, comm, mode=MPI.MODE_WRONLY | MPI.MODE_CREATE):
+        from pathlib import Path
+        # filename = os.fspath(filename)
+        # keep the absolute path, otherwise derived classes which use this
+        # may come a cropper when the current directory changes
+        self.baseFilename = os.path.abspath(filename)
+        if Path(self.baseFilename).exists() and comm.rank == 0:
+            from os import remove
+            remove(self.baseFilename)
+        self.mpiFile = MPI.File.Open(comm, self.baseFilename, mode)
+        self.mpiFile.Set_atomicity(True)
+        logging.Handler.__init__(self)
+
+    def emit(self, record):
+        try:
+            msg = self.format(record)+'\n'
+            mv = memoryview(bytes(msg, encoding='utf-8'))
+            self.mpiFile.Write_shared((mv, len(mv), MPI.BYTE))
+            self.flush()
+        except Exception:
+            self.handleError(record)
+
+    def sync(self):
+        self.mpiFile.Sync()
+
+    def __repr__(self):
+        level = logging.getLevelName(self.level)
+        return '<%s %s (%s)>' % (self.__class__.__name__, self.baseFilename, level)
+
+    def close(self):
+        """
+        Closes the stream.
+        """
+        self.acquire()
+        try:
+            self.mpiFile.Close()
+        finally:
+            self.release()
+
+
+def columns(lines, returnColWidth=False, colWidth=0):
+    if colWidth == 0:
+        for l, _, _ in lines:
+            colWidth = max(len(l), colWidth)
+    s = []
+    for l, f, v in lines:
+        if isinstance(f, str):
+            lf = '{:<'+str(colWidth+2)+'}'+f
+            s.append(lf.format(l+':', v))
+        else:
+            lf = '{:<'+str(colWidth+2)+'}'+'{}'
+            s.append(lf.format(l+':', f(v)))
+    s = '\n'.join(s)
+    if returnColWidth:
+        return s, colWidth
+    else:
+        return s
+
+
+class outputParam:
+    def __init__(self, label, value, format=None, aTol=None, rTol=None, tested=False):
+        self.label = label
+        if format is None:
+            if isinstance(value, bool):
+                format = '{}'
+            elif isinstance(value, (float, REAL)):
+                format = '{:.3}'
+            elif isinstance(value, (int, INDEX)):
+                format = '{:,}'
+            elif isinstance(value, np.ndarray):
+                formatter = {'float_kind': lambda x: '{:.3}'.format(x)}
+                format = lambda s: np.array2string(s, formatter=formatter, max_line_width=200)
+            else:
+                format = '{}'
+        self.format = format
+        self.value = value
+        self.aTol = aTol
+        self.rTol = rTol
+        self.tested = tested
+
+
+class outputGroup:
+    def __init__(self, aTol=None, rTol=None, tested=False):
+        self.entries = []
+        self.tested = tested
+        self.aTol = aTol
+        self.rTol = rTol
+
+    def add(self, label, value, format=None, aTol=None, rTol=None, tested=None):
+        if aTol is None:
+            aTol = self.aTol
+        if rTol is None:
+            rTol = self.rTol
+        if tested is None:
+            tested = self.tested
+        p = outputParam(label, value, format, aTol, rTol, tested)
+        self.entries.append(p)
+
+    def __repr__(self):
+        lines = [(p.label, p.format, p.value) for p in self.entries]
+        return columns(lines)
+
+    def __add__(self, other):
+        c = outputGroup()
+        from copy import deepcopy
+        d = deepcopy(self.entries)
+        d += other.entries
+        c.entries = d
+        return c
+
+    def toDict(self, tested=False):
+        if not tested:
+            return {p.label: p.value for p in self.entries}
+        else:
+            return {p.label: p.value for p in self.entries if p.tested}
+
+    def fromDict(self, d):
+        for key, value in d.items():
+            self.add(key, value)
+
+    def __getattr__(self, key):
+        for p in self.entries:
+            if p.label == key:
+                return p.value
+        raise KeyError(key)
+
+    def diff(self, d):
+        result = {}
+        d = deepcopy(d)
+        for p in self.entries:
+            if p.tested:
+                if p.label in d:
+                    aTol = p.aTol if p.aTol is not None else 1e-12
+                    rTol = p.rTol if p.rTol is not None else 1e-12
+                    if isinstance(p.value, (np.ndarray, list)):
+                        if len(p.value) == len(d[p.label]):
+                            if not np.allclose(p.value, d[p.label],
+                                               rtol=rTol, atol=aTol):
+                                result[p.label] = (p.value, d[p.label])
+                        else:
+                            result[p.label] = (p.value, d[p.label])
+                    elif isinstance(p.value, (int, INDEX, REAL, float)):
+                        if not np.allclose(p.value, d[p.label],
+                                           rtol=rTol, atol=aTol):
+                            print(p.label, p.value, d[p.label], rTol, aTol, p.rTol, p.aTol)
+                            result[p.label] = (p.value, d[p.label])
+                    else:
+                        if p.value != d[p.label]:
+                            result[p.label] = (p.value, d[p.label])
+                    d.pop(p.label)
+                else:
+                    result[p.label] = (p.value, 'Not available')
+        for key in d:
+            result[key] = ('Not available', d[key])
+        return result
+
+
+class timerOutputGroup(outputGroup):
+    def __init__(self):
+        super(timerOutputGroup, self).__init__()
+
+    def __repr__(self):
+        lines = []
+        if len(self.entries) > 0 and len(self.entries[0].value) > 1:
+            header = ['timer', 'numCalls', 'minCall', 'meanCall', 'maxCall', 'minSum', 'meanSum', 'medSum', 'maxSum']
+        else:
+            header = ['timer', 'numCalls', 'minCall', 'meanCall', 'maxCall', 'sum']
+        for p in self.entries:
+            key = p.label
+            data = p.value
+            numCalls = np.array([p[0] for p in data])
+            minNumCalls = np.min(numCalls)
+            meanNumCalls = np.mean(numCalls)
+            medNumCalls = np.median(numCalls)
+            maxNumCalls = np.max(numCalls)
+
+            minCall = np.min([p[1] for p in data])
+            meanCall = np.sum([p[0]*p[2] for p in data])/numCalls.sum()
+            maxCall = np.median([p[4] for p in data])
+
+            sums = [p[0]*p[2] for p in data]
+            if len(sums) > 1:
+                minSum = np.min(sums)
+                meanSum = np.mean(sums)
+                medSum = np.median(sums)
+                maxSum = np.max(sums)
+                if minNumCalls != maxNumCalls:
+                    calls = (minNumCalls, meanNumCalls, medNumCalls, maxNumCalls)
+                else:
+                    calls = maxNumCalls
+                lines.append((key, calls, minCall, meanCall, maxCall, minSum, meanSum, medSum, maxSum))
+            else:
+                lines.append((key, meanNumCalls, minCall, meanCall, maxCall, sums[0]))
+        from tabulate import tabulate
+        return tabulate(lines, headers=header)
+
+
+class seriesOutputGroup:
+    def __init__(self, name, aTol=None, rTol=None, tested=False):
+        self.name = name
+        self.aTol = aTol
+        self.rTol = rTol
+        self.tested = tested
+        self.groups = {}
+
+    def addGroup(self, label):
+        label = str(label)
+        if label in self.groups:
+            group = self.groups[label]
+        else:
+            group = outputGroup(aTol=self.aTol, rTol=self.rTol, tested=self.tested)
+            self.groups[label] = group
+        return group
+
+    def get(self, keyName, valueNames=[], sortBy=None, reverse=False):
+        if sortBy is None:
+            sortBy = keyName
+        if not isinstance(valueNames, (list, tuple)):
+            valueNames = [valueNames]
+        keys = []
+        values = {valueName: [] for valueName in valueNames}
+        sortKeys = []
+        for label in sorted(self.groups):
+            try:
+                key = getattr(self.groups[label], keyName)
+                sortKey = getattr(self.groups[label], sortBy)
+                v = {}
+                for valueName in valueNames:
+                    v[valueName] = getattr(self.groups[label], valueName)
+                keys.append(key)
+                for valueName in valueNames:
+                    values[valueName].append(v[valueName])
+                sortKeys.append(sortKey)
+            except KeyError:
+                pass
+        idx = np.argsort(sortKeys)
+        if reverse:
+            idx = idx[::-1]
+        keys = np.array(keys)[idx]
+        for valueName in valueNames:
+            values[valueName] = np.array(values[valueName])[idx]
+        if len(valueNames) > 0:
+            return keys, tuple([values[valueName] for valueName in valueNames])
+        else:
+            return keys
+
+    def getPair(self, keyName, valueName, sortBy=None, reverse=False):
+        if sortBy is None:
+            sortBy = keyName
+        keys = []
+        values = []
+        sortKeys = []
+        for label in sorted(self.groups):
+            try:
+                key = getattr(self.groups[label], keyName)
+                value = getattr(self.groups[label], valueName)
+                sortKey = getattr(self.groups[label], sortBy)
+                keys.append(key)
+                values.append(value)
+                sortKeys.append(sortKey)
+            except KeyError:
+                pass
+        idx = np.argsort(sortKeys)
+        if reverse:
+            idx = idx[::-1]
+        keys = np.array(keys)[idx]
+        values = np.array(values)[idx]
+        return keys, values
+
+    def roc(self, keyName, valueName, reverse=False):
+        keys, values = self.get(keyName, [valueName], reverse=reverse)
+        return roc(keys, values).flatten()
+
+    def toDict(self, tested=False):
+        d = {'type': 'series'}
+        for label in self.groups:
+            d[label] = self.groups[label].toDict(tested)
+        return d
+
+    def fromDict(self, d):
+        for label in d:
+            if label != 'type':
+                group = self.addGroup(label)
+                group.fromDict(d[label])
+
+    def getTable(self, keyName, valueNames=[], reverse=False):
+        rocs = []
+        if isinstance(valueNames, list):
+            assert len(valueNames) > 0
+            newValueNames = []
+            for k in range(len(valueNames)):
+                if valueNames[k][:3] == 'roc':
+                    rocs.append(('RoC '+valueNames[k][3:],
+                                 np.concatenate(([None], self.roc(keyName, valueNames[k][3:], reverse=reverse))),
+                                 k))
+                else:
+                    newValueNames.append(valueNames[k])
+            valueNames = newValueNames
+        keys, values = self.get(keyName, valueNames, reverse=reverse)
+        values = list(values)
+        for label, value, pos in rocs:
+            valueNames.insert(pos, label)
+            values.insert(pos, value)
+        header = [keyName]+valueNames
+        lines = np.vstack((keys, *values)).T
+        from tabulate import tabulate
+        return tabulate(lines, headers=header)
+
+    def plot(self, keyName, valueName, bnd=None, **kwargs):
+        import matplotlib.pyplot as plt
+        key, (value, ) = self.get(keyName, valueName)
+        plt.plot(key, value, **kwargs)
+        if bnd is not None:
+            assert isinstance(bnd, dict)
+            exponent = bnd.pop('exponent')
+            pos = bnd.pop('pos', None)
+            y = key**exponent
+            if pos is None:
+                pos = value.argmin()
+            y *= value[pos]/y[pos]
+            plt.plot(key, y, **bnd)
+
+    def diff(self, d):
+        result = {}
+        for label in self.groups:
+            p = self.groups[label].diff(d[label])
+            if len(p) > 0:
+                result[label] = p
+        return result
+
+
+class driverArgGroup:
+    def __init__(self, parent, group):
+        self.parent = parent
+        self.group = group
+
+    def add(self, *args, **kwargs):
+        if self.parent is not None:
+            kwargs['group'] = self.group
+            self.parent.add(*args, **kwargs)
+
+
+class driver:
+    def __init__(self, comm=None, setCommExitHandler=True, masterRank=0):
+        self.comm = comm
+        self._identifier = ''
+        self.processHook = []
+        self.masterRank = masterRank
+        self.isMaster = (self.comm is None or self.comm.rank == self.masterRank)
+        self.argGroups = {}
+        self.outputGroups = {}
+        self._logger = None
+        self._timer = None
+        self._figures = {}
+        self._display_available = None
+        if self.comm is not None and setCommExitHandler:
+            exitHandler(self.comm)
+        if self.isMaster:
+            # self.parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
+            from os import environ
+            width = environ.get('COLUMNS', 200)
+            self.parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.ArgumentDefaultsHelpFormatter(prog, max_help_position=50, width=width))
+            self.mainGroup = self.parser.add_argument_group('main')
+            io = self.addGroup('input/output')
+            io.add('disableHeader', False, help='Disable verbose header')
+            io.add('displayConfig', False, help='Display configuration')
+            io.add('displayRanks', False, help='Display MPI ranks in log')
+            io.add('disableFileLog', False, help='Disable logging to file')
+            io.add('showTimers', True, help='Display timers')
+            io.add('showMemory', False, help='Show memory info in timers')
+            io.add('test', False, help='Run in test mode')
+            io.add('yamlInput', '', help='YAML config file')
+            io.add('hdf5Input', '', help='HDF5 config file')
+            io.add('yamlOutput', '', help='YAML output file')
+            io.add('hdf5Output', '', help='HDF5 output file')
+            io.add('testCache', '', help='YAML cache file')
+            io.add('overwriteCache', False, help='Overwrite the test cache file')
+
+    def setIdentifier(self, identifier):
+        self._identifier = identifier
+
+    @property
+    def identifier(self):
+        return self._identifier
+
+    def setLogFile(self, filename=None):
+        if filename is None:
+            from pathlib import Path as path
+            logs = path('logs')
+            assert self._identifier != ''
+            filename = logs/(self._identifier+'.log')
+            filename.parent.mkdir(exist_ok=True, parents=True)
+        if self.comm is not None and self.comm.size > 1:
+            fileHandler = MPIFileHandler(filename, self.comm)
+        else:
+            fileHandler = logging.FileHandler(filename, 'w')
+        logging.getLogger().addHandler(fileHandler)
+        fileHandler.setFormatter(logging.Formatter('{asctime}  {name:40} {message}',
+                                                   style='{',
+                                                   datefmt="%Y-%m-%d %H:%M:%S"))
+
+    def addGroup(self, name):
+        if name in self.argGroups:
+            return self.argGroups[name]
+        if self.isMaster:
+            argGroup = driverArgGroup(self, self.parser.add_argument_group(name))
+            self.argGroups[name] = argGroup
+            return argGroup
+        else:
+            return driverArgGroup(None, None)
+
+    def add(self, name, defaultValue=None, acceptedValues=[], help=' ', argInterpreter=None, group=None):
+        if self.isMaster:
+            if group is None:
+                group = self.mainGroup
+            if len(acceptedValues) > 0:
+                if defaultValue is None:
+                    defaultValue = acceptedValues[0]
+                else:
+                    if defaultValue not in acceptedValues:
+                        acceptedValues.insert(0, defaultValue)
+            else:
+                acceptedValues = None
+            if isinstance(defaultValue, bool):
+                if defaultValue is True:
+                    action = 'store_false'
+                    flagname = 'no-'+name
+                elif defaultValue is False:
+                    action = 'store_true'
+                    flagname = name
+                if len(flagname) == 1:
+                    flagname = ('-'+flagname, '--'+flagname)
+                else:
+                    flagname = ('--'+flagname, )
+                group.add_argument(*flagname,
+                                   action=action,
+                                   help=help,
+                                   dest=name)
+            else:
+                if acceptedValues is not None:
+                    types = [a for a in acceptedValues if type(a) == type]
+
+                    if len(types) > 0 and argInterpreter is None:
+                        acceptedValues2 = [a for a in acceptedValues if type(a) != type]
+
+                        def argInterpreter(s):
+                            from ast import literal_eval
+                            if s in acceptedValues2:
+                                return s
+                            for t in types:
+                                try:
+                                    x = literal_eval(s)
+                                    if isinstance(x, t):
+                                        return x
+                                except Exception as e:
+                                    print(e)
+                            raise argparse.ArgumentTypeError()
+
+                        acceptedValues = None
+
+                if argInterpreter is None:
+                    argInterpreter = type(defaultValue)
+                if len(name) == 1:
+                    name = ('-'+name, '--'+name)
+                else:
+                    name = ('--'+name, )
+                group.add_argument(*name,
+                                   default=defaultValue,
+                                   type=argInterpreter,
+                                   choices=acceptedValues,
+                                   help=help)
+
+    def addPositional(self, name, nargs=1, group=None):
+        if self.isMaster:
+            if group is None:
+                group = self.mainGroup
+            group.add_argument(name, nargs=nargs)
+
+
+    def addToProcessHook(self, fun):
+        self.processHook.append(fun)
+
+    def process(self, override={}):
+        doTerminate = False
+        if self.isMaster:
+            if 'plots' in self.argGroups:
+                io = self.addGroup('input/output')
+                io.add('plotFolder', '', help='folder for saving plots')
+                io.add('plotFormat', acceptedValues=['pdf', 'png', 'jpeg', 'eps', 'ps', 'svg'])
+            try:
+                args, unknown = self.parser.parse_known_args()
+            except SystemExit:
+                doTerminate = True
+        if self.comm:
+            doTerminate = self.comm.bcast(doTerminate, root=0)
+        if doTerminate:
+            exit(0)
+        if self.isMaster:
+            if len(unknown) > 0:
+                self.logger.warning('Unknown args: {}'.format(unknown))
+            params = vars(args)
+            if params['yamlInput'] != '':
+                import yaml
+                yaml_filename = params['yamlInput']
+                conf = yaml.load(open(yaml_filename, 'r'), Loader=yaml.FullLoader)
+                params.update(conf)
+            if params['hdf5Input'] != '':
+                import h5py
+                hdf5_filename = params['hdf5Input']
+                f = h5py.File(hdf5_filename, 'r')
+                conf = loadDictFromHDF5(f)
+                f.close()
+                params.update(conf)
+            if params['test']:
+                params['displayConfig'] = True
+            if self.comm:
+                params['mpiGlobalCommSize'] = self.comm.size
+            else:
+                params['mpiGlobalCommSize'] = 1
+            params.update(override)
+        else:
+            params = {}
+        if self.comm:
+            params = self.comm.bcast(params, root=0)
+        self.params = params
+        self._timer = TimerManager(self.logger, comm=self.comm, memoryProfiling=params['showMemory'])
+
+        for fun in self.processHook:
+            fun(self.params)
+
+        if self._identifier != '' and not self.params['disableFileLog']:
+            self.setLogFile()
+
+        if params['displayRanks'] and self.comm is not None and self.comm.size > 1:
+            prefix = '{}: '.format(self.comm.rank)
+            formatter = logging.Formatter(fmt=prefix+'{asctime}  {name:40} {message}',
+                                          style='{',
+                                          datefmt="%Y-%m-%d %H:%M:%S")
+            logging.getLogger().setLevel(logging.INFO)
+            for handler in logging.getLogger().handlers:
+                handler.setFormatter(formatter)
+        if params['displayConfig']:
+            from pprint import pformat
+            self.logger.info('\n'+pformat(params))
+        if not params['disableHeader']:
+            from sys import argv
+            sysInfo = getSystemInfo(argv)
+            if self.isMaster:
+                self.logger.info(sysInfo)
+        return params
+
+    def set(self, key, value):
+        if hasattr(self, 'params'):
+            self.params[key] = value
+        else:
+            raise KeyError
+
+    def getLogger(self):
+        if self._logger is None:
+            import logging
+            if self.isMaster:
+                level = logging.INFO
+            else:
+                level = logging.WARNING
+            logging.basicConfig(level=level,
+                                format='{asctime}  {name:40} {message}',
+                                style='{',
+                                datefmt="%Y-%m-%d %H:%M:%S")
+            self._logger = logging.getLogger('__main__')
+        return self._logger
+
+    @property
+    def logger(self):
+        return self.getLogger()
+
+    def getTimer(self):
+        return self._timer
+
+    @property
+    def timer(self):
+        return self.getTimer()
+
+    def addOutputGroup(self, name, group=None, aTol=None, rTol=None, tested=False):
+        if name in self.outputGroups:
+            group = self.outputGroups[name]
+        else:
+            if group is None:
+                group = outputGroup(tested=tested, aTol=aTol, rTol=rTol)
+            self.outputGroups[name] = group
+        assert group.tested == tested
+        assert group.aTol == aTol
+        assert group.rTol == rTol
+        return group
+
+    def addOutputSeries(self, name, aTol=None, rTol=None, tested=False):
+        group = seriesOutputGroup(name, aTol, rTol, tested)
+        group = self.addOutputGroup(name, group, aTol, rTol, tested)
+        return group
+
+    def outputToDict(self, tested=False):
+        d = {}
+        for group in self.outputGroups:
+            d[group] = self.outputGroups[group].toDict(tested=tested)
+        return d
+
+    def timerReport(self):
+        t = self.addOutputGroup('Timers', timerOutputGroup())
+        self.timer.setOutputGroup(self.masterRank, t)
+        self.logger.info('\n'+str(t))
+
+    def saveOutput(self):
+        if self.isMaster:
+            failAfterOutput = False
+            if self.params['testCache'] != '':
+                try:
+                    import yaml
+                    cache = yaml.load(open(self.params['testCache'], 'r'), Loader=yaml.FullLoader)
+                    diff = {}
+                    for name in self.outputGroups:
+                        diff[name] = self.outputGroups[name].diff(cache.get(name, {}))
+                        if len(diff[name]) == 0:
+                            diff.pop(name)
+                    from pprint import pformat
+                    if len(diff) > 0:
+                        if self.params['overwriteCache']:
+                            failAfterOutput = True
+                            self.params['yamlOutput'] = self.params['testCache']
+                            self.logger.info('No match\n' + str(pformat(diff)))
+                        else:
+                            assert False, 'No match\n' + str(pformat(diff))
+                    else:
+                        self.logger.info('\nAll matched')
+                except FileNotFoundError:
+                    self.params['yamlOutput'] = self.params['testCache']
+                    failAfterOutput = True
+
+            if self.params['hdf5Output'] == 'auto' and self._identifier != '':
+                self.params['hdf5Output'] = self._identifier + '.hdf5'
+            if self.params['yamlOutput'] == 'auto' and self._identifier != '':
+                self.params['yamlOutput'] = self._identifier + '.yaml'
+
+            if self.params['hdf5Output'] != '' or self.params['yamlOutput'] != '':
+                d = self.outputToDict(tested=self.params['test'])
+                if not self.params['test']:
+                    d.update(self.params)
+
+            from pathlib import Path
+            if self.params['hdf5Output'] != '':
+                import h5py
+                self.logger.info('Saving to {}'.format(self.params['hdf5Output']))
+                Path(self.params['hdf5Output']).parent.mkdir(exist_ok=True, parents=True)
+                f = h5py.File(self.params['hdf5Output'], 'w')
+                saveDictToHDF5(d, f)
+                f.close()
+            if self.params['yamlOutput'] != '':
+                import yaml
+                self.logger.info('Saving to {}'.format(self.params['yamlOutput']))
+                Path(self.params['yamlOutput']).parent.mkdir(exist_ok=True, parents=True)
+                d = processDictForYaml(d)
+                yaml.dump(d, open(self.params['yamlOutput'], 'w'))
+            assert not failAfterOutput, 'No cache file'
+
+    @property
+    def display_available(self):
+        if self._display_available is None:
+            from os import environ
+            available = ("DISPLAY" in environ and
+                         "SSH_CONNECTION" not in environ and
+                         not self.params['skipPlots'])
+            if available:
+                try:
+                    import matplotlib.pyplot as plt
+                except ImportError:
+                    self.logger.warn('No Matplotlib')
+                    available = False
+            self._display_available = available
+        return self._display_available
+
+    def declareFigure(self, name, desciption='', default=True):
+        if self.isMaster:
+            addSkipOption = 'plots' not in self.argGroups
+            plots = self.addGroup('plots')
+            if addSkipOption:
+                plots.add('skipPlots', False, help='Do not plot anything')
+            plots.add('plot_'+name, default, help=desciption)
+            self._figures[name] = None
+
+    def willPlot(self, name):
+        key = 'plot_'+name
+        return key in self.params and self.params[key] and (self.display_available or self.params['plotFolder'] != '')
+
+    def startPlot(self, name, **kwargs):
+        if self.isMaster:
+            if (('plot_'+name not in self.params or self.params['plot_'+name]) and (self.display_available or self.params['plotFolder'] != '')):
+                import matplotlib.pyplot as plt
+                from . plot_utils import latexOptions
+                MPLconf = latexOptions(**kwargs)
+                plt.rcParams.update(MPLconf)
+                if (name not in self._figures) or (self._figures[name] is None):
+                    fig = plt.figure()
+                    self._figures[name] = fig
+                    plt.get_current_fig_manager().set_window_title(name)
+                else:
+                    plt.figure(self._figures[name].number)
+                return self._figures[name]
+            elif 'plot_'+name in self.params:
+                del self._figures[name]
+            return None
+        else:
+            return None
+
+    def finishPlots(self, **kwargs):
+        newFigures = {}
+        for name in self._figures:
+            if self._figures[name] is not None:
+                newFigures[name] = self._figures[name]
+        self._figures = newFigures
+        if len(self._figures) > 0:
+            if self.params['plotFolder'] != '':
+                for name in self._figures:
+                    if self._figures[name] is not None:
+                        if self._identifier != '':
+                            filename = self._identifier+'_'+name
+                            filename = filename.replace('_', '-')
+                            filename = filename.replace(' ', '-')
+                            filename = filename.replace('=', '')
+                        else:
+                            filename = name
+                        self._figures[name].tight_layout()
+                        from pathlib import Path
+                        Path(self.params['plotFolder']+'/'+filename+'.'+self.params['plotFormat']).parent.mkdir(exist_ok=True, parents=True)
+                        self._figures[name].savefig(self.params['plotFolder']+'/'+filename+'.'+self.params['plotFormat'], bbox_inches='tight', **kwargs)
+                    else:
+                        self.logger.warn('Figure \'{}\' not created'.format(name))
+            else:
+                import matplotlib.pyplot as plt
+                plt.show()
+
+    def finish(self, **kwargs):
+        t = self.addOutputGroup('Timers', timerOutputGroup())
+        self.timer.setOutputGroup(self.masterRank, t)
+        if self.params['showTimers'] and self.isMaster:
+            self.logger.info('\n'+str(t))
+        self.saveOutput()
+        self.finishPlots(**kwargs)
+
+    def __getattr__(self, name):
+        if name in self.params:
+            return self.params[name]
+        else:
+            return getattr(self, name)
+
+
+def diffDict(d1, d2, aTol, relTol):
+    diff = {}
+    for key in d1:
+        if isinstance(d1[key], dict):
+            if key not in d2:
+                p = diffDict(d1[key], {}, aTol, relTol)
+                if len(p) > 0:
+                    diff[key] = p
+            else:
+                p = diffDict(d1[key], d2[key], aTol, relTol)
+                if len(p) > 0:
+                    diff[key] = p
+        else:
+            if key not in d2:
+                diff[key] = (d1[key], 'Not available')
+            else:
+                if isinstance(d1[key], (int, INDEX, REAL, float, np.ndarray, list)):
+                    if not np.allclose(d1[key], d2[key],
+                                       rtol=relTol, atol=aTol):
+                        diff[key] = (d1[key], d2[key])
+                elif d1[key] != d2[key]:
+                    diff[key] = (d1[key], d2[key])
+    for key in d2:
+        if isinstance(d2[key], dict):
+            if key not in d1:
+                p = diffDict({}, d2[key], aTol, relTol)
+                if len(p) > 0:
+                    diff[key] = p
+        else:
+            if key not in d1:
+                diff[key] = ('Not available', d2[key])
+    return diff
+
+
+def runDriver(path, py, python=None, timeout=600, ranks=None, cacheDir='',
+              overwriteCache=False,
+              aTol=1e-12, relTol=1e-2, extra=None):
+    from subprocess import Popen, PIPE, STDOUT, TimeoutExpired
+    import logging
+    import os
+    from pathlib import Path
+    logger = logging.getLogger('__main__')
+    if not isinstance(py, (list, tuple)):
+        py = [py]
+    autotesterOutput = Path('/home/caglusa/autotester/html')
+    if autotesterOutput.exists():
+        plotDir = autotesterOutput/('test-plots/'+''.join(py)+'/')
+    else:
+        extra = None
+    if cacheDir != '':
+        cache = cacheDir+'/cache_' + ''.join(py)
+        runOutput = cacheDir+'/run_' + ''.join(py)
+        if ranks is not None:
+            cache += str(ranks)
+            runOutput += str(ranks)
+        py += ['--test', '--testCache={}'.format(cache)]
+        if overwriteCache:
+            py += ['--overwriteCache']
+    else:
+        py += ['--test']
+    if extra is not None:
+        plotDir.mkdir(exist_ok=True, parents=True)
+        py += ['--plotFolder={}'.format(plotDir), '--plotFormat=png']
+    else:
+        py += ['--skipPlots']
+    assert (Path(path)/py[0]).exists(), 'Driver \"{}\" does not exist'.format(Path(path)/py[0])
+    if ranks is not None and ranks > 1:
+        if python is None:
+            import sys
+            python = sys.executable
+        cmd = [python] + py
+        cmd = ['mpiexec', '--bind-to', 'none', '-n', str(ranks)]+cmd
+
+        logger.info('Launching "{}" from "{}"'.format(' '.join(cmd), path))
+        my_env = {}
+        for key in os.environ:
+            if key.find('OMPI') == -1:
+                my_env[key] = os.environ[key]
+        proc = Popen(cmd, cwd=path,
+                     stdout=PIPE, stderr=PIPE,
+                     universal_newlines=True,
+                     env=my_env)
+        try:
+            stdout, stderr = proc.communicate(timeout=timeout)
+        except TimeoutExpired:
+            proc.kill()
+            raise
+        if len(stdout) > 0:
+            logger.info(stdout)
+        if len(stderr) > 0:
+            logger.error(stderr)
+        assert proc.returncode == 0, 'Return code {}'.format(proc.returncode)
+    else:
+        import sys
+        from unittest.mock import patch
+        import importlib.util
+        with patch.object(sys, 'argv', py):
+            sys.path.insert(0, path)
+            spec = importlib.util.spec_from_file_location("driver", str(Path(path)/py[0]))
+            mod = importlib.util.module_from_spec(spec)
+            spec.loader.exec_module(mod)
+            sys.path.remove(path)
+    if extra is not None:
+        from pytest_html import extras
+        for img in plotDir.glob('*.png'):
+            filename = img.relative_to(autotesterOutput)
+            filename = 'http://geminga.sandia.gov:8080/'+str(filename)
+            extra.append(extras.png(str(filename)))
+
+
+class parametrizedArg:
+    def __init__(self, name, params=[]):
+        if not isinstance(params, (list, tuple)):
+            params = [params]
+        self.name = name
+        self.params = params
+        fields = []
+        for p in self.params:
+            if p == str:
+                fields.append('[a-zA-Z]+')
+            elif p == int:
+                fields.append('[+-]?[0-9]+')
+            elif p == float:
+                fields.append('[+-]?[0-9]+\.[0-9]*')
+            elif p == bool:
+                fields.append('True|False')
+            else:
+                raise NotImplementedError()
+        self.regexp = re.compile(name+'\('+','.join(['\s*(' + f + ')\s*' for f in fields])+'\)')
+
+    def match(self, s):
+        return self.regexp.match(s) is not None
+
+    def interpret(self, s):
+        m = self.regexp.match(s)
+        return [p(v) for v, p in zip(m.groups(), self.params)]
+
+
+class problem:
+    def __init__(self, driver):
+        self.__values__ = {}
+        self.__args__ = {}
+        self._driver = driver
+        self.setDriverArgs(driver)
+        self._driver.addToProcessHook(self.process)
+
+    def addParametrizedArg(self, name, params=[]):
+        self.__args__[name] = parametrizedArg(name, params)
+
+    def parametrizedArg(self, name):
+        return self.__args__[name]
+
+    def argInterpreter(self, parametrizedArgs, acceptedValues=[]):
+        from argparse import ArgumentTypeError
+
+        def interpreter(v):
+            if v in acceptedValues:
+                return v
+            for p in parametrizedArgs:
+                if self.parametrizedArg(p).match(v):
+                    return v
+            raise ArgumentTypeError()
+
+        return interpreter
+
+    def conversionInterpreter(self):
+        def interpreter(v):
+            print(v)
+            try:
+                return int(v)
+            except:
+                pass
+            try:
+                return float(v)
+            except:
+                pass
+            return v
+
+        return interpreter
+
+    def setDriverArgs(self, driver):
+        pass
+
+    def processImpl(self, params):
+        pass
+
+    def process(self, params):
+        self.__values__.update(params)
+        self.processImpl(params)
+        self._driver.setIdentifier(self.getIdentifier(params))
+        for key in self.__values__:
+            params[key] = self.__values__[key]
+
+    def __setattr__(self, name, value):
+        if name in ('__values__', '_driver', '__args__'):
+            object.__setattr__(self, name, value)
+        else:
+            self.__values__[name] = value
+            self._driver.params[name] = value
+
+    def __getattr__(self, name):
+        if name in ('__values__', '_driver', '__args__'):
+            raise AttributeError()
+        else:
+            return self.__values__[name]
+
+    def __repr__(self):
+        lines = [(label, '{}', e) for label, e in self.__values__.items()]
+        return columns(lines)
+
+    def getIdentifier(self, params):
+        return ''
diff --git a/base/setup.cfg b/base/setup.cfg
new file mode 100644
index 0000000..31e816c
--- /dev/null
+++ b/base/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_base/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/base/setup.py b/base/setup.py
new file mode 100644
index 0000000..149f8af
--- /dev/null
+++ b/base/setup.py
@@ -0,0 +1,153 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from shutil import copy
+from pathlib import Path
+
+try:
+    import cython
+except ImportError as e:
+    raise ImportError('PyNucleus requires \'Cython\'. Please install it.') from e
+
+try:
+    import numpy
+except ImportError as e:
+    raise ImportError('PyNucleus requires \'numpy\'. Please install it.') from e
+
+try:
+    import scipy
+except ImportError as e:
+    raise ImportError('PyNucleus requires \'scipy\'. Please install it.') from e
+
+try:
+    import mpi4py
+except ImportError as e:
+    raise ImportError('PyNucleus requires \'mpi4py\'. Please install it.') from e
+
+try:
+    from PyNucleus_packageTools import package, fillTemplate
+except ImportError as e:
+    raise ImportError('\'PyNucleus_packageTools\' needs to be installed first.') from e
+
+p = package('PyNucleus_base')
+p.addOption('USE_BLAS', 'useBLAS', True)
+p.addOption('USE_MKL', 'useMKL', False)
+p.addOption('USE_CHOLMOD', 'use_cholmod', True, ['scikit-sparse'])
+p.addOption('USE_PYAMG', 'use_pyamg', False, ['pyamg'])
+p.addOption('MKL_LIBRARY', 'mklLibrary', 'mkl_rt')
+p.addOption('USE_MKL_TRISOLVE', 'useMKL_trisolve', False)
+p.addOption('FILL_UNINITIALIZED', 'fillUninitialized', True)
+p.loadConfig(extra_config={'annotate': True})
+
+# set up variable types
+if cython.inline('return sizeof(a)', a=1) == 4:
+    archDetected = '32bit'
+else:
+    archDetected = '64bit'
+print('Arch detected: {}'.format(archDetected))
+if p.config['arch'] == 'detect':
+    p.config['arch'] = archDetected
+if p.config['arch'] == '32bit':
+    if not (p.hash_file(p.folder+'myTypes32.pyx') ==
+            p.hash_file(p.folder+'myTypes.pyx') and
+            p.hash_file(p.folder+'myTypes32.pxd') ==
+            p.hash_file(p.folder+'myTypes.pxd') and
+            p.hash_file(p.folder+'myTypes32.h') ==
+            p.hash_file(p.folder+'myTypes.h')):
+        print('Configuring for 32 bit system')
+        copy(p.folder+'myTypes32.pyx', p.folder+'myTypes.pyx')
+        copy(p.folder+'myTypes32.pxd', p.folder+'myTypes.pxd')
+        copy(p.folder+'myTypes32.h', p.folder+'myTypes.h')
+elif p.config['arch'] == '64bit':
+    if not (p.hash_file(p.folder+'myTypes64.pyx') ==
+            p.hash_file(p.folder+'myTypes.pyx') and
+            p.hash_file(p.folder+'myTypes64.pxd') ==
+            p.hash_file(p.folder+'myTypes.pxd') and
+            p.hash_file(p.folder+'myTypes64.h') ==
+            p.hash_file(p.folder+'myTypes.h')):
+        print('Configuring for 64 bit system')
+        copy(p.folder+'myTypes64.pyx', p.folder+'myTypes.pyx')
+        copy(p.folder+'myTypes64.pxd', p.folder+'myTypes.pxd')
+        copy(p.folder+'myTypes64.h', p.folder+'myTypes.h')
+else:
+    raise NotImplementedError()
+
+p.addExtension("myTypes",
+               sources=[p.folder+"myTypes.pyx"])
+p.addExtension("blas",
+               sources=[p.folder+"blas.pyx"],
+               libraries=[p.config['mklLibrary']] if p.config['useMKL'] else [])
+p.addExtension("performanceLogger",
+               sources=[p.folder+"performanceLogger.pyx"])
+p.addExtension("utilsCy",
+               sources=[p.folder+"utilsCy.pyx"])
+
+
+print('Filling templates')
+
+templates = ['LinearOperator_{SCALAR}.pxi', 'LinearOperator_decl_{SCALAR}.pxi',
+             'LinearOperatorWrapper_{SCALAR}.pxi', 'LinearOperatorWrapper_decl_{SCALAR}.pxi',
+             'DenseLinearOperator_{SCALAR}.pxi', 'DenseLinearOperator_decl_{SCALAR}.pxi',
+             'CSR_LinearOperator_{SCALAR}.pxi', 'CSR_LinearOperator_decl_{SCALAR}.pxi',
+             'SSS_LinearOperator_{SCALAR}.pxi', 'SSS_LinearOperator_decl_{SCALAR}.pxi',
+             'DiagonalLinearOperator_{SCALAR}.pxi', 'DiagonalLinearOperator_decl_{SCALAR}.pxi',
+             'IJOperator_{SCALAR}.pxi', 'IJOperator_decl_{SCALAR}.pxi']
+replacementGroups = [[('{SCALAR}', 'REAL'),
+                      ('{SCALAR_label}', ''),
+                      ('{SCALAR_label_lc}', ''),
+                      ('{SCALAR_label_lc_}', '')],
+                     [('{SCALAR}', 'COMPLEX'),
+                      ('{SCALAR_label}', 'Complex'),
+                      ('{SCALAR_label_lc}', 'complex'),
+                      ('{SCALAR_label_lc_}', 'complex_'),
+                      # for some reason, complex cannot handle += etc
+                      ('\s([^\s]+\[[^\]]*\])\s([\*\+-])=', ' \\1 = \\1 \\2'),
+                      ('\s([^\s]+)\s([\*\+-])=', ' \\1 = \\1 \\2')]]
+fillTemplate(Path(p.folder), templates, replacementGroups)
+
+templates = [
+    'tupleDict_{VALUE}.pxi', 'tupleDict_decl_{VALUE}.pxi'
+]
+replacementGroups = [[('{VALUE}', 'INDEX'),
+                       ('{VALUE_dtype}', 'INDEX'),
+                       ('{VALUE_t}', 'INDEX_t'),
+                       ('{LENGTH_dtype}', 'np.uint8'),
+                       ('{LENGTH_t}', 'np.uint8_t')],
+                      [('{VALUE}', 'MASK'),
+                       ('{VALUE_dtype}', 'np.uint64'),
+                       ('{VALUE_t}', 'np.uint64_t'),
+                       ('{LENGTH_dtype}', 'np.uint16'),
+                       ('{LENGTH_t}', 'np.uint16_t')]]
+fillTemplate(Path(p.folder), templates, replacementGroups)
+
+
+p.addExtension("linear_operators",
+               sources=[p.folder+"linear_operators.pyx"])
+p.addExtension("sparseGraph",
+               sources=[p.folder+"sparseGraph.pyx"])
+p.addExtension("solvers",
+               sources=[p.folder+"solvers.pyx"])
+p.addExtension("linalg",
+               sources=[p.folder+"linalg.pyx"],
+               libraries=[p.config['mklLibrary']] if p.config['useMKL'] else [])
+p.addExtension("sparsityPattern",
+               sources=[p.folder+"sparsityPattern.pyx"])
+p.addExtension("convergence",
+               sources=[p.folder+"convergence.pyx"])
+p.addExtension("ip_norm",
+               sources=[p.folder+"ip_norm.pyx"])
+p.addExtension("intTuple",
+               sources=[p.folder+"intTuple.pyx"])
+p.addExtension("tupleDict",
+               sources=[p.folder+"tupleDict.pyx"])
+
+
+p.setup(description="Helper functions for PyNucleus.",
+        install_requires=['numpy', 'scipy', 'cython', 'mpi4py>=2.0.0', 'matplotlib', 'tabulate', 'h5py', 'pyyaml'],
+        
+        )
diff --git a/base/versioneer.py b/base/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/base/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/data/matnip.png b/data/matnip.png
new file mode 100644
index 0000000000000000000000000000000000000000..8cbd0700470538c3de80fa1f181a31594fe42202
GIT binary patch
literal 27610
zcma&OXH-*b^FADGU<DKr>4+jAO$btyVgaNCMO1o|8jv0V=?aPkuz;eWsXzchdhb#N
zq=q6QO+*O2_wt{eaL#jnAKtZIzsSnk+56se&pmU^HP__nb#<kK`<VBkP^g2-SFhYa
zq4sd2P*f2#JKz)a&=no{Z?D5u9cL6O3@&=xPGu(l#0rJ72~@sv@upkEM30ZdP4kke
zwROkk7Au#NoR_}I$EIC8a9Gqr^tGAUOO-$gQ%!7_*b(}7s!eaY&JQqHy4a++rD{|E
zHO}a16aRjIb+AY8mqp#>N8zFRTsP-mvpoo!Kpo%l_~m}(&^>3*T380IClok`%`=?%
zqqL=M;}vbWuy(71!LNDam3Kky>Yu`>VY!VVVIO5!De~{mVsz|I%AaLiR(Q%^#)t4c
zl)v8j5%yC4`uhQkyd(<cEA{{TvbMuTZGCG)##(7j%QIEIsf|9HmV?y9sS@YW*z<uL
z-__nrl2Z2aX}<Q?EEh_qCg0uA3jb$efi}n0+k50%bcxReH^=_bgi>$o-#<<r=X=hs
zb}zn1<}CK%!M!G@JIE`SH$r1xt7jQmU2w`F<codkyXV;dc}DM2Y0^nyTkK-;$|Ux7
zrC)<KEqVDpE$CXw*4FIel(>a;`M?)PJ~Oq(pH4Bz;q>^brkU~8dwU&X&FC&cx5TaN
z9;GGLgZSllUxukweAX^%{OvMgw9K%1VYq$6(;u-J?NK}94L?g+wEJ-!WZo*$&PcPG
zx$rz{$KN|L<cG38Ce-qTrRsaEM9Yb|;>@oKx3jL9c3U=WoN!&wJ9}I1*r#fD@|BW@
zF(GUZ!&W8MOZXNI+{&|tt<vT)i^F{uXk673pDvIua^T1F)bAEvkei#`bO=t8&=^~x
z8dP?@5Oyv912e@aWnZoE$)|3d@n^cG^1lzetndQ3mAdSMrS^vtk6sFlKeuDOr0AuL
z`^~cL$FpNX9vN;9)86fVZYimV;;w#f<0HVbb1%gz@~j`Q8FlcocO`WMSQP`OI3%ky
zmNF=o@O6_Xnmz50+YGY~R*qkOOV8uni#s>+s+@A`%Ds3V)uNoW1f0L#@mG14D*MK$
zkhh^bZH52t8XKqjYAx2z;);86+!@#j5~pJT<z=66pfP{D4LeV;nOG@`;CHX3vy+W5
z<4349Q_?M{Y&ymgOg~OzDzLb1?ce(_A+@1w#(PZyt1Pw`uKy7$8dA4YfGaTl$F>!p
zNT4wmH#~h$`JZL%PTRgIY!rHwHqBq8`VK9{lAMQH(C-zWQ@y3D*1o%Krb;e6kMoFV
z+2L}g=k42XzkG#gb|`3%_9Z6MbcXF$i?+h6edt7~!s0qiD6a6E5fidEi-o+n6WfdX
z^$GjzO)z<J+7Gth+7gXPZ^e-pNBrlF0||UQtB1&o6WU%}V+*?K${2ZZYL(lDmckQ8
zNV4R`>1;b`Gg_gEiu{0g4=E2oVKhS{&LS77j&EOd60T}EOTMb|3FRvJ9T-hq1e$!A
z!1iVSa9M>3`LZiP+n3Q`G%F({$d}=^FZ(4=WH_rtUhU!Cl+~gn;H%Y%l&?Oce6_5>
z3V&j-pZo?Vwq2#)0*#?}SR~tRz?5P)hy8d0jifhXkhdb+2Fcrv(aig8LjKHQdk6E_
zkk8D>pK1Ja9ek$!+mig*tL@L24kMpglRu;1{w{n*9Qkca{%qi%&kiA<Igmf&`scgw
z8Sigr@@L)Y?35SwrEf+Dov@=u93X%9*!Dg$QDKNzf8Qrx8vgI4uUgPSCmSi3K3I6U
z?NxzK2w3d{$o3vCcc<C*7e#p@T40a1opIo)2MZ2ID26naL}M<S`VLtHrl+ynZHJBn
z0R*hofwW&I8f&z*X>*^<Q4Guki=|^S&NsO50=f8D6U8@0cHjwIB4whnd2FU8G6(qV
zI?2J*co!Ozd&k#Yaz?7U+?`3?Gld+M%f4CR?W$1qG`}4JA1#PHl+-6jQ(t%yFY28F
z+|P5*GImsa`)y0vEJX$4Jllb^K>n0!SW3*Zr0t~~#Sn9ujQ1+7BkL}wm)kxjhwub@
za7BXTaaf8!pZd0Mv!XFRRVdZv-zM-(x)Qf-JBzfSH;(QUpm$tn+CRp1BG`zs6!_T7
zkLvAJDG%goKFd_{!sVB(@FibSstl|b;C-Gv^rO5crzaY7Ani(RqNzY~z=-5M%Cb*f
zB37bQ=UI_ge}d=UzVyUQFB(?2e>eyheP^I(0i`6};yvKAS+is=riNiH9FwT8e!;@c
zvXiYxMy{`oGjhiwU!B^0$B7phpC6XdUO5>Vt@8od0ew@c2?;ON>6KijdM6g#<1_;&
zg?(0@<NP68OcF;72vyPXww|(ml~0e@Z@xb+W)T}3>+kJ39O+7ki+kj~UOeU1H00F9
zdd-WnBJKe4ivFa*ihgc*hvWSHpYPPt`}wd)2lYfOg<n-gMO0KY)(_>Y`#C9zdqeP}
z(`ZWJB>uc>%8U<@NDSJ#SJ?A6fOt_~0*q1FsEionhZI((?A7WjHDo=N_~q*EAdWe8
zst*r?SR^{y+m+){sELDG>gML=f`&zs=)-TKqGa#S{`vFAEIhMII3=8CkG9&tJ8%X+
zKGsb3{;LP6o31J?C;cTBgK;t~)!rW3!P(p1**Vo)T$q=qElq=x)a)+^_u*-iiHnby
zTkiK0v-s{{Hp5VgIkHp0!m%3m_^UvpHmyT!y;e%`f?*-<nLQg#L=1MvhlhfCISh=9
z8BH{(AA@fv`OciN>B=ytm6>lYx<7YAQ**c9dtPtyivIW_D|*VB1}hS@{R&o!)AY1a
zx?Jc=+Ib{|=PET7-BjTIs7@oF;;gKju&BzI@aLyr)MVmFt-pT#l5+lQwQ<l_(iYET
zd^T(n%tY->Dq-)xX)oa|z&3E3`o%p)!Kl-bJ5Yz~>gwJK>ivB;6}QuuFq*o5!)@+w
z!;1^ES1l|E!EZDp&Vi-hsgEJcA!QlDTLn)>fpJ>->~3|pT_o<3r%x?e{VZgezEl3f
zk2I6ZBh<HU-OA6;caJ>6`~oq_?Pgc-Z{yuwojW5&<@+OW{#?_rKf&<e*RKy3r<kEQ
z*+bvFIi+8kZZ}l-rr>VUG4l5qTX4wt=rNr;SxK{RY|b+?{M^58v34kWC~}89J&S~0
za#B)=Iu(kkJcwoN^XJbhDk=<e=|You*wW;fL5X1<+Kv`SnX*J?+UQroBFQpJeq*cD
zUB_5`_XXO#+gMu~9v)Vura~P^8ka8Zo0?nsWtWSK_*i8CXX<%e77HA0ZI!|WRQzY#
z(U-v@uYXpD$lAZf?L#eB?-wq585R~__2oE<u8BW&Zf#>|XlP%fM3olu-S&BA<hz(u
z_%0m0i!-k78y_2wDjXlI33_#C2da-Xr`V=5<;@u_6?lB0P@4pwo1B5CI9U3Jd<<gg
zgJ@k^htEAzxynjRRaFm?0)kxyYoi3&cA-krt=m6;`g9^LMH_|U<~?(!JKx#z{{8!o
zj_J6!rO#prPpKF^MnoYVqy-3#QK`3g%H<7PU6-yJ>HX#UjIh^NI_GZJ4Gj%0*n7GZ
zdCvX?cNz8<Go&x&e=&}25H{|x2qGacr{xY}xkH}|k1aRoHqqj6OW&i;>VT(t#$;sN
z7z(nWLS5b$d2Okh#a_UuMBk;R*bNKbmL9T-c-#5JQt&neK#mo=kQfHs<4N<Y@mM@6
z;dhs1^cTv;J^AtZRuY7kV%zSVwzivz2WDE41&CE4>EN8ECoDks=fh*vDJt<aS{Ucy
z(bJ6TYIsh>3K~X6r9NAm>+7B*PL%JHM~}m;ZEWIP?4CVG46v4n#Ll$nx-qIl1cUfr
z4%6S8*o`2}p*BY!9W(q^UmsGu1@AAO+BvqgX5>yfwI)|P_zBxOd8gudD3V^DTz7$|
zOBZ-#jfM8Pr$!=%+wq_*#h~zZiasB#fG!UFcX82WZ7SB;8`*RQab(pu+Vp81(sn*@
zZlyor;@Hn`e*~qXlWl!VLqo&KDFa7`k`Emn8}s|x-|<Dy*~#gy$Bkhml+oNkvJ~gl
zui!DGV;sL)J~SBiOL!E{nD+9ZLNm<k-#&Y`kHmoT9ofC(0i5s0kN0%D^f|i`W7kls
ztMirqV-=jO7eB@Rgy3`Wtu?&1%d~bp&YxF2dh(#*=(YE+Pr{lvKSNv}p3CpCLyY#0
zfq~)Xty^#2zWq6^f5;w><1p4K91lNu9*TwsxR)b*;$TC%_%?T6zxH$Yt|~q0Eo_+B
zJ^&q=O|p7gT1Wt)OUlR)9rUr~4UhPRmNND|g~LS27Ay2^(cMG*i2u5!OxahQPv(@B
zm61m1S?^y=@bGD^g@pxbTJ|hwKU$Qp0+@1hP;S$7V|C#z`_<t;gxcdf1wL{Bq*XaU
zqB#q|!sMsxax%_eTbx~1Gs`UsIfQQ=F|BQFW?NESC=}85N7}%zd}kyS-HRR#w#DPN
zq85Xw82x5;R%_p>biW<|;Qdg?_F+&+whotuqLB9#9nE-8VJ;=Em+fkAZ(r*5n3`*n
zvu_zZpM!P!j;asZs%KISq}mlN<C09xsg@mdI*$9@hlTaBr;GdNdWev$%5sYg?cpC~
zgMIg&yZd8W!e>r`aiZ(bcS5Z)1)n}#_`qC^H+}_4p`BwiRNW*Pj&}>1>UzWCME94q
zJ(8$0iw;}5N%#gpRSECcXWu47>;Adl01&ght(CDz(&3i$BdJB!gDGPi5%16!k)vo+
z@%Z31*_GJt2}n~Y5hWcR9Tk;lBnhSSX;`OTG8}C&kx6)(&ZB*CNq2=2lH2<D4A(OY
z6x~f7ot2+9sYrf&S3S1iJ^hvXFT2QS89{mJOb{2fw%z)fYVA*MH^QS^^pO{bY`E%@
zdf9!fc+vbtBa56}Ab9ig?R)nYM%$AbiaDuJ-*ij8Hzs=uH8nLG+~woNG3w-s=G2X3
z>NjUQb~!^53<);Q*v|M$sD$b~POP<c#^l|*DBp%159F~}636B=b*5~0WCvQx&3C9;
z7(nEw_{@p?WI&wdZLobY9}T1*t+>5FU4Q9#F~fDrkSGCAjplP|s?SW-u`~SqYWLAf
z;+RHuq+U7YnM}O^kIrPh(80&{^>DREY)ql$Vqk6SMO6R!|FdgY{xfHMmMb{$vq8Jy
z`@tN>`7`xQrhDg}-gRRfOJnz)CLe;dx(8)az%}gqS>m+@ej*Gfz?dhsbFKDtbY+Q2
zh%=tx26z!3&6-SYAkyLggS6?*(G~pIWvZ2$hX?Z#3IFLP6^~Cxd$vxvAnE5_ZRo-9
z2qL<w7M#uKJ5znaH51aH9uzA6_I(4Ht@O$FrnN0we&289L-pSV7qyrep+a51;^nnL
z2}j54{Ul?tts5*q2gr4xsAz|uA(PD|fqyVj+HG4+YRnPT?pwESyI+Wkh+u^yh!Xpv
zn>`vJw{9Y!nREz?hst88+TJA4+@Csk<kiGiu(wvX+YRZ5Lsq6#yvQLJ6C);Trc<Gw
zxh-~Cn&t;I?!{;p9YoN-C~JpbaNbKroRCreZJz{9=|^mfY_jz%yL=x)v|F0&v8lVL
zfI{7?T%MWqC-5CYyi2iK`?04sk%OoX-sSlZ#`$5=)Y303yb!OZ{2EM^`x9c{@2ck^
zAt7;oDAe~q3+^5s9x-cfoh8_3P2>{HuExE2NiI5_=gP$;3#k)R=W^GIbjYT9Ape0t
zAW+~@nOS$Pz0c;#aLjVhPYFzck;N66GN-{@v1+IKo%m&|MN^q(G?yph<hPSF66D(Q
zKk=FZ(SbP1ZNQEzD=X>aZ2M8yn;#!!iSnGS<(2JSu*3hiPI_{NddF}q_m9!vW$l-1
z3g4PFA!qV(LP|;s0-~u<B04yZDR&8sqbxl6BGg<n*9jMb89FZSFvZa58Ey50myj4-
zJe>RZz{9LRCnwLlU67KJf;FHlo=pxK3_@;0iuz01v|P#5l~4U@q~o}0M0z<-{}lf2
zdMyNboJfiyY*dWwVHVT}p20zxU<3`>dGjK%SV9iU{9vu_HZHb{ii58*(HK-4!{DnQ
zD3thdjwuMofeCk8v$1#nSEl(6k~ZJIQ20N&_i-AeqpIh}lw41qL&@uG%?fWFK866j
zTsdP(#pYAy20*dME1*e5>17)^`AV!@-icD9r`cs~ZLOlJO5V(y1_l#LYZY?->h%zG
zrYyqGmmNObgh2Z4syTPxK_Jy8rI<t2xkz7@egbshO-fD{kdu>p#tlx$|9pf_S`fj?
zk3D`YvqH)~u#`5F&))Awj?grPk`8b98GY+MokuK@0U2g?c2;ESMMo;U-+zlm$=;ze
zoujn(RN`MQxeV46ok#K3=P~o~@zs9&hMZOr&E+Mfu}%OcqO*QA-H>2S{!qWjT|m{?
zVD}rc=J7{fGMppz0xIqj<58t<37?^6TAsS59M5FXu{HY^=igFHVb@3u*S}Q*QXL(Q
z)kHZkm-gBBeEW0E$g_n=be?O<%E@``-Q3Z!I22~Yqi^3T=<Xso4jHWhFD?{j(`J;Y
z10bUw>Q%rAA{lOav`?rD!s>aX=_Hi^gX=&#Ep(j{5)wjuZa}4E@^5s$SAG8VruN=m
zSxHH^#qlm<ksqzCHw+AHjZ`!q$oG%5#0;=riq`=MA^tcHGEUS9Z^TIkE-0)gQ%f2u
zxxOfANF=$Svr>8Mm?+_%=_iy5`}XgzSm}^Jjht$e@5xhI&h9i^n<rko7AXzS>izLU
z+sH`xc;lUFpGYF&dr&kZTk25R<Su_fl1}n8+eN{kfF_61!zgnAGf=_Ed}otE$=f7$
z4h;{paJU<J&QvLJc+D5~Z8W}mqxo*_;TVET4{si+szB=J_0!LQXsa1RU{bSC<R!su
z3>$dz&D*zc1GE(#994zP_~SGjoUMYz#YMpREYbe0Pt_J%lsJxW-Q(V3wk5CY7%j4{
z>8c83U2e&m(hr>iU2FcA)zb{U7H*B@OzlR!$xl7Q$CsFx=y;<q%cfIkm|Tjl;u_h(
z?XJN8hOis3idOoYS!)~T;PCQ;tD;jf?8Ys2pk{car1G<}&~kEp%NKCfy#v)gRrSK9
z7iJIQ+{rhp>p~U+G=K~z`^aWWZevu;*Lxd&lrqoP#P(NSdP<7A^lsEur^24p;>Dk{
zL-qUj?TgKPXWYgzGH(T_`{|@|q%^_-!2bp8bhHqC&#+xIfTQ5mD*(7rBzbjpwWrYa
z*s)_s$}xd(7VSB!jKwBShCh-xch0&GP1^i&cMS-h2=aY`H9L3yz6dcXD&``IVf6EV
z1ER6EUd~-U0RddTK7=NDZENdMNZSG6%IQ!UNUP2VP74>$x6(w9G9&L-IosBV)KbX*
zh})9SC+`GA$+tP2Q-@Ie`*-a~E!}j7#Q>ssIqa0r&(Cl1{oTifa|7|$qS;MoW!sT+
z4nzi&MwbO5a^VI4O*Eq;CR7Pcc^`Ch?&?@ue}+3;zm(wFnR|DY7`++m2!}>>oGJp1
z*ha=Tqyt_`Rd4!t*)0;oXB!#0=9V5Yk7_eWha#`FjJ&cxeYym8PPjCdk(i%<AF9!c
z4ZS$>!-r#$hmS{OvD2N#O@-OcsS`;H*NA}Mbj{7<nueVVdk|wBjf;yD6cmIk-Foqa
zg_-w4yH-H=XJrxuxcvuaAbQszL=q4V3%+jf@EH5E)|vRfjqTS2wjo{Mpa0cI`|iWn
z<+j$H*{;Q~m?ZJsLss(rci`E#&9t$ZS6(a_Ggjk&BZ!1VBQ52YoCzF0ve}0RZ&+__
zZg58j9Yi@+4LL!1g{dKs#=A0Cks9;eH!4$sR{A?t?jrQ27{^bkJ`O-E;Pyq1yA7WN
zn6OYhUA`YmO6~~w{tS<}cy>-s$Jl}j_0#XATWh7@L$oWY3_vYdiG~5Xj^(cOUoaMe
zl~jH;jclbyJ0o?^LDVShYoW^ovk3=~JD01fs@PvBrPdO5QXe|7gnS(rZK45e=9Q(h
zj9=2()oQdgFC-v^_J?Rl9J3v$4<)yfZXZ2*^wjCoh|fN{dGqE-I~qcUaLMwWZ?w)w
z!5aAF4{J=%OSEZk8z<M!;I^L2*TLa2`nTdyOgFD*9K@@@rt(%!xU6k#*43$yIO#`7
zq!t49&WG9)1uxRAo&n2HjgPP;qb=NmopWJR_$dH(iYEFhE1t5087dG_cq^BU+mf)>
zwzh1uA^Q19#xhNkDm(lM77(mW+hLMi`}H5d)!ZKZc=3PrehdPbC~@ESQWlHNzC8h(
zpA7N|Pap88l4Pqtv32;xG1)kM27;lLe%@uot@HJG7eE+e1_<kGOH)lvO<@q$_d!x>
zNx+UyPO{Gqw_Ah#Gw?v|xC7M?l+L46;5nqb@ll<`;J%F@blnCX9fPc=FBFCx3k5~R
z$BF^XfB_HXlcIDXV`I)Wdj%?RCsVVCbo{yS4+I<e@_&jO-v3kfGTiaxA8kb+RQial
z-eUw-zrVvz23F6$P&)Kd!t%x($&kbl1FfyMP%|^${-S&9=1ugU76lYtG^4Pg02|ki
ziHV6^`+X_biLsmQRo}7zF1Nrk_9s(!!!k|+1RGt7k~47kX=SrK+Lu{G$ah{ivi~#%
zJI8eY4|m^u6%mo#au9d<MjW!0>adjwa3)T}R{kOLOf7=%!tbo7{>FrS9O!=^Et7&7
zI>;G?l!}1XS!W~eqMlp>zbrswA|oS58_{Q*&BS_Yv>9sL(~g^B8k@JvjVfN1*;e37
zkMM^xEOmHH)U!ug#=e0KiGe8ZIg#B7`P`SE-E;aPiDqDRE_%yz?HYdzayL1++s$hC
zG!D3%1Gyk?5p)-6U+$Y3tPcGX*N%2osX1N?{EndQ#olA61G3Rb?WwiWJqQ6LNh`In
zwie-wRNjeM2IKtS@gg@pc?k}Tc{=+QFcylObQI5uJB|lo2@13X{$BL)Uc2>oOS@4O
zK+0;R8h&vinrWAMdueKh)DIz$dNz>ua&(iHfq@{K8P3+$Hg_x5lUOMq!T@V+t8{-^
zt{pO&wOm-1$vaWqU{!r59KSpOE6jMM*x^}_`)~R5t-iK)W^Lomn>Vc&A0MUJwK&<M
zr>`#nEJLWnRPmCE5>0)Wk@x+zYjwbHp!c@I4qr^Bz8ENur5rQGFd4apzguHEp@hxq
z8-Sdl9QR-LlzkJ<<JUO`E`m{4N3hK7-^R!@Y+UZ;m=MOkO7;gOP)#En{dj@%c{-05
z^c0hPLc7~4)1el(+k8L1k~k`XQ7nhUH@^Q+S-(qR(&x_r{$l+C&DIttn&h^MfIE08
zVbdW%qG>r|2Z!izJe=Sg+O)r!Y+Bn28$K_I@9jMO&Ti#Bv|%-m)bM-njQlt={r*$c
z9PIP;OUlZ%jgjYfKJdl8G}(5gqKz_eC5a<EYA=K7-j<Y<bo<=M0GCwWjjYsf;lqw<
zJPzq%u3nC8<x^a8>!k~mxh6B}9hcI+DMFuUI|{98y4MjA`i6#+K(qS=#%5&90<J>X
z&2k(T4`jIgR`c7tu-Y5`aO9sE%kVx-=b2!Y?v2~jz77OXD4%lf*7^%}70puYrw6+o
zGRw;^y{L(qmBLjk)I)&|ToXd8$F{Y#wfMaHTnPvW-}Vt+e4jS$X#XeO^Y)YHPqCTc
zcI#YDO@NzKcN=Xi)t6LQ-q%FYIOgjE+j=t}E1GOnT9TMp`-C51zz(J(F(3|Nz=`06
z4-D0KbSq>uZ%l}TmkTz}=hjQ{%!Q&+Q;bLOp3VhYb+uO0JuJTMK=7}7m(~q8LnJTu
zSYo6gk}H>yaI7p22K#nLCgE{9Pf!sNrz>4PGW%DJdA1|}<->@zw$cZU*Sp<XhZ|n2
zQ^_A4>-eGxYjP_F*x3twK0<0esRhV^=DP}K1l(=+*^bypC7WOIe<3W!oT-bHDKCwD
zhvu?xj0FliXXvT=e|cH?y#VomyoPeG^`aus@tV52{0PH!_5t#u;t;A&JX-j`Q;pA}
z9Og8Eh;Y$aVcxpPFWy8Pbp1aL8mML{SvR06--z3E0DKptqq$0b{5H@s-rgnMGL<dS
zDAb$1<Rz-Y66uqv&0&cpY*P`T!6F4kQXz^xy;g1fD)g`&3dCLgd&25({p%A21qBp*
z!YS&49I8Y$xCJzqZ&quE{0`I_CZn2|HB#%E(uO#oM+S4kxD<01*ENAa2?ZtBrt#xQ
z5q(_&sN$MIjTP1c$c5Y{>gvIe9n!i1hzqiD2|x&1jLs@VHp3A%<454a2gz#u&77%e
z9TJU+4qb0it0`^MjZ&;iUdiq?GNtQjGguwOFM-+HSx@->T`jEs6nMwo1&Fa%AqpV9
zp)F-SBw4p1CoK{j&NNwB=d0?5?!4%!`&bjZk8BoPr&1_!Ua6RaPJaE{w{O#H8~o9i
zwSeb!o$CGl`wp_Z4qZPX0><u!P<6M$y&39eYxmrI1L;fJj56<-|E99a*{LZT;u)1l
zY!^CoC_Ldlom{)fI!uU3{aeB3$Ar}>VPz3(Yx^dcPG$``2sq>*$Y|3R7$+-V1kO51
zu8lA94h{~#B04c^<m*jAJOQ>S7Q4eC6Aq9>ubXN6=UbGx_isJx>-F&B!AX?JuX~*a
z5L3O7{K^c8t!hs)b&yC$EdFxM!jJvGGI@XL4POd0KPNtOd0~qf=--x#vP<_|nFWxx
zdj}PXvjT{y`lcpVGM+5s5fpU4$?H7aSiEuC7D@T7&*A*vHkBE9l+M#$ls^>;%{a3r
z+NF+smJ82YUC>qzru|3<EEwe=7FM5)B@!M!+(F{xPW+dKICoZdK9>7R69IAR2{^rX
z_&WStm~3t+LCMGXWrz|>Piz=;&1q&^;qlj&_WZ-g$a6vA05ERIc?4D&<WC_d%A6)9
zm_3z`J3-&CcQeEVac>fG0)aNeRcePw_hO|H*NRfWYOJSFqUvX#od)F#m@Z0r(gqEQ
zk50r;hvK_+-@jApiZb1;&8aORHZFZE77GStLkx<CJW>TB1Pq*-hb5aA1sA`cyAXA3
z=r$)goT~fzU7{4MPpr84QNhD5g&X8pgY@tI<$1MjfRO_Hd*?oq_eyXars+GzF7_>y
zX@~4P#6A`pOvgAF$-WS2F6YjRc~0OPSnU{B&9GFBM}2rZJL?1)s)AAmAGg`jZyfEt
zn)v$&L4svAc~3p!<yi#qCXzO-ZYp&(zH-8CI2N{u!Azl4{RZJ=+_h_MpF+GS6i@C|
zb#?E-APE3Tr`XK2p(tPIG!hmRM6ku}`&%1Jaom-4Kap+ff!K3LsKak^>_Iw@PBljf
zBMZ|h{b<!^L%GMy8t%%Ze$@_{36^Z}=6V*?L?zh`6=JS!HzUoiSib`RQtm>RF)@)o
znr<IzbZvDkHHg*(dNQ+X8<#hWvJtx;LSXb$ZQ8x7$f9>mocFv9hYCXPvzhBT5P5O=
z5A2rKY@y7y8*s|cvFpKyho50HI|+QdwSvMUNU0)yEip>BCRgpxnpOvL`Sg|}%eDl3
zSp~H%T~FsuQ_Kxe36XoWG^!eQ$r7O$HT*VU)ST6>bJOTppk;o5L?~5=Nln8^DJgFc
zi6%qxe*OA&VBi4=0LG$K67_`&Dk^m^<?Kz6-7`h}cV9=v!2)FASBkoSAs(}T_|V3Z
z`ITo*mNqTu<IIPmR=Nf~OQ`bR^i63&#<=@$Mg0y7brSQ$Zo797ui>7zkb8!}JrAGn
zsA#|$(FTflO+oK*AwW7#)Blux<4Mguo@$9n<1-W-C_!mwwEcD|l(6qDGO5@D2Ut|7
z8k$-L6g2dG2KKZR3aVDV6Q)Z&&Iiy^fpCJ)-jVrrX$f(;hC^(f7Q)lfLdAWdHSgb{
zKk|wxJZISx6h?>>{C0jv*+fT*K@xzt9~tJVCMF*#AdbgvW1&<0=SRMiQd0N7Md!vM
z0Qf$5^3&a811@A+++F-zqS;Dco?TX?6I;U`6{kQ9m0#Lt%v8N_+1B<;=h&M=q5^}j
zzvqdZp?F-nwY|OYnKL2k^zR1_MX9DGAoqNQ+>?3C<SN`#=!-2861*y(&g(FX-xxEV
zx3_PL8FCJq<7G^d)>a*G0^jTEZmf8WS3d%etA`W{+bKU^vbj2T^vvz6@lUkf-B%s9
z>ednGh=;R$N;)}UV5Eed$tI)WJSIMl+ux?e&%!VvkVArSQuqrYpIBLw)kvY-bDi_`
z^>XbR8X92b|Ebom*!L9N<9A)HxDPFifHkeZNN$vR2S(N~mI;(68~GqqX7>_x2|U5#
zUi+{ueu)L^!_bgg67nhw?P;cxR0EF#-&kq0<<?VHR)$Im-seemj%_!C+?caug)McI
zu{+`$nQ#yf^MP=L;v4<_>FNObX9Yy(0HW-6)`Tatn@ivZtN3*<T!X6fDAbQg!1$N=
zldpuE5_%TZxY%D<=1r6xQL~4r?sNfC8i<LQ-b{`SlolUdPEA1oZLFVUN3g3pfN8Bo
z&$*>#god-!_z65Qk)vxLh4*^=hm3J=mM}sOq_5b+73l=S;(bfRLrh4maop>AV&R0X
z+dZ*UxXU|s3JgF_g1$(A1DL(h=l)QTjL&M#)+G?aaSIw01kR~pvOYU}kBa`RN~{d!
z@TCt84Fw>hIb?eUX&h^7k2vK6l9boZN_l@^|1lxffj)c}nhm@4t5PYGW2b-#va4#z
z0U{tb!2fgyp2lL;R#Rl098vNhMG3h7ffT^pBX3z`2Jz%^=>K5vfiuvK`<ZP6ybEwC
z#tcS28w&;Z#?U?Cq{nMhr9S>XHo8QHqBft+`ZE?X_zkEWqhXr|jAi_rsK!3Rd8xOR
z9u$5Ajs8}?@&&i3?gHx0kv-Cbr`CqzHUZVbT@w=0(wYZn+B!Q`sgHL+|M}UoXNo^0
zcA+Y>b8}O*(=QFtw>T_&mb{X?i?P9PPQ<8Zva9R@@eY{H{AO~%6ENGM4U#6FhgjU5
ztd#rOp;#v;OYi=LrZ1|Hz&0Mu-Oohogje-L5MRltXzF7Ur2%kx8Dg*N#LsN&c775k
z5453KR*t>vK#T7x`0mcT1{^0Jbzpj+FNlG9Ixhq1p1Qe6{Y&2R(?2~!`jm$PHhoi|
zKMlNg)S#EMdVlUGUccO(zFb3p{w!~JziIj?mH{4@+g+}F3-zRYX0S%W?pK1F4o3;X
z&~b0=fc{i}oVP*AWA?*41vDT$o9{{v;DvX}d(fyK5NwlqEN?)GHcn@QVYiN$UQ@2_
zh5$jtP;zic@3o8`upkQC{`|lG@8;<8%%Hfq_%k0T#giatX}i=HK_HDJ&OvX3W{>u#
zm^G$)2W{Gx7B3lxoss#VLQs*ul$Ts`Pv{RksQ5;_5@NO?kjIFy4vu~T8OPAvYou#5
z)fXKdEzGt{UVrWPb79%lk(j=XN!Q@_rUhIPna93k@x5E7xqL6UA5y(dfKw@Yvg2|a
z2viVz1y8aEN!P@4gj-f*#;PgTg^MMw;8G7Z9X&rdBxYDiChDm^Bv3HA=Fr4lBcMu(
ziY}0$tNgDO<awLRgU~%X$a3zgWx9%;-DE;(wLc*-)#rLxYLtBqN>u<NV?Ns$7h^$c
zfS7>{a#}SdzmkL472|<l3?5MlyskNlK3dGezsWH9K*NyJo3JpCVnfIg`^enbzTv`2
z6kQ>(pR4mEq7P7(kXMtElHvxn&%i%!Zqqg+i%UdYGA7(P8F^@67CZT&in^%*+^1#Z
zu+I#Pwu~{(@wjIWLk;iIATyR}GS`bU-#4hBzfpOqd(#1B6`z;)V{j(HQ}x@nyUY5s
zpgzr29W$DrpNGCfPIk7}@<0H;4--BGc<Kn`k!_yiAwM{-fa9kVy#j|UKxTz4W`{P3
zQ=Z2-1tdNAq`&6EjfYNj49my?a%b7G&J|U0V~Z{hbWT9mT?NFHdrfWw=io36yvEje
zX4B7~KTFnURZmY$usu4Z`qqE0{*Emkf<(@6LW&r|O<o#1`xaz}Y9FWE^Pq)OjJ=aP
z7zwo4i8S7d4yToP)hT<~FAar*l$KB+CpUM+)KZdXmATp6if0V_%Uzu8S-H8iOR3W5
z&u^^{%LRC|9@tY@SlZ=Zkeb>wv-DNr>)=c*f$(_ljHxYNH>>zwFco8_l}xqMPLOSZ
zSzqWjK`-|ARn<;lJs^zSf@C+xKD|Gy;6bAsU(m5oS`Zm)Q)-HVf4SN+^qWrQ_$Nj_
zORhf0{-y?IW2NWgubF!8;)8d(+IRLXi#9D!tE&upR{_)SCsj~b$8ldVZt*kmx?Ft)
zWI=qjszfV}q+Av`0vv>gP-TFi33{&JDiGG?+@_5wby~9_v^fk74d0bYb`VG#gSx<4
zmE95gQlouXo7RHpD#P@C4Kz->rKF+0a_><S6g5E#m>@g>3f}5^yyz)qIK}tA=ww<9
zH%2~?mzVyQ){$z|*Vguj9M^q&LEmzG(-NzOAwnyJxrf(L3<PrVQ%tHm#=h=YO$jiy
z{d%fvapLgA$*FU+9!rG}qxc-UJ;(IFJ<R8WiphFjD}E*;LsQn5o=J->{G2U6FYil$
zaq^sL|JHFIaskmz`#8cgYJ*kN*ImF!+86r};nUH11KNff*^k&~<^Qzhp2#nH7TQPd
zKfay(_Sou5agRRxbl}zPL%u?@b910y0y^RTe`tUYXG;4xTH>wLFq)S?oO#<P^oWY_
zxfRmbNP9vULt4zvS}BeOY>Ag^?)}{F2}*rPNjI9JWaMte5Ney<menZjcHS>ga1Rk`
z)4bLehogNAA)$}d4bLgekHy&I0nOfH)i(C6l9mnzP9GF!?lf5J{tW&T3uNSVqF*PJ
zXL%d~!+gPml$=BuOep&X5;Fv6$R|Kxxv=slL{S}J?FWv*BeJbcO(Wri9Or1I4UgTh
zr+gDgH|}k#os56PNZu79!z+kt#r#!OI!kqXn#UlkRmp(1%wgXFU&gxn`dfDR!zmoW
zo7<9(jVsd4p>(R`bDZFl8k(gnB*ZBg*U7YhC+4=;rrWTZjLYh8Eof3``Xq!1zT}=&
zCt^u7^dh~>h~}|j+AT^COp!)uXK@1}|4*Uvf5=Jcr*}T^ag7oXiJp6;1=}pG+*%24
z*D%p4@X-SvmeBjhMl;y$kh2`pQ_<Ner%n9D7kOgGZk^eUjPwGSWEj0scUr%%^sd2H
ze`V}mNYT8uE3-r0IkwP0fEqj7dt(&>@$@y)9@Ley@KX1PI6~6tf<9iWRyI=*oIMaS
z1;E!;v3xnoTdRLPOe@y!%3sJoXpD(8($>3V<~@HMkMp-@Kq{p^et0;vpx!U+6cPPT
zfs90CkEKaLN(a^bi5=yozCFDbvljI5wxIcb`EaYV9RLAsRl;PJOE;ev^!2i~a&*g@
z?*Ah~sq$!IuU)$tdwIR(GB&l1)P@F~kG@9e*YrS?;@fDTc2|JMi}O2BKehmUNk!ak
z2~_2mdt)66ODr*W@rQB@pG->ZTV2@-<znM9OprsK75P@kX{$4LQ_^i>E9B7O2Y_sv
z4DzcL77~K9a9G@9`~a!uCLf(9;Uq-=HqQbr>8p8x$MB}dsr-?!_zQhJ$G&fKeXa?L
zS5!N2TX6(};*v)U77u{<mJt2p`EU9I8%BdczsWWnAC@avT3FgNA~C#@BDPmBev6^#
zuD$`&b%Jd`Pa!{3+!}wNdrTMT+Q7OA7btq}g9Zuq$=u9iw)S*PA;&J%?<==&ci_cs
zK5b64pfNQ+=ezfNuEaP)HGAw$4WQkOz9VuaMuh|tSQm>|WGre+?QEgD@65xO9iM$8
zTT3$V#XLCsOvoB4!5-ieps2@C26h^HV36*d4UvFJkBFwj5ZhWeb=sOvn?JBKYCi9q
zL4Jp8g&{eIQ~J2a=Tri&cY|aNa))N<c|GZJS2Z>Kht7kz#~{xG<f-@ukf)yB*uv+v
zB`b=IX`3Z|<|%9B?d>h_0v<8H=@?n^)RwnS&7}0`nz`^hozVNYvtLo0o)rNq9-al-
z$a%-)wdsF4k_u*w=DbV2TR_8wUrYM&jn<N)+AR`B1%6IT6M?)=cYW#VOL(F0-@j{@
zn9`!u7XWZnFVoOVV4(ZqKCxx}O>bF-MB_LbR`vd~I7aa#^7qGi(q7%jEOV==^$0^J
zNdRj($J~JJdf4aG?AW^qm592<E)jn4Y4E9Ezw{}oqp#4_o>CE9R|U4ejpacfog=Z$
zW${MfHLTpkV!1;g0At~|0-lL~-{D@Cik9;_mEC8Bl5Q4=AoqJm0ygLQFhmCmZZl()
zVH)cTyA6d15N)~$@OVi>X&d@FfTkKD@S8q%|5vo*%OBWetI5CoB%C0ZgUwM6vDPcA
zW*K}~sV&B@cJ8P-!QtM?`b=J1B|r7^Yeha6@;0#~I+f9p$rhW=FS{Pa2LQGAS&K}s
zzIrR9qKz6rm_Z55f7Ly_TwvA3wZE56*x_#`95^xSw<aZ{^253G+r;|a)bV@R)%h#j
zkBw1Gf19FSf4D_7)7PLDUJ#>pEp)}Jt13PDF1FilH>4OH5ebDt$5QA&wxX$M0=g%_
zyE7rTK$ip2CP+aw>s$0iFSpvePyebJIYSaIBt=r<i<_l>zwKr_cT-W35yZ5-XJR{e
zJtB|Gg|hJ<zu2AMgvHynbjl@k9th_0D0!CK_=t%E^D}5|LW+6))F}bq>12b#9O&~8
zefbIq5QJ<<S1IlDXZ|DC{sbmtrMo_M@n9gq&c>zf3)C{Hab7uL9+JK%Fy7*LLRsD?
zp*pn|8=;K2jm%SBnQ@v4aRdFwNi?s{%I^AG_=erC0SY=TqM2K6eh-B`-&tE*3z}ua
zR4qUNK;}UbecGvo`oI?5lxxwze1t<JgFvcyAs&vM6;KHY_ATA_2E*drXEtn$BN2~C
zY>m=e^W6I*xE^JQIie@tGx=o+J$9vS>$!1>2i`@Zc{}ta^ZGh~;2lI~tE=vaLnbGK
z({Bq0<RMCc5aRBnrSUEd5z;+SjriqX=awyun0UF(Ra&R&_Zz`v*Eos!nW$#enu5@=
zODgX@rhvOUQE?lS3qx^nT3mOuBkrYw$_DGYGcx_aA-1VIziH(q1Cx%G_N4-CV%A;g
zn#xxrNagFYxnQ)_KCdasZG~^dExcR%b*|abh{+P9jNf5qY<+_=5l1=#dYzXce;GnW
zPw{!Ve=@9jn|_Pf09`e0NF{*_=McRBLyQL$e276QOrA;If4X7W6n#{<_m|zMXKVd_
zw8@f3xgHV{&-}1lCF=15iS$>e?Pivn;ol-s>V&Lym4l{+MV;_-vBW*0Z$e}-sf4Fr
z@OCtYqn&mc*_^5`xLa`oCP7&s^5QL_W<=d1cgYCyoYyHG3Lx9ZHTCptT7>S=!~DtJ
zweQb%bwGAK-bybbFqkn~l`ZF;9B1QU@LcDZR`tD~Kh5_`dgwX+wDtae8zWKFolM++
z>{H8345i>xiGQL-k%pC1hy`-<>_GiD$YAzWON&(U7ppBZ?ztXRYDSZ?U!<B$3Y^2<
zFj#!}zFH1e$P{)aJtExjbyyDCgYnn}-8_53o?OwMz%Teq5FiO!3hx|>y*fr7t&wvi
zwgY`Ux;rUaTM`NvAT8W^`S`j(&(2AQK*rJ0QHtu>I3JFIYCO;|c<BQ1?pUn#Z*9TF
zzw+e^&S8N*f%R_|8=Gi1NW`TV)D>M$Kir;%nZ*h~pEc4&qEm_)aHCppgtB{&|3d8}
zVR3!|0jCkSFr5DrnT>bq;n^fN=<ku+!zJ>NU1Uz4J{|T?l?LuA@UIYzF{8jjg{Bm1
zecq$Cm6f3P`m13-9tb-w^zW=ql6{UJX~Rh*OZ><G=h&$J%9u^<k|l^3B1iHn4c?Pm
zZd`Zv-#gFnn^f~xW@(ik^6q_#z}8gj3d)3ENNCaR0<=ML9S5rm3Zy~YVSJRun&3Yj
z&NFcitJ`0qB)#KPnCPk6<$%J02$jO~dbBx)(}`;vTIVCIN<Z`@W8lqyV$NG@1;Knp
zaaNq*)ua14J-54g856z5-37&}DEXH%;+mJ1F+?RxbfJfOO={-@$&WpeUtoe1c8#$~
zlO;&0QAO#AwnmEg={=yq|Ikw$_n1<w(3Jo;3ug^ljsKMem8o%fPCrmM-OwYHOyF}%
zl#B3kZhIvwqfuyYzSA-8q_E&ynf?h^Kyr2g;{Zk$+|w^Hnp<^wF$^_!q+msNxZ;+q
zIyraJW!d#ezX=C-p9b=zd(PGem39#V2k7T*linXLzl)d6pN^|KtM)d8J^q-#uMlJ2
zR7+CP$iBrYTVk8TbRcZ>n?G?_X+M`@PU77po(c+^IG~Ebd;zxt!V`E<ZX{&|MgaKa
zybZh@fX{P2A08na)qMB-^h&09_wMQwACK&`?`x3BQaz-#o1bY?9RFUlMci9$R7M$Z
zDEPv9ve*?#C4dOe=q3{{TJV5W+65rB-u#D@&OOz7hJ{b%Dk!=T^^Kw(#*hBnBJ4;?
zEz#8=Pi5Tsy*-j(Pkkr}E8U*hb+Yt4CiA)Wkw~jjhm67201hLX;mzmJE&LmY!OMBn
zLFJ{-^<xkKxMhKRaKv((o8F^2W@t5hSB|zQQmp>$(kk;2NF#)t9Lf0wAW~SxlFkt(
zSYyISk6Zh^U%s4vfP1#w3Wd+FoOIMDWBtWY<P3~P<!M0g^2UwVuU>IMocVN9Pw&jV
zlCL9}5J!{b&M5ksd}U4OEIug9ht@W|!4c-vaS36&3W=Tjp|Pi$X<0R~aq%2YvEq5=
z6D+h?8M*0(_ePyQS`hA_iRkpbu~1d?#ys;ch;_(_LS(rec|M8C9fUwgOPqdiwFY4>
z4)y$pKfN;fuCQwaLyV7J=zdvoWhO@%sZ_Vt)m6uBKR7nnuZPIWI8J2j4kn2*z&k?V
zQymYeoRVTjZ@vdDgP7cXV^YNuN%k?J=Qbu<e_6Db%!w735K$Mal_F+zYK%BgCo&ud
z6@j?r?gl}79M29Er?h?1Of~dX%xRlhi?Ib>)W@r;ns_kP3jH>PFF>b?o;e$&`Uo+m
zY_WNTFk|{F&2g9?@y;FNc1~><KBVuJ`1c2pxg}|{)VfA(*xXyhr@^YaTMv{5dMq|v
ztR8*<*iqf>u}ePi-xj%TS4Lb~+V#bU2T|pJ=2ik}Z{>CivYiYhkiMVxI`>CJBjRpr
z)^J%O%rEQA_rq!RE=e5dtZxmIk-oO{^*A5W%k-FPJ9F;~qs%7I8F8f>A9&}+3!*J;
z+zS(2^u)2brUrq5;;f{Gs3*>pKE3to(_Qi^_wVP|VKh1EkUoPdS+84mH6tVAobAt0
zgp~V}s%Vej#NCd6p5*o;7Fe<A(%W0%2XnKEZgj2=)EwI!AO#Fg1b_Si`i!l?@Bmx4
zpun+$7me$ErT2NXhzE1n3+yxUFW-OrEO+f2<m}hMsExLBQS)T(v{~87X_m#l2qv9?
zCPn(85Ln*#yPQUeh#s{IE3D!rqlG(t`qjYN$3k$~dLOpbeQwVkeckB$_e)C@9@B&y
zaSuM-9QN__>FygIbtBd-4fn=qAWWmnx8?k#`3A4}SZ;Ya=<8@Pn2Q|P&v~MHcPlXS
zJIK0@;?>b4P@a)h<WMx7I&})rQtL%KaTQ{0k4t}JZ*1+~ecV^ZPE>fqJa-C5s-483
zvQP1NeY8wT*=Brj)h``<u}Pt@=ke2Ikm&9ePW4P=v(5^;e|BlN?~^?J6ioQJqYMm)
zIDjl4)HE48g1VaLor-ijH8fs5WYhe{iwSY*e&A#O(*R}wPTUadxxU&>o3r7y;}|Ws
zm{6mFHQ}Y!sBXMkfvNK1-9(f|b4qr8+u9r4u5dKF)EPFaf}e(+<wlIC6K|mSoN>D;
z8!-7VU&YyboZ}5bRza?dZ!31nWr@SK+MJ(Gd}<n55cBeG1H%n?*r-I$P@CMqkK?H=
zbLT7JdBcWs8Mf<8#VIq;8*H`OiXrwGT~$iEyiG#x8~F@BQJ~*rxvxwB><1`aF$eUw
zNtxY3zIm_TyvcSNQW@mF8a7a;f#)gC+37PPYw2y^0m>L3&$+aHN;nz#eE@?}o$5WT
zxnhPfF!X<8K>TOcTm@Sb=_bu)|0b4M9Fdb4g*F=dJ@&JNEwrCPM;dNriESzFF=iW_
ztwqSdLp?B`_hFNj3Kh@=gO%JbF1&vKKCv^P2`p;3hV%59lxd|Gm`U{rKI6pa4Z4l`
zt@W?S&jzqvkt*Hd;NYLNIBOUKOPZG3SUpSP94K&Ig*`ZN1PWD+SH~iAdu_skQN<wN
zO7Xt)aqz{%-Npsv#xP{5w0{Tt`W+yXKm!J5A&(zF-m3EneS_H6(qpQ3NYg5}&_NP@
z0OCi>hvr*-pja&8W^=dQW0nmq7?mDtDC8lgYAQS*R;J$j{QQI+0C@lx&0Uy~^r&0!
z8vQ1Lfo&Vor16!09TH+=V^eIH3Th;n=TI3EbfX$M(<UUSpEtd^Hsx9t@ck-AbLo-r
z#50rNHjfCG6<5UCtA0+@C3YJeHH_xp|F9R(4I);)Ok&7&PyDOhSdJFlvoG)^!sgwy
z#qD1ETNtk#pVkbr0#zm7BirR3FW`C$bC7V?c~N;H4RKKILdnt`RWs;V*dw<-Kx+xw
zb<0N9V!L3L_N!p|cmW_O{uE8d%uF9wn5-@6f{3(LFq{WsOSUU}t8HQ}RByn<Nm%E|
z$2A^^*P-h4DRMbpfBVOVp=Y5m+5TuUkhU63<j!^-CBM5m8vy~%&dxxoz<3G_wvjUU
zgi#918=Hlb1vZnv@|S)&H}$yoO)NLPd<Fidu7eLdr!4)j7U7!_Qkm~@PRz%i!g(q|
zMGJu{8?$FqO~pzvgv2?qp*o`@(B5e-E@<w7*9MvIlS}_?N+UDU6is$E6Nmnh)9PsN
zmr+sP!}s9y?(KYWo=%;|B$l8YV<T6Q-1fAUUag~*@ussjkH3uMVFq@qeJ$q6%BgG}
z46I`4dWnWamBR9it_|@XgC^8osN0nK_P^54vR#mcLA=5~aBO`#K(1(It2g20rQexX
zncLGannqh6BBky~m?CjPEvGm|LAw0v55J1*$ef4CNeFSE;>hYY`GRQo4p2w+dn537
zmt+HQBf6gZsICV&4&~gR`-`+Xpe05bZ5>YYx`kR;+3-H6LM(@8q}pLZDumX7^E}EC
z31r^Fm9^cL_CXg!2j!wWzcB!VtsEVdu*~td!F{d7=?As?x9~M}WhGzYIC<=EpvZuM
z>W49f%XuJg+?ir<Mk^&0bPoK?Ib=lL$a=Oed@u_`Zn=?YV$M7L)?Cm#FcvZ1+%NN~
z-X_%Lg*t{f>?IxfmX2p3T+?c_c_xKi>Y|#jIXOA~)6!0+h=!&5MVwF~J!u%C!k0r)
z&b==E9NwN6K~9Ftm#RN;km{hFxbzSAukn*wR(-8Dnhr|ra-D8}lEExQSU8O3JyC~d
zd(B{CYHApiA`{)Ypu?Vngdi_faSk&qx0MMd|C2URCJa?g)x|*=Qmvk_OZD3D(>>tJ
z#1)h<8Z9p&rVyW-HWwT%#;A-iCHo{pX82;OY>Q9~K(-?yI1G_8O{BcIbC0jyfo`-H
zj!*-ZaQflWAMM>C3{nOJpvZ>v1Q~L3e_n+{6#mdgLsT)AT-`flP(}!!fT#)D*Qv?L
zp=2y;lUmFJy0tb@91&E9;%}PlN+S)!5@SY7rsHt_4pL4Pcn3JvvE2UkPd;yoN39zC
z5M({qfM{D=>HEzP96%D$-z$%ijlMQSNGvFDhZ#SL)<>S}vIK^?!3OyW8tHNU@UN-a
zi7Tx6CKyTd&u?mB2@3D|m~Gq9ZYy})E+Q+I)H<fOzgeT=tC*aJ=L3+5=)wpCWK^=m
zZkyWWp$Nuqp2ZQ{lG;D0{FFfs%wkPWQ!KYJ1GB9eLA8zK+Q$dVVOvY#24o2NkrcTc
z-w`ndZ6E+UO(uPeJB-7js}OnKbq}JVx-9l^;$Oi}!krO<a;n~^61gRI5)JB3TxZFq
z4?FIgn&$M4QpRCgc~{uJoUGl@RRoumSN(??DHtX(cfr6}5K<V(?}azFlx?w~Wv=Ks
zG6hP{Qkcao;f<tf`~dfhFD!i-Y#i(w-t}?sx%)7<3sWE(X9NUbgbS(P{?|a@H5nn0
z<VsqxO~J}&jPK_B&(6|0vD2(_NML3=h{mKTlgEkJYks-xE3*&^t0xn=+fq{;Wz3+r
z855$oHp{%6ajO|<I@mQjcia9}_ujo2B8U)M>FJCP?P{FSI_I14pUyChj@TeBO#XIj
z{CV5+4#gL69GwS5f(&b#7DztUYvfZa@7dMTJZZ4%J892~dI&)_-iZjqTE*Z7l-WVf
zyH5y-F@-gotQyX}U!C_Bu$AwAr;z%lFX=uJ_i5vy7*FPhFqlqz&e#MJR(Y-e=qms3
zrk}*h5z&^9cfN3YC#@VF89)DdkJ|i3{?dr3S4@k?m6$j7BNJ;XV{5+5FCyLq`?}pE
z|KtECqfJNhzdQk&`$uoF$MfgU5q2(?3s1<&3>$Bv&*sxLz(2-XJUdrYJa+5gJFADP
z;RipX-CtN`%^r+i@;U{gVmPki4Is&-gQg!Xc<XOEX_N-YO#UUt0`C?V7uVMQ^h}~f
zsl{)og&3ZW3Ba}*{QUJ=%(a7oL?epUbz@ez7u1Q>oPJ)+EP)~ae1!Zu(B=Y|Bfi=U
z$BubiU<Xb#DKU``bT%O9z4I9OC(l+(pWp;c>;34^0q9WU@sf*(e3LVVIA-R41dHot
z&=7RmoRfo!vR7{^TU7~e>5R@(pR;X5J?YMKi~)4Mttust{))m)HwVug+!xFeu^RUo
z5sB<+7vB9`RC8YXeXl-LW21dOW<`pCVq#QlJ|jS5o$K;#+d<`M300syg3Qg#KtNCh
z3#DikkQ++N5&xn;&65sYa1%1y9Ov6xCxN-YzZ538T0c>1w%g;gJc1A)FQoT{MG7z(
zT_jKfZ(*CXd=i`$G!!rRr0$R(r}&SV+1x+FjPhl4;Jw3!@jZh-2-vT`7#0G~Gd8y0
zHLTX(k6Ui)oD^RTKcCfQZJ-G~Ovs9Hnt)b)3fZK4vrl^D08PXUwY%5<baTMYzJLE-
zD2zP!g_&ae8sFy?kK2_5A|k~ZmJtx|N4SsOc^B9@@_t8dU8;tL#WR=G7O%eWcQ2cv
z6{YXRJSjeBmsxOz(Qyds0_bd1TK%^Jb^E^>RO4K|Z~`<ZUa_{k|J*b)p)oDwB|pSC
zc>2ZnSHYb2jvdAPrHWOme^rzm@(g?M1dwhd7TuxO0taI<avHgJ2>LMph5sFZoM0{@
zA|e9O&fkq&QX}>piG;?DR$5)~caZQp+<)n?k&GUEajC)U5k%ca81z)9PtVUJTvc+&
zfI-t(Zd4!T=M`3xcFC(z8lo9weKj7Bhmk)-IF|&gP8BvfQcEAH91N{O26h|w(U}#P
zSTkAINa#Xughe~2gbWo3dnRfPt&;nfzV_Py*U;|)is2?GH1874;0GuECr*{kJ%=&)
zDvOvxq<5gEgUQHe%Ux#`%FWh`B{3(3F{w#5s>gj8_60`)&p#sBY<9Kyw6gT2U3*Gz
zyEkp>Arw{11KKA?C}ZinAvTRPAX7SskOky_+jFyvEo+ncp?uN?p^BDWF>}m23`X@i
z)G@4UNu?W*Q?iqJ>tJ9vvF+l6F>m;d3@()jce}`HZs@S1auE&&B60c|VPQo1mvR4}
z@n_=@u!tP(C~isrf{0KNLTESb_lWgdZ?})CjoY91XofmtL2he#IGh>zu-+ykyM)CY
zh{rhVLi&i6E8$$t{|L*=z646Fy?UE6zFQ1JP_y3usa`3JcPO0ubNL_&=-GI5HJJBL
z!~2tLUwMF%F*FR>_%)^!>vwY9U8jJsWTs;a<LUplW9%nw+cx^A117_h|8{8QuDwW|
zx2lBhR?JB@DyE;?$h7CbYR@kvCEoCh8eqyY9_BuoFkFo!iDpsfwanW4Yini_JKYIo
z4Kjzgd37&L-I>zLNYwy>@K^T}uHuC#UA?DN5@_-dM!X2<9-z{$gVC{`0&ximq(=bi
zuW~LN>bHM2OnuMXFO^bJ`~2-&xENMJ<MB65n=9+$s~A$up}btj)gD(wv9_h91qfs4
zs%n^!#K{w~#ofQySJ!v>YLnYUC8ML(kfou;%iq^Wjp8ZD#*VJ5y@`xh61P;$!0jqj
zK6)I2R_SUthvbLyv<GcAf8OD1Meen-fuyk=X>?xFreFY=b`yhO^H@ywE9x6auIkQV
zP;_#3w)i%w!MitI)t}S|D}%~-Q2JNnhA(Ip=Dz%W!sMgxT2Nhbqtm)ujM1EMVQIgM
zbxkwU(8pHNHNnrt@KMxz6Zt)p4dD{@Khlt2Tc{MDj3U06$zJ{*`m||kxoouC1Q<0h
zlOft43F7+z;CwA6P9b#^i=EEuLI!TJshp<Q)*^t%>I1p^@5={LPF3PAR0i4(KN*$K
zAb5@tJ{6S%zsBUfB`2FT`D6Ce_t1NCO;PXAUq98j7J);#Zce$Dt_E;-u*JLuS_^n3
z06<-zYQjXx>8H^V@LiX|=mU}LC=QVSJ{ZJMRSLnp>Gs4PmG5$#BN5FP3=+g;N7D|u
z$V{|Hhuj92@ubWP=;L$>>6P(nj>8WVxWSJ<J?|ENx5<owllA=?yY}j|*RQjuQ$X=F
zbSVc)HWG~nf1TRdUfEVYoVh@~=hoJ#$P;%8`|LrwW&EtXygWG>R36B$v;1yz#Un8?
zk@ZRNjKYds>2f6;kBNJN>8{6dYPUs}#6Owdz@+}fXxeH9z>k#3EKb8T4zpOtALzqP
zRCgi2aiO_(aJT$17=4B*52&CKW>y`0f#}bYaqK&KlQc3ZMJEYqMy$2?0c^s8fJ%K-
zbe%-Vmm`BVK=SM=gJG!FohLk@C1Hj8R?z@~Go#HkOLEL`0?%_9@aWzD*VdKCL%D|S
zm$X=t;#AUh>S#MEYAo4_I$~%$q9jQe#!|+Tb?QrLL1iCnWSq`XM-sBsw2{g>WDn_(
zWN?xZ(RaNwo$CAjzW(z2&Gf#{Jn!?|&wXF_bzQIaOpY9KTHVx0ueXH07&XcZ{x;2H
z<-L^>w(mT&_$N#!oKkHTE#$pe+il2uU>(Jqc=Z@jSHmM?+0k~taA12aRkoTROgf(a
zy$d;OM{0mYN$IwzcxUAb9={a~`-+LD^uAdwn{_D{t7QiMECXzCX!NbNqnb$k!$Cf3
zO6IKaM0S0^m1`^4tUJ2n%5e3S_Z}M?-0yANbW>CEXz04E#E>)KFY=OAj8Wrx7uOze
z19EAy-SZTvZG19p>@n&Q>S$%%2(zy5Vkbhst+tT%Iv}O%!iOG1$&zi#`yLq_<co%2
z_m>y1Mb)3g@>4n=;C<BXZ`Z4MezC&bIagJKXfo@Dg{JZ~oLmSB!2u(uwNrXyBRXYJ
z<P9tB;JO#EUw$B+!=eWXGf43M6D<+?fw&Tp1rCghdd0lm(Fs>Jg=%F`Klr5tudb0t
zK8mtRndC5kEvrO{W2bSaq@IDYNU)*$Zk=_)Z;M7d8dXHgK+_#zah^|~W+xH%{2%@V
zJODW6=-kVc;wX!X@cav+`z~q9UK_M`)!wSK15>U;>i}jSU$r07m!qP(`4eakBQWci
zjG1%mgSt$wE+o=2Z`ySP@+p0C9mke?nrj+HrvzVgd=S_y>1a;<ytBA?t#66f6$lv5
z1?nz?B(|wXi^}b6WCbZo5QWWXRWjP*%9SCzOP89#QW~z$-~oxt6g~~4Rxb)glrOXn
z-2B>Kzz;oH+)Hq_2#ly?6syzoAOQR=%aaXieV!{z3Z7P(cnIk~bOc0*UhFHPS0IU%
zQG|QQwUrt#L6AXIBy(1S6}NkxhvMSKw_K26TH9>c2gH`-bc+&I{A4!mlxy~0X`-gK
z&3ezZm4W?66QOu!b!*diMJGHoI1;;`rDIymt{D1H>dN#ICaP6erxAYz!KX=OU<0O!
zX*CdhpNiQzANL|AqSF&xCTt{^eMT>Z%oev8!YmI!IZoJ|MZVP{Oxcl-bKd#Im;Q{#
zsY@?^OC2||GJyASCCKkk1q=02PEhl9E(RnPd_NUikFy(3v{P*MOz(Ed&wb?@FJ4JE
zMvii`X`7&Kk;B}3q2V5Zb)p186wy)~_)iO&5NCyzq3~%`kb#v*p-Qz}p)bWb-yB&M
zk_t|}0wD{gizVERVq#1n9xhLZ`&ukkw8{1wi~j`RwL?lfjx1Q(s#I0950g>j_Xa6D
zZugxV2d}p%9Cw!<MT)E*AGsaeH*1_M?GOq7Mb@WjxHv)Q$Z;H?WRm(nMr@&iX-l#U
z^PJ6wMm9B30xEqJC7Wq&l-ah9Ln6dHfuoePA^O>W%%P8RGx_SNQ~H|5kA)fqNdRb`
z(J-}&EPu3xLD)?XPh3SXy98p^H9u`kfDn?W6rZ?B^;otKfB(IYwpti47XsTF1e2fc
zaVgZ-0ndqRK{px-UqkJ6behe*<J9tGUnzw8=D_k`?BEmccJT!Xn&5#5vKIQ%IUF<h
zg4RL4m-G@Mo~H$v!B=HuG)OTxd?ts13=i^hns(qe!8W0%Fivaye#tA<hq~wC$4b~N
z67e@nN|Nw|%(8~ty#%F|2yt5I8AU#%3!9v0JAiw$Gddxcv?BY>0gZki27hSDDxzn{
z56m(#VzDt|4|C<@iJe)+y$<Mi)%XWvdMMzHlZJ)KD}u?hVt~ay-GAT;8BTu8eF&yk
zTMLDjx9e?!Md6bfo@oMbdKS7(=jlu!B^5KmSg_BloBMh{+SPsB19^X~^%dA#q-BWN
zq0s)OmLfVABek8=u5KMS=rvWo&!1vX*g;5Wa8UQA2ewdpV?HzDO^}0+Yo$t`++0`f
zK(@1rR-lNoHDrF}-Cc_{3FQlAzSSQ?yp1(Qi4TBvwrieRx=8w6(E`8WD|hSWUb=K!
zdA-3S_H|)q@DspOOe9AM6(qTsRuamo8`Osuu~Es??>Thu_yB;8%oLsZc#yp$8p~0i
zIHk9z^HptH^EwwDJ<s$>QPWw>zBYG$4-yu76L}0Lb%G)~;Wb$M)<>#)|Aom##A<Qd
zcX1IiC$C5GF8RNU%}?7F(^iSdcT(<m;U<9AX_RE;XXp3~?8mNF4lUbEJ!7HQ-@9-T
z$7IGvuOS;@+GR+=se3S5Wsm0csb^!76zq4OiBkY=Z5s8SS{@?I;=Yorpk^hQqe#bA
z6_%ZU6IRt|6D?X*=QDbRaeCH^WL4ugC$x$JD5XJ7x7UOl?9J4kY$`#MVFfaaZ{`(?
zLD?73rl6%1Axhg@D3$R(aYh%{cu${Lcy(sdo9h3mFVD~-twny`@hWr9Z`(P(O3&iA
zs1C>b_}U9Ym4T}`hw)=Jq3_|}q2t>Vg|$t*U=&SAqEj3H%dsz2g5Z9j9E<hg5{o~D
zq*Ip9ucWI_Y`)#7DDSbD-95k{Up3TX1jWadd#y2ld#X(x-B+AgIq}vk5<f0%WEE^q
zn!a3Pud&^<PgC>^D3#<nnBRB@yEP}QV=#**%Cvf`!@bsLu1K6Ak-Bi+Yrv>${)H%O
z$%<-)mEDmS5|bN1M7a)8P{t9bt%z?h;(PmXK!qssEN(|?+Jm(g)*?gQgD|v6RRa}s
z*}e*7|M8vN>T#T^%LkU8WcLBB9Wa-WJTZRj1Xv;2q#WoV$SePu@>$sfnf3Perq7lp
zYUR^uJBp4ZO>Rja1z!SZu2W5a=UpIhv@cP-;QN(RtHav`ySlK}<lxr>GvnODOF|n9
zhYuSGOX%7*Y~0BAFC}p^)2$S$m*X$4Bc+vIz4slmW^m+3TDn7Es7((p!=4J)P4XD6
z&v1_r-_wTXY-c9x2#yQ**E!YNm;FOnrGFP}h!B%fwUqWP23h?grRINvD9w-T-<ttj
zZs!l1)fU%#ZydsjO2UR)B)Rh_be50s6gP$if^fdq;Qv=9t3Cm8)#^nVOJ~d=Zox^W
zzQ1Qlqu&A3PhXRsnWyCe8x-C*2`Y1SDA_GMaE}B!10%A}lV=(OaQ++|g3cY#h?*Cm
z35o@5OtmlX+svGn=Nk+O+9tJ-_j!5KKf1hjAFEjDb#3KxCA7n7jTCuUv!DjxXd>}M
z$XuangqGrrRv?=QXGL1ISGhi&ecS<EKOV9ox8QAi7KZE$)a|zit!mhx=3CfB`db{A
zSWi4JZ0G&Q5hv23;P1RucoMlq%)5iONtxPgCJ&@i0F@{60_Y@lCV(<-cCQ)y9}Gv-
zLQie*Zet`wSxw)3{UUzfy*n3TQ(+pP2Sj$HbnQ9uahrym-(}N^(S<2pwU$ItwNiOF
zl#(r+V$m>M@Pc~T{NyZbxr9q;hq!qeSA&dq&FEDS&G`-U^er!GSy)6X8Vrz?ou@fK
zofjoLut3mPf|Fvt7t5`V!GEkp54ed3Aitn4Unluoihq6Oll!(88~Xv<MD!1Q!}~q6
zAqN(okTUG6iK`|o1|w=gBnO&$j};lT0<#}Qr?Zf3-5R+2f;_4iVn*+Wwbv)I)%nTD
z1njQE8x{;d{rEHPtlfjPbf?NQ`i<2~oz!LYP0=&Q%2<=UVv`T=_`%lt!>wKkf54aB
zzD}%9F3jp#^e?5OE>hq!a2T5jQz2}Iw2A2{)lQL*n@Kj!80q=LijJ2uoCuvec{Iz-
z@{L{jI&sH_vm2!?)jz+vyao7dCIo-?KaI@9QhzB-JWRjlIS(=I4t{U%RrpioKeujA
zzNjjqNs0-unH_CeK~yfP3wpo?S@1jZyxvRp><4M_??JWDdO<t#=O_k|9mQhp2g+_~
zxeCX?UUfQGfT=c6j!j#VmVqtTJ-$=Z*ugQgB`+#@_-saF@6c@5xM5oMEu^K|AODz$
zDYAq~imQg-nZLydD;M<fw0wC|BpsX3r=kDR;wDiwo575T==14MIt=1V>j$H<qqlV|
z9BzJF5Akwu+Om#tM}$Jj@^fd2?C#@t!Bi)km9Qge$w7NN@sz?VH+R0E?H|SwfX`v^
zI^m-T!WW-ADx^GRK-b%|`;f0t&hD(f+xJ+fi|GMnf#Eullr509cp`1NXxGH97+bfQ
zWbA3xe$7vRl1{rovgUOiq{{v&!0>F8T?=-Z|7K7XR5#b|)N4hrJIKDlIWfbIbO9?0
z5^v^Q^kL+W*Jbs2&vmwm`HngsK9SXz2R2a<+Q*~XmybQ5SIZ{Au|Ir$Lh@mf_1NLL
z3FyhdTi)TV{zU9EfrLG2>rCR2{B>HcTjK%QQb9tSHhl?SR2^sElQT{#8)CvG1;118
zghK>FEWT{w>HC+$0Jz)}Sue8zGlm#!r?FC%nm^$0nvOgkoP+qJne_sDK10g;yQfq)
z#JKs*=VC>?k?1fvKA)}V+(a1==o^{MnndjXO|tf)5sfp2WYG<%Ni^Fw8gj+fpw6!;
zBwMa30ctc@$*>|qG|t?4u3`6T7l{tAIXAjpw7*#lEq4C_AVp6MgPT#EP8qoO*dbed
zXDWLdb(CIL?>BaFcC~$xU`gDe6*HO~0gtu>$=eC(J$9-;rd(-ByIAZ`x3hk69#dR(
zgvxw|c*Q93QFW+FbboYv;erY^?F)TmOsGw*B0Zay@Td0%m=o_W3&;%`XQc^ul^xtS
z=3I}zkR2Yf=WX#kKj<XtGTweO62<rth&)hPTIxLqS*Yb;!&xlP)k{QTt%~UeeI9x+
zxxOW&{pJ`L8&5}C-DV#RONzSFpjk1rMQP<)tG(tQZ&FK3(wS-AH+K1o=cK#6LdsF7
zqRD~n_;;+tI~xK}MqA!7=u3*G3}O|DzhJd`zoQzdyytB&9v;o%rLwz^UL95)4!JQ^
z5HkMp2zFFpd4k0_M%b5Fd~Q7)N}X+%9+*_J6doI#b;#Lkk8#UR_fI_dBrrDAICSE~
z7IJ{}5q2L*3irODW~9=HBN?VS_t9f3*#CuL{`&MSP1`#yIqlw$4A%2SEmBi71jJQ{
zA5ZL0(HA20Gg{Az<m~_6JryPXTv$rmbaPprLZZ6ppx=g$GfRFaj2`q>f@XH!R_i|S
zXse)7e{`w}UU3KQb-TuOb+MDycMtCAUZ2*^NgmF8E-=9m6nTN6p`mC}kSZhiCmkLk
zrS!aC76onI3<HjiXx(RgWsw+A>OY-bz_a=i@>#DauL=DHr;mGZbb5c{7B=HYshI^B
zw0*7YzgC0_0MHREdwP0E|769}4V0tR77dxSbrq%)4INWVmiW#Nls|aA#Ft2qr7!JQ
z7lyu!TMN-bcCf^+h;NCJZGM+Z?Rx4T&$;vK)&qi^%F}l>q3{DVNi5%(&8A^n&sVRR
z1gKrTw0D)Xz5*cI;63ASK^Qst`u03X7OaC##T-+i+sr#KE;oPe)4LX$qV#_OpZSq=
z`txC>T!#N7>N4VervSfL{__j=T7$C+Z^7enM-_rh&Bzf2?8cju!y_%sXQ|4@cVjz(
zyq0@bz01lmG_hTj^|U-PDU>F@NR5g#1YFsthSTk)6||QA*RqG5TbCMjU59@8R0oq6
z_$u4_x@>B|WEkVp)%DME@}Kbfm+c*$=HnoYFilQa(^pFJNK2IEbG5$h=+_S+yhWf{
z-o`d*Rv=wh#ybM_UI)Z5d2q6v(p&v%_;bXZy%R|)8*bcqb@pkYRQ|~Ddi%i7xs%iq
zYrqNRY;up_3Qa42z#j<{j7_|;ASei4H$`i9K}T_dx8N>)o2160Kz`nm>VNP`kl@T`
zq<PW)Njc=bmU^K-1s9E=*3L;BUPT3~pgqODZ2L2i|J`rRw2S-^Xnbr`Lz#Sgm6FL~
zJ4v(&y@?%nBwV?+x0!+(xnW;c+4LNr#sTsC&lUPqWIPtl3J13j!R9gBiWv3$t|u2I
z>{mFmF8<2FZkO8<nZy$JT!%qrE%iPyDF<}s&iLBwAw1Pf5rlkW!FulBv`zQ?QA|0R
zZYM^ce&>D@`rE|FvN%AIWf5YxD|%?xW|1nkdGlT*A(@dKFqG<buBpq)d8%H1>E{jM
z%hb!p4p^1zyzg*x@4YaS$RGX$n}6c9CA0+~b-tcBwP&-)uzqMR^8Y9AxHB%6&-u*H
zQVNpbFv>a}em17B)>V|-cPyD9v8H^$=L_DFbJ@Jhvt;7MlhoB_a@Jf5><JK-9}GI|
z#&x!q(EZ<cS+A&;Bwt4|ejX5uU!3mp1_}l_ye)^mBQ@pe)C2M}nd?w%oHLOHO!2<M
z>NHC{%wqRZF0h>2ANt78Ir0!%NLlFUcJ#2%;Zvv4I4Tk!TRl6$rDYPFS!{@Gdj*=L
z#u3WabVpG;ujUcVASq#=Z6QWpMOv21br=)qWJ2uRqkqOQX5G3$M=#U|FsJpn9Qmcr
zy}Q(?s500V9>}@u+*t>9ZWbv_&^Zd%3i5NAJ8OV#Bd6UF;@V{;t18QEwLYk(QX{y-
zt+0EvTt+sA$njKKpSaMOd?66i%BX{)Xr=bjs8Y<%VSWt6W8`aH^e@`GumCPAJ<~46
zL&~Oz*P9U!j~F*U)|Vx3^mq*$fLVR_2P&QQM*?xWx-A#-0DA}pZ|R|_hcDc=N+f@E
zoWR(tY-uP<T>qMxQsUI}>QA4^J20)Dv{2+N^==JKc0X9S@dK`6R31$}+oOmx=dP0%
zS+1K<LxC{{x!4dwtm-56E5(-2SBWL(_vXZj{668=r13TR^Yg0%zC^YIFvu7y#c1sp
zm<r}N9{q#!d&$emct9sD6;kl|Y7cex<*5xWs|d$)H|Y>Wp9|-ZW8x^WRjE+?O?}KA
z0fhPJ%X(br#ZSh~BXcbD=(JzRi-i4(@0@|@^TbfM<l$Faw^xdBB<99YH~w9r7F;@8
zI3V-*W(wAG&$5)@!cU<(8B;rOub1Mt>F61t`_!5*9M7B=bm$Zhb(WT_nn3k&amW?3
zVC7byi)^H+H~6xv*E;f+p4w4(s&K(m97shMQu>p}^#r2n$eUbRf5_A@IvMXRrjy^R
zaty{2Ld@@sGM!f7(S%3JF@F?8ZJpBfMIUXqU~HTRN;H<Cf?Dz?Uc(YqSz@ByR!u(D
z7>nu|TkwXctlVp`GYa-D_}cI@{1V3!XGSMJU!{A2ybEWyB_;rm)?;F#4GdpS<`TmC
zYHr`anV0Yxt0Ui`MQ|#V=qxE*bE+#;t7*=3W+AeT{?w|===|ZIw38dir$M<D?;yk@
s|1l+$JXpd%$@d8f|N4JlV>R|g#KqNQLFVpe!TU~3jLnU18rq%uU&418Jpcdz

literal 0
HcmV?d00001

diff --git a/docs/Makefile b/docs/Makefile
new file mode 100644
index 0000000..7e372b2
--- /dev/null
+++ b/docs/Makefile
@@ -0,0 +1,15 @@
+
+PYTHON ?= python3
+
+
+example1:
+	$(PYTHON) example1.py --export example1_stepMesh.py --finalTarget mesh
+	$(PYTHON) example1.py --export example1_stepDoFMap.py --finalTarget dofmap
+	$(PYTHON) example1.py --export example1_stepFunctions.py --finalTarget functions
+	$(PYTHON) example1.py --export example1_stepSolvers.py --finalTarget solvers
+	$(PYTHON) example1.py --export example1_stepErrors.py --finalTarget innerNorm
+
+	$(PYTHON) example2.py --export example2_stepKernelFracInf.py --finalTarget kernelFracInf
+	$(PYTHON) example2.py --export example2_stepMeshFracInf.py --finalTarget meshFracInf
+	$(PYTHON) example2.py --export example2_stepSolveFracInf.py --finalTarget solveFracInf
+	$(PYTHON) example2.py --export example2_stepFiniteHorizon.py --finalTarget finiteHorizon
diff --git a/docs/PyNucleus_base.rst b/docs/PyNucleus_base.rst
new file mode 100644
index 0000000..23eb402
--- /dev/null
+++ b/docs/PyNucleus_base.rst
@@ -0,0 +1,118 @@
+
+
+PyNucleus.base package
+=======================
+
+.. automodule:: PyNucleus_base
+
+
+PyNucleus.base.blas module
+---------------------------
+
+.. automodule:: PyNucleus_base.blas
+
+
+PyNucleus.base.convergence module
+----------------------------------
+
+.. automodule:: PyNucleus_base.convergence
+
+
+PyNucleus.base.factory module
+------------------------------
+
+.. automodule:: PyNucleus_base.factory
+
+
+PyNucleus.base.intTuple module
+-------------------------------
+
+.. automodule:: PyNucleus_base.intTuple
+
+
+PyNucleus.base.ip.norm module
+-------------------------------
+
+.. automodule:: PyNucleus_base.ip_norm
+
+
+PyNucleus.base.linalg module
+-----------------------------
+
+.. automodule:: PyNucleus_base.linalg
+
+
+PyNucleus.base.linear.operators module
+----------------------------------------
+
+.. automodule:: PyNucleus_base.linear_operators
+
+
+PyNucleus.base.memProfile module
+---------------------------------
+
+.. automodule:: PyNucleus_base.memProfile
+
+
+PyNucleus.base.myTypes module
+------------------------------
+
+.. automodule:: PyNucleus_base.myTypes
+
+
+PyNucleus.base.performanceLogger module
+----------------------------------------
+
+.. automodule:: PyNucleus_base.performanceLogger
+
+
+PyNucleus.base.plot.utils module
+----------------------------------
+
+.. automodule:: PyNucleus_base.plot_utils
+
+
+PyNucleus.base.setupUtils module
+---------------------------------
+
+.. automodule:: PyNucleus_base.setupUtils
+
+
+PyNucleus.base.solver.factory module
+--------------------------------------
+
+.. automodule:: PyNucleus_base.solver_factory
+
+
+PyNucleus.base.solvers module
+------------------------------
+
+.. automodule:: PyNucleus_base.solvers
+
+PyNucleus.base.sparseGraph module
+----------------------------------
+
+.. automodule:: PyNucleus_base.sparseGraph
+
+PyNucleus.base.sparsityPattern module
+--------------------------------------
+
+.. automodule:: PyNucleus_base.sparsityPattern
+
+
+PyNucleus.base.tupleDict module
+--------------------------------
+
+.. automodule:: PyNucleus_base.tupleDict
+
+
+PyNucleus.base.utilsCy module
+------------------------------
+
+.. automodule:: PyNucleus_base.utilsCy
+
+
+PyNucleus.base.utilsFem module
+-------------------------------
+
+.. automodule:: PyNucleus_base.utilsFem
diff --git a/docs/PyNucleus_fem.rst b/docs/PyNucleus_fem.rst
new file mode 100644
index 0000000..fbf10f1
--- /dev/null
+++ b/docs/PyNucleus_fem.rst
@@ -0,0 +1,102 @@
+
+
+PyNucleus.fem package
+======================
+
+.. automodule:: PyNucleus_fem
+
+
+PyNucleus.fem.DoFMaps module
+-----------------------------
+
+.. automodule:: PyNucleus_fem.DoFMaps
+
+
+PyNucleus.fem.algebraicOverlaps module
+---------------------------------------
+
+.. automodule:: PyNucleus_fem.algebraicOverlaps
+
+
+PyNucleus.fem.boundaryLayerCy module
+-------------------------------------
+
+.. automodule:: PyNucleus_fem.boundaryLayerCy
+
+
+PyNucleus.fem.distributed.operators module
+--------------------------------------------
+
+.. automodule:: PyNucleus_fem.distributed_operators
+
+
+PyNucleus.fem.femCy module
+---------------------------
+
+.. automodule:: PyNucleus_fem.femCy
+
+
+PyNucleus.fem.functions module
+-------------------------------
+
+.. automodule:: PyNucleus_fem.functions
+
+
+PyNucleus.fem.mesh module
+--------------------------
+
+.. automodule:: PyNucleus_fem.mesh
+
+
+PyNucleus.fem.meshConstruction module
+--------------------------------------
+
+.. automodule:: PyNucleus_fem.meshConstruction
+
+
+PyNucleus.fem.meshCy module
+----------------------------
+
+.. automodule:: PyNucleus_fem.meshCy
+
+
+PyNucleus.fem.meshOverlaps module
+----------------------------------
+
+.. automodule:: PyNucleus_fem.meshOverlaps
+
+
+PyNucleus.fem.meshPartitioning module
+--------------------------------------
+
+.. automodule:: PyNucleus_fem.meshPartitioning
+
+
+PyNucleus.fem.pdeProblems module
+---------------------------------
+
+.. automodule:: PyNucleus_fem.pdeProblems
+
+
+PyNucleus.fem.quadrature module
+--------------------------------
+
+.. automodule:: PyNucleus_fem.quadrature
+
+
+PyNucleus.fem.repartitioner module
+-----------------------------------
+
+.. automodule:: PyNucleus_fem.repartitioner
+
+
+PyNucleus.fem.simplexMapper module
+-----------------------------------
+
+.. automodule:: PyNucleus_fem.simplexMapper
+
+
+PyNucleus.fem.splitting module
+-------------------------------
+
+.. automodule:: PyNucleus_fem.splitting
diff --git a/docs/PyNucleus_metisCy.rst b/docs/PyNucleus_metisCy.rst
new file mode 100644
index 0000000..2b89a0b
--- /dev/null
+++ b/docs/PyNucleus_metisCy.rst
@@ -0,0 +1,18 @@
+
+
+PyNucleus.metisCy package
+==========================
+
+.. automodule:: PyNucleus_metisCy
+
+
+PyNucleus.metisCy.metisCy module
+---------------------------------
+
+.. automodule:: PyNucleus_metisCy.metisCy
+
+
+PyNucleus.metisCy.parmetisCy module
+------------------------------------
+
+.. automodule:: PyNucleus_metisCy.parmetisCy
diff --git a/docs/PyNucleus_multilevelSolver.rst b/docs/PyNucleus_multilevelSolver.rst
new file mode 100644
index 0000000..66e4c8e
--- /dev/null
+++ b/docs/PyNucleus_multilevelSolver.rst
@@ -0,0 +1,54 @@
+
+
+PyNucleus.multilevelSolver package
+===================================
+
+.. automodule:: PyNucleus_multilevelSolver
+
+
+PyNucleus.multilevelSolver.coarseSolvers module
+------------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.coarseSolvers
+
+
+PyNucleus.multilevelSolver.connectors module
+---------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.connectors
+
+
+PyNucleus.multilevelSolver.geometricMG module
+----------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.geometricMG
+
+
+PyNucleus.multilevelSolver.hierarchies module
+----------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.hierarchies
+
+
+PyNucleus.multilevelSolver.levels module
+-----------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.levels
+
+
+PyNucleus.multilevelSolver.multigrid module
+--------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.multigrid
+
+
+PyNucleus.multilevelSolver.restrictionProlongation module
+----------------------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.restrictionProlongation
+
+
+PyNucleus.multilevelSolver.smoothers module
+--------------------------------------------
+
+.. automodule:: PyNucleus_multilevelSolver.smoothers
diff --git a/docs/PyNucleus_nl.rst b/docs/PyNucleus_nl.rst
new file mode 100644
index 0000000..5d72dde
--- /dev/null
+++ b/docs/PyNucleus_nl.rst
@@ -0,0 +1,122 @@
+
+
+PyNucleus.nl package
+=====================
+
+Submodules
+----------
+
+
+
+PyNucleus.nl.clusterMethodCy module
+------------------------------------
+
+.. automodule:: PyNucleus_nl.clusterMethodCy
+
+
+PyNucleus.nl.config module
+---------------------------
+
+.. automodule:: PyNucleus_nl.config
+
+
+PyNucleus.nl.fractionalLaplacian1D module
+------------------------------------------
+
+.. automodule:: PyNucleus_nl.fractionalLaplacian1D
+
+
+PyNucleus.nl.fractionalLaplacian2D module
+------------------------------------------
+
+.. automodule:: PyNucleus_nl.fractionalLaplacian2D
+
+
+
+
+PyNucleus.nl.fractionalOrders module
+-------------------------------------
+
+.. automodule:: PyNucleus_nl.fractionalOrders
+
+
+
+
+PyNucleus.nl.integral\_problems module
+---------------------------------------
+
+.. automodule:: PyNucleus_nl.integral_problems
+
+
+PyNucleus.nl.interactionDomains module
+---------------------------------------
+
+.. automodule:: PyNucleus_nl.interactionDomains
+
+
+PyNucleus.nl.kernels module
+----------------------------
+
+.. automodule:: PyNucleus_nl.kernels
+
+
+PyNucleus.nl.kernels2 module
+-----------------------------
+
+.. automodule:: PyNucleus_nl.kernels2
+
+
+PyNucleus.nl.kernels2 module
+-----------------------------
+
+.. automodule:: PyNucleus_nl.kernels2
+
+
+PyNucleus.nl.kernelsCy module
+------------------------------
+
+.. automodule:: PyNucleus_nl.kernelsCy
+
+
+PyNucleus.nl.nonlocalLaplacian module
+--------------------------------------
+
+.. automodule:: PyNucleus_nl.nonlocalLaplacian
+
+
+PyNucleus.nl.nonlocalLaplacianBase module
+------------------------------------------
+
+.. automodule:: PyNucleus_nl.nonlocalLaplacianBase
+
+
+PyNucleus.nl.nonlocalLaplacianND module
+----------------------------------------
+
+.. automodule:: PyNucleus_nl.nonlocalLaplacianND
+
+
+PyNucleus.nl.nonlocalProblems module
+-------------------------------------
+
+.. automodule:: PyNucleus_nl.nonlocalProblems
+
+
+
+
+PyNucleus.nl.twoPointFunctions module
+--------------------------------------
+
+.. automodule:: PyNucleus_nl.twoPointFunctions
+
+
+PyNucleus.nl.twoPointFunctions module
+--------------------------------------
+
+.. automodule:: PyNucleus_nl.twoPointFunctions
+
+
+Module contents
+---------------
+
+.. automodule:: PyNucleus_nl
diff --git a/docs/conf.py b/docs/conf.py
new file mode 100644
index 0000000..15d7856
--- /dev/null
+++ b/docs/conf.py
@@ -0,0 +1,80 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Configuration file for the Sphinx documentation builder.
+#
+# This file only contains a selection of the most common options. For a full
+# list see the documentation:
+# https://www.sphinx-doc.org/en/master/usage/configuration.html
+
+# -- Path setup --------------------------------------------------------------
+
+# If extensions (or modules to document with autodoc) are in another directory,
+# add these directories to sys.path here. If the directory is relative to the
+# documentation root, use os.path.abspath to make it absolute, like shown here.
+#
+import os
+import sys
+sys.path.insert(0, os.path.abspath('..'))
+
+
+# -- Project information -----------------------------------------------------
+
+project = 'PyNucleus'
+copyright = '2021, Christian Glusa'
+author = 'Christian Glusa'
+
+
+# -- General configuration ---------------------------------------------------
+
+# Add any Sphinx extension module names here, as strings. They can be
+# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
+# ones.
+extensions = [
+    'sphinx.ext.autodoc',
+    'sphinx.ext.viewcode',
+    'sphinxcontrib.programoutput',
+    'matplotlib.sphinxext.plot_directive'
+]
+
+autodoc_default_options = {
+    'members': True,
+    'member-order': 'bysource',
+    'undoc-members': True,
+    'show-inheritance': True,
+    'special-members': '__init__,__call__'
+}
+
+# Add any paths that contain templates here, relative to this directory.
+# templates_path = ['_templates']
+
+# List of patterns, relative to source directory, that match files and
+# directories to ignore when looking for source files.
+# This pattern also affects html_static_path and html_extra_path.
+exclude_patterns = ['build', 'Thumbs.db', '.DS_Store']
+
+
+# -- Options for HTML output -------------------------------------------------
+
+# The theme to use for HTML and HTML Help pages.  See the documentation for
+# a list of builtin themes.
+#
+html_theme = 'bizstyle'
+
+html_theme_options = {
+    'sidebarwidth': '300px'
+}
+
+
+# Add any paths that contain custom static files (such as style sheets) here,
+# relative to this directory. They are copied after the builtin static files,
+# so a file named "default.css" will overwrite the builtin "default.css".
+# html_static_path = ['_static']
+
+
+# autodoc_mock_imports = ["PyNucleus.base"]
diff --git a/docs/example1.py b/docs/example1.py
new file mode 100644
index 0000000..692a07b
--- /dev/null
+++ b/docs/example1.py
@@ -0,0 +1,151 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus.packageTools.sphinxTools import codeRegionManager
+
+mgr = codeRegionManager()
+
+with mgr.add('imports'):
+    import matplotlib.pyplot as plt
+    from numpy import sqrt
+
+with mgr.add('mesh'):
+    ######################################################################
+    # Get a mesh and refine it
+
+    from PyNucleus import meshFactory
+
+with mgr.add('mesh', onlyIfFinal=True):
+    # show available options
+    meshFactory.print()
+
+with mgr.add('mesh'):
+    mesh = meshFactory('square', ax=0., ay=0., bx=1., by=1.)
+    for _ in range(3):
+        mesh = mesh.refine()
+with mgr.add('mesh', onlyIfFinal=True):
+    print('Mesh:', mesh)
+    plt.figure().gca().set_title('Mesh')
+    mesh.plot()
+
+with mgr.add('dofmap'):
+    ######################################################################
+    # Construct a finite element space
+    from PyNucleus import dofmapFactory
+
+with mgr.add('dofmap', onlyIfFinal=True):
+    # show available options
+    dofmapFactory.print()
+
+with mgr.add('dofmap'):
+    # We use piecewise linears
+    dm = dofmapFactory('P1', mesh)
+with mgr.add('dofmap', onlyIfFinal=True):
+    print('DoFMap:', dm)
+
+    plt.figure().gca().set_title('DoFMap')
+    dm.plot()
+
+with mgr.add('functions'):
+    ######################################################################
+    # Construct some simple functions
+    from PyNucleus import functionFactory
+
+with mgr.add('functions', onlyIfFinal=True):
+    # show available options
+    functionFactory.print()
+
+with mgr.add('functions'):
+    # functions defined via Python lambdas
+    rhs_1 = functionFactory('Lambda', lambda x: 2*x[0]*(1-x[0]) + 2*x[1]*(1-x[1]))
+    exact_solution_1 = functionFactory('Lambda', lambda x: x[0]*(1-x[0])*x[1]*(1-x[1]))
+
+    # Functions defined via Cython implementations -> faster evaluation
+    rhs_2 = functionFactory('rhsFunSin2D')
+    exact_solution_2 = functionFactory('solSin2D')
+
+    # assemble right-hand side vectors and interpolate the exact solutions
+    b1 = dm.assembleRHS(rhs_1)
+    u_interp_1 = dm.interpolate(exact_solution_1)
+
+with mgr.add('functions', onlyIfFinal=True):
+    print('Linear system RHS:', b1)
+    print('Interpolated solution:', u_interp_1)
+
+with mgr.add('functions'):
+    b2 = dm.assembleRHS(rhs_2)
+    u_interp_2 = dm.interpolate(exact_solution_2)
+
+with mgr.add('functions', onlyIfFinal=True):
+    plt.figure().gca().set_title('Interpolated solution')
+    u_interp_1.plot()
+
+with mgr.add('matrices'):
+    ######################################################################
+    # Assemble mass and Laplacian stiffness matrices
+    mass = dm.assembleMass()
+    laplacian = dm.assembleStiffness()
+with mgr.add('matrices', onlyIfFinal=True):
+    print('Linear system matrix:', laplacian)
+
+with mgr.add('solvers'):
+    ######################################################################
+    # Construct solvers
+    from PyNucleus import solverFactory
+
+with mgr.add('solvers', onlyIfFinal=True):
+    # show available options
+    solverFactory.print()
+
+with mgr.add('solvers'):
+    solver_direct = solverFactory('lu', A=laplacian)
+    solver_direct.setup()
+with mgr.add('solvers', onlyIfFinal=True):
+    print('Direct solver:', solver_direct)
+
+with mgr.add('solvers'):
+    solver_krylov = solverFactory('cg', A=laplacian)
+    solver_krylov.setup()
+    solver_krylov.maxIter = 100
+    solver_krylov.tolerance = 1e-8
+with mgr.add('solvers', onlyIfFinal=True):
+    print('Krylov solver:', solver_krylov)
+
+with mgr.add('solvers'):
+    u1 = dm.zeros()
+    solver_direct(b1, u1)
+
+    u2 = dm.zeros()
+    numIter = solver_krylov(b2, u2)
+with mgr.add('solvers', onlyIfFinal=True):
+    print('Number of iterations:', numIter)
+
+    plt.figure().gca().set_title('Error')
+    (u_interp_1-u1).plot(flat=True)
+
+with mgr.add('innerNorm', onlyIfFinal=True):
+    ######################################################################
+    # Inner products and norms
+    print('Residual norm 1st solve: ', (b1-laplacian*u1).norm())
+    print('Residual norm 2nd solve: ', (b2-laplacian*u2).norm())
+
+with mgr.add('innerNorm'):
+    # Compute errors
+    H10_error_1 = sqrt(b1.inner(u_interp_1-u1))
+    L2_error_1 = sqrt((u_interp_1-u1).inner(mass*(u_interp_1-u1)))
+    H10_error_2 = sqrt(b2.inner(u_interp_2-u2))
+    L2_error_2 = sqrt((u_interp_2-u2).inner(mass*(u_interp_2-u2)))
+
+with mgr.add('innerNorm', onlyIfFinal=True):
+    print('1st problem - H10:', H10_error_1, 'L2:', L2_error_1)
+    print('2nd problem - H10:', H10_error_2, 'L2:', L2_error_2)
+
+
+with mgr.add('final'):
+    plt.show()
diff --git a/docs/example1.rst b/docs/example1.rst
new file mode 100644
index 0000000..c1d6131
--- /dev/null
+++ b/docs/example1.rst
@@ -0,0 +1,140 @@
+
+
+Example 1 - A simple PDE problem
+================================
+
+In this first example, we will construct a finite element discretization of a classical PDE problem and solve it.
+The full code of this example can be found in ``drivers/example1.py``.
+
+Factories
+---------
+
+The creation of different groups of objects, such as finite element spaces or meshes, use factories.
+The available classes that a factory provides can be displayed by calling the ``print()`` method of the factory.
+An object is built by passing the name of the desired class and additional parameters to the factory.
+If this sounds vague now, don't worry, the examples below will make it clear.
+
+Meshes
+------
+
+The first object we need to create is a mesh to support the finite element discretization.
+We start by construction a mesh for a square domain :math:`\Omega=[0, 1] \times [0, 1]` and refining it uniformly three times:
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Get a mesh
+   :end-before: #################
+   :lineno-match:
+
+The output of the above code snippet is given below.
+In particular, we see what other meshes we could have constructed using the ``meshFactory``, apart from 'square', and what parameters we can pass to the factory,
+We also see that we created a 2d mesh with 289 vertices and 512 cells.
+
+.. program-output:: python3 example1.py --finalTarget mesh
+
+.. plot:: example1_stepMesh.py
+
+Many PyNucleus objects have a ``plot`` method, similar to the mesh that we just created.
+
+DoFMaps
+-------
+
+In the next step, we create a finite element space on the mesh.
+By default, we assume a Dirichlet condition on the entire boundary of the domain.
+We build a piecewise linear finite element space.
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Construct a finite element space
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example1.py --finalTarget dofmap
+
+.. plot:: example1_stepDoFMap.py
+
+Functions and vectors
+---------------------
+
+Functions can either be defined in Python, or in Cython.
+The advantage of the latter is that their code is compiled, which speeds up evaluation significantly.
+A couple of compiled functions are already available via the ``functionFactory``.
+A generic Python function can be used via the ``Lambda`` function class.
+
+We will be solving the problem
+
+.. math::
+
+   -\Delta u &= f & \text{ in } \Omega, \\
+   u &= 0 & \text{ on } \partial \Omega,
+
+for two different forcing functions :math:`f`.
+
+We assemble the right-hand side
+
+.. math::
+
+   \int_\Omega f v
+
+of the linear system by calling the ``assembleRHS`` method of the DoFMap object, and interpolate the exact solutions into the finite element space.
+
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Construct some simple functions
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example1.py --finalTarget functions
+
+.. plot:: example1_stepFunctions.py
+
+Matrices
+--------
+
+We assemble two matrices, the mass matrix
+
+.. math::
+
+   \int_\Omega u v
+
+and the stiffness matrix associated with the Laplacian
+
+.. math::
+
+   \int_\Omega \nabla u \cdot \nabla v
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Assemble mass
+   :end-before: #######
+   :lineno-match:
+
+.. program-output:: python3 example1.py --finalTarget matrices
+
+Solvers
+-------
+
+Now that we have assembled our linear system, we want to solve it.
+We choose to solve one system using an LU solver, and the other one using a CG solver.
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Construct solvers
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example1.py --finalTarget solvers
+
+.. plot:: example1_stepSolvers.py
+
+Norms and inner products
+------------------------
+
+Finally, we want to check that we actually solved the system by computing residual errors.
+We also compute errors in :math:`H^1_0` and :math:`L^2` norms.
+
+.. literalinclude:: ../drivers/example1.py
+   :start-after: Inner products
+   :end-before: plt.show
+   :lineno-match:
+
+.. program-output:: python3 example1.py --finalTarget innerNorm
+
+This concludes our first example.
+Next, we turn to nonlocal equations.
diff --git a/docs/example2.py b/docs/example2.py
new file mode 100644
index 0000000..fb4a96f
--- /dev/null
+++ b/docs/example2.py
@@ -0,0 +1,147 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus.packageTools.sphinxTools import codeRegionManager
+
+mgr = codeRegionManager()
+
+with mgr.add('imports'):
+    import matplotlib.pyplot as plt
+    from time import time
+
+with mgr.add('kernelFracInf'):
+    ######################################################################
+    # Get a fractional kernel
+    from PyNucleus import kernelFactory
+
+with mgr.add('kernelFracInf', onlyIfFinal=True):
+    # show available options
+    kernelFactory.print()
+
+with mgr.add('kernelFracInf'):
+    from numpy import inf
+    kernelFracInf = kernelFactory('fractional', dim=2, s=0.75, horizon=inf)
+
+with mgr.add('kernelFracInf', onlyIfFinal=True):
+    print(kernelFracInf)
+    plt.figure().gca().set_title('Fractional kernel')
+    kernelFracInf.plot()
+
+
+with mgr.add('meshFracInf'):
+    ######################################################################
+    # Generate an appropriate mesh
+    from PyNucleus import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
+
+    # Get a mesh that is appropriate for the problem, i.e. with the required interaction domain.
+    meshFracInf, _ = nonlocalMeshFactory('disc', kernel=kernelFracInf, boundaryCondition=HOMOGENEOUS_DIRICHLET, hTarget=0.15)
+
+with mgr.add('meshFracInf', onlyIfFinal=True):
+    print(meshFracInf)
+    plt.figure().gca().set_title('Mesh for fractional kernel')
+    meshFracInf.plot()
+
+with mgr.add('assemblyFracInf'):
+    ######################################################################
+    # Assemble the operator
+    from PyNucleus import dofmapFactory, functionFactory
+
+    dmFracInf = dofmapFactory('P1', meshFracInf)
+
+    rhs = functionFactory('constant', 1.)
+    exact_solution = functionFactory('solFractional', dim=2, s=0.75)
+
+    b = dmFracInf.assembleRHS(rhs)
+    u_exact = dmFracInf.interpolate(exact_solution)
+    u = dmFracInf.zeros()
+
+    # Assemble the operator in dense format.
+    start = time()
+    A_fracInf = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='dense')
+with mgr.add('assemblyFracInf', onlyIfFinal=True):
+    print('Dense assembly took {}s'.format(time()-start))
+
+with mgr.add('assemblyFracInf'):
+    start = time()
+    A_fracInf_h2 = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='h2')
+with mgr.add('assemblyFracInf', onlyIfFinal=True):
+    print('Hierarchical assembly took {}s'.format(time()-start))
+
+    print(A_fracInf)
+    print(A_fracInf_h2)
+
+with mgr.add('solveFracInf'):
+    ######################################################################
+    # Solve the linear system
+    from PyNucleus import solverFactory
+    from numpy import sqrt
+
+    solver = solverFactory('lu', A=A_fracInf, setup=True)
+    solver(b, u)
+
+    Hs_err = sqrt(abs(b.inner(u-u_exact)))
+
+with mgr.add('solveFracInf', onlyIfFinal=True):
+    print('Hs error: {}'.format(Hs_err))
+    plt.figure().gca().set_title('Numerical solution, fractional kernel')
+    u.plot()
+
+with mgr.add('finiteHorizon'):
+    ######################################################################
+    # Solve a problem with finite horizon
+    kernelConst = kernelFactory('constant', dim=2, horizon=0.2)
+
+with mgr.add('finiteHorizon', onlyIfFinal=True):
+    print(kernelConst)
+    plt.figure().gca().set_title('Constant kernel')
+    kernelConst.plot()
+
+with mgr.add('finiteHorizon'):
+    from PyNucleus import DIRICHLET
+
+    meshConst, nIConst = nonlocalMeshFactory('square', kernel=kernelConst, boundaryCondition=DIRICHLET, hTarget=0.18)
+
+with mgr.add('finiteHorizon', onlyIfFinal=True):
+    print(meshConst)
+    plt.figure().gca().set_title('Mesh for constant kernel')
+    meshConst.plot()
+
+with mgr.add('finiteHorizon'):
+    dmConst = dofmapFactory('P1', meshConst, nIConst['domain'])
+    dmConstInteraction = dmConst.getComplementDoFMap()
+
+    A_const = dmConst.assembleNonlocal(kernelConst, matrixFormat='sparse')
+    B_const = dmConst.assembleNonlocal(kernelConst, dm2=dmConstInteraction, matrixFormat='sparse')
+
+    g = functionFactory('Lambda', lambda x: -(x[0]**2 + x[1]**2)/4)
+    g_interp = dmConstInteraction.interpolate(g)
+
+    b = dmConst.assembleRHS(rhs)-(B_const*g_interp)
+    u = dmConst.zeros()
+
+    solver = solverFactory('cg', A=A_const, setup=True)
+    solver.maxIter = 1000
+    solver.tolerance = 1e-8
+
+    solver(b, u)
+
+    u_global = dmConst.augmentWithBoundaryData(u, g_interp)
+
+    plt.figure().gca().set_title('Numerical solution, constant kernel')
+    u_global.plot()
+
+    plt.figure().gca().set_title('Analytic solution, constant kernel')
+    u_global.dm.interpolate(g).plot()
+
+with mgr.add('finiteHorizon', onlyIfFinal=True):
+    print(A_const)
+
+with mgr.add('final'):
+    ######################################################################
+    plt.show()
diff --git a/docs/example2.rst b/docs/example2.rst
new file mode 100644
index 0000000..67ac2e7
--- /dev/null
+++ b/docs/example2.rst
@@ -0,0 +1,127 @@
+
+
+Example 2 - Nonlocal problems
+=============================
+
+I this second example, we will assemble and solve several nonlocal equations.
+The full code of this example can be found in ``drivers/example2.py``.
+
+PyNucleus can assemble operators of the form
+
+.. math::
+
+   \mathcal{L}u(x) = \int_{\mathbb{R}^d} [u(y)-u(x)] \gamma(x, y) dy.
+
+The kernel :math:`\gamma` is of the form
+
+.. math::
+
+   \gamma(x,y) = \phi(x, y) |x-y|^{-\beta(x,y)} \chi_{V_\delta(x)}(y).
+
+Here, :math:`\phi` is a positive function, and :math:`\chi` is the indicator function.
+:math:`0<\delta\le\infty` is called the horizon and determines the size of the kernel support :math:`V_\delta(x) \subset \mathbb{R}^d`.
+The singularity :math:`\beta` of the kernel depends on the family of kernels:
+
+- fractional type: :math:`\beta(x,y)=d+2s(x,y)`, where :math:`d` is the spatial dimension and :math:`s(x,y)` is the fractional order.
+- constant type :math:`\beta(x,y)=0`
+- peridynamic type :math:`\beta(x,y)=-1`
+
+At present, the only implemented interaction regions are balls in the 2-norm:
+
+.. math::
+
+   V_{\delta}^{(2)}(x) = \{y \in \mathbb{R}^d | ||x-y||_2<\delta\}.
+
+
+A fractional kernel
+-------------------
+
+We start off by creating a fractional kernel with infinite horizon and constant fractional order :math:`s=0.75`.
+
+.. literalinclude:: ../drivers/example2.py
+   :start-after: Get a fractional kernel
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example2.py --finalTarget kernelFracInf
+
+.. plot:: example2_stepKernelFracInf.py
+
+By default, kernels are normalized. This can be disabled by passing `normalized=False`.
+
+
+Nonlocal assembly
+-----------------
+
+We will be solving the problem
+
+.. math::
+
+   -\mathcal{L} u &= f && \text{ in } \Omega=B(0,1)\subset\mathbb{R}^2, \\
+   u &= 0 && \text{ in } \mathbb{R}^2 \setminus \Omega,
+
+for constant forcing function :math:`f=1`.
+
+First, we generate a mesh.
+Instead of the `meshFactory` used in the previous example, we now use the `nonlocalMeshFactory`.
+The advantage is that this factory can generate meshes with appropriate interaction domains.
+For this particular example, the factory will not generate any interaction domain, since the homogeneous Dirichlet condition on :math:`\mathbb{R}^2\setminus\Omega` can be enforced via a boundary integral.
+
+.. literalinclude:: ../drivers/example2.py
+   :start-after: Generate an appropriate mesh
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example2.py --finalTarget meshFracInf
+
+.. plot:: example2_stepMeshFracInf.py
+
+Next, we obtain a piecewise linear, continuous DoFMap on the mesh, assemble the RHS and interpolate the known analytic solution.
+We assemble the nonlocal operator by passing the kernel to the `assembleNonlocal` method of the DoFMap object.
+The optional parameter `matrixFormat` determines what kind of linear operator is assembled.
+We time the assembly of the operator as a dense matrix, and as a hierarchical matrix, and inspect the resulting objects.
+
+.. literalinclude:: ../drivers/example2.py
+   :start-after: Assemble the operator
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example2.py --finalTarget assemblyFracInf
+
+It can be observed that both assembly routines take roughly the same amount of time.
+The reason for this is that the operator itself has quite small dimensions.
+For larger number of unknowns, we expect the hierarchical assembly scale like :math:`\mathcal{O}(N \log^{2d} N)`, whereas the dense assembly will scale at best like :math:`\mathcal{O}(N^2)`.
+
+Similar to the local PDE example, we can then solve the resulting linear equation and compute the error in energy norm.
+
+.. literalinclude:: ../drivers/example2.py
+   :start-after: Solve the linear system
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example2.py --finalTarget solveFracInf
+
+.. plot:: example2_stepSolveFracInf.py
+
+
+A finite horizon case
+---------------------
+
+Next, we solve a nonlocal Poisson problem involving a constant kernel with finite horizon.
+We will choose :math:`\gamma(x,y) \sim \chi_{V_{\delta}^{(2)}(x)}(y)` for :math:`\delta=0.2`, and solve
+
+.. math::
+
+   -\mathcal{L} u &= f && \text{ in } \Omega=[0,1]^2, \\
+   u &= -(x_1^2 + x_2^2)/4 && \text{ in } \mathcal{I},
+
+where :math:`\mathcal{I}:=\{y\in\mathbb{R}^2\setminus\Omega | \exists x\in\Omega: \gamma(x,y)\neq 0\}` is the interaction domain.
+
+.. literalinclude:: ../drivers/example2.py
+   :start-after: Solve a problem with finite horizon
+   :end-before: #################
+   :lineno-match:
+
+.. program-output:: python3 example2.py --finalTarget finiteHorizon
+
+.. plot:: example2_stepFiniteHorizon.py
diff --git a/docs/features.rst b/docs/features.rst
new file mode 100644
index 0000000..c85b467
--- /dev/null
+++ b/docs/features.rst
@@ -0,0 +1,38 @@
+
+
+Features
+========
+
+* Simplical meshes in 1D, 2D, 3D
+
+* Dense and sparse (CSR, symmetric CSR, hierarchical) matrix formats
+
+* Finite Elements:
+
+  * continuous P1, P2, P3 spaces,
+  * discontinuous P0 space
+
+* Solvers/preconditioners:
+
+  * LU,
+  * Cholesky,
+  * incomplete LU & Cholesky,
+  * CG,
+  * BiCGStab,
+  * GMRES,
+  * geometric multigrid
+
+* Assembly of local operators
+
+* Nonlocal kernels:
+
+  * Finite and infinite horizon
+  * Interaction domains defined with respect to different norms
+  * Singularities: fractional, peridynamic, indicator kernel
+  * spatially variable kernels
+
+* Nonlocal assembly (1D and 2D) into dense, sparse and hierarchical matrices
+
+* Distributed computing using MPI
+
+* Partitioning using METIS / ParMETIS
diff --git a/docs/index.rst b/docs/index.rst
new file mode 100644
index 0000000..0cc5ec4
--- /dev/null
+++ b/docs/index.rst
@@ -0,0 +1,107 @@
+
+
+Welcome to PyNucleus' documentation!
+=====================================
+
+PyNucleus is a finite element code that specifically targets nonlocal operators of the form
+
+.. math::
+
+   \int_{\mathbb{R}^d} [u(x)-u(y)] \gamma(x, y) dy
+
+for nonlocal kernels :math:`\gamma` with finite or infinite horizon and of integrable or fractional type.
+Specific examples of such operators include the integral and regional fractional Laplacians, their truncated and tempered variants, and operators arising from peridynamics.
+
+The package aims to provide efficient discretization and assembly routines with :math:`O(N \log N)` quasi-optimal complexity.
+The resulting sets of equations can be solved using optimal linear solvers.
+The code is fully NumPy/SciPy compatible, allowing easy integration into application codes.
+
+
+Features
+--------
+
+* Simplical meshes in 1D, 2D, 3D
+
+* Finite Elements:
+
+  * continuous P1, P2, P3 spaces,
+  * discontinuous P0 space
+
+* Assembly of local operators
+
+* Nonlocal kernels:
+
+  * Finite and infinite horizon
+  * Singularities: fractional, peridynamic, constant kernel
+  * spatially variable kernels: variable fractional order and variable coefficients
+
+* Nonlocal assembly (1D and 2D) into dense, sparse and hierarchical matrices
+
+* Solvers/preconditioners:
+
+  * LU,
+  * Cholesky,
+  * incomplete LU & Cholesky,
+  * Jacobi,
+  * CG,
+  * BiCGStab,
+  * GMRES,
+  * geometric multigrid
+
+* Distributed computing using MPI
+
+* Computationally expensive parts of the code are compiled via Cython.
+
+* Partitioning using METIS / ParMETIS
+
+Getting started
+---------------
+
+.. toctree::
+   :maxdepth: 1
+
+   installation
+   references
+
+
+Examples
+--------
+
+.. toctree::
+   :maxdepth: 2
+
+   example1
+   example2
+
+
+Funding
+-------
+
+PyNucleus' development is funded through the MATNIP project (PI: Marta D'Elia) of the LDRD program at Sandia National Laboratories.
+
+.. image:: ../data/matnip.png
+   :height: 100px
+
+*The MATNIP project develops for the first time a rigorous nonlocal interface theory based on physical principles that is consistent with the classical theory of partial differential equations when the nonlocality vanishes and is mathematically well-posed.
+This will improve the predictive capability of nonlocal models and increase their usability at Sandia and, more in general, in the computational-science and engineering community.
+Furthermore, this theory will provide the groundwork for the development of nonlocal solvers, reducing the burden of prohibitively expensive computations.*
+
+API
+--------
+
+.. toctree::
+   :maxdepth: 1
+
+   PyNucleus_base
+   PyNucleus_fem
+   PyNucleus_metisCy
+   PyNucleus_multilevelSolver
+   PyNucleus_nl
+
+
+Indices and tables
+==================
+
+* :ref:`genindex`
+* :ref:`modindex`
+* :ref:`search`
diff --git a/docs/installation.rst b/docs/installation.rst
new file mode 100644
index 0000000..e21eba8
--- /dev/null
+++ b/docs/installation.rst
@@ -0,0 +1,76 @@
+
+
+Prerequisites
+=============
+
+In order to install PyNucleus, you will need
+
+* Python 3,
+* MPI,
+* METIS,
+* ParMETIS,
+* SuiteSparse,
+* make (optional, only for editable installs).
+
+On Debian, Ubuntu etc, the required dependecies can be installed with
+
+.. code-block:: shell
+
+   sudo apt-get install python3 mpi-default-bin mpi-default-dev libmetis-dev libparmetis-dev libsuitesparse-dev
+
+Installation
+============
+
+PyNucleus is installed via
+
+.. code-block:: shell
+
+   python3 -m pip install .
+
+or via
+
+.. code-block:: shell
+
+   make
+
+If you want to easily modify the source code without re-installing the package every time, and editable install is available as
+
+.. code-block:: shell
+
+   make dev
+
+PyNucleus depends on other Python packages that will be installed automatically:
+
+* NumPy
+* SciPy
+* Matplotlib
+* Cython
+* mpi4py
+* tabulate
+* PyYAML
+* H5py
+* modepy
+* meshpy
+* scikit-sparse
+
+
+Docker container
+================
+
+A Docker container that contains all the required dependecies can be built as well:
+
+.. code-block:: shell
+
+   ./build-docker.sh
+
+Once the build is done, it can be launched as
+
+.. code-block:: shell
+
+   ./run-docker-linux.sh
+
+or
+
+.. code-block:: shell
+
+   ./run-docker-mac.sh
diff --git a/drivers/example1.py b/drivers/example1.py
new file mode 100644
index 0000000..cfda6e7
--- /dev/null
+++ b/drivers/example1.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import matplotlib.pyplot as plt
+from numpy import sqrt
+
+######################################################################
+# Get a mesh and refine it
+
+from PyNucleus import meshFactory
+
+# show available options
+meshFactory.print()
+
+mesh = meshFactory('square', ax=0., ay=0., bx=1., by=1.)
+for _ in range(3):
+    mesh = mesh.refine()
+
+print('Mesh:', mesh)
+plt.figure().gca().set_title('Mesh')
+mesh.plot()
+
+######################################################################
+# Construct a finite element space
+from PyNucleus import dofmapFactory
+
+# show available options
+dofmapFactory.print()
+
+# We use piecewise linears
+dm = dofmapFactory('P1', mesh)
+
+print('DoFMap:', dm)
+
+plt.figure().gca().set_title('DoFMap')
+dm.plot()
+
+######################################################################
+# Construct some simple functions
+from PyNucleus import functionFactory
+
+# show available options
+functionFactory.print()
+
+# functions defined via Python lambdas
+rhs_1 = functionFactory('Lambda', lambda x: 2*x[0]*(1-x[0]) + 2*x[1]*(1-x[1]))
+exact_solution_1 = functionFactory('Lambda', lambda x: x[0]*(1-x[0])*x[1]*(1-x[1]))
+
+# Functions defined via Cython implementations -> faster evaluation
+rhs_2 = functionFactory('rhsFunSin2D')
+exact_solution_2 = functionFactory('solSin2D')
+
+# assemble right-hand side vectors and interpolate the exact solutions
+b1 = dm.assembleRHS(rhs_1)
+u_interp_1 = dm.interpolate(exact_solution_1)
+
+print('Linear system RHS:', b1)
+print('Interpolated solution:', u_interp_1)
+
+b2 = dm.assembleRHS(rhs_2)
+u_interp_2 = dm.interpolate(exact_solution_2)
+
+plt.figure().gca().set_title('Interpolated solution')
+u_interp_1.plot()
+
+######################################################################
+# Assemble mass and Laplacian stiffness matrices
+mass = dm.assembleMass()
+laplacian = dm.assembleStiffness()
+
+print('Linear system matrix:', laplacian)
+
+######################################################################
+# Construct solvers
+from PyNucleus import solverFactory
+
+# show available options
+solverFactory.print()
+
+solver_direct = solverFactory('lu', A=laplacian)
+solver_direct.setup()
+
+print('Direct solver:', solver_direct)
+
+solver_krylov = solverFactory('cg', A=laplacian)
+solver_krylov.setup()
+solver_krylov.maxIter = 100
+solver_krylov.tolerance = 1e-8
+
+print('Krylov solver:', solver_krylov)
+
+u1 = dm.zeros()
+solver_direct(b1, u1)
+
+u2 = dm.zeros()
+numIter = solver_krylov(b2, u2)
+
+print('Number of iterations:', numIter)
+
+plt.figure().gca().set_title('Error')
+(u_interp_1-u1).plot(flat=True)
+
+######################################################################
+# Inner products and norms
+print('Residual norm 1st solve: ', (b1-laplacian*u1).norm())
+print('Residual norm 2nd solve: ', (b2-laplacian*u2).norm())
+
+# Compute errors
+H10_error_1 = sqrt(b1.inner(u_interp_1-u1))
+L2_error_1 = sqrt((u_interp_1-u1).inner(mass*(u_interp_1-u1)))
+H10_error_2 = sqrt(b2.inner(u_interp_2-u2))
+L2_error_2 = sqrt((u_interp_2-u2).inner(mass*(u_interp_2-u2)))
+
+print('1st problem - H10:', H10_error_1, 'L2:', L2_error_1)
+print('2nd problem - H10:', H10_error_2, 'L2:', L2_error_2)
+
+plt.show()
+
diff --git a/drivers/example2.py b/drivers/example2.py
new file mode 100644
index 0000000..6908382
--- /dev/null
+++ b/drivers/example2.py
@@ -0,0 +1,125 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import matplotlib.pyplot as plt
+from time import time
+
+######################################################################
+# Get a fractional kernel
+from PyNucleus import kernelFactory
+
+# show available options
+kernelFactory.print()
+
+from numpy import inf
+kernelFracInf = kernelFactory('fractional', dim=2, s=0.75, horizon=inf)
+
+print(kernelFracInf)
+plt.figure().gca().set_title('Fractional kernel')
+kernelFracInf.plot()
+
+######################################################################
+# Generate an appropriate mesh
+from PyNucleus import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
+
+# Get a mesh that is appropriate for the problem, i.e. with the required interaction domain.
+meshFracInf, _ = nonlocalMeshFactory('disc', kernel=kernelFracInf, boundaryCondition=HOMOGENEOUS_DIRICHLET, hTarget=0.15)
+
+print(meshFracInf)
+plt.figure().gca().set_title('Mesh for fractional kernel')
+meshFracInf.plot()
+
+######################################################################
+# Assemble the operator
+from PyNucleus import dofmapFactory, functionFactory
+
+dmFracInf = dofmapFactory('P1', meshFracInf)
+
+rhs = functionFactory('constant', 1.)
+exact_solution = functionFactory('solFractional', dim=2, s=0.75)
+
+b = dmFracInf.assembleRHS(rhs)
+u_exact = dmFracInf.interpolate(exact_solution)
+u = dmFracInf.zeros()
+
+# Assemble the operator in dense format.
+start = time()
+A_fracInf = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='dense')
+
+print('Dense assembly took {}s'.format(time()-start))
+
+start = time()
+A_fracInf_h2 = dmFracInf.assembleNonlocal(kernelFracInf, matrixFormat='h2')
+
+print('Hierarchical assembly took {}s'.format(time()-start))
+
+print(A_fracInf)
+print(A_fracInf_h2)
+
+######################################################################
+# Solve the linear system
+from PyNucleus import solverFactory
+from numpy import sqrt
+
+solver = solverFactory('lu', A=A_fracInf, setup=True)
+solver(b, u)
+
+Hs_err = sqrt(abs(b.inner(u-u_exact)))
+
+print('Hs error: {}'.format(Hs_err))
+plt.figure().gca().set_title('Numerical solution, fractional kernel')
+u.plot()
+
+######################################################################
+# Solve a problem with finite horizon
+kernelConst = kernelFactory('constant', dim=2, horizon=0.2)
+
+print(kernelConst)
+plt.figure().gca().set_title('Constant kernel')
+kernelConst.plot()
+
+from PyNucleus import DIRICHLET
+
+meshConst, nIConst = nonlocalMeshFactory('square', kernel=kernelConst, boundaryCondition=DIRICHLET, hTarget=0.18)
+
+print(meshConst)
+plt.figure().gca().set_title('Mesh for constant kernel')
+meshConst.plot()
+
+dmConst = dofmapFactory('P1', meshConst, nIConst['domain'])
+dmConstInteraction = dmConst.getComplementDoFMap()
+
+A_const = dmConst.assembleNonlocal(kernelConst, matrixFormat='sparse')
+B_const = dmConst.assembleNonlocal(kernelConst, dm2=dmConstInteraction, matrixFormat='sparse')
+
+g = functionFactory('Lambda', lambda x: -(x[0]**2 + x[1]**2)/4)
+g_interp = dmConstInteraction.interpolate(g)
+
+b = dmConst.assembleRHS(rhs)-(B_const*g_interp)
+u = dmConst.zeros()
+
+solver = solverFactory('cg', A=A_const, setup=True)
+solver.maxIter = 1000
+solver.tolerance = 1e-8
+
+solver(b, u)
+
+u_global = dmConst.augmentWithBoundaryData(u, g_interp)
+
+plt.figure().gca().set_title('Numerical solution, constant kernel')
+u_global.plot()
+
+plt.figure().gca().set_title('Analytic solution, constant kernel')
+u_global.dm.interpolate(g).plot()
+
+print(A_const)
+
+######################################################################
+plt.show()
+
diff --git a/drivers/interfaceProblem.py b/drivers/interfaceProblem.py
new file mode 100644
index 0000000..cef8068
--- /dev/null
+++ b/drivers/interfaceProblem.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus.base import REAL, driver, solverFactory
+from PyNucleus.base.ip_norm import ip_serial, norm_serial
+from PyNucleus.fem import (simpleInterval, uniformSquare,
+                           squareIndicator, P1_DoFMap,
+                           constant, Lambda, NO_BOUNDARY, INTERIOR, PHYSICAL,
+                           getSurfaceDoFMap)
+from PyNucleus.fem.DoFMaps import fe_vector
+from PyNucleus.fem.splitting import meshSplitter, dofmapSplitter
+
+
+d = driver()
+d.add('domain', acceptedValues=['doubleInterval', 'doubleSquare'])
+d.add('problem', acceptedValues=['polynomial', 'sin', 'sin-solJump-fluxJump', 'sin-nojump'])
+d.add('coeff1', 1.0)
+d.add('coeff2', 1.0)
+d.add('hTarget', 0.05)
+d.add('solver', acceptedValues=['lu', 'alternatingSchwarz', 'RAS'])
+
+d.declareFigure('solutions-flat')
+d.declareFigure('solutions-3d')
+d.declareFigure('errors')
+
+params = d.process()
+
+if d.domain == 'doubleInterval':
+    a, b, c = 0, 2, 1
+    mesh = simpleInterval(a, b)
+    mesh = mesh.refine()
+
+    eps = 1e-9
+    domainIndicator1 = squareIndicator(np.array([a+eps], dtype=REAL),
+                                       np.array([c-eps], dtype=REAL))
+    domainIndicator2 = squareIndicator(np.array([c+eps], dtype=REAL),
+                                       np.array([b-eps], dtype=REAL))
+    interfaceIndicator = squareIndicator(np.array([c-eps], dtype=REAL),
+                                         np.array([c+eps], dtype=REAL))
+    dirichletIndicator1 = constant(1.)-domainIndicator1-interfaceIndicator
+    dirichletIndicator2 = constant(1.)-domainIndicator2-interfaceIndicator
+
+    if d.problem == 'polynomial':
+        sol_1 = Lambda(lambda x: x[0]**2)
+        sol_2 = Lambda(lambda x: (x[0]-1)**2)
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = constant(-2*d.coeff1)
+        forcing_right = constant(-2*d.coeff2)
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(2*d.coeff1)
+    elif d.problem == 'sin-solJump-fluxJump':
+        sol_1 = Lambda(lambda x: np.sin(np.pi*x[0]))
+        sol_2 = Lambda(lambda x: 1.+np.sin(np.pi*(x[0]-1)))
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = Lambda(lambda x: np.pi**2*np.sin(np.pi*x[0])*d.coeff1)
+        forcing_right = Lambda(lambda x: np.pi**2*np.sin(np.pi*(x[0]-1))*d.coeff2)
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(-np.pi*d.coeff1 - np.pi*d.coeff2)
+    elif d.problem == 'sin-nojump':
+        sol_1 = Lambda(lambda x: np.sin(np.pi*x[0])/d.coeff1)
+        sol_2 = Lambda(lambda x: np.sin(np.pi*x[0])/d.coeff2)
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = Lambda(lambda x: np.pi**2*np.sin(np.pi*x[0]))
+        forcing_right = Lambda(lambda x: np.pi**2*np.sin(np.pi*(x[0])))
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(0)
+    elif d.problem == 'sin-soljump':
+        sol_1 = Lambda(lambda x: np.sin(np.pi*x[0]))
+        sol_2 = Lambda(lambda x: 1.+np.sin(np.pi*(x[0]-1)))
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = Lambda(lambda x: np.pi**2*np.sin(np.pi*x[0])*d.coeff1)
+        forcing_right = Lambda(lambda x: np.pi**2*np.sin(np.pi*(x[0]-1))*d.coeff2)
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(-np.pi*d.coeff1 - np.pi*d.coeff2)
+    else:
+        raise NotImplementedError(d.problem)
+
+elif d.domain == 'doubleSquare':
+    ax = 0
+    ay = 0
+    bx = 2
+    by = 1
+    cx = 1
+    mesh = uniformSquare(2, 2, ax, ay, bx, by)
+    mesh = mesh.refine()
+
+    eps = 1e-9
+    domainIndicator1 = squareIndicator(np.array([ax+eps, ay+eps], dtype=REAL),
+                                       np.array([cx-eps, by-eps], dtype=REAL))
+    domainIndicator2 = squareIndicator(np.array([cx+eps, ay+eps], dtype=REAL),
+                                       np.array([bx-eps, by-eps], dtype=REAL))
+    interfaceIndicator = squareIndicator(np.array([cx-eps, ay+eps], dtype=REAL),
+                                         np.array([cx+eps, by-eps], dtype=REAL))
+    dirichletIndicator1 = constant(1.)-domainIndicator1-interfaceIndicator
+    dirichletIndicator2 = constant(1.)-domainIndicator2-interfaceIndicator
+
+    if d.problem == 'polynomial':
+        sol_1 = Lambda(lambda x: x[0]**2)
+        sol_2 = Lambda(lambda x: (x[0]-1)**2)
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = constant(-2*d.coeff1)
+        forcing_right = constant(-2*d.coeff2)
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(2*d.coeff1)
+    elif d.problem == 'sin':
+        sol_1 = Lambda(lambda x: np.sin(np.pi*x[0]))
+        sol_2 = Lambda(lambda x: np.sin(np.pi*(x[0]-1)))
+        diri_left = sol_1
+        diri_right = sol_2
+        forcing_left = Lambda(lambda x: np.pi**2*np.sin(np.pi*x[0])*d.coeff1)
+        forcing_right = Lambda(lambda x: np.pi**2*np.sin(np.pi*(x[0]-1))*d.coeff2)
+        sol_jump = sol_2-sol_1
+        flux_jump = constant(-np.pi*d.coeff1 - np.pi*d.coeff2)
+    else:
+        raise NotImplementedError(d.problem)
+else:
+    raise NotImplementedError(d.domain)
+
+######################################################################
+
+while mesh.h > params['hTarget']:
+    mesh = mesh.refine()
+
+# Global DoFMap used for getting consistent indexing across the two domains
+dm = P1_DoFMap(mesh, NO_BOUNDARY)
+
+split = meshSplitter(mesh, {'mesh1': domainIndicator1,
+                            'mesh2': domainIndicator2})
+
+# submesh for domain 1
+domain1Mesh = split.getSubMesh('mesh1')
+domain1Mesh.tagBoundaryVertices(lambda x: INTERIOR if interfaceIndicator(x) > 0.5 else PHYSICAL)
+domain1Mesh.tagBoundaryEdges(lambda x, y: INTERIOR if (interfaceIndicator(x) > 0.5 and interfaceIndicator(y) > 0.5) else PHYSICAL)
+dm1 = split.getSubMap('mesh1', dm)
+R1, P1 = split.getRestrictionProlongation('mesh1', dm, dm1)
+
+# Surface mesh used for assembling the flux jump condition
+interface = domain1Mesh.get_surface_mesh(INTERIOR)
+dmInterface = getSurfaceDoFMap(mesh, interface, dm1)
+
+# submesh for domain 2
+domain2Mesh = split.getSubMesh('mesh2')
+dm2 = split.getSubMap('mesh2', dm)
+R2, P2 = split.getRestrictionProlongation('mesh2', dm, dm2)
+
+# The interface DoFs are discretized by domain 1. Hence, we split dm1
+# into interior+interface and boundary. We will also need an interface
+# restriction.
+dmSplit1 = dofmapSplitter(dm1, {'interface': interfaceIndicator,
+                                'domain': domainIndicator1+interfaceIndicator,
+                                'bc': dirichletIndicator1})
+R1I, P1I = dmSplit1.getRestrictionProlongation('interface')
+R1D, P1D = dmSplit1.getRestrictionProlongation('domain')
+R1B, P1B = dmSplit1.getRestrictionProlongation('bc')
+
+dmSplit2 = dofmapSplitter(dm2, {'interface': interfaceIndicator,
+                                'domain': domainIndicator2+interfaceIndicator,
+                                'bc': dirichletIndicator2})
+R2I, P2I = dmSplit2.getRestrictionProlongation('interface')
+R2D, P2D = dmSplit2.getRestrictionProlongation('domain')
+R2B, P2B = dmSplit2.getRestrictionProlongation('bc')
+
+# np.testing.assert_equal(P1D.num_columns+P1B.num_columns, P1D.num_rows)
+# np.testing.assert_equal(P2D.num_columns+P2B.num_columns+P2I.num_columns, P2D.num_rows)
+# np.testing.assert_allclose((P1*P1D*np.ones((P1D.num_columns))+P2*P2D*np.ones((P2D.num_columns))).max(), 1.)
+# np.testing.assert_equal(P1I.num_columns, P2I.num_columns)
+
+
+A1 = dm1.assembleStiffness()
+A1.scale(d.coeff1)
+A2 = dm2.assembleStiffness()
+A2.scale(d.coeff2)
+
+# domain-domain interaction
+A = (P1*P1D*(R1D*A1*P1D)*R1D*R1) + \
+    (P2*P2D*(R2D*A2*P2D)*R2D*R2)
+# Fake Dirichlet condition. We really only want to solve on interior
+# and interface unknowns. We make the boundary unknowns of the global
+# problem an identity block and set the rhs to zero for these.
+A += (P1*P1B*R1B*R1) + \
+    (P2*P2B*R2B*R2)
+
+# forcing
+b = P1*P1D*dmSplit1.getSubMap('domain').assembleRHS(forcing_left) + \
+    P2*P2D*dmSplit2.getSubMap('domain').assembleRHS(forcing_right)
+# flux jump forcing term
+b += P1*dmInterface.assembleRHS(flux_jump)
+# solution jump
+h = dmSplit2.getSubMap('interface').interpolate(sol_jump)
+b -= (P2*P2D*(R2D*A2*P2I))*h
+# Dirichlet BCs
+g1 = dmSplit1.getSubMap('bc').interpolate(diri_left)
+g2 = dmSplit2.getSubMap('bc').interpolate(diri_right)
+b -= P1*P1D*(R1D*A1*P1B)*g1
+b -= P2*P2D*(R2D*A2*P2B)*g2
+
+u = dm.zeros()
+with d.timer('solve'):
+    if d.solver == 'lu':
+        lu = solverFactory.build('lu', A=A, setup=True)
+        lu(b, u)
+    elif (d.solver == 'alternatingSchwarz') or (d.solver == 'RAS'):
+        a1inv = solverFactory.build('lu', A=R1*A*P1, setup=True)
+        a2inv = solverFactory.build('lu', A=R2*A*P2, setup=True)
+        u1 = dm1.zeros()
+        u2 = dm2.zeros()
+        r = dm.zeros()
+        A.residual_py(u, b, r)
+        norm = norm_serial()
+        k = 0
+        residualNorm0 = residualNorm = norm(r)
+        if d.solver == 'alternatingSchwarz':
+            while k < 100 and residualNorm/residualNorm0 > 1e-5:
+                b1 = R1*r
+                a1inv(b1, u1)
+                u += P1*u1
+                A.residual_py(u, b, r)
+
+                b2 = R2*r
+                a2inv(b2, u2)
+                u += P2*u2
+                A.residual_py(u, b, r)
+
+                residualNorm = norm(r)
+                k += 1
+            d.logger.info('Alternating Schwarz solver obtained residual norm {}/{} = {} after {} iterations'.format(residualNorm, residualNorm0, residualNorm/residualNorm0, k))
+        else:
+            u1.assign(1.)
+            u2.assign(1.)
+            dg = P1*u1+P2*u2
+            d1inv = fe_vector(1./(R1*dg), dm1)
+            d2inv = fe_vector(1./(R2*dg), dm2)
+            while k < 100 and residualNorm/residualNorm0 > 1e-5:
+                b1 = R1*r
+                a1inv(b1, u1)
+                u += P1*(u1*d1inv)
+
+                b2 = R2*r
+                a2inv(b2, u2)
+                u += P2*(u2*d2inv)
+
+                A.residual_py(u, b, r)
+                residualNorm = norm(r)
+                k += 1
+            d.logger.info('RAS solver obtained residual norm {}/{} = {} after {} iterations'.format(residualNorm, residualNorm0, residualNorm/residualNorm0, k))
+
+    else:
+        raise NotImplementedError(d.solver)
+
+u1 = dm1.zeros()
+u1.assign(R1*u + P1B*g1)
+
+u2 = dm2.zeros()
+u2.assign(R2*u + P2I*h + P2B*g2)
+
+M1 = dm1.assembleMass()
+M2 = dm2.assembleMass()
+u1ex = dm1.interpolate(sol_1)
+u2ex = dm2.interpolate(sol_2)
+
+inner = ip_serial()
+
+results = d.addOutputGroup('results')
+results.add('domain1L2err', np.sqrt(inner(M1*(u1-u1ex), u1-u1ex)), rTol=1e-2)
+results.add('domain2L2err', np.sqrt(inner(M2*(u2-u2ex), u2-u2ex)), rTol=1e-2)
+d.logger.info('\n'+str(results))
+
+data = d.addOutputGroup('data', tested=False)
+data.add('fullDomain1Mesh', u1.dm.mesh)
+data.add('fullDomain1DoFMap', u1.dm)
+data.add('full_u1', u1)
+data.add('fullDomain2Mesh', u2.dm.mesh)
+data.add('fullDomain2DoFMap', u2.dm)
+data.add('full_u2', u2)
+
+if d.startPlot('solutions-flat'):
+    if mesh.dim == 1:
+        u1.plot()
+        u2.plot()
+    else:
+        vmin = min(u1.min(), u2.min())
+        vmax = max(u1.max(), u2.max())
+        plotKwargs = {}
+        plotKwargs['vmin'] = vmin
+        plotKwargs['vmax'] = vmax
+        plotKwargs['flat'] = True
+        u1.plot(**plotKwargs)
+        u2.plot(**plotKwargs)
+
+if mesh.dim == 2 and dm.num_dofs < 60000 and d.startPlot('solutions-3d'):
+    vmin = min(u1.min(), u2.min())
+    vmax = max(u1.max(), u2.max())
+    plotKwargs = {}
+    if mesh.dim == 2:
+        plotKwargs['vmin'] = vmin
+        plotKwargs['vmax'] = vmax
+    ax = u1.plot(**plotKwargs)
+    if mesh.dim == 2:
+        plotKwargs['ax'] = ax
+    ax = u2.plot(**plotKwargs)
+
+if d.startPlot('errors'):
+    plotKwargs = {}
+    if dm.num_dofs >= 60000:
+        plotKwargs['flat'] = True
+    ax = (u1-u1ex).plot(**plotKwargs)
+    if mesh.dim == 2:
+        plotKwargs['ax'] = ax
+    ax = (u2-u2ex).plot(**plotKwargs)
+d.finish()
diff --git a/drivers/runHelmholtz.py b/drivers/runHelmholtz.py
new file mode 100644
index 0000000..d27c485
--- /dev/null
+++ b/drivers/runHelmholtz.py
@@ -0,0 +1,193 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from mpi4py import MPI
+import numpy as np
+from PyNucleus.base import COMPLEX, driver, solverFactory
+from PyNucleus.base.linear_operators import wrapRealToComplexCSR
+from PyNucleus.fem import (assembleSurfaceMass,
+                           getSurfaceDoFMap,
+                           PHYSICAL,
+                           NO_BOUNDARY,
+                           real, imag,
+                           helmholtzProblem,
+                           plotManager)
+from PyNucleus.multilevelSolver import (EmptyHierarchy,
+                                        hierarchyManager,
+                                        inputConnector,
+                                        paramsForMG)
+
+
+d = driver(MPI.COMM_WORLD)
+p = helmholtzProblem(d)
+
+d.add('partitioner', 'regular')
+d.add('partitionerParams', {})
+d.add('debugOverlaps', False)
+d.add('maxiter', 300)
+
+d.declareFigure('solution')
+d.declareFigure('error')
+
+params = d.process()
+
+params['reaction'] = None
+params['buildMass'] = True
+params['tag'] = NO_BOUNDARY
+
+
+with d.timer('setup levels'):
+    hierarchies, connectors = paramsForMG(p.noRef,
+                                          range(d.comm.size),
+                                          params, p.dim, p.element)
+    connectors['input'] = {'type': inputConnector,
+                           'params': {'domain': p.domain}}
+    FINE = 'fine'
+    hierarchies[-1]['label'] = FINE
+
+    hM = hierarchyManager(hierarchies, connectors, params, d.comm)
+    hM.setup()
+    hM.display()
+
+    h = hM[FINE].meshLevels[-1].mesh.h
+    tol = {'P1': 0.1*h**2,
+           'P2': 0.001*h**3,
+           'P3': 0.001*h**4}['P1']
+    tol = 1e-5
+    tol = max(tol, 2e-9)
+
+
+def getOp(S, M, MB, frequency, shift=0):
+    A = S - (M * frequency**2) + (MB * (1j*frequency))
+    if shift == 0:
+        return A.to_csr_linear_operator()
+    else:
+        B = M * (1j*shift*frequency**2)
+        return (A + B).to_csr_linear_operator()
+
+
+for h in hM.builtHierarchies:
+    if isinstance(h, EmptyHierarchy):
+        continue
+    mesh = h.meshLevels[-1].mesh
+    dm = h.algebraicLevels[-1].DoFMap
+
+    surface = mesh.get_surface_mesh(PHYSICAL)
+    MB = h.algebraicLevels[-1].S.copy()
+    MB.setZero()
+    assembleSurfaceMass(mesh, surface, dm, MB, sss_format=p.symmetric)
+    h.algebraicLevels[-1].MB = MB
+
+    for lvl in range(len(h.algebraicLevels)-2, -1, -1):
+        if h.algebraicLevels[lvl].A is None:
+            continue
+        h.algebraicLevels[lvl].MB = h.algebraicLevels[lvl].A.copy()
+        h.algebraicLevels[lvl].MB.setZero()
+        h.algebraicLevels[lvl+1].R.restrictMatrix(h.algebraicLevels[lvl+1].MB,
+                                                  h.algebraicLevels[lvl].MB)
+    # for lvl in range(len(h.algebraicLevels)-2, -1, -1):
+    #     if h.algebraicLevels[lvl].A is None:
+    #         continue
+    #     h.algebraicLevels[lvl].MB = h.algebraicLevels[lvl].A.copy()
+    #     h.algebraicLevels[lvl].MB.data[:] = 0.
+    #     mesh = h.meshLevels[lvl].mesh
+    #     dm = h.algebraicLevels[lvl].DoFMap
+    #     surface = mesh.get_surface_mesh(PHYSICAL)
+    #     assembleSurfaceMass(mesh, surface, dm, h.algebraicLevels[lvl].MB)
+
+    for lvl in range(len(h.algebraicLevels)):
+        if h.algebraicLevels[lvl].S is None:
+            continue
+        if h.algebraicLevels[lvl].P is not None:
+            h.algebraicLevels[lvl].P = wrapRealToComplexCSR(h.algebraicLevels[lvl].P)
+        if h.algebraicLevels[lvl].R is not None:
+            h.algebraicLevels[lvl].R = wrapRealToComplexCSR(h.algebraicLevels[lvl].R)
+        h.algebraicLevels[lvl].A = getOp(h.algebraicLevels[lvl].S,
+                                         h.algebraicLevels[lvl].M,
+                                         h.algebraicLevels[lvl].MB,
+                                         p.frequency,
+                                         shift=0.5)
+
+overlaps = hM[FINE].multilevelAlgebraicOverlapManager
+
+ml = solverFactory.build('complex_mg',
+                         hierarchy=hM,
+                         smoother=('jacobi',
+                                   {'omega': 0.8,
+                                    # 'omega': min(2./3., 8./(4+3*self.dim)),
+                                    'presmoothingSteps': 2,
+                                    'postsmoothingSteps': 2}),
+                         setup=True)
+msg = '\n'+str(ml)
+d.logger.info(msg)
+
+mesh = hM[FINE].meshLevels[-1].mesh
+dm = hM[FINE].algebraicLevels[-1].DoFMap
+A = getOp(hM[FINE].algebraicLevels[-1].S,
+          hM[FINE].algebraicLevels[-1].M,
+          hM[FINE].algebraicLevels[-1].MB,
+          p.frequency)
+M = wrapRealToComplexCSR(hM[FINE].algebraicLevels[-1].M)
+interfaces = hM[FINE].meshLevels[-1].interfaces
+
+with d.timer('assemble RHS'):
+    b = dm.assembleRHS(p.rhs)
+
+    if p.boundaryCond is not None:
+        surface = mesh.get_surface_mesh(PHYSICAL)
+        dmS = getSurfaceDoFMap(mesh, surface, dm)
+        b += dmS.assembleRHS(p.boundaryCond)
+
+x = dm.zeros(dtype=COMPLEX)
+gmres = solverFactory.build('complex_gmres', A=A, maxIter=d.maxiter, tolerance=tol, setup=True)
+gmres.setPreconditioner(ml.asPreconditioner(), left=False)
+gmres.setNormInner(ml.norm, ml.inner)
+res = []
+with d.timer('solve'):
+    gmres(b, x)
+    res = gmres.residuals
+
+results = d.addOutputGroup('results', tested=True)
+results.add('Tolerance', tol)
+results.add('numIter', len(res))
+results.add('res', res[-1], rTol=3e-1)
+L2 = np.sqrt(abs(ml.inner(M*x, x)))
+results.add('solution L2 norm', L2, rTol=1e-6)
+if p.solEx is not None:
+    solExReal = real(p.solEx)
+    solExImag = imag(p.solEx)
+    xEx = dm.interpolate(solExReal)+1j*dm.interpolate(solExImag)
+    L2err = np.sqrt(abs(ml.inner(M*(x-xEx), x-xEx)))
+    results.add('L2 error', L2err, rTol=2.)
+d.logger.info('\n'+str(results))
+
+if mesh.dim < 3:
+    plotDefaults = {}
+    if mesh.dim == 2:
+        plotDefaults['flat'] = True
+        plotDefaults['shading'] = 'gouraud'
+    if d.willPlot('solution'):
+        pM = plotManager(mesh, dm, defaults=plotDefaults, interfaces=interfaces)
+        pM.add(x.real, label='solution (real)')
+        pM.add(x.imag, label='solution (imag)')
+        if p.solEx is not None:
+            pM.add(xEx.real, label='exact solution (real)')
+            pM.add(xEx.imag, label='exact solution (imag)')
+        pM.preparePlots(tag=NO_BOUNDARY)
+    if d.startPlot('solution'):
+        pM.plot()
+    if p.solEx is not None:
+        if d.willPlot('error'):
+            pMerr = plotManager(mesh, dm, defaults=plotDefaults, interfaces=interfaces)
+            pMerr.add((x-xEx).real, label='error (real)')
+            pMerr.add((x-xEx).imag, label='error (imag)')
+            pMerr.preparePlots(tag=NO_BOUNDARY)
+        if d.startPlot('error'):
+            pMerr.plot()
+d.finish()
diff --git a/drivers/runNonlocal.py b/drivers/runNonlocal.py
new file mode 100644
index 0000000..9e4e4bb
--- /dev/null
+++ b/drivers/runNonlocal.py
@@ -0,0 +1,207 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus.base import driver, solverFactory, INDEX
+from PyNucleus.base.linear_operators import Dense_LinearOperator
+from PyNucleus.fem import (simplexXiaoGimbutas, plotManager,
+                           P0_DoFMap, NO_BOUNDARY)
+from PyNucleus.multilevelSolver import hierarchyManager
+from PyNucleus.nl import (multilevelDirichletCondition,
+                          paramsForFractionalHierarchy,
+                          nonlocalProblem)
+from PyNucleus.nl.nonlocalProblems import (DIRICHLET, HOMOGENEOUS_DIRICHLET,
+                                           NEUMANN, HOMOGENEOUS_NEUMANN)
+
+d = driver()
+p = nonlocalProblem(d)
+
+d.add('solver', acceptedValues=['lu', 'mg', 'cg-mg'])
+d.add('dense', False)
+d.add('forceRebuild', True)
+d.add('genKernel', False)
+d.add('maxiter', 100)
+d.add('tol', 1e-6)
+
+d.add('plotRHS', False)
+d.add('plotOnFullDomain', True)
+
+d.declareFigure('solution')
+d.declareFigure('analyticSolution')
+
+params = d.process()
+
+if d.kernel != 'fractional':
+    # Hierarchical matrices are only implemented for fractional kernels
+    d.dense = True
+
+with d.timer('hierarchy'):
+    params['domain'] = p.mesh
+    params['keepMeshes'] = 'all'
+    params['keepAllDoFMaps'] = True
+    params['assemble'] = 'ALL' if params['solver'].find('mg') >= 0 else 'last'
+    params['dense'] = d.dense
+    params['logging'] = True
+    params['genKernel'] = d.genKernel
+    hierarchies, connectors = paramsForFractionalHierarchy(p.noRef, params)
+    hM = hierarchyManager(hierarchies, connectors, params)
+    hM.setup()
+mesh = hM['fine'].meshLevels[-1].mesh
+assert 2*mesh.h < p.horizon.value, "h = {}, horizon = {}".format(mesh.h, p.horizon.value)
+
+if not p.boundaryCondition == HOMOGENEOUS_DIRICHLET:
+    bc = multilevelDirichletCondition(hM.getLevelList(), p.domainIndicator, p.fluxIndicator)
+    fullDoFMap = bc.fullDoFMap
+    naturalDoFMap = bc.naturalDoFMap
+    b = naturalDoFMap.assembleRHS(p.rhs, qr=simplexXiaoGimbutas(3, mesh.dim))
+    bc.setDirichletData(p.dirichletData)
+    bc.applyRHScorrection(b)
+    hierarchy = bc.naturalLevels
+else:
+    hierarchy = hM.getLevelList()
+    naturalDoFMap = hierarchy[-1]['DoFMap']
+    b = naturalDoFMap.assembleRHS(p.rhs, qr=simplexXiaoGimbutas(3, mesh.dim))
+
+
+# pure Neumann condition -> project out nullspace
+if p.boundaryCondition in (NEUMANN, HOMOGENEOUS_NEUMANN):
+    assert bc.dirichletDoFMap.num_dofs == 0, bc.dirichletDoFMap
+    if params['solver'].find('mg') >= 0:
+        bc.naturalLevels[0]['A'] = bc.naturalLevels[0]['A'] + Dense_LinearOperator.ones(*bc.naturalLevels[0]['A'].shape)
+    const = bc.naturalDoFMap.ones()
+    b -= b.inner(const)/const.inner(const)*const
+
+u = naturalDoFMap.zeros()
+
+with d.timer('solve'):
+    if params['solver'].find('mg') >= 0:
+        ml = solverFactory.build('mg', hierarchy=hierarchy, setup=True, tolerance=params['tol'], maxIter=params['maxiter'])
+        d.logger.info('\n'+str(ml))
+    if d.solver == 'mg':
+        its = ml(b, u)
+        res = ml.residuals
+    elif d.solver == 'cg-mg':
+        cg = solverFactory.build('cg', A=hierarchy[-1]['A'], setup=True, tolerance=params['tol'], maxIter=params['maxiter'])
+        cg.setPreconditioner(ml.asPreconditioner())
+        its = cg(b, u)
+        res = cg.residuals
+    elif d.solver == 'lu':
+        lu = solverFactory.build(d.solver, A=hierarchy[-1]['A'], setup=True)
+        its = lu(b, u)
+    else:
+        raise NotImplementedError(d.solver)
+
+# pure Neumann condition -> add nullspace components to match analytic solution
+if p.boundaryCondition in (NEUMANN, HOMOGENEOUS_NEUMANN) and p.analyticSolution is not None:
+    uEx = bc.naturalDoFMap.interpolate(p.analyticSolution)
+    u += (const.inner(uEx)-const.inner(u))/const.inner(const) * const
+
+vectors = d.addOutputGroup('vectors')
+vectors.add('u', u)
+
+meshes = d.addOutputGroup('meshes')
+meshes.add('fullMesh', mesh)
+
+results = d.addOutputGroup('results')
+results.add('full h', mesh.h)
+results.add('natural DoFs', naturalDoFMap.num_dofs)
+results.add('iterations', its)
+
+if p.boundaryCondition in (DIRICHLET, ):
+    results.add('full DoFs', bc.fullDoFMap.num_dofs)
+    u_full = bc.augmentDirichlet(u)
+    vectors.add('u_full', u_full)
+else:
+    u_full = bc.naturalP*u
+
+errors = d.addOutputGroup('errors', tested=True)
+resNorm = (b-hierarchy[-1]['A']*u).norm(False)
+errors.add('residual norm', resNorm)
+
+if p.analyticSolution is not None:
+    uEx = bc.naturalDoFMap.interpolate(p.analyticSolution)
+    M_natural = naturalDoFMap.assembleMass()
+    L2err_natural = np.sqrt(abs((u-uEx).inner(M_natural*(u-uEx))))
+    relL2err_natural = L2err_natural/np.sqrt(abs(uEx.inner(M_natural*uEx)))
+
+    uEx_domain = bc.domainDoFMap.interpolate(p.analyticSolution)
+    M_domain = bc.domainDoFMap.assembleMass()
+    u_domain = bc.domainDoFMap.fromArray(bc.domainR*u_full)
+    L2err_domain = np.sqrt(abs((u_domain-uEx_domain).inner(M_domain*(u_domain-uEx_domain))))
+    relL2err_domain = L2err_domain/np.sqrt(abs(uEx_domain.inner(M_domain*uEx_domain)))
+
+    Linferr_natural = np.abs((u-uEx)).max()
+    relLinferr_natural = Linferr_natural/np.abs(uEx).max()
+    vectors.add('uEx', uEx)
+    errors.add('L2 error natural', L2err_natural, rTol=3e-2)
+    errors.add('rel L2 error natural', relL2err_natural, rTol=3e-2)
+    errors.add('L2 error domain', L2err_domain, rTol=3e-2)
+    errors.add('rel L2 error domain', relL2err_domain, rTol=3e-2)
+    errors.add('Linf error natural', Linferr_natural, rTol=3e-2)
+    errors.add('rel Linf error natural', relLinferr_natural, rTol=3e-2)
+
+    if p.boundaryCondition in (DIRICHLET, NEUMANN):
+        uEx_full = bc.fullDoFMap.interpolate(p.analyticSolution)
+        M_full = bc.fullDoFMap.assembleMass()
+        L2err_full = np.sqrt(abs((uEx_full-u_full).inner(M_full*(uEx_full-u_full))))
+        vectors.add('uEx_full', uEx_full)
+        errors.add('L2 error including Dirichlet domain', L2err_full, rTol=3e-2)
+d.logger.info('\n'+str(results+errors))
+
+if d.startPlot('solution'):
+    import matplotlib.pyplot as plt
+
+    plotDefaults = {}
+    if p.dim == 2:
+        plotDefaults['flat'] = True
+    if p.element != 'P0':
+        plotDefaults['shading'] = 'gouraud'
+    if p.boundaryCondition in (DIRICHLET, NEUMANN):
+        pM = plotManager(bc.fullDoFMap.mesh, bc.fullDoFMap, defaults=plotDefaults)
+        if p.dim == 1:
+            pMerr = plotManager(bc.fullDoFMap.mesh, bc.fullDoFMap, defaults=plotDefaults)
+        else:
+            pMerr = pM
+        pM.add(u_full, label='solution')
+        if d.plotRHS:
+            pM.add(bc.augmentDirichlet(b), label='rhs')
+        if p.analyticSolution is not None:
+            pM.add(uEx_full, label='analytic solution')
+            pMerr.add(u_full-uEx_full, label='error')
+    else:
+        if d.plotOnFullDomain:
+            pM = plotManager(naturalDoFMap.mesh, naturalDoFMap, defaults=plotDefaults)
+            if p.dim == 1:
+                pMerr = plotManager(naturalDoFMap.mesh, naturalDoFMap, defaults=plotDefaults)
+            else:
+                pMerr = pM
+        else:
+            indicator = P0_DoFMap(naturalDoFMap.mesh, NO_BOUNDARY).interpolate(p.domainIndicator)
+            selectedCells = np.flatnonzero(indicator.toarray() >= 1e-9).astype(INDEX)
+            reducedDM = naturalDoFMap.getReducedMeshDoFMap(selectedCells)
+            pM = plotManager(reducedDM.mesh, reducedDM, defaults=plotDefaults)
+            if p.dim == 1:
+                pMerr = plotManager(reducedDM.mesh, reducedDM, defaults=plotDefaults)
+            else:
+                pMerr = pM
+        pM.add(u, label='solution')
+        if d.plotRHS:
+            pM.add(b, label='rhs')
+        if p.analyticSolution is not None:
+            pM.add(uEx, label='analytic solution')
+            if p.dim == 1:
+                pMerr.add(u-uEx, label='error')
+    if p.dim == 1 and p.analyticSolution is not None:
+        plt.subplot(1, 2, 1)
+        pM.plot()
+        plt.subplot(1, 2, 2)
+        pMerr.plot()
+    else:
+        pM.plot()
+d.finish()
diff --git a/drivers/runParallelGMG.py b/drivers/runParallelGMG.py
new file mode 100644
index 0000000..cdbe948
--- /dev/null
+++ b/drivers/runParallelGMG.py
@@ -0,0 +1,377 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division, print_function
+from mpi4py import MPI
+import numpy as np
+from PyNucleus.base import driver, solverFactory
+from PyNucleus.fem import (accumulate2global,
+                           diffusionProblem)
+from PyNucleus.multilevelSolver import (V, FMG_V,
+                                        hierarchyManager,
+                                        inputConnector,
+                                        
+                                        paramsForMG)
+
+d = driver(MPI.COMM_WORLD)
+p = diffusionProblem(d)
+
+d.add('checkSolution', False)
+d.add('saveVTK', False)
+
+d.add('doMG', True)
+d.add('doFMG', True)
+d.add('doCG', False)
+d.add('doPCG', True)
+d.add('doFMGPCG', True)
+d.add('doBICGSTAB', False)
+d.add('doPBICGSTAB', True)
+d.add('doGMRES', False)
+d.add('doPGMRES', True)
+d.add('doFMGPGMRES', True)
+d.add('commType', 'standard', acceptedValues=['oneSided', 'persistent'])
+
+d.add('partitioner', 'regular')
+d.add('partitionerParams', {})
+d.add('debugOverlaps', False)
+
+solver = d.addGroup('solver')
+solver.add('smoother', 'jacobi', acceptedValues=['gauss_seidel', 'chebyshev'])
+solver.add('doPCoarsen', False)
+solver.add('maxiter', 50)
+
+d.declareFigure('residuals', default=False)
+d.declareFigure('spSolve')
+d.declareFigure('spSolveError')
+d.declareFigure('spSolveExactSolution')
+
+params = d.process()
+
+with d.timer('setup levels'):
+    if d.doPCoarsen and d.element != 'P1':
+         raise NotImplementedError()
+    else:
+        hierarchies, connectors = paramsForMG(p.noRef,
+                                              range(d.comm.size),
+                                              params, p.dim, p.element)
+        connectors['input'] = {'type': inputConnector,
+                               'params': {'domain': d.domain}}
+
+    FINE = 'fine'
+    hierarchies[-1]['label'] = FINE
+
+    hM = hierarchyManager(hierarchies, connectors, params, d.comm)
+    hM.setup()
+    hM.display()
+
+    subdomain = hM[FINE].meshLevels[-1].mesh
+    DoFMap_fine = hM[FINE].algebraicLevels[-1].DoFMap
+    overlaps = hM[FINE].multilevelAlgebraicOverlapManager
+    h = hM[FINE].meshLevels[-1].mesh.global_h(overlaps.comm)
+    hmin = hM[FINE].meshLevels[-1].mesh.global_hmin(overlaps.comm)
+    tol = {'P1': 0.5*h**2,
+           'P2': 0.001*h**3,
+           'P3': 0.001*h**4}[d.element]
+    tol = max(tol, 2e-9)
+
+# assemble rhs on finest grid
+with d.timer('Assemble rhs on finest grid'):
+    rhs = DoFMap_fine.assembleRHS(p.rhsFun)
+if p.boundaryCond:
+    with d.timer('BC'):
+        boundaryDoFMap = DoFMap_fine.getComplementDoFMap()
+        boundary_data = boundaryDoFMap.interpolate(p.boundaryCond)
+        A_boundary = DoFMap_fine.assembleStiffness(dm2=boundaryDoFMap)
+        rhs -= A_boundary*boundary_data
+
+with d.timer('Setup solver'):
+    smootherParams = {'jacobi': {'presmoothingSteps': 2,
+                                 'postsmoothingSteps': 2},
+                      'gauss_seidel': {'presmoothingSteps': 1,
+                                       'postsmoothingSteps': 1},
+                      'chebyshev': {'degree': 3}}
+    ml = solverFactory.build('mg',
+                             hierarchy=hM,
+                             smoother=(d.smoother, smootherParams[d.smoother]),
+                             maxIter=d.maxiter, tolerance=tol,
+                             setup=True)
+info = d.addOutputGroup('info')
+info.add('Subdomains', d.comm.size)
+info.add('Refinement steps', p.noRef)
+info.add('Elements', d.comm.allreduce(subdomain.num_cells))
+info.add('DoFs', overlaps.countDoFs())
+info.add('h', h)
+info.add('hmin', hmin)
+info.add('Tolerance', tol)
+d.logger.info('\n' + str(info) + '\n')
+d.logger.info('\n'+str(ml))
+d.comm.Barrier()
+
+x = DoFMap_fine.zeros()
+r = DoFMap_fine.zeros()
+A = hM[FINE].algebraicLevels[-1].A
+acc = hM[FINE].algebraicLevels[-1].accumulateOperator
+A.residual_py(x, rhs, r)
+r0 = r.norm(False)
+
+
+rate = d.addOutputGroup('rates', tested=True, aTol=1e-2)
+its = d.addOutputGroup('iterations', tested=True)
+res = d.addOutputGroup('residuals', tested=True, rTol=3e-1)
+resHist = d.addOutputGroup('resHist', tested=True, aTol=5e-8)
+errs = d.addOutputGroup('errors', tested=True, rTol=2.)
+
+for cycle, label in [(V, 'MG'),
+                     (FMG_V, 'FMG')]:
+    if getattr(d, 'do'+label):
+        ml.cycle = cycle
+        with d.timer('Solve '+label):
+            numIter = ml(rhs, x)
+        residuals = ml.residuals
+        A.residual_py(x, rhs, r)
+        resNorm = r.norm(False)
+        rate.add('Rate of convergence '+label, (resNorm/r0)**(1/numIter))
+        its.add('Number of iterations '+label, numIter)
+        res.add('Residual norm '+label, resNorm)
+        resHist.add(label, residuals)
+
+# set up cg
+cg = solverFactory.build('cg', A=A, maxIter=d.maxiter, tolerance=tol, setup=True)
+cg.setNormInner(ml.norm, ml.inner)
+# set up gmres
+gmres = solverFactory.build('gmres', A=A, maxIter=d.maxiter//5, restarts=5, tolerance=tol, setup=True)
+gmres.setNormInner(ml.norm, ml.inner)
+# set up bicgstab
+bicgstab = solverFactory.build('bicgstab', A=A, maxIter=d.maxiter, tolerance=tol, setup=True)
+bicgstab.setNormInner(ml.norm, ml.inner)
+
+for solver, label in [
+        (cg, 'CG'),
+        (gmres, 'GMRES'),
+        (bicgstab, 'BICGSTAB')]:
+    if getattr(d, 'do'+label):
+        solver.setPreconditioner(acc)
+        solver.setInitialGuess()
+        with d.timer('Solve '+label):
+            numIter = solver(rhs, x)
+        residuals = solver.residuals
+        A.residual_py(x, rhs, r)
+        resNorm = r.norm(False)
+        rate.add('Rate of convergence '+label, (resNorm/r0)**(1/numIter))
+        its.add('Number of iterations '+label, numIter)
+        res.add('Residual norm '+label, resNorm)
+        resHist.add(label, residuals)
+    if getattr(d, 'doP'+label):
+        solver.setPreconditioner(ml.asPreconditioner(cycle=V), False)
+        solver.setInitialGuess()
+        with d.timer('Solve P'+label):
+            numIter = solver(rhs, x)
+        residuals = solver.residuals
+        A.residual_py(x, rhs, r)
+        resNorm = r.norm(False)
+        rate.add('Rate of convergence P'+label, (resNorm/r0)**(1/numIter))
+        its.add('Number of iterations P'+label, numIter)
+        res.add('Residual norm P'+label, resNorm)
+        resHist.add('P'+label, residuals)
+
+
+if d.saveVTK and p.boundaryCond:
+    y = DoFMap_fine.augmentWithBoundaryData(x,
+                                            boundary_data)
+    subdomain.exportSolutionVTK(y, y.dm, '{}{}.vtk'.format(d.problem, d.comm.rank),
+                                rank=d.comm.rank)
+
+if d.doFMGPCG:
+    ml.cycle = FMG_V
+    ml.maxIter = 1
+    cg.setPreconditioner(ml.asPreconditioner(cycle=V))
+    with d.timer('Solve FMG-PCG'):
+        ml(rhs, x)
+        cg.setInitialGuess(x)
+        numIter = cg(rhs, x)
+    residuals = cg.residuals
+    numIter += 1
+    A.residual_py(x, rhs, r)
+    resNorm = r.norm(False)
+    rate.add('Rate of convergence FMG-PCG', (resNorm/r0)**(1/numIter))
+    its.add('Number of iterations FMG-PCG', numIter)
+    res.add('Residual norm FMG-PCG', resNorm)
+    resHist.add('FMG-PCG', residuals)
+
+if d.doFMGPGMRES:
+    ml.cycle = FMG_V
+    ml.maxIter = 1
+    gmres.setPreconditioner(ml.asPreconditioner(cycle=V), False)
+    with d.timer('Solve FMG-PGMRES'):
+        ml(rhs, x)
+        gmres.setInitialGuess(x)
+        numIter = gmres(rhs, x)
+    residuals = gmres.residuals
+    numIter += 1
+    A.residual_py(x, rhs, r)
+    resNorm = r.norm(False)
+    rate.add('Rate of convergence FMG-PGMRES', (resNorm/r0)**(1/numIter))
+    its.add('Number of iterations FMG-PGMRES', numIter)
+    res.add('Residual norm FMG-PGMRES', resNorm)
+    resHist.add('FMG-PGMRES', residuals)
+
+d.comm.Barrier()
+
+if d.doPCoarsen:
+    from PyNucleus_base.linear_operators import CSR_LinearOperator
+    from PyNucleus_base.myTypes import INDEX, REAL
+
+    A = []
+    P = []
+    nullspace = []
+    coords = []
+    if d.element == 'P2':
+        lvls = [-2, -1]
+    elif d.element == 'P3':
+        lvls = [-3, -2, -1]
+    for lvl in lvls:
+        numGlobalDoFs0 = overlaps.countDoFs(level=lvl)
+        idx0 = overlaps.getGlobalIndices(lvl)
+        dm0 = hM[FINE].algebraicLevels[lvl].DoFMap
+        assert idx0.shape[0] == dm0.num_dofs
+        P0 = CSR_LinearOperator(idx0,
+                                np.arange(dm0.num_dofs+1, dtype=INDEX),
+                                np.ones((idx0.shape[0]), dtype=REAL))
+        P0.num_columns = numGlobalDoFs0
+        P.append(P0.to_csr())
+
+        coords.append(d.comm.reduce(P[-1].T @ (overlaps.getDistributeAsDiagonalOperator(lvl).to_csr()@hM[FINE].algebraicLevels[lvl].DoFMap.getDoFCoordinates())))
+        nullspace.append(d.comm.reduce(P[-1].T @ (overlaps.getDistributeAsDiagonalOperator(lvl).to_csr()@hM[FINE].algebraicLevels[lvl].DoFMap.ones())))
+
+        A.append(d.comm.reduce((P[-1].T @ hM[FINE].algebraicLevels[lvl].A.to_csr() @ P[-1]).tocsr()))
+
+    b_global = d.comm.reduce(P[-1].T @ rhs.toarray())
+    u_global = d.comm.reduce(P[-1].T @ (overlaps.getDistributeOperator()*x))
+
+    P_ops = []
+    for k in range(len(lvls)-1):
+        lvlC = lvls[k]
+        lvlF = lvls[k+1]
+        P_ops.append(d.comm.reduce((P[lvlF].T @ overlaps.getDistributeAsDiagonalOperator(lvlF).to_csr() @ hM[FINE].algebraicLevels[lvlF].P.to_csr() @ P[lvlC]).tocsr()))
+
+    if d.comm.rank == 0:
+        from scipy.io import mmwrite
+        from PyNucleus import solverFactory
+
+        for element, a in zip(pSchedule, A):
+            mmwrite('A_'+element, a, symmetry='general')
+        for k in range(len(pSchedule)-1):
+            elementC = pSchedule[k]
+            elementF = pSchedule[k+1]
+            mmwrite('P_'+elementF+'_'+elementC, P_ops[k], symmetry='general')
+        mmwrite('b_'+d.element, b_global[:, np.newaxis], symmetry='general')
+        mmwrite('x_'+d.element, u_global[:, np.newaxis], symmetry='general')
+        for element, coord in zip(pSchedule, coords):
+            mmwrite('coords_'+element, coord)
+        for element, ns in zip(pSchedule, nullspace):
+            mmwrite('nullspace_'+element, ns[:, np.newaxis])
+        # print(np.linalg.norm(b_global-A_global@u_global))
+        # print(np.linalg.norm(b_global))
+        # print(repr(A_global))
+
+        # A_global = CSR_LinearOperator.from_csr(A_global)
+        # A2_global = CSR_LinearOperator.from_csr(A2_global)
+        # P_global = CSR_LinearOperator.from_csr(P_global)
+        # mg = solverFactory('mg', hierarchy=[{'A': A2_global}, {'A': A_global, 'P': P_global, 'R': P_global.transpose()}], setup=True, smoother=('jacobi', {'presmoothingSteps': 2,
+        #                                                                                                                                                    'postsmoothingSteps': 2}))
+        # print(mg)
+        # cg = solverFactory('cg', A=A_global, setup=True, maxIter=d.maxiter, tolerance=tol)
+        # cg.setPreconditioner(mg.asPreconditioner(), False)
+        # cg.setInitialGuess()
+        # cg(b_global, u_global)
+        # print(cg.residuals, len(cg.residuals))
+
+if p.L2ex:
+    if p.boundaryCond:
+        d.logger.warning('L2 error is wrong for inhomogeneous BCs')
+    with d.timer('Mass matrix'):
+        M = DoFMap_fine.assembleMass(sss_format=d.symmetric)
+    z = DoFMap_fine.assembleRHS(p.exactSolution)
+    L2err = np.sqrt(np.absolute(x.inner(M*x, True, False) -
+                                2*z.inner(x, False, True) +
+                                p.L2ex))
+    del z
+    errs.add('L^2 error', L2err)
+if p.H10ex:
+    if p.boundaryCond:
+        d.logger.warning('H^1_0 error is wrong for inhomogeneous BCs')
+    H10err = np.sqrt(np.absolute(p.H10ex - rhs.inner(x, False, True)))
+    errs.add('H^1_0 error', H10err)
+d.logger.info('\n'+str(rate+its+res+errs))
+
+if d.startPlot('residuals'):
+    import matplotlib.pyplot as plt
+    if d.doMG:
+        plt.plot(resHist.MG, '-*', label='MG')
+    if d.doFMG:
+        plt.plot(resHist.FMG, '-.', label='FMG')
+    if d.doCG:
+        plt.plot(resHist.CG, '--', label='CG')
+    if d.doPCG:
+        plt.plot(resHist.PCG, '-*', label='MG-PCG')
+    if d.doFMGPCG:
+        plt.plot(resHist.FMGPCG, '-*', label='FMG-PCG')
+    if d.doGMRES:
+        plt.plot(resHist.GMRES, '.', label='GMRES')
+    if d.doPGMRES:
+        plt.plot(resHist.PGMRES, '-.', label='MG-GMRES')
+    if d.doFMGPGMRES:
+        plt.plot(resHist.FMGPGMRES, '-*', label='FMG-PGMRES')
+    plt.yscale('log')
+    plt.legend()
+
+if d.checkSolution:
+    interfaces = hM[FINE].meshLevels[-1].interfaces
+    (global_mesh,
+     global_solution,
+     global_dm) = accumulate2global(subdomain, interfaces, DoFMap_fine, x,
+                                    comm=d.comm)
+    if d.isMaster:
+        from scipy.sparse.linalg import spsolve
+        from numpy.linalg import norm
+        A = global_dm.assembleStiffness()
+        rhs = global_dm.assembleRHS(p.rhsFun)
+        if p.boundaryCond:
+            global_boundaryDoFMap = global_dm.getComplementDoFMap()
+            global_boundary_data = global_boundaryDoFMap.interpolate(p.boundaryCond)
+            global_A_boundary = global_dm.assembleStiffness(dm2=global_boundaryDoFMap)
+            rhs -= global_A_boundary*global_boundary_data
+        with d.timer('SpSolver'):
+            y = spsolve(A.to_csr(), rhs)
+        if p.boundaryCond:
+            sol_augmented, dm_augmented = global_dm.augmentWithBoundaryData(global_solution, global_boundary_data)
+            global_mass = dm_augmented.assembleMass()
+            global_z = dm_augmented.assembleRHS(p.exactSolution)
+            L2err = np.sqrt(np.absolute(np.vdot(sol_augmented, global_mass*sol_augmented) -
+                                        2*np.vdot(global_z, sol_augmented) +
+                                        p.L2ex))
+        else:
+            global_mass = global_dm.assembleMass()
+            global_z = global_dm.assembleRHS(p.exactSolution)
+            L2err = np.sqrt(np.absolute(np.vdot(global_solution, global_mass*global_solution) -
+                                        2*np.vdot(global_z, global_solution) +
+                                        p.L2ex))
+        errsSpSolve = d.addOutputGroup('errSpSolve')
+        errsSpSolve.add('L2', L2err)
+        errsSpSolve.add('2-norm', norm(global_solution-y, 2))
+        errsSpSolve.add('max-norm', np.abs(global_solution-y).max())
+        d.logger.info('\n'+str(errsSpSolve))
+        if d.startPlot('spSolve'):
+            import matplotlib.pyplot as plt
+            global_solution.plot()
+        if p.exactSolution and d.startPlot('spSolveExactSolution'):
+            global_dm.interpolate(p.exactSolution).plot()
+        if d.startPlot('spSolveError'):
+            (global_solution-y).plot()
+d.finish()
diff --git a/drivers/runSerialGMG.py b/drivers/runSerialGMG.py
new file mode 100644
index 0000000..3c6fca8
--- /dev/null
+++ b/drivers/runSerialGMG.py
@@ -0,0 +1,204 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import mpi4py.MPI as MPI
+import numpy as np
+from PyNucleus.base import driver, solverFactory
+from PyNucleus.fem import diffusionProblem
+from PyNucleus.multilevelSolver import (V, FMG_V,
+                                        hierarchyManager,
+                                        inputConnector,
+                                        paramsForSerialMG)
+
+
+comm = MPI.COMM_WORLD
+assert comm.size == 1
+
+d = driver()
+p = diffusionProblem(d)
+
+solver = d.addGroup('solver')
+solver.add('amg', False)
+solver.add('lu', False)
+solver.add('chol', False)
+solver.add('smoother', 'jacobi', acceptedValues=['gauss_seidel', 'sor', 'chebyshev', 'ilu'])
+solver.add('maxiter', 50)
+
+d.declareFigure('residuals', default=False)
+d.declareFigure('solution', default=False)
+
+params = d.process()
+
+info = d.addOutputGroup('info')
+
+with d.timer('setup levels'):
+    if not params['amg']:
+        hierarchies, connectors = paramsForSerialMG(p.noRef, params)
+        connectors['input'] = {'type': inputConnector,
+                               'params': {'domain': d.domain}}
+        FINE = 'fine'
+        hierarchies[-1]['label'] = FINE
+
+        hM = hierarchyManager(hierarchies, connectors, params, comm)
+        hM.setup()
+        hM.display()
+
+        levels = hM
+
+        mesh = hM[FINE].meshLevels[-1].mesh
+        DoFMap = hM[FINE].algebraicLevels[-1].DoFMap
+    else:
+         raise NotImplementedError()
+
+if d.element == 'P1':
+    tol = 0.5*mesh.h**2
+elif d.element == 'P2':
+    tol = 0.001*mesh.h**3
+elif d.element == 'P3':
+    tol = 0.001*mesh.h**4
+
+with d.timer('RHS'):
+    rhs = DoFMap.assembleRHS(p.rhsFun)
+if p.boundaryCond:
+    with d.timer('BC'):
+        boundary_data = DoFMap.getBoundaryData(p.boundaryCond)
+        levels[-1]['A'] = DoFMap.assembleStiffness(boundary_data, rhs,
+                                                   sss_format=True,
+                                                   reorder=d.reorder)
+
+info.add('DoFs', rhs.shape[0])
+info.add('element', params['element'])
+info.add('Tol', tol)
+d.logger.info('\n'+str(info))
+
+smootherParams = {'jacobi': {'presmoothingSteps': 2,
+                             'postsmoothingSteps': 2},
+                  'gauss_seidel': {'presmoothingSteps': 1,
+                                   'postsmoothingSteps': 1},
+                  'sor': {},
+                  'chebyshev': {'degree': 3},
+                  'ilu': {}}
+ml = solverFactory.build('mg', hierarchy=levels, smoother=(d.smoother, smootherParams[d.smoother]), maxIter=d.maxiter, tolerance=tol, setup=True)
+d.logger.info('\n'+str(ml))
+
+A = hM[FINE].algebraicLevels[-1].A
+x = DoFMap.zeros()
+r = DoFMap.zeros()
+A.residual_py(x, rhs, r)
+r0 = ml.norm(r, False)
+
+rate = d.addOutputGroup('rates', tested=True, aTol=1e-2)
+its = d.addOutputGroup('iterations', tested=True)
+res = d.addOutputGroup('residuals', tested=True, rTol=3e-1)
+resHist = d.addOutputGroup('resHist', tested=True, aTol=5e-8)
+errors = d.addOutputGroup('errors', tested=True, rTol=2.)
+
+for cycle, label in [(V, 'MG'),
+                     (FMG_V, 'FMG')]:
+    with d.timer('Solve MG'):
+        ml.cycle = cycle
+        numIter = ml(rhs, x)
+        residuals = ml.residuals
+    A.residual_py(x, rhs, r)
+    resNorm = ml.norm(r, False)
+    rate.add('Rate of convergence '+label, (resNorm/r0)**(1/numIter))
+    its.add('Number of iterations '+label, numIter)
+    res.add('Residual norm '+label, resNorm)
+    resHist.add(label, residuals)
+
+if p.boundaryCond:
+    y = DoFMap.augmentWithBoundaryData(x, boundary_data)
+    mesh.exportSolutionVTK(y, y.dm, 'fichera.vtk')
+
+# set up cg
+cg = solverFactory.build('cg', A=A, maxIter=d.maxiter, tolerance=tol, setup=True)
+# set up gmres
+gmres = solverFactory.build('gmres', A=A, maxIter=d.maxiter//5, restarts=5, tolerance=tol, setup=True)
+# set up bicgstab
+bicgstab = solverFactory.build('bicgstab', A=A, maxIter=d.maxiter, tolerance=tol, setup=True)
+
+for solver, label in [(cg, 'CG'),
+                      (gmres, 'GMRES'),
+                      (bicgstab, 'BICGSTAB')]:
+    with d.timer('Solve '+label):
+        numIter = solver(rhs, x)
+        residuals = solver.residuals
+    A.residual_py(x, rhs, r)
+    resNorm = ml.norm(r, False)
+    rate.add('Rate of convergence '+label, (resNorm/r0)**(1/numIter))
+    its.add('Number of iterations '+label, numIter)
+    res.add('Residual norm '+label, resNorm)
+    resHist.add(label, residuals)
+
+    with d.timer('Solve P'+label):
+        solver.setPreconditioner(ml.asPreconditioner(cycle=V))
+        numIter = solver(rhs, x)
+        residuals = solver.residuals
+    A.residual_py(x, rhs, r)
+    resNorm = ml.norm(r, False)
+    rate.add('Rate of convergence P'+label, (resNorm/r0)**(1/numIter))
+    its.add('Number of iterations P'+label, numIter)
+    res.add('Residual norm P'+label, resNorm)
+    resHist.add('P'+label, residuals)
+
+
+if d.lu:
+    # set up lu
+    with d.timer('Setup LU'):
+        lu = solverFactory.build('lu', A, setup=True)
+    with d.timer('Solve LU'):
+        lu(rhs, x)
+    A.residual_py(x, rhs, r)
+    resNorm = ml.norm(r, False)
+    res.add('Residual norm LU', resNorm)
+
+if d.chol:
+    # set up cholesky
+    with d.timer('Setup CHOL'):
+        chol = solverFactory.build('chol', A, setup=True)
+    with d.timer('Solve CHOL'):
+        chol(rhs, x)
+    A.residual_py(x, rhs, r)
+    resNorm = ml.norm(r, False)
+    res.add('Residual norm CHOL', resNorm)
+
+del ml
+
+if p.L2ex:
+    with d.timer('Mass matrix'):
+        M = DoFMap.assembleMass(sss_format=True)
+    z = DoFMap.assembleRHS(p.exactSolution)
+    L2err = np.sqrt(np.absolute(np.vdot(x, M*x) - 2*np.vdot(z, x) + p.L2ex))
+    errors.add('L^2 error', L2err)
+    errors.add('L^2 error constant', L2err/mesh.h**2)
+if p.H10ex:
+    H10err = np.sqrt(np.absolute(p.H10ex - np.vdot(rhs, x)))
+    errors.add('H^1_0 error', H10err)
+    errors.add('H^1_0 error constant', H10err/mesh.h)
+
+d.logger.info('\n'+str(rate+its+res+errors))
+
+if d.startPlot('residuals'):
+    import matplotlib.pyplot as plt
+    plt.plot(resHist.MG, '-*', label='MG')
+    plt.plot(resHist.FMG, '-.', label='FMG')
+    plt.plot(resHist.CG, '--', label='CG')
+    plt.plot(resHist.PCG, '-*', label='MG-PCG')
+    plt.plot(resHist.GMRES, '.', label='GMRES')
+    plt.plot(resHist.PGMRES, '-o', label='MG-GMRES')
+    plt.plot(resHist.BICGSTAB, '.', label='BICGSTAB')
+    plt.plot(resHist.PBICGSTAB, '-o', label='MG-BICGSTAB')
+    plt.yscale('log')
+    plt.legend()
+
+if d.startPlot('solution') and hasattr(mesh, 'plotFunction'):
+    mesh.plotFunction(x)
+
+d.finish()
diff --git a/drivers/variableOrder.py b/drivers/variableOrder.py
new file mode 100644
index 0000000..8894ea2
--- /dev/null
+++ b/drivers/variableOrder.py
@@ -0,0 +1,218 @@
+#!/usr/bin/env python3
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from numpy.linalg import norm as npnorm
+from PyNucleus.base.ip_norm import ip_serial, norm_serial
+from PyNucleus.base import driver, solverFactory, krylov_solver
+from PyNucleus.base.linear_operators import invDiagonal
+from PyNucleus.fem import (str2DoFMap,
+                           meshFactory,
+                           functionFactory)
+from PyNucleus.fem.mesh import plotManager
+from PyNucleus.nl import (nonlocalBuilder,
+                          constFractionalOrder,
+                          variableConstFractionalOrder,
+                          leftRightFractionalOrder,
+                          smoothedLeftRightFractionalOrder,
+                          innerOuterFractionalOrder,
+                          getFractionalKernel)
+
+d = driver()
+d.add('domain', acceptedValues=['interval', 'square', 'circle'])
+d.add('do_dense', True)
+d.add('do_h2', False)
+d.add('do_transpose', False)
+d.add('solver', acceptedValues=['lu', 'cg', 'gmres'])
+d.add('maxIter', 1000)
+d.add('tol', 1e-7)
+d.add('element', acceptedValues=['P1', 'P0'])
+d.add('s1', 0.25)
+d.add('s2', 0.75)
+d.add('normalizePlot', False)
+
+d.declareFigure('variableOrder')
+d.declareFigure('error')
+
+params = d.process()
+
+s1 = d.s1
+s2 = d.s2
+sol1 = sol2 = None
+smean = 0.5*(s1+s2)
+if d.domain == 'interval':
+    mesh = meshFactory.build(d.domain, noRef=10, a=-1, b=1)
+    if d.element == 'P0':
+        assert s1 < 0.5 and s2 < 0.5
+        sVals = [constFractionalOrder(s1),
+                 constFractionalOrder(s2),
+                 leftRightFractionalOrder(s1, s2),
+                 leftRightFractionalOrder(s1, s2, s1, smean),
+                 leftRightFractionalOrder(s1, s2, s2, smean),
+                 ]
+    elif d.element == 'P1':
+        sNonSym = leftRightFractionalOrder(s1, s2)
+        sNonSym.symmetric = False
+        sVals = [
+            # constFractionalOrder(s1),
+            # constFractionalOrder(s2),
+            # variableConstFractionalOrder(s1),
+            # variableConstFractionalOrder(s2),
+            leftRightFractionalOrder(s1, s2, s1, s1),
+            leftRightFractionalOrder(s1, s2, smean, smean),
+            leftRightFractionalOrder(s1, s2, s2, s2),
+            # sNonSym,
+            # smoothedLeftRightFractionalOrder(s1, s2, slope=200.),
+            # smoothedLeftRightFractionalOrder(s1, s2, slope=1000.),
+            # leftRightFractionalOrder(s1, s2, s1, (s1+s2)/2),
+            # leftRightFractionalOrder(s1, s2, s2, (s1+s2)/2)
+        ]
+    rhs = functionFactory.build('constant', value=1.)
+    sol1 = functionFactory.build('solFractional', s=s1, dim=mesh.dim)
+    sol2 = functionFactory.build('solFractional', s=s2, dim=mesh.dim)
+elif d.domain == 'square':
+    mesh = meshFactory.build(d.domain, noRef=5, N=2, M=2, ax=-1, ay=-1, bx=1, by=1)
+    sVals = [
+        # constFractionalOrder(s1),
+        # constFractionalOrder(s2),
+        leftRightFractionalOrder(s1, s2)
+        # innerOuterFractionalOrder(mesh.dim, s1, s2, 0.3)
+    ]
+    # rhs = functionFactory.build('Lambda', fun=lambda x: 1. if x[0] > 0 else 0.)
+    rhs = functionFactory.build('constant', value=1.)
+elif d.domain == 'circle':
+    mesh = meshFactory.build(d.domain, noRef=5, n=8)
+    sVals = [
+        innerOuterFractionalOrder(mesh.dim, s2, s1, 0.5),
+    ]
+    rhs = functionFactory.build('constant', value=1.)
+    sol1 = functionFactory.build('solFractional', s=s1, dim=mesh.dim)
+    sol2 = functionFactory.build('solFractional', s=s2, dim=mesh.dim)
+else:
+    raise NotImplementedError()
+
+DoFMap = str2DoFMap(d.element)
+dm = DoFMap(mesh)
+d.logger.info(mesh)
+d.logger.info(dm)
+
+centerDoF = np.argmin(npnorm(dm.getDoFCoordinates(), axis=1))
+horizon = functionFactory.build('constant', value=np.inf)
+norm = norm_serial()
+inner = ip_serial()
+
+
+if d.willPlot('variableOrder'):
+    plotDefaults = {}
+    if mesh.dim == 2:
+        plotDefaults['flat'] = True
+    pM = plotManager(mesh, dm, defaults=plotDefaults)
+    if d.do_dense and d.do_h2:
+        pMerr = plotManager(mesh, dm, defaults=plotDefaults)
+
+for s in sVals:
+    b = dm.assembleRHS(rhs)
+    err = None
+    kernel = getFractionalKernel(mesh.dim, s, horizon)
+    builder = nonlocalBuilder(mesh, dm, kernel, comm=d.comm, logging=True,
+                              # params={'maxLevels': 3}
+                              # params={'eta': 0.9}
+                              )
+    for build, label, do in [(builder.getDense, 'dense', d.do_dense),
+                             (builder.getH2, 'H2', d.do_h2)]:
+        if not do:
+            continue
+        with d.timer(label+' assemble '+str(s)):
+            A = build()
+        import matplotlib.pyplot as plt
+        # from fractionalLaplacian.clusterMethodCy import getFractionalOrders
+        if label == 'H2':
+            plt.figure()
+            A2 = builder.getDense()
+            plt.pcolormesh(np.log10(np.absolute(A.toarray()-A2.toarray())))
+            plt.colorbar()
+            if mesh.dim == 1:
+                plt.figure()
+                _, Pnear = builder.getH2(True)
+
+                for c in Pnear:
+                    c.plot()
+                # cell_orders = []
+                # for c1 in c.n1.cells:
+                #     for c2 in c.n2.cells:
+                #         cell_orders.append(orders[c1, c2])
+                # box1 = c.n1.box
+                # box2 = c.n2.box
+                # plt.text(0.5*(box1[0, 0]+box1[0, 1]), 0.5*(box2[0, 0]+box2[0, 1]), '({},{})'.format(min(cell_orders), max(cell_orders)))
+                for lvl in A.Pfar:
+                    for c in A.Pfar[lvl]:
+                        c.plot(color='blue' if c.constantKernel else 'green')
+                    # cell_orders = []
+                    # for c1 in c.n1.cells:
+                    #     for c2 in c.n2.cells:
+                    #         cell_orders.append(orders[c1, c2])
+                    # box1 = c.n1.box
+                    # box2 = c.n2.box
+                    # plt.text(0.5*(box1[0, 0]+box1[0, 1]), 0.5*(box2[0, 0]+box2[0, 1]), '({},{})'.format(min(cell_orders), max(cell_orders)))
+            else:
+                plt.figure()
+                plt.spy(A.Anear.toarray())
+        d.logger.info(str(A))
+
+        with d.timer(label+' solve '+str(s)):
+            solver = solverFactory.build(d.solver, A=A,
+                                         maxIter=d.maxIter, tolerance=d.tol,
+                                         setup=True)
+            if isinstance(solver, krylov_solver):
+                Dinv = invDiagonal(A)
+                solver.setPreconditioner(Dinv, False)
+            x = dm.zeros()
+            numIter = solver(b, x)
+        if err is None:
+            err = x.copy()
+        else:
+            err -= x
+            pMerr.add(err, label=str(s))
+            M = dm.assembleMass()
+            L2err = np.sqrt(abs(inner(err, M*err)))
+            d.logger.info('L2 error: {}'.format(L2err))
+        d.logger.info('{}: resNorm {} in {} iters, norm {}'.format(s, norm(A*x-b), numIter, norm(x)))
+        if d.normalizePlot:
+            x /= x[centerDoF]
+        if d.willPlot('variableOrder'):
+            pM.add(x, label=label+' '+str(s))
+        if not s.symmetric and d.do_transpose and d.do_dense:
+            At = A.transpose()
+            with d.timer(label+' solve transpose '+str(s)):
+                solver = solverFactory.build(d.solver, A=At,
+                                             maxIter=d.maxIter, tolerance=d.tol,
+                                             setup=True)
+                if isinstance(solver, krylov_solver):
+                    Dinv = invDiagonal(At)
+                    solver.setPreconditioner(Dinv, False)
+                xt = dm.zeros()
+                numIter = solver(b, xt)
+            d.logger.info('{} transpose: resNorm {} in {} iters'.format(s, norm(At*xt-b), numIter))
+            if d.normalizePlot:
+                xt /= xt[centerDoF]
+            if d.willPlot('variableOrder'):
+                pM.add(xt, label=label+' transpose '+str(s))
+
+if d.startPlot('variableOrder'):
+    for s, sol in [(s1, sol1), (s2, sol2)]:
+        if sol is not None:
+            x = dm.interpolate(sol)
+            if d.normalizePlot:
+                x /= x[centerDoF]
+            pM.add(x, label='exact '+str(s), ls='--')
+    pM.plot(legendOutside=d.plotFolder != '')
+if d.do_dense and d.do_h2:
+    if d.startPlot('error'):
+        pMerr.plot(legendOutside=d.plotFolder != '')
+d.finish()
diff --git a/fem/.gitattributes b/fem/.gitattributes
new file mode 100644
index 0000000..14042bf
--- /dev/null
+++ b/fem/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_fem/_version.py export-subst
diff --git a/fem/MANIFEST.in b/fem/MANIFEST.in
new file mode 100644
index 0000000..7ad9346
--- /dev/null
+++ b/fem/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_fem/_version.py
diff --git a/fem/PyNucleus_fem/DoFMaps.pxd b/fem/PyNucleus_fem/DoFMaps.pxd
new file mode 100644
index 0000000..d3d9986
--- /dev/null
+++ b/fem/PyNucleus_fem/DoFMaps.pxd
@@ -0,0 +1,81 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport REAL_t, COMPLEX_t, INDEX_t, BOOL_t
+from PyNucleus_base.ip_norm cimport ipBase, normBase, complexipBase, complexNormBase
+from . meshCy cimport meshBase, vertices_t, cells_t
+from . functions cimport function, complexFunction
+
+include "vector_decl_REAL.pxi"
+include "vector_decl_COMPLEX.pxi"
+
+
+cdef class DoFMap:
+    cdef:
+        public meshBase mesh
+        readonly INDEX_t dim
+        BOOL_t reordered
+        public list localShapeFunctions
+        public REAL_t[:, ::1] nodes
+        public INDEX_t num_dofs
+        public INDEX_t num_boundary_dofs
+        public INDEX_t[:, ::1] dofs
+        public INDEX_t polynomialOrder
+        public list tag
+        public INDEX_t dofs_per_vertex
+        public INDEX_t dofs_per_edge
+        public INDEX_t dofs_per_face
+        public INDEX_t dofs_per_cell
+        public INDEX_t dofs_per_element
+        public ipBase inner
+        public normBase norm
+        public complexipBase complex_inner
+        public complexNormBase complex_norm
+    cdef INDEX_t cell2dof(self,
+                          const INDEX_t cellNo,
+                          const INDEX_t perCellNo)
+    cpdef void reorder(self,
+                       const INDEX_t[::1] perm)
+    cdef void getNodalCoordinates(self, REAL_t[:, ::1] cell, REAL_t[:, ::1] coords)
+    cpdef void getVertexDoFs(self, INDEX_t[:, ::1] v2d)
+    cpdef void resetUsingIndicator(self, function indicator)
+    cpdef void resetUsingFEVector(self, REAL_t[::1] ind)
+
+
+cdef class P1_DoFMap(DoFMap):
+    pass
+
+
+cdef class P2_DoFMap(DoFMap):
+    pass
+
+
+cdef class P0_DoFMap(DoFMap):
+    pass
+
+
+cdef class P3_DoFMap(DoFMap):
+    pass
+
+
+
+cdef class shapeFunction:
+    cdef:
+        REAL_t[::1] bary
+    cdef REAL_t eval(self, const REAL_t[::1] lam)
+    cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride)
+    cdef REAL_t evalGlobal(self, REAL_t[:, ::1] simplex, REAL_t[::1] x)
+
+
+cdef class vectorShapeFunction:
+    cdef:
+        INDEX_t dim
+        INDEX_t[::1] cell
+    cpdef void setCell(self, INDEX_t[::1] cell)
+    cdef void eval(self, const REAL_t[::1] lam, const REAL_t[:, ::1] gradLam, REAL_t[::1] value)
+    cdef void evalGlobal(self, const REAL_t[:, ::1] simplex, const REAL_t[::1] x, REAL_t[::1] value)
diff --git a/fem/PyNucleus_fem/DoFMaps.pyx b/fem/PyNucleus_fem/DoFMaps.pyx
new file mode 100644
index 0000000..1390ed3
--- /dev/null
+++ b/fem/PyNucleus_fem/DoFMaps.pyx
@@ -0,0 +1,1919 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport isnan
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, BOOL
+from cpython cimport Py_buffer
+from PyNucleus_base.blas cimport assign, assign3, assignScaled, matmat
+from PyNucleus_base.ip_norm cimport vector_t, ip_serial, norm_serial, wrapRealInnerToComplex, wrapRealNormToComplex
+from PyNucleus_base import uninitialized
+from PyNucleus_base.sparsityPattern cimport sparsityPattern
+from PyNucleus_base.linear_operators cimport LinearOperator
+from PyNucleus_base.tupleDict cimport tupleDictINDEX
+from . meshCy cimport (sortEdge, sortFace,
+                       encode_edge,
+                       faceVals,
+                       getBarycentricCoords1D,
+                       getBarycentricCoords2D,
+                       cellFinder, cellFinder2,
+                       intTuple)
+from . meshCy import getSubmesh2
+from . quadrature cimport simplexQuadratureRule, simplexXiaoGimbutas
+from PyNucleus_base.linear_operators cimport (CSR_LinearOperator,
+                                               SSS_LinearOperator,
+                                               sparseGraph)
+from PyNucleus_base.sparseGraph import cuthill_mckee
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline REAL_t evalP0(REAL_t[:, ::1] simplex, REAL_t[::1] uloc, REAL_t[::1] x):
+    return uloc[0]
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline REAL_t evalP12D(REAL_t[:, ::1] simplex, REAL_t[::1] uloc, REAL_t[::1] x):
+    cdef:
+        REAL_t vol, l3, res, l
+        INDEX_t i
+    vol = ((simplex[0, 0]-simplex[1, 0])*(simplex[2, 1]-simplex[1, 1]) -
+           (simplex[0, 1]-simplex[1, 1])*(simplex[2, 0]-simplex[1, 0]))
+    l3 = vol
+    res = 0.
+    for i in range(2):
+        l = ((x[0]-simplex[(i+1) % 3, 0])*(simplex[(i+2) % 3, 1]-simplex[(i+1) % 3, 1]) -
+             (x[1]-simplex[(i+1) % 3, 1])*(simplex[(i+2) % 3, 0]-simplex[(i+1) % 3, 0]))
+        l3 -= l
+        res += uloc[i]*l
+    res += uloc[2]*l3
+    return res/vol
+
+
+include "vector_REAL.pxi"
+include "vector_COMPLEX.pxi"
+
+
+cdef class DoFMap:
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def __init__(self,
+                 meshBase mesh,
+                 INDEX_t dofs_per_vertex,
+                 INDEX_t dofs_per_edge,
+                 INDEX_t dofs_per_face,
+                 INDEX_t dofs_per_cell,
+                 tag=None,
+                 INDEX_t skipCellsAfter=-1):
+
+        cdef:
+            INDEX_t vertices_per_element
+            INDEX_t edges_per_element
+            INDEX_t faces_per_element
+            INDEX_t manifold_dim = mesh.manifold_dim
+
+        self.mesh = mesh
+        self.dim = self.mesh.dim
+
+        if isinstance(tag, function):
+            self.tag = [-10]
+        elif isinstance(tag, list):
+            self.tag = tag
+        else:
+            self.tag = [tag]
+        if manifold_dim == 0:
+            vertices_per_element = 1
+            edges_per_element = 0
+            faces_per_element = 0
+        elif manifold_dim == 1:
+            vertices_per_element = 2
+            edges_per_element = 0
+            faces_per_element = 0
+        elif manifold_dim == 2:
+            vertices_per_element = 3
+            edges_per_element = 3
+            faces_per_element = 0
+        elif manifold_dim == 3:
+            vertices_per_element = 4
+            edges_per_element = 6
+            faces_per_element = 4
+        else:
+            raise NotImplementedError()
+
+        self.dofs_per_vertex = dofs_per_vertex
+        if edges_per_element > 0:
+            self.dofs_per_edge = dofs_per_edge
+        else:
+            self.dofs_per_edge = 0
+        if faces_per_element > 0:
+            self.dofs_per_face = dofs_per_face
+        else:
+            self.dofs_per_face = 0
+        self.dofs_per_cell = dofs_per_cell
+        self.dofs_per_element = (vertices_per_element*dofs_per_vertex +
+                                 edges_per_element*dofs_per_edge +
+                                 faces_per_element*dofs_per_face +
+                                 dofs_per_cell)
+
+        cdef:
+            INDEX_t[:, ::1] cells = mesh.cells
+            INDEX_t nc = cells.shape[0]
+            INDEX_t c0, c1, c2, c3, i, j, k, dof, numDoFs, numBdofs, v
+            INDEX_t[:, ::1] temp = uninitialized((6, 2), dtype=INDEX)
+            INDEX_t[::1] e01 = temp[0, :]
+            INDEX_t[::1] e12 = temp[1, :]
+            INDEX_t[::1] e20 = temp[2, :]
+            INDEX_t[::1] e03 = temp[3, :]
+            INDEX_t[::1] e13 = temp[4, :]
+            INDEX_t[::1] e23 = temp[5, :]
+            INDEX_t[::1] edgeOrientations = uninitialized((edges_per_element), dtype=INDEX)
+            INDEX_t[:, ::1] temp2 = uninitialized((4, 3), dtype=INDEX)
+            INDEX_t[::1] f012 = temp2[0, :]
+            INDEX_t[::1] f013 = temp2[1, :]
+            INDEX_t[::1] f123 = temp2[2, :]
+            INDEX_t[::1] f023 = temp2[3, :]
+            np.ndarray[INDEX_t, ndim=2] dofs_mem = -MAX_INT*np.ones((nc,
+                                                                     self.dofs_per_element),
+                                                                    dtype=INDEX)
+            INDEX_t[:, ::1] dofs = dofs_mem
+            tupleDictINDEX eV
+            faceVals fV
+            INDEX_t[::1] boundaryVertices
+            INDEX_t[:, ::1] boundaryEdges
+            INDEX_t[:, ::1] boundaryFaces
+            INDEX_t[::1] vertices = MAX_INT*np.ones((mesh.num_vertices),
+                                                    dtype=INDEX)
+
+        self.dofs = dofs
+
+        numBdofs = -1
+        if dofs_per_vertex > 0:
+            if manifold_dim > 0:
+                boundaryVertices = mesh.getBoundaryVerticesByTag(tag)
+                for v in boundaryVertices:
+                    vertices[v] = numBdofs
+                    numBdofs -= dofs_per_vertex
+        if dofs_per_edge > 0:
+            eV = tupleDictINDEX(mesh.num_vertices, deleteHits=False)
+            boundaryEdges = mesh.getBoundaryEdgesByTag(tag)
+            for i in range(boundaryEdges.shape[0]):
+                sortEdge(boundaryEdges[i, 0], boundaryEdges[i, 1], e01)
+                eV.enterValue(e01, numBdofs)
+                numBdofs -= dofs_per_edge
+        if dofs_per_face > 0:
+            fV = faceVals(mesh.num_vertices, deleteHits=True)
+            boundaryFaces = mesh.getBoundaryFacesByTag(tag)
+            for i in range(boundaryFaces.shape[0]):
+                sortFace(boundaryFaces[i, 0], boundaryFaces[i, 1], boundaryFaces[i, 2], f012)
+                fV.enterValue(f012, numBdofs)
+                numBdofs -= dofs_per_face
+        self.num_boundary_dofs = -numBdofs-1
+
+        if skipCellsAfter == -1:
+            skipCellsAfter = nc
+        numDoFs = 0
+        for i in range(nc):
+            # dofs on the vertices
+            if dofs_per_vertex > 0:
+                for k in range(vertices_per_element):
+                    v = cells[i, k]
+                    dof = vertices[v]
+                    if dof != MAX_INT:
+                        # Vertex already has a DoF
+                        if dof >= 0:
+                            for j in range(k*dofs_per_vertex,
+                                           (k+1)*dofs_per_vertex):
+                                dofs[i, j] = dof
+                                dof += 1
+                        else:
+                            for j in range(k*dofs_per_vertex,
+                                           (k+1)*dofs_per_vertex):
+                                dofs[i, j] = dof
+                                dof -= 1
+                    else:
+                        # Vertex does not already have a DoF
+                        # Do we want to assign one?
+                        if i < skipCellsAfter:
+                            vertices[v] = numDoFs
+                            dof = numDoFs
+                            for j in range(k*dofs_per_vertex,
+                                           (k+1)*dofs_per_vertex):
+                                dofs[i, j] = dof
+                                dof += 1
+                            numDoFs += dofs_per_vertex
+            # dofs on the edges
+            if dofs_per_edge > 0:
+                c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+                sortEdge(c0, c1, e01)
+                sortEdge(c1, c2, e12)
+                sortEdge(c0, c2, e20)
+                edgeOrientations[0] = c1-c0
+                edgeOrientations[1] = c2-c1
+                edgeOrientations[2] = c0-c2
+                if manifold_dim == 3:
+                    c3 = cells[i, 3]
+                    sortEdge(c0, c3, e03)
+                    sortEdge(c1, c3, e13)
+                    sortEdge(c2, c3, e23)
+                    edgeOrientations[3] = c3-c0
+                    edgeOrientations[4] = c3-c1
+                    edgeOrientations[5] = c3-c2
+
+                for k in range(edges_per_element):
+                    # Try to enter new DoF for edge
+                    dof = eV.enterValue(temp[k, :], numDoFs)
+                    # We got the DoF of that edge back.
+                    if dof == numDoFs:
+                        # It's the new one we assigned
+                        if edgeOrientations[k] < 0:
+                            dof += dofs_per_edge-1
+                        if i < skipCellsAfter:
+                            for j in range(vertices_per_element*dofs_per_vertex + k*dofs_per_edge,
+                                           vertices_per_element*dofs_per_vertex + (k+1)*dofs_per_edge):
+                                dofs[i, j] = dof
+                                if edgeOrientations[k] < 0:
+                                    dof -= 1
+                                else:
+                                    dof += 1
+                            numDoFs += dofs_per_edge
+                        else:
+                            eV.removeValue(temp[k, :])
+                    else:
+                        # It was already present
+                        if dof >= 0:
+                            if edgeOrientations[k] < 0:
+                                dof += dofs_per_edge-1
+                            for j in range(vertices_per_element*dofs_per_vertex + k*dofs_per_edge,
+                                           vertices_per_element*dofs_per_vertex + (k+1)*dofs_per_edge):
+                                dofs[i, j] = dof
+                                if edgeOrientations[k] < 0:
+                                    dof -= 1
+                                else:
+                                    dof += 1
+                        else:
+                            if edgeOrientations[k] < 0:
+                                dof += dofs_per_edge-1
+                            for j in range(vertices_per_element*dofs_per_vertex + k*dofs_per_edge,
+                                           vertices_per_element*dofs_per_vertex + (k+1)*dofs_per_edge):
+                                dofs[i, j] = dof
+                                if edgeOrientations[k] < 0:
+                                    dof += 1
+                                else:
+                                    dof -= 1
+            # dofs on the faces
+            if dofs_per_face > 0:
+                c0, c1, c2, c3 = cells[i, 0], cells[i, 1], cells[i, 2], cells[i, 3]
+                sortFace(c0, c1, c2, f012)
+                sortFace(c0, c1, c3, f013)
+                sortFace(c1, c2, c3, f123)
+                sortFace(c0, c2, c3, f023)
+                for k in range(faces_per_element):
+                    # Try to enter new DoF for face
+                    dof = fV.enterValue(temp2[k, :], numDoFs)
+                    # We got the DoF of that face back.
+                    if dof == numDoFs:
+                        # It's the new one we assigned
+                        if i < skipCellsAfter:
+                            for j in range(vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + k*dofs_per_face,
+                                           vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + (k+1)*dofs_per_face):
+                                dofs[i, j] = dof
+                                dof += 1
+                            numDoFs += dofs_per_face
+                        else:
+                            # FIX: This should not be commented out!
+                            # fV.removeValue(temp2[k, :]) # Not implemented yet!!
+                            pass
+                    else:
+                        # It was already present
+                        if dof >= 0:
+                            dof += dofs_per_face-1
+                            for j in range(vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + k*dofs_per_face,
+                                           vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + (k+1)*dofs_per_face):
+                                dofs[i, j] = dof
+                                dof -= 1
+                        else:
+                            for j in range(vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + k*dofs_per_face,
+                                           vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + (k+1)*dofs_per_face):
+                                dofs[i, j] = dof
+                                dof -= 1
+            if i < skipCellsAfter:
+                # dofs in the interior of the cell
+                for k in range(vertices_per_element*dofs_per_vertex + edges_per_element*dofs_per_edge + faces_per_element*dofs_per_face,
+                               self.dofs_per_element):
+                    dofs[i, k] = numDoFs
+                    numDoFs += 1
+        self.num_dofs = numDoFs
+
+        if isinstance(tag, function):
+            self.resetUsingIndicator(tag)
+
+        self.inner = ip_serial()
+        self.norm = norm_serial()
+        self.complex_inner = wrapRealInnerToComplex(ip_serial())
+        self.complex_norm = wrapRealNormToComplex(norm_serial())
+
+    cpdef void resetUsingIndicator(self, function indicator):
+        cdef:
+            fe_vector ind
+        ind = self.interpolate(indicator)
+        self.resetUsingFEVector(ind)
+
+    cpdef void resetUsingFEVector(self, REAL_t[::1] ind):
+        cdef:
+            INDEX_t[:, ::1] new_dofs = uninitialized((self.mesh.num_cells,
+                                                      self.dofs_per_element), dtype=INDEX)
+            INDEX_t cellNo, dofNo, dofOld, dofNew = 0, dofNewBoundary = -1
+            dict old2new = {}
+        for cellNo in range(self.mesh.num_cells):
+            for dofNo in range(self.dofs_per_element):
+                dofOld = self.cell2dof(cellNo, dofNo)
+                try:
+                    new_dofs[cellNo, dofNo] = old2new[dofOld]
+                except KeyError:
+                    if dofOld >= 0 and ind[dofOld] > 0:
+                        new_dofs[cellNo, dofNo] = dofNew
+                        old2new[dofOld] = dofNew
+                        dofNew += 1
+                    else:
+                        new_dofs[cellNo, dofNo] = dofNewBoundary
+                        old2new[dofOld] = dofNewBoundary
+                        dofNewBoundary -= 1
+        self.dofs = new_dofs
+        self.num_dofs = dofNew
+        self.num_boundary_dofs = -dofNewBoundary-1
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t cell2dof(self,
+                          const INDEX_t cellNo,
+                          const INDEX_t perCellNo):
+        return self.dofs[cellNo, perCellNo]
+
+    def cell2dof_py(self, INDEX_t cellNo, INDEX_t perCellNo):
+        return self.cell2dof(cellNo, perCellNo)
+
+    cpdef void reorder(self, const INDEX_t[::1] perm):
+        cdef INDEX_t i, j, dof
+        for i in range(self.dofs.shape[0]):
+            for j in range(self.dofs.shape[1]):
+                dof = self.dofs[i, j]
+                if dof >= 0:
+                    self.dofs[i, j] = perm[dof]
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def buildSparsityPattern(self, const cells_t cells,
+                             INDEX_t start_idx=-1, INDEX_t end_idx=-1,
+                             BOOL_t symmetric=False,
+                             BOOL_t reorder=False):
+        cdef:
+            INDEX_t i, j, k, I, J, jj
+            INDEX_t num_dofs = self.num_dofs
+            INDEX_t num_cells = cells.shape[0]
+            INDEX_t nnz = 0
+            INDEX_t[::1] indptr, indices
+            REAL_t[::1] data, diagonal
+            sparsityPattern sPat = sparsityPattern(num_dofs)
+
+        if self.reordered:
+            reorder = False
+
+        if start_idx == -1:
+            start_idx = 0
+        if end_idx == -1:
+            end_idx = num_cells
+
+        if symmetric and not reorder:
+            for i in range(start_idx, end_idx):
+                for j in range(self.dofs_per_element):
+                    I = self.cell2dof(i, j)
+                    if I < 0:
+                        continue
+                    for k in range(self.dofs_per_element):
+                        J = self.cell2dof(i, k)
+                        # This is the only line that differs from the
+                        # non-symmetric code
+                        if J < 0 or I <= J:
+                            continue
+                        # create entry (I, J)
+                        sPat.add(I, J)
+        else:
+            for i in range(start_idx, end_idx):
+                for j in range(self.dofs_per_element):
+                    I = self.cell2dof(i, j)
+                    if I < 0:
+                        continue
+                    for k in range(self.dofs_per_element):
+                        J = self.cell2dof(i, k)
+                        if J < 0:
+                            continue
+                        # create entry (I, J)
+                        sPat.add(I, J)
+
+        indptr, indices = sPat.freeze()
+        del sPat
+        if reorder:
+            perm = uninitialized((indptr.shape[0]-1), dtype=INDEX)
+            graph = sparseGraph(indices, indptr, indptr.shape[0]-1, indptr.shape[0]-1)
+            cuthill_mckee(graph, perm)
+            # get inverse of permutation
+            iperm = uninitialized((perm.shape[0]), dtype=INDEX)
+            for i in range(perm.shape[0]):
+                iperm[perm[i]] = i
+
+            sPat = sparsityPattern(num_dofs)
+            if symmetric:
+                for i in range(perm.shape[0]):
+                    I = perm[i]
+                    for jj in range(indptr[I], indptr[I+1]):
+                        J = indices[jj]
+                        j = iperm[J]
+                        if i > j:
+                            sPat.add(i, j)
+                        elif i < j:
+                            sPat.add(j, i)
+            else:
+                for i in range(perm.shape[0]):
+                    I = perm[i]
+                    for jj in range(indptr[I], indptr[I+1]):
+                        J = indices[jj]
+                        j = iperm[J]
+                        sPat.add(i, j)
+            indptr, indices = sPat.freeze()
+            self.reorder(iperm)
+            self.reordered = True
+            del sPat
+        nnz = indptr[num_dofs]
+        data = np.zeros((nnz), dtype=REAL)
+        if symmetric:
+            diagonal = np.zeros((num_dofs), dtype=REAL)
+            A = SSS_LinearOperator(indices, indptr, data, diagonal)
+        else:
+            A = CSR_LinearOperator(indices, indptr, data)
+        A.sort_indices()
+        return A
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def buildNonSymmetricSparsityPattern(self,
+                                         const cells_t cells,
+                                         DoFMap dmOther,
+                                         INDEX_t start_idx=-1,
+                                         INDEX_t end_idx=-1):
+        cdef:
+            INDEX_t i, j, k, I, J
+            INDEX_t num_dofs = self.num_dofs
+            INDEX_t num_cells = cells.shape[0]
+            INDEX_t nnz = 0
+            INDEX_t[::1] indptr, indices
+            REAL_t[::1] data
+            sparsityPattern sPat = sparsityPattern(num_dofs)
+
+        if start_idx == -1:
+            start_idx = 0
+        if end_idx == -1:
+            end_idx = num_cells
+
+        for i in range(start_idx, end_idx):
+            for j in range(self.dofs_per_element):
+                I = self.cell2dof(i, j)
+                if I < 0:
+                    continue
+                for k in range(dmOther.dofs_per_element):
+                    J = dmOther.cell2dof(i, k)
+                    if J < 0:
+                        continue
+                    # create entry (I, J)
+                    sPat.add(I, J)
+
+        indptr, indices = sPat.freeze()
+        del sPat
+        nnz = indptr[num_dofs]
+        data = np.zeros((nnz), dtype=REAL)
+        A = CSR_LinearOperator(indices, indptr, data)
+        A.num_columns = dmOther.num_dofs
+        A.sort_indices()
+        return A
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def interpolate(self, function function):
+        cdef:
+            REAL_t[::1] v = self.full(fill_value=np.nan, dtype=REAL)
+            fe_vector vec = fe_vector(v, self)
+            INDEX_t cellNo, i, dof
+            REAL_t[:, ::1] simplex = uninitialized((self.mesh.dim+1,
+                                                      self.mesh.dim), dtype=REAL)
+            REAL_t[:, ::1] pos = uninitialized((self.dofs_per_element, self.mesh.dim), dtype=REAL)
+        for cellNo in range(self.mesh.num_cells):
+            self.mesh.getSimplex(cellNo, simplex)
+            matmat(self.nodes, simplex, pos)
+            for i in range(self.dofs_per_element):
+                dof = self.cell2dof(cellNo, i)
+                if dof >= 0 and isnan(v[dof]):
+                    v[dof] = function.eval(pos[i, :])
+        return vec
+
+    def getDoFCoordinates(self):
+        from . functions import coordinate
+        coords = uninitialized((self.num_dofs, self.mesh.dim), dtype=REAL)
+        for i in range(self.mesh.dim):
+            coords[:, i] = self.interpolate(coordinate(i))
+        return coords
+
+    def project(self, function, DoFMap=None, simplexQuadratureRule qr=None):
+        from . femCy import assembleRHSfromFEfunction
+        from scipy.sparse.linalg import spsolve
+        if isinstance(function, np.ndarray):
+            rhs = assembleRHSfromFEfunction(self.mesh, function, DoFMap, self, qr=qr)
+        else:
+            rhs = self.assembleRHS(function, qr=qr)
+        mass = self.assembleMass()
+        x = spsolve(mass.to_csr(), rhs)
+        return fe_vector(x, self)
+
+    def assembleMass(self,
+                     vector_t boundary_data=None,
+                     vector_t rhs_contribution=None,
+                     LinearOperator A=None,
+                     INDEX_t start_idx=-1,
+                     INDEX_t end_idx=-1,
+                     BOOL_t sss_format=False,
+                     BOOL_t reorder=False,
+                     INDEX_t[::1] cellIndices=None,
+                     DoFMap dm2=None):
+        if dm2 is None:
+            from . femCy import assembleMass
+            return assembleMass(self,
+                                boundary_data,
+                                rhs_contribution,
+                                A,
+                                start_idx, end_idx,
+                                sss_format,
+                                reorder,
+                                cellIndices)
+        else:
+            assert self.mesh == dm2.mesh
+            from . femCy import assembleMassNonSym
+            return assembleMassNonSym(self.mesh, self, dm2, A, start_idx, end_idx)
+
+    def assembleStiffness(self,
+                          vector_t boundary_data=None,
+                          vector_t rhs_contribution=None,
+                          LinearOperator A=None,
+                          INDEX_t start_idx=-1,
+                          INDEX_t end_idx=-1,
+                          BOOL_t sss_format=False,
+                          BOOL_t reorder=False,
+                          function diffusivity=None,
+                          INDEX_t[::1] cellIndices=None,
+                          DoFMap dm2=None):
+        from . femCy import assembleStiffness
+        return assembleStiffness(self,
+                                 boundary_data,
+                                 rhs_contribution,
+                                 A,
+                                 start_idx, end_idx,
+                                 sss_format,
+                                 reorder,
+                                 diffusivity,
+                                 cellIndices,
+                                 dm2=dm2)
+
+    def assembleRHS(self,
+                    fun,
+                    simplexQuadratureRule qr=None):
+        from . femCy import assembleRHS, assembleRHScomplex
+        if isinstance(fun, complexFunction):
+            return assembleRHScomplex(fun, self, qr)
+        else:
+            return assembleRHS(fun, self, qr)
+
+    def assembleNonlocal(self, kernel, str matrixFormat='DENSE', DoFMap dm2=None, **kwargs):
+        """Assemble a nonlocal operator of the form
+
+        .. math::
+
+           \int_D (u(x)-u(y)) \gamma(x, y) dy
+
+        :param kernel: The kernel function :math:`\gamma`
+
+        :param matrixFormat: The matrix format for the assembly. Valid values are `dense`, `sparse`, `H2` and `H2corrected`.
+            `H2` assembles into a hierachical matrix format. `H2corrected` also assembles a hierachical matrix for an infinite
+            horizon kernel and a correction term.
+
+        """
+        try:
+            from PyNucleus_nl import nonlocalBuilder
+            builder = nonlocalBuilder(self.mesh, self, kernel, dm2=dm2, **kwargs)
+            if matrixFormat.upper() == 'DENSE':
+                return builder.getDense()
+            elif matrixFormat.upper() == 'SPARSE':
+                return builder.getDense(trySparsification=True)
+            elif matrixFormat.upper() == 'H2':
+                return builder.getH2()
+            elif matrixFormat.upper() == 'H2CORRECTED':
+                A = builder.getH2FiniteHorizon()
+                A.setKernel(kernel)
+                return A
+            else:
+                raise NotImplementedError('Unknown matrix format: {}'.format(matrixFormat))
+        except ImportError as e:
+            raise ImportError('\'PyNucleus_nl\' needs to be installed first.') from e
+
+    def __getstate__(self):
+        return (self.mesh,
+                np.array(self.dofs),
+                self.dofs_per_vertex,
+                self.dofs_per_edge,
+                self.dofs_per_face,
+                self.dofs_per_cell,
+                self.dofs_per_element,
+                self.num_dofs,
+                self.num_boundary_dofs,
+                self.tag,
+                self.mesh,
+                self.localShapeFunctions,
+                np.array(self.nodes))
+
+    def __setstate__(self, state):
+        self.mesh = state[0]
+        self.dofs = state[1]
+        self.dofs_per_vertex = state[2]
+        self.dofs_per_edge = state[3]
+        self.dofs_per_face = state[4]
+        self.dofs_per_cell = state[5]
+        self.dofs_per_element = state[6]
+        self.num_dofs = state[7]
+        self.num_boundary_dofs = state[8]
+        self.tag = state[9]
+        self.mesh = state[10]
+        self.localShapeFunctions = state[11]
+        self.nodes = state[12]
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef void getNodalCoordinates(self, REAL_t[:, ::1] cell, REAL_t[:, ::1] coords):
+        cdef:
+            INDEX_t numCoords = self.nodes.shape[0]
+            INDEX_t dim = cell.shape[1]
+            INDEX_t i, j, k
+        for i in range(numCoords):
+            for k in range(dim):
+                coords[i, k] = 0.
+            for j in range(dim+1):
+                for k in range(dim):
+                    coords[i, k] += self.nodes[i, j]*cell[j, k]
+
+    def getNodalCoordinates_py(self, REAL_t[:, ::1] cell):
+        coords = np.zeros((self.nodes.shape[0], cell.shape[1]), dtype=REAL)
+        self.getNodalCoordinates(cell, coords)
+        return coords
+
+    def zeros(self, INDEX_t numVecs=1, BOOL_t collection=False, dtype=REAL):
+        if numVecs == 1:
+            if dtype == REAL:
+                return fe_vector(np.zeros((self.num_dofs), dtype=REAL), self)
+            elif dtype == COMPLEX:
+                return complex_fe_vector(np.zeros((self.num_dofs), dtype=COMPLEX), self)
+            else:
+                return np.zeros((self.num_dofs), dtype=dtype)
+        else:
+            if collection:
+                return np.zeros((numVecs, self.num_dofs), dtype=dtype)
+            else:
+                return np.zeros((self.num_dofs, numVecs), dtype=dtype)
+
+    def ones(self, INDEX_t numVecs=1, BOOL_t collection=False, dtype=REAL):
+        if numVecs == 1:
+            if dtype == REAL:
+                return fe_vector(np.ones((self.num_dofs), dtype=REAL), self)
+            elif dtype == COMPLEX:
+                return complex_fe_vector(np.ones((self.num_dofs), dtype=COMPLEX), self)
+            else:
+                np.ones((self.num_dofs), dtype=dtype)
+        else:
+            if collection:
+                return np.ones((numVecs, self.num_dofs), dtype=dtype)
+            else:
+                return np.ones((self.num_dofs, numVecs), dtype=dtype)
+
+    def full(self, REAL_t fill_value, INDEX_t numVecs=1, BOOL_t collection=False, dtype=REAL):
+        if numVecs == 1:
+            if dtype == REAL:
+                return fe_vector(np.full((self.num_dofs), fill_value=fill_value, dtype=REAL), self)
+            elif dtype == COMPLEX:
+                return complex_fe_vector(np.full((self.num_dofs), fill_value=fill_value, dtype=COMPLEX), self)
+            else:
+                return np.full((self.num_dofs), fill_value=fill_value, dtype=dtype)
+        else:
+            if collection:
+                return np.full((numVecs, self.num_dofs), fill_value=fill_value, dtype=dtype)
+            else:
+                return np.full((self.num_dofs, numVecs), fill_value=fill_value, dtype=dtype)
+
+    def empty(self, INDEX_t numVecs=1, BOOL_t collection=False, dtype=REAL):
+        if numVecs == 1:
+            if dtype == REAL:
+                return fe_vector(uninitialized((self.num_dofs), dtype=REAL), self)
+            elif dtype == COMPLEX:
+                return complex_fe_vector(uninitialized((self.num_dofs), dtype=COMPLEX), self)
+            else:
+                return uninitialized((self.num_dofs), dtype=dtype)
+        else:
+            if collection:
+                return uninitialized((numVecs, self.num_dofs), dtype=dtype)
+            else:
+                return uninitialized((self.num_dofs, numVecs), dtype=dtype)
+
+    def fromArray(self, data):
+        assert data.shape[0] == self.num_dofs
+        if data.dtype == COMPLEX:
+            return complex_fe_vector(data, self)
+        else:
+            return fe_vector(data, self)
+
+    def evalFun(self, const REAL_t[::1] u, INDEX_t cellNo, REAL_t[::1] x):
+        cdef:
+            REAL_t[:, ::1] simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+            REAL_t[::1] bary = uninitialized((self.dim+1), dtype=REAL)
+            shapeFunction shapeFun
+            REAL_t val
+        self.mesh.getSimplex(cellNo, simplex)
+        if self.dim == 1:
+            getBarycentricCoords1D(simplex, x, bary)
+        elif self.dim == 2:
+            getBarycentricCoords2D(simplex, x, bary)
+        else:
+            raise NotImplementedError()
+        val = 0.
+        for k in range(self.dofs_per_element):
+            dof = self.cell2dof(cellNo, k)
+            if dof >= 0:
+                shapeFun = self.localShapeFunctions[k]
+                val += shapeFun.eval(bary)*u[dof]
+        return val
+
+    def getGlobalShapeFunction(self, INDEX_t dof):
+        return globalShapeFunction(self, dof)
+
+    def getCellLookup(self):
+        cdef:
+            INDEX_t cellNo, dofNo, dof
+            list d2c
+        d2c = [set() for dof in range(self.num_dofs)]
+        for cellNo in range(self.mesh.num_cells):
+            for dofNo in range(self.dofs_per_element):
+                dof = self.cell2dof(cellNo, dofNo)
+                if dof >= 0:
+                    d2c[dof].add(cellNo)
+        return d2c
+
+    def getPatchLookup(self):
+        return self.getCellLookup()
+
+    def linearPart(self, fe_vector x):
+        cdef:
+            INDEX_t i, j, dof, dofP1
+            DoFMap dm
+            fe_vector y
+            REAL_t[::1] yy
+        if isinstance(self, P1_DoFMap):
+            return x, self
+        dm = P1_DoFMap(self.mesh, self.tag)
+        y = dm.zeros(dtype=REAL)
+        yy = y
+        for i in range(self.mesh.num_cells):
+            for j in range(0, (self.dim+1)*self.dofs_per_vertex, self.dofs_per_vertex):
+                dofP1 = dm.cell2dof(i, j//self.dofs_per_vertex)
+                if dofP1 >= 0:
+                    dof = self.cell2dof(i, j)
+                    if dof >= 0:
+                        yy[dofP1] = x[dof]
+                    else:
+                        yy[dofP1] = 0.
+        return y, dm
+
+    def getComplementDoFMap(self):
+        from copy import deepcopy
+        cdef:
+            bdm = deepcopy(self)
+            INDEX_t i, j
+        for i in range(self.mesh.num_cells):
+            for j in range(self.dofs_per_element):
+                bdm.dofs[i, j] = -bdm.dofs[i, j]-1
+        bdm.num_dofs, bdm.num_boundary_dofs = self.num_boundary_dofs, self.num_dofs
+        bdm.inner = ip_serial()
+        bdm.norm = norm_serial()
+        bdm.complex_inner = wrapRealInnerToComplex(ip_serial())
+        bdm.complex_norm = wrapRealNormToComplex(norm_serial())
+        return bdm
+
+    def augmentWithZero(self, const REAL_t[::1] x):
+        cdef:
+            DoFMap dm = type(self)(self.mesh, tag=MAX_INT)
+            fe_vector y = dm.empty(dtype=REAL)
+            REAL_t[::1] yy = y
+            INDEX_t i, k, dof, dof2, num_cells = self.mesh.num_cells
+        for i in range(num_cells):
+            for k in range(self.dofs_per_element):
+                dof = self.cell2dof(i, k)
+                dof2 = dm.cell2dof(i, k)
+                if dof >= 0:
+                    yy[dof2] = x[dof]
+                else:
+                    yy[dof2] = 0.
+        return y, dm
+
+    def augmentWithBoundaryData(self,
+                                const REAL_t[::1] x,
+                                const REAL_t[::1] boundaryData):
+        cdef:
+            DoFMap dm = type(self)(self.mesh, tag=MAX_INT)
+            fe_vector y = dm.empty(dtype=REAL)
+            REAL_t[::1] yy = y
+            INDEX_t i, k, dof, dof2, num_cells = self.mesh.num_cells
+
+        for i in range(num_cells):
+            for k in range(self.dofs_per_element):
+                dof = self.cell2dof(i, k)
+                dof2 = dm.cell2dof(i, k)
+                if dof >= 0:
+                    yy[dof2] = x[dof]
+                else:
+                    yy[dof2] = boundaryData[-dof-1]
+        return y
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def getBoundaryData(self, function boundaryFunction):
+        cdef:
+            REAL_t[::1] data = uninitialized((self.num_boundary_dofs), dtype=REAL)
+            cells_t cells = self.mesh.cells
+            INDEX_t num_cells = self.mesh.num_cells
+            vertices_t vertices = self.mesh.vertices
+            INDEX_t dim = self.mesh.dim
+            INDEX_t i, k, dof, vertexNo, vertexNo2
+            REAL_t[::1] vertex = uninitialized((dim), dtype=REAL)
+
+        # This is not how it should be done, cause it's not flexible. We
+        # are only evaluating at vertices and edge midpoints.
+
+        # FIX: Avoid visiting every dof several times
+        if self.dofs_per_vertex > 0:
+            for i in range(num_cells):
+                for k in range(dim+1):
+                    dof = self.cell2dof(i, k*self.dofs_per_vertex)
+                    if dof < 0:
+                        vertexNo = cells[i, k]
+                        data[-dof-1] = boundaryFunction.eval(vertices[vertexNo, :])
+        if self.dofs_per_edge > 0:
+            if dim == 2:
+                for i in range(num_cells):
+                    for k in range(dim+1):
+                        dof = self.cell2dof(i, (dim+1)*self.dofs_per_vertex+k*self.dofs_per_edge)
+                        if dof < 0:
+                            vertexNo = cells[i, k]
+                            vertexNo2 = cells[i, k%(dim+1)]
+                            for m in range(dim):
+                                vertex[m] = 0.5*(vertices[vertexNo, m]+vertices[vertexNo2, m])
+                            data[-dof-1] = boundaryFunction.eval(vertex)
+            elif dim == 3:
+                for i in range(num_cells):
+                    for k in range(3):
+                        dof = self.cell2dof(i, (dim+1)*self.dofs_per_vertex+k*self.dofs_per_edge)
+                        if dof < 0:
+                            vertexNo = cells[i, k]
+                            vertexNo2 = cells[i, k%(dim)]
+                            for m in range(dim):
+                                vertex[m] = 0.5*(vertices[vertexNo, m]+vertices[vertexNo2, m])
+                            data[-dof-1] = boundaryFunction.eval(vertex)
+                    for k in range(3, 6):
+                        dof = self.cell2dof(i, (dim+1)*self.dofs_per_vertex+k*self.dofs_per_edge)
+                        if dof < 0:
+                            vertexNo = cells[i, k-3]
+                            vertexNo2 = cells[i, 3]
+                            for m in range(dim):
+                                vertex[m] = 0.5*(vertices[vertexNo, m]+vertices[vertexNo2, m])
+                            data[-dof-1] = boundaryFunction.eval(vertex)
+        return np.array(data, copy=False)
+
+    cpdef void getVertexDoFs(self, INDEX_t[:, ::1] v2d):
+        cdef:
+            INDEX_t vertices_per_element
+            meshBase mesh = self.mesh
+            INDEX_t dim = mesh.manifold_dim
+            INDEX_t cellNo, j, v, k, dof
+        if dim == 1:
+            vertices_per_element = 2
+        elif dim == 2:
+            vertices_per_element = 3
+        elif dim == 3:
+            vertices_per_element = 4
+        else:
+            raise NotImplementedError()
+
+        for cellNo in range(mesh.num_cells):
+            for j in range(vertices_per_element):
+                v = mesh.cells[cellNo, j]
+                for k in range(self.dofs_per_vertex):
+                    dof = self.cell2dof(cellNo, self.dofs_per_vertex*j+k)
+                    v2d[v, k] = dof
+
+    def getCoordinateBlocks(self, INDEX_t[::1] idxDims, delta=1e-5):
+        cdef:
+            REAL_t[:, ::1] c
+            dict blocks = {}
+            INDEX_t[::1] key
+            REAL_t[::1] fac, mins, maxs
+            INDEX_t i, j
+            intTuple hv
+            INDEX_t numBlocks, block, temp, nnz
+        c = self.getDoFCoordinates()
+        key = np.empty((idxDims.shape[0]), dtype=INDEX)
+        fac = np.empty((idxDims.shape[0]), dtype=REAL)
+        delta = 1e-5
+        mins = np.array(c, copy=False).min(axis=0)[idxDims]
+        maxs = np.array(c, copy=False).max(axis=0)[idxDims]
+        for j in range(idxDims.shape[0]):
+            fac[j] = 1./(maxs[j] - mins[j]) / delta
+        numBlocks = 0
+        for i in range(self.num_dofs):
+            for j in range(idxDims.shape[0]):
+                key[j] = <INDEX_t>(fac[j] * (c[i, idxDims[j]] - mins[j]) + 0.5)
+            hv = intTuple.create(key)
+            try:
+                blocks[hv] += 1
+            except KeyError:
+                blocks[hv] = 1
+                numBlocks += 1
+        indptr = np.empty((numBlocks+1), dtype=INDEX)
+        numBlocks = 0
+        for hv in blocks:
+            indptr[numBlocks] = blocks[hv]
+            blocks[hv] = numBlocks
+            numBlocks += 1
+        nnz = 0
+        for i in range(numBlocks):
+            temp = indptr[i]
+            indptr[i] = nnz
+            nnz += temp
+        indptr[numBlocks] = nnz
+        indices = np.empty((nnz), dtype=INDEX)
+        for i in range(self.num_dofs):
+            for j in range(idxDims.shape[0]):
+                key[j] = <INDEX_t>(fac[j] * (c[i, idxDims[j]] - mins[j]) + 0.5)
+            hv = intTuple.create(key)
+            block = blocks[hv]
+            indices[indptr[block]] = i
+            indptr[block] += 1
+        for i in range(numBlocks, 0, -1):
+            indptr[i] = indptr[i-1]
+        indptr[0] = 0
+        return sparseGraph(indices, indptr, numBlocks, self.num_dofs)
+
+    def getReducedMeshDoFMap(self, INDEX_t[::1] selectedCells=None):
+        # Same DoFs, but on mesh that only contains elements with DoFs
+        # Warning: This can also discard domain corners in some cases.
+        cdef:
+            INDEX_t cellNo, dofNo, cellNoNew, dof
+            meshBase newMesh
+            DoFMap newDM
+            dict boundaryDoFMapping = {}
+            BOOL_t[::1] dofCheck
+
+        if selectedCells is None:
+            selectedCells = uninitialized((self.mesh.num_cells), dtype=INDEX)
+            cellNoNew = 0
+            for cellNo in range(self.mesh.num_cells):
+                for dofNo in range(self.dofs_per_element):
+                    if self.cell2dof(cellNo, dofNo) >= 0:
+                        selectedCells[cellNoNew] = cellNo
+                        cellNoNew += 1
+                        break
+            selectedCells = selectedCells[:cellNoNew]
+        else:
+            dofCheck = np.full((self.num_dofs), dtype=BOOL, fill_value=False)
+            for cellNoNew in range(selectedCells.shape[0]):
+                cellNo = selectedCells[cellNoNew]
+                for dofNo in range(self.dofs_per_element):
+                    dof = self.cell2dof(cellNo, dofNo)
+                    if dof >= 0:
+                        dofCheck[dof] = True
+            assert np.all(dofCheck), "New mesh does not contain all previous DoFs"
+
+        newMesh = getSubmesh2(self.mesh, selectedCells)
+        newDM = type(self)(newMesh)
+
+        boundaryDoFNew = -1
+        for cellNoNew in range(newMesh.num_cells):
+            cellNo = selectedCells[cellNoNew]
+            for dofNo in range(self.dofs_per_element):
+                dof = self.dofs[cellNo, dofNo]
+                if dof >= 0:
+                    newDM.dofs[cellNoNew, dofNo] = self.dofs[cellNo, dofNo]
+                else:
+                    try:
+                        newDM.dofs[cellNoNew, dofNo] = boundaryDoFMapping[dof]
+                    except KeyError:
+                        newDM.dofs[cellNoNew, dofNo] = boundaryDoFNew
+                        boundaryDoFMapping[dof] = boundaryDoFNew
+                        boundaryDoFNew -= 1
+        newDM.num_dofs = self.num_dofs
+        newDM.num_boundary_dofs = -boundaryDoFNew-1
+        return newDM
+
+    def plot(self, *args, **kwargs):
+        self.mesh.plotDoFMap(self, *args, **kwargs)
+
+    def sort(self):
+        cdef:
+            INDEX_t[::1] idx, invIdx
+            INDEX_t k, dof, dof2
+        coords = self.getDoFCoordinates()
+        if self.mesh.dim == 1:
+            idx = np.argsort(coords, axis=0).ravel().astype(INDEX)
+        elif self.mesh.dim == 2:
+            idx = np.argsort(coords.view('d,d'), order=['f1', 'f0'], axis=0).flat[:coords.shape[0]].astype(INDEX)
+        elif self.mesh.dim == 3:
+            idx = np.argsort(coords.view('d,d,d'), order=['f2', 'f1', 'f0'], axis=0).flat[:coords.shape[0]].astype(INDEX)
+        else:
+            raise NotImplementedError()
+        invIdx = uninitialized((self.num_dofs), dtype=INDEX)
+        k = 0
+        for dof in range(self.num_dofs):
+            dof2 = idx[dof]
+            invIdx[dof2] = k
+            k += 1
+        self.reorder(invIdx)
+
+    def HDF5write(self, node):
+        COMPRESSION = 'gzip'
+        node.attrs['type'] = 'DoFMap'
+        node.create_group('mesh')
+        self.mesh.HDF5write(node['mesh'])
+        node.attrs['dim'] = self.dim
+        node.attrs['reordered'] = self.reordered
+        # localShapeFunctions
+        if isinstance(self, P0_DoFMap):
+            node.attrs['element'] = 'P0'
+        elif isinstance(self, P1_DoFMap):
+            node.attrs['element'] = 'P1'
+        elif isinstance(self, P2_DoFMap):
+            node.attrs['element'] = 'P2'
+        elif isinstance(self, P3_DoFMap):
+            node.attrs['element'] = 'P3'
+        node.create_dataset('nodes', data=self.nodes,
+                            compression=COMPRESSION)
+        node.attrs['num_dofs'] = self.num_dofs
+        node.attrs['num_boundary_dofs'] = self.num_boundary_dofs
+        node.create_dataset('dofs', data=self.dofs,
+                            compression=COMPRESSION)
+        node.attrs['polynomialOrder'] = self.polynomialOrder
+        # tag
+        node.attrs['dofs_per_vertex'] = self.dofs_per_vertex
+        node.attrs['dofs_per_edge'] = self.dofs_per_edge
+        node.attrs['dofs_per_face'] = self.dofs_per_face
+        node.attrs['dofs_per_cell'] = self.dofs_per_cell
+        node.attrs['dofs_per_element'] = self.dofs_per_element
+
+    @staticmethod
+    def HDF5read(node):
+        from . mesh import meshNd
+        mesh = meshNd.HDF5read(node['mesh'])
+        if node.attrs['element'] == 'P0':
+            dm = P0_DoFMap(mesh)
+        elif node.attrs['element'] == 'P1':
+            dm = P1_DoFMap(mesh)
+        elif node.attrs['element'] == 'P2':
+            dm = P2_DoFMap(mesh)
+        elif node.attrs['element'] == 'P3':
+            dm = P3_DoFMap(mesh)
+        else:
+            dm = DoFMap(mesh,
+                        node.attrs['dofs_per_vertex'],
+                        node.attrs['dofs_per_edge'],
+                        node.attrs['dofs_per_face'],
+                        node.attrs['dofs_per_cell'])
+        dm.reordered = node.attrs['reordered']
+        dm.nodes = np.array(node['nodes'], dtype=REAL)
+        dm.num_dofs = node.attrs['num_dofs']
+        dm.num_boundary_dofs = node.attrs['num_boundary_dofs']
+        dm.dofs = np.array(node['dofs'], dtype=INDEX)
+        dm.polynomialOrder = node.attrs['polynomialOrder']
+        return dm
+
+    def __repr__(self):
+        return 'DoFMap with {} DoFs and {} boundary DoFs.'.format(self.num_dofs,
+                                                                  self.num_boundary_dofs)
+
+    def set_ip_norm(self, ipBase inner, normBase norm):
+        self.inner = inner
+        self.norm = norm
+
+    def set_complex_ip_norm(self, complexipBase inner, complexNormBase norm):
+        self.complex_inner = inner
+        self.complex_norm = norm
+
+    def combine(self, DoFMap other):
+        from copy import deepcopy
+
+        cdef:
+            INDEX_t cellNo, dofNo, dof1, dof2
+            DoFMap dmCombined
+
+        assert type(self) == type(other)
+        assert self.mesh == other.mesh
+        assert self.num_dofs == other.num_boundary_dofs
+        assert self.num_boundary_dofs == other.num_dofs
+
+        dmCombined = deepcopy(self)
+        dmCombined.num_dofs = self.num_dofs+other.num_dofs
+        dmCombined.num_boundary_dofs = 0
+        for cellNo in range(self.mesh.num_cells):
+            for dofNo in range(self.dofs_per_element):
+                dof1 = self.cell2dof(cellNo, dofNo)
+                dof2 = other.cell2dof(cellNo, dofNo)
+                if dof1 >= 0 and dof2 < 0:
+                   dmCombined.dofs[cellNo, dofNo] = dof1
+                elif dof1 < 0 and dof2 >= 0:
+                    dmCombined.dofs[cellNo, dofNo] = self.num_dofs+dof2
+                else:
+                    raise NotImplementedError()
+        return dmCombined
+
+
+cdef class globalShapeFunction(function):
+    cdef:
+        REAL_t[:, :, ::1] simplices
+        INDEX_t[::1] dofNos
+        REAL_t[::1] bary
+        DoFMap dm
+
+    def __init__(self, DoFMap dm, INDEX_t dof):
+        cdef:
+            list cellNos = []
+            list dofNos = []
+            INDEX_t cellNo, dofNo, k
+        for cellNo in range(dm.mesh.num_cells):
+            for dofNo in range(dm.dofs_per_element):
+                if dm.cell2dof(cellNo, dofNo) == dof:
+                    cellNos.append(cellNo)
+                    dofNos.append(dofNo)
+        self.simplices = uninitialized((len(cellNos), dm.mesh.dim+1, dm.mesh.dim), dtype=REAL)
+        k = 0
+        for cellNo in cellNos:
+            dm.mesh.getSimplex(cellNo, self.simplices[k, :, :])
+            k += 1
+        self.dofNos = np.array(dofNos, dtype=INDEX)
+        self.bary = uninitialized((4), dtype=REAL)
+        self.dm = dm
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t dim = self.simplices.shape[2], dofNo, k, j
+            BOOL_t doEval
+            REAL_t result = 0.
+            shapeFunction phi
+        if dim == 1:
+            for k in range(self.simplices.shape[0]):
+                getBarycentricCoords1D(self.simplices[k, :, :], x, self.bary)
+                doEval = True
+                for j in range(dim+1):
+                    if self.bary[j] < 0:
+                        doEval = False
+                        break
+                if doEval:
+                    dofNo = self.dofNos[k]
+                    phi = self.dm.localShapeFunctions[dofNo]
+                    result += phi.eval(self.bary)
+        elif dim == 2:
+            for k in range(self.simplices.shape[0]):
+                getBarycentricCoords2D(self.simplices[k, :, :], x, self.bary)
+                doEval = True
+                for j in range(dim+1):
+                    if self.bary[j] < 0:
+                        doEval = False
+                        break
+                if doEval:
+                    dofNo = self.dofNos[k]
+                    phi = self.dm.localShapeFunctions[dofNo]
+                    result += phi.eval(self.bary)
+        return result
+
+
+cdef class shapeFunction:
+    def __init__(self):
+        self.bary = uninitialized((4), dtype=REAL)
+
+    def __call__(self, lam):
+        return self.eval(np.array(lam))
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t evalGlobal(self, REAL_t[:, ::1] simplex, REAL_t[::1] x):
+        if simplex.shape[1] == 1:
+            getBarycentricCoords1D(simplex, x, self.bary)
+        elif simplex.shape[1] == 2:
+            getBarycentricCoords2D(simplex, x, self.bary)
+        else:
+            raise NotImplementedError()
+        return self.eval(self.bary)
+
+    def evalGlobalPy(self, REAL_t[:, ::1] simplex, REAL_t[::1] x):
+        return self.evalGlobal(simplex, x)
+
+    def __getstate__(self):
+        return
+
+    def __setstate__(self, state):
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class vectorShapeFunction:
+    def __init__(self, INDEX_t dim):
+        self.dim = dim
+        self.cell = uninitialized((dim+1), dtype=INDEX)
+
+    cpdef void setCell(self, INDEX_t[::1] cell):
+        cdef:
+            INDEX_t i
+        for i in range(self.dim+1):
+            self.cell[i] = cell[i]
+
+    def __call__(self, lam, gradLam):
+        value = uninitialized((self.dim), dtype=REAL)
+        self.eval(np.array(lam), np.array(gradLam), value)
+        return value
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self, const REAL_t[::1] lam, const REAL_t[:, ::1] gradLam, REAL_t[::1] value):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void evalGlobal(self, const REAL_t[:, ::1] simplex, const REAL_t[::1] x, REAL_t[::1] value):
+        raise NotImplementedError()
+
+    def evalGlobalPy(self, REAL_t[:, ::1] simplex, REAL_t[::1] x):
+        value = uninitialized((self.dim), dtype=REAL)
+        self.evalGlobal(simplex, x, value)
+        return value
+
+
+cdef class shapeFunctionP0(shapeFunction):
+    def __call__(self, lam):
+        return 1.
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return 1.
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride):
+        return 1.
+
+
+cdef class P0_DoFMap(DoFMap):
+    def __init__(self, meshBase mesh, tag=None,
+                 INDEX_t skipCellsAfter=-1):
+        self.polynomialOrder = 0
+        if mesh.dim == 1:
+            self.localShapeFunctions = [shapeFunctionP0()]
+            self.nodes = np.array([[0.5, 0.5]], dtype=REAL)
+            super(P0_DoFMap, self).__init__(mesh, 0, 0, 0, 1, tag, skipCellsAfter)
+        elif mesh.dim == 2:
+            self.localShapeFunctions = [shapeFunctionP0()]
+            self.nodes = np.array([[1./3., 1./3., 1./3.]], dtype=REAL)
+            super(P0_DoFMap, self).__init__(mesh, 0, 0, 0, 1, tag, skipCellsAfter)
+        elif mesh.dim == 3:
+            self.localShapeFunctions = [shapeFunctionP0()]
+            self.nodes = np.array([[0.25, 0.25, 0.25, 0.25]], dtype=REAL)
+            super(P0_DoFMap, self).__init__(mesh, 0, 0, 0, 1, tag, skipCellsAfter)
+        else:
+            raise NotImplementedError()
+
+    def __repr__(self):
+        return 'P0 DoFMap with {} DoFs and {} boundary DoFs.'.format(self.num_dofs,
+                                                                     self.num_boundary_dofs)
+
+    def interpolateFE(self, mesh, P0_DoFMap dm, REAL_t[::1] u):
+        cdef:
+            REAL_t[::1] uFine = np.zeros((self.num_dofs))
+            REAL_t[::1] uloc = uninitialized((dm.dofs_per_element))
+            REAL_t[:, ::1] other_vertices = mesh.vertices
+            INDEX_t[:, ::1] other_cells = mesh.cells
+            REAL_t[:, ::1] other_simplex = uninitialized((mesh.dim+1,
+                                                       mesh.dim), dtype=REAL)
+            REAL_t[:, ::1] my_vertices = self.mesh.vertices
+            INDEX_t[:, ::1] my_cells = self.mesh.cells
+            REAL_t[:, ::1] my_simplex = uninitialized((self.mesh.dim+1,
+                                                    self.mesh.dim), dtype=REAL)
+            INDEX_t k, other_dof, i, j, my_dof, my_cell, other_cell
+            REAL_t[:, ::1] coords = uninitialized((dm.dofs_per_element, mesh.vertices.shape[1]), dtype=REAL)
+            REAL_t[::1] vertex
+
+        from . meshCy import cellFinder
+        cF = cellFinder(mesh)
+        for my_cell in range(self.mesh.num_cells):
+            for i in range(my_cells.shape[1]):
+                for j in range(my_vertices.shape[1]):
+                    my_simplex[i, j] = my_vertices[my_cells[my_cell, i], j]
+            self.getNodalCoordinates(my_simplex, coords)
+            for k in range(self.dofs_per_element):
+                my_dof = self.cell2dof(my_cell, k)
+                if my_dof >= 0 and uFine[my_dof] == 0.:
+                    vertex = coords[k, :]
+                    other_cell = cF.findCell(vertex)
+                    for i in range(other_cells.shape[1]):
+                        for j in range(other_vertices.shape[1]):
+                            other_simplex[i, j] = other_vertices[other_cells[other_cell, i], j]
+                    for j in range(dm.dofs_per_element):
+                        other_dof = dm.cell2dof(other_cell, j)
+                        if other_dof >= 0:
+                            uloc[j] = u[other_dof]
+                        else:
+                            uloc[j] = 0.
+                    uFine[my_dof] = evalP0(other_simplex, uloc, vertex)
+        return np.array(uFine, copy=False)
+
+
+cdef class shapeFunctionP1(shapeFunction):
+    cdef:
+        INDEX_t vertexNo
+
+    def __init__(self, INDEX_t vertexNo):
+        self.vertexNo = vertexNo
+
+    def __call__(self, lam):
+        return lam[self.vertexNo]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return lam[self.vertexNo]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t evalStrided(self, const REAL_t* lam, INDEX_t stride):
+        return lam[self.vertexNo*stride]
+
+    def __getstate__(self):
+        return self.vertexNo
+
+    def __setstate__(self, state):
+        self.vertexNo = state
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class P1_DoFMap(DoFMap):
+    def __init__(self, meshBase mesh, tag=None,
+                 INDEX_t skipCellsAfter=-1):
+        self.polynomialOrder = 1
+        if mesh.dim == 1:
+            self.localShapeFunctions = [shapeFunctionP1(0),
+                                        shapeFunctionP1(1)]
+            self.nodes = np.array([[1., 0.],
+                                   [0., 1.]], dtype=REAL)
+        elif mesh.dim == 2:
+            self.localShapeFunctions = [shapeFunctionP1(0),
+                                        shapeFunctionP1(1),
+                                        shapeFunctionP1(2)]
+            self.nodes = np.array([[1., 0., 0.],
+                                   [0., 1., 0.],
+                                   [0., 0., 1.]], dtype=REAL)
+        elif mesh.dim == 3:
+            self.localShapeFunctions = [shapeFunctionP1(0),
+                                        shapeFunctionP1(1),
+                                        shapeFunctionP1(2),
+                                        shapeFunctionP1(3)]
+            self.nodes = np.array([[1., 0., 0., 0.],
+                                   [0., 1., 0., 0.],
+                                   [0., 0., 1., 0.],
+                                   [0., 0., 0., 1.]], dtype=REAL)
+        super(P1_DoFMap, self).__init__(mesh, 1, 0, 0, 0, tag, skipCellsAfter)
+
+    def __repr__(self):
+        return 'P1 DoFMap with {} DoFs and {} boundary DoFs.'.format(self.num_dofs,
+                                                                     self.num_boundary_dofs)
+
+
+cdef class shapeFunctionP2_vertex(shapeFunction):
+    cdef:
+        INDEX_t vertexNo
+
+    def __init__(self, INDEX_t vertexNo):
+        self.vertexNo = vertexNo
+
+    def __call__(self, lam):
+        return lam[self.vertexNo]*(2.*lam[self.vertexNo]-1.)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return lam[self.vertexNo]*(2.*lam[self.vertexNo]-1.)
+
+    def __getstate__(self):
+        return self.vertexNo
+
+    def __setstate__(self, state):
+        self.vertexNo = state
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class shapeFunctionP2_edge(shapeFunction):
+    cdef:
+        INDEX_t vertexNo1, vertexNo2
+
+    def __init__(self, INDEX_t vertexNo1, INDEX_t vertexNo2):
+        self.vertexNo1 = vertexNo1
+        self.vertexNo2 = vertexNo2
+
+    def __call__(self, lam):
+        return 4.*lam[self.vertexNo1]*lam[self.vertexNo2]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return 4.*lam[self.vertexNo1]*lam[self.vertexNo2]
+
+    def __getstate__(self):
+        return (self.vertexNo1, self.vertexNo2)
+
+    def __setstate__(self, state):
+        self.vertexNo1 = state[0]
+        self.vertexNo2 = state[1]
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class P2_DoFMap(DoFMap):
+    def __init__(self, meshBase mesh, tag=None,
+                 INDEX_t skipCellsAfter=-1):
+        self.polynomialOrder = 2
+        if mesh.dim == 1:
+            self.localShapeFunctions = [shapeFunctionP2_vertex(0),
+                                        shapeFunctionP2_vertex(1),
+                                        shapeFunctionP2_edge(0, 1)]
+            self.nodes = np.array([[1., 0.],
+                                   [0., 1.],
+                                   [0.5, 0.5]], dtype=REAL)
+            super(P2_DoFMap, self).__init__(mesh, 1, 0, 0, 1, tag, skipCellsAfter)
+        elif mesh.dim == 2:
+            self.localShapeFunctions = [shapeFunctionP2_vertex(0),
+                                        shapeFunctionP2_vertex(1),
+                                        shapeFunctionP2_vertex(2),
+                                        shapeFunctionP2_edge(0, 1),
+                                        shapeFunctionP2_edge(1, 2),
+                                        shapeFunctionP2_edge(0, 2)]
+            self.nodes = np.array([[1., 0., 0.],
+                                   [0., 1., 0.],
+                                   [0., 0., 1.],
+                                   [0.5, 0.5, 0.],
+                                   [0., 0.5, 0.5],
+                                   [0.5, 0., 0.5]], dtype=REAL)
+            super(P2_DoFMap, self).__init__(mesh, 1, 1, 0, 0, tag, skipCellsAfter)
+        elif mesh.dim == 3:
+            self.localShapeFunctions = [shapeFunctionP2_vertex(0),
+                                        shapeFunctionP2_vertex(1),
+                                        shapeFunctionP2_vertex(2),
+                                        shapeFunctionP2_vertex(3),
+                                        shapeFunctionP2_edge(0, 1),
+                                        shapeFunctionP2_edge(1, 2),
+                                        shapeFunctionP2_edge(0, 2),
+                                        shapeFunctionP2_edge(0, 3),
+                                        shapeFunctionP2_edge(1, 3),
+                                        shapeFunctionP2_edge(2, 3)]
+            self.nodes = np.array([[1., 0., 0., 0.],
+                                   [0., 1., 0., 0.],
+                                   [0., 0., 1., 0.],
+                                   [0., 0., 0., 1.],
+                                   [0.5, 0.5, 0., 0.],
+                                   [0., 0.5, 0.5, 0.],
+                                   [0.5, 0., 0.5, 0.],
+                                   [0.5, 0., 0., 0.5],
+                                   [0., 0.5, 0., 0.5],
+                                   [0., 0., 0.5, 0.5]], dtype=REAL)
+            super(P2_DoFMap, self).__init__(mesh, 1, 1, 0, 0, tag, skipCellsAfter)
+
+    def __repr__(self):
+        return 'P2 DoFMap with {} DoFs and {} boundary DoFs.'.format(self.num_dofs,
+                                                                     self.num_boundary_dofs)
+
+
+cdef class shapeFunctionP3_vertex(shapeFunction):
+    cdef:
+        INDEX_t vertexNo
+
+    def __init__(self, INDEX_t vertexNo):
+        self.vertexNo = vertexNo
+
+    def __call__(self, lam):
+        return 4.5*lam[self.vertexNo]*(lam[self.vertexNo]-1./3.)*(lam[self.vertexNo]-2./3.)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return 4.5*lam[self.vertexNo]*(lam[self.vertexNo]-1./3.)*(lam[self.vertexNo]-2./3.)
+
+    def __getstate__(self):
+        return self.vertexNo
+
+    def __setstate__(self, state):
+        self.vertexNo = state
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class shapeFunctionP3_edge(shapeFunction):
+    cdef:
+        INDEX_t vertexNo1, vertexNo2
+
+    def __init__(self, INDEX_t vertexNo1, INDEX_t vertexNo2):
+        self.vertexNo1 = vertexNo1
+        self.vertexNo2 = vertexNo2
+
+    def __call__(self, lam):
+        return 13.5*lam[self.vertexNo1]*lam[self.vertexNo2]*(lam[self.vertexNo1]-1./3.)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return 13.5*lam[self.vertexNo1]*lam[self.vertexNo2]*(lam[self.vertexNo1]-1./3.)
+
+    def __getstate__(self):
+        return (self.vertexNo1, self.vertexNo2)
+
+    def __setstate__(self, state):
+        self.vertexNo1 = state[0]
+        self.vertexNo2 = state[1]
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class shapeFunctionP3_face(shapeFunction):
+    cdef:
+        INDEX_t vertexNo1, vertexNo2, vertexNo3
+
+    def __init__(self, INDEX_t vertexNo1, INDEX_t vertexNo2, INDEX_t vertexNo3):
+        self.vertexNo1 = vertexNo1
+        self.vertexNo2 = vertexNo2
+        self.vertexNo3 = vertexNo3
+
+    def __call__(self, lam):
+        return 27.*lam[self.vertexNo1]*lam[self.vertexNo2]*lam[self.vertexNo3]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, const REAL_t[::1] lam):
+        return 27.*lam[self.vertexNo1]*lam[self.vertexNo2]*lam[self.vertexNo3]
+
+    def __getstate__(self):
+        return (self.vertexNo1, self.vertexNo2, self.vertexNo3)
+
+    def __setstate__(self, state):
+        self.vertexNo1 = state[0]
+        self.vertexNo2 = state[1]
+        self.vertexNo3 = state[2]
+        self.bary = uninitialized((4), dtype=REAL)
+
+
+cdef class P3_DoFMap(DoFMap):
+    def __init__(self, meshBase mesh, tag=None,
+                 INDEX_t skipCellsAfter=-1):
+        self.polynomialOrder = 3
+        if mesh.dim == 1:
+            self.localShapeFunctions = [shapeFunctionP3_vertex(0),
+                                        shapeFunctionP3_vertex(1),
+                                        shapeFunctionP3_edge(0, 1),
+                                        shapeFunctionP3_edge(1, 0)]
+            self.nodes = np.array([[1., 0.],
+                                   [0., 1.],
+                                   [2./3., 1./3.],
+                                   [1./3., 2./3.]], dtype=REAL)
+            super(P3_DoFMap, self).__init__(mesh, 1, 0, 0, 2, tag, skipCellsAfter)
+        elif mesh.dim == 2:
+            self.localShapeFunctions = [shapeFunctionP3_vertex(0),
+                                        shapeFunctionP3_vertex(1),
+                                        shapeFunctionP3_vertex(2),
+                                        shapeFunctionP3_edge(0, 1),
+                                        shapeFunctionP3_edge(1, 0),
+                                        shapeFunctionP3_edge(1, 2),
+                                        shapeFunctionP3_edge(2, 1),
+                                        shapeFunctionP3_edge(2, 0),
+                                        shapeFunctionP3_edge(0, 2),
+                                        shapeFunctionP3_face(0, 1, 2)]
+            self.nodes = np.array([[1., 0., 0.],
+                                   [0., 1., 0.],
+                                   [0., 0., 1.],
+                                   [2./3., 1./3., 0.],
+                                   [1./3., 2./3., 0.],
+                                   [0., 2./3., 1./3.],
+                                   [0., 1./3., 2./3.],
+                                   [1./3., 0., 2./3.],
+                                   [2./3., 0., 1./3.],
+                                   [1./3., 1./3., 1./3.]], dtype=REAL)
+            super(P3_DoFMap, self).__init__(mesh, 1, 2, 0, 1, tag, skipCellsAfter)
+        elif mesh.dim == 3:
+            self.localShapeFunctions = [shapeFunctionP3_vertex(0),
+                                        shapeFunctionP3_vertex(1),
+                                        shapeFunctionP3_vertex(2),
+                                        shapeFunctionP3_vertex(3),
+                                        shapeFunctionP3_edge(0, 1),
+                                        shapeFunctionP3_edge(1, 0),
+                                        shapeFunctionP3_edge(1, 2),
+                                        shapeFunctionP3_edge(2, 1),
+                                        shapeFunctionP3_edge(2, 0),
+                                        shapeFunctionP3_edge(0, 2),
+                                        shapeFunctionP3_edge(0, 3),
+                                        shapeFunctionP3_edge(3, 0),
+                                        shapeFunctionP3_edge(1, 3),
+                                        shapeFunctionP3_edge(3, 1),
+                                        shapeFunctionP3_edge(2, 3),
+                                        shapeFunctionP3_edge(3, 2),
+                                        shapeFunctionP3_face(0, 1, 2),
+                                        shapeFunctionP3_face(0, 1, 3),
+                                        shapeFunctionP3_face(1, 2, 3),
+                                        shapeFunctionP3_face(2, 0, 3)]
+            self.nodes = np.array([[1., 0., 0., 0.],
+                                   [0., 1., 0., 0.],
+                                   [0., 0., 1., 0.],
+                                   [0., 0., 0., 1.],
+                                   [2./3., 1./3., 0., 0.],
+                                   [1./3., 2./3., 0., 0.],
+                                   [0., 2./3., 1./3., 0.],
+                                   [0., 1./3., 2./3., 0.],
+                                   [1./3., 0., 2./3., 0.],
+                                   [2./3., 0., 1./3., 0.],
+                                   [2./3., 0., 0., 1./3.],
+                                   [1./3., 0., 0., 2./3.],
+                                   [0., 2./3., 0., 1./3.],
+                                   [0., 1./3., 0., 2./3.],
+                                   [0., 0., 2./3., 1./3.],
+                                   [0., 0., 1./3., 2./3.],
+                                   [1./3., 1./3., 1./3., 0.],
+                                   [1./3., 1./3., 0., 1./3.],
+                                   [0., 1./3., 1./3., 1./3.],
+                                   [1./3., 0., 1./3., 1./3.]], dtype=REAL)
+            super(P3_DoFMap, self).__init__(mesh, 1, 2, 1, 0, tag, skipCellsAfter)
+        else:
+            raise NotImplementedError()
+
+    def __repr__(self):
+        return 'P3 DoFMap with {} DoFs and {} boundary DoFs.'.format(self.num_dofs,
+                                                                     self.num_boundary_dofs)
+
+
+
+
+def str2DoFMap(element):
+    if element == 'P0':
+        return P0_DoFMap
+    elif element == 'P1':
+        return P1_DoFMap
+    elif element == 'P2':
+        return P2_DoFMap
+    elif element == 'P3':
+        return P3_DoFMap
+    
+    else:
+        raise NotImplementedError('Unknown DoFMap: {}'.format(element))
+
+
+def getAvailableDoFMaps():
+    return ['P0', 'P1', 'P2', 'P3',
+            
+            ]
+
+
+def str2DoFMapOrder(element):
+    if element in ('P0', 0, '0'):
+        return 0
+    elif element in ('P1', 1, '1'):
+        return 1
+    elif element in ('P2', 2, '2'):
+        return 2
+    elif element in ('P3', 3, '3'):
+        return 3
+    
+    else:
+        raise NotImplementedError('Unknown DoFMap: {}'.format(element))
+
+
+def getSubMap(DoFMap dm, indicator):
+    if isinstance(indicator, function):
+        indicator = dm.interpolate(indicator)
+    else:
+        assert indicator.shape[0] == dm.num_dofs
+
+    cdef:
+        DoFMap dmSub_GD
+        REAL_t[::1] ind = indicator
+        INDEX_t i, dofOld, dofNew, dofNewBoundary
+        dict old2new
+
+    old2new = {}
+    dmSub_GD = type(dm)(dm.mesh)
+    dofNew = 0
+    dofNewBoundary = -1
+    for i in range(dmSub_GD.dofs.shape[0]):
+        for k in range(dmSub_GD.dofs_per_element):
+            dofOld = dm.cell2dof(i, k)
+            if dofOld >= 0 and ind[dofOld] > 0:
+                try:
+                    dmSub_GD.dofs[i, k] = old2new[dofOld]
+                except KeyError:
+                    dmSub_GD.dofs[i, k] = dofNew
+                    old2new[dofOld] = dofNew
+                    dofNew += 1
+            else:
+                try:
+                    dmSub_GD.dofs[i, k] = old2new[dofOld]
+                except KeyError:
+                    dmSub_GD.dofs[i, k] = dofNewBoundary
+                    old2new[dofOld] = dofNewBoundary
+                    dofNewBoundary -= 1
+    dmSub_GD.num_dofs = dofNew
+    dmSub_GD.num_boundary_dofs = -dofNewBoundary-1
+    return dmSub_GD
+
+
+def getSubMapRestrictionProlongation(DoFMap dm, DoFMap dmSub, indicator=None):
+    cdef:
+        INDEX_t numDoFsSub = dmSub.num_dofs
+        INDEX_t i, k, cellNo, dofNo, dof, dofSub
+        meshBase mesh = dm.mesh
+        set subMapAdded
+
+    if indicator is not None:
+        data = np.ones((numDoFsSub), dtype=REAL)
+        indptr = np.arange((numDoFsSub+1), dtype=INDEX)
+        indices = np.zeros((numDoFsSub), dtype=INDEX)
+        k = 0
+        for i in range(dm.num_dofs):
+            if indicator[i] > 0:
+                indices[k] = i
+                k += 1
+    else:
+        assert dm.mesh.num_vertices == dmSub.mesh.num_vertices
+        assert dm.mesh.num_cells == dmSub.mesh.num_cells
+        subMapAdded = set()
+        k = 0
+        for cellNo in range(mesh.num_cells):
+            for dofNo in range(dm.dofs_per_element):
+                dof = dm.cell2dof(cellNo, dofNo)
+                dofSub = dmSub.cell2dof(cellNo, dofNo)
+                if dof >= 0 and dofSub >= 0 and dofSub not in subMapAdded:
+                    subMapAdded.add(dofSub)
+                    k += 1
+        data = np.ones((k), dtype=REAL)
+        indptr = np.arange((numDoFsSub+1), dtype=INDEX)
+        indices = np.zeros((k), dtype=INDEX)
+        subMapAdded = set()
+        k = 0
+        for cellNo in range(mesh.num_cells):
+            for dofNo in range(dm.dofs_per_element):
+                dof = dm.cell2dof(cellNo, dofNo)
+                dofSub = dmSub.cell2dof(cellNo, dofNo)
+                if dof >= 0 and dofSub >= 0 and dofSub not in subMapAdded:
+                    indices[dofSub] = dof
+                    subMapAdded.add(dofSub)
+                    k += 1
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = dm.num_dofs
+    P = R.transpose()
+    return R, P
+
+
+def getSubMapRestrictionProlongation2(meshBase mesh, DoFMap dm, DoFMap dmSub, INDEX_t[::1] newCellIndices):
+    cdef:
+        INDEX_t cellNo, newCellNo, dofNo, dofNew, dof
+        INDEX_t[::1] indices, indptr
+        REAL_t[::1] data
+        CSR_LinearOperator opUnreduced2Reduced
+    indices = uninitialized((dmSub.num_dofs), dtype=INDEX)
+    for cellNo in range(mesh.num_cells):
+        newCellNo = newCellIndices[cellNo]
+        if newCellNo >= 0:
+            for dofNo in range(dm.dofs_per_element):
+                dofNew = dmSub.cell2dof(newCellNo, dofNo)
+                if dofNew >= 0:
+                    dof = dm.cell2dof(cellNo, dofNo)
+                    indices[dofNew] = dof
+
+    indptr = np.arange((dmSub.num_dofs+1), dtype=INDEX)
+    data = np.ones((dmSub.num_dofs), dtype=REAL)
+    opUnreduced2Reduced = CSR_LinearOperator(indices, indptr, data)
+    opUnreduced2Reduced.num_columns = dm.num_dofs
+    return opUnreduced2Reduced
+
+
+def generateLocalMassMatrix(DoFMap dm, DoFMap dm2=None):
+    cdef:
+        simplexQuadratureRule qr
+        REAL_t[::1] entries
+        REAL_t[::1] node
+        INDEX_t m, i, j, k, l
+        REAL_t s
+    from . femCy import generic_matrix
+    node = uninitialized((dm.dim+1), dtype=REAL)
+    if dm2 is None:
+        qr = simplexXiaoGimbutas(2*dm.polynomialOrder+1, dm.dim)
+        entries = uninitialized(((dm.dofs_per_element*(dm.dofs_per_element+1))//2), dtype=REAL)
+        m = 0
+        for i in range(len(dm.localShapeFunctions)):
+            for j in range(i, len(dm.localShapeFunctions)):
+                s = 0.
+                for k in range(qr.num_nodes):
+                    for l in range(dm.dim+1):
+                        node[l] = qr.nodes[l, k]
+                    s += dm.localShapeFunctions[i](node) * dm.localShapeFunctions[j](node) * qr.weights[k]
+                entries[m] = s
+                m += 1
+    else:
+        qr = simplexXiaoGimbutas(dm.polynomialOrder+dm2.polynomialOrder+1, dm.dim)
+        entries = uninitialized((dm.dofs_per_element*dm2.dofs_per_element), dtype=REAL)
+        m = 0
+        for i in range(len(dm.localShapeFunctions)):
+            for j in range(len(dm2.localShapeFunctions)):
+                s = 0.
+                for k in range(qr.num_nodes):
+                    for l in range(dm.dim+1):
+                        node[l] = qr.nodes[l, k]
+                    s += dm.localShapeFunctions[i](node) * dm2.localShapeFunctions[j](node) * qr.weights[k]
+                entries[m] = s
+                m += 1
+    return generic_matrix(entries)
+
+
+cdef class lookupFunction(function):
+    cdef:
+        meshBase mesh
+        DoFMap dm
+        REAL_t[::1] u
+        public cellFinder2 cellFinder
+
+    def __init__(self, meshBase mesh, DoFMap dm, REAL_t[::1] u, cellFinder2 cF=None):
+        self.mesh = mesh
+        self.dm = dm
+        self.u = u
+        if cF is None:
+            self.cellFinder = cellFinder2(self.mesh)
+        else:
+            self.cellFinder = cF
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            shapeFunction shapeFun
+            REAL_t val
+            INDEX_t cellNo, dof, k
+        cellNo = self.cellFinder.findCell(x)
+        if cellNo == -1:
+            return 0.
+        val = 0.
+        for k in range(self.dm.dofs_per_element):
+            dof = self.dm.cell2dof(cellNo, k)
+            if dof >= 0:
+                shapeFun = self.dm.localShapeFunctions[k]
+                val += shapeFun.eval(self.cellFinder.bary)*self.u[dof]
+        return val
diff --git a/fem/PyNucleus_fem/__init__.py b/fem/PyNucleus_fem/__init__.py
new file mode 100644
index 0000000..3620f81
--- /dev/null
+++ b/fem/PyNucleus_fem/__init__.py
@@ -0,0 +1,177 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . mesh import mesh1d, mesh2d, mesh3d, meshNd
+from . mesh import (simpleInterval, simpleSquare, simpleLshape, simpleBox,
+                    circle, graded_circle, cutoutCircle, twinCircle, dumbbell, wrench,
+                    Hshape, ball, rectangle, crossSquare,
+                    gradedSquare, gradedBox,
+                    disconnectedInterval, disconnectedDomain,
+                    double_graded_interval,
+                    simpleFicheraCube, uniformSquare,
+                    standardSimplex2D, standardSimplex3D,
+                    intervalWithInteraction,
+                    double_graded_interval_with_interaction,
+                    discWithIslands,
+                    squareWithInteractions,
+                    discWithInteraction,
+                    plotFunctions)
+from . mesh import (PHYSICAL, INTERIOR_NONOVERLAPPING, INTERIOR, NO_BOUNDARY,
+                    DIRICHLET, HOMOGENEOUS_DIRICHLET,
+                    NEUMANN, HOMOGENEOUS_NEUMANN,
+                    NORM, boundaryConditions)
+from . mesh import meshFactory as meshFactoryClass
+from . meshCy import radialMeshTransformation
+from . DoFMaps import (P0_DoFMap, P1_DoFMap, P2_DoFMap, P3_DoFMap,
+                       
+                       str2DoFMap, str2DoFMapOrder, getAvailableDoFMaps,
+                       lookupFunction)
+from . femCy import (assembleDrift,
+                     assembleMassNonSym,
+                     assembleSurfaceMass,
+                     
+                     assembleMatrix,
+                     assembleRHSfromFEfunction,
+                     getSurfaceDoFMap)
+from . femCy import (scalar_coefficient_stiffness_1d_sym_P1,
+                     scalar_coefficient_stiffness_2d_sym_P1,
+                     scalar_coefficient_stiffness_3d_sym_P1,
+                     stiffness_2d_sym_anisotropic2_P1,
+                     mass_0d_in_1d_sym_P1,
+                     mass_1d_in_2d_sym_P1,
+                     mass_1d_in_2d_sym_P2)
+from . quadrature import simplexXiaoGimbutas
+from . distributed_operators import (DistributedLinearOperator,
+                                     CSR_DistributedLinearOperator)
+from . functions import (simpleAnisotropy, simpleAnisotropy2, inclusions, inclusionsHong,
+                         motorPermeability)
+from . functions import (function,
+                         vectorFunction,
+                         _rhsFunSin1D, _solSin1D, _rhsFunSin2D, _cos1D, _cos2D, _rhsCos2D, _grad_cos2d_n,
+                         _rhsFunSin3D, _solSin2D, _solSin3D, Lambda, constant,
+                         complexLambda,
+                         real, imag,
+                         _rhsFunSin3D_memoized,
+                         _rhsFichera, _solFichera,
+                         solCos1DHeat, rhsFunCos1DHeat,
+                         rhsFunSource1D, rhsFunSource2D,
+                         solCos2DHeat, rhsFunCos2DHeat,
+                         solFractional,
+                         rhsFractional1D, solFractional1D,
+                         rhsFractional2D, solFractional2D,
+                         solFractional2Dcombination,
+                         rhsFractional2Dcombination,
+                         rhsHr,
+                         rhsHr2Ddisk,
+                         indicatorFunctor,
+                         squareIndicator,
+                         radialIndicator,
+                         fractalDiffusivity, expDiffusivity)
+rhsFunSin1D = _rhsFunSin1D()
+rhsFunSin2D = _rhsFunSin2D()
+rhsFunSin3D = _rhsFunSin3D()
+cos2D = _cos2D()
+rhsCos2D = _rhsCos2D()
+solSin1D = _solSin1D()
+solSin2D = _solSin2D()
+solSin3D = _solSin3D()
+grad_cos2d_n = _grad_cos2d_n()
+rhsFichera = _rhsFichera()
+solFichera = _solFichera()
+
+
+def solFractional2D_nonPeriodic(s):
+    import numpy as np
+    return solFractional2Dcombination(s, [{'n': 2, 'l': 2, 'angular_shift': 0.},
+                                          {'n': 1, 'l': 5, 'angular_shift': np.pi/3.}])
+
+
+def rhsFractional2D_nonPeriodic(s):
+    import numpy as np
+    return rhsFractional2Dcombination(s, [{'n': 2, 'l': 2, 'angular_shift': 0.},
+                                          {'n': 1, 'l': 5, 'angular_shift': np.pi/3.}])
+
+
+from . functions import (_rhsBoundaryLayer2D, _solBoundaryLayer2D,
+                         _solCornerSingularity2D, rhsMotor,
+                         rhsBoundarySingularity2D, solBoundarySingularity2D)
+
+rhsBoundaryLayer2D = _rhsBoundaryLayer2D()
+solBoundaryLayer2D = _solBoundaryLayer2D()
+solCornerSingularity2D = _solCornerSingularity2D()
+# rhsMotor = _rhsMotor()
+
+# Parallel Stuff
+from . mesh import (stitchSubdomains, accumulate2global, getGlobalPartitioning,
+                    getSubMeshSolution,
+                    getRestrictionProlongationSubmesh,
+                    plotManager)
+from . meshCy import getSubmesh
+from . meshPartitioning import (regularMeshPartitioner, metisDofPartitioner,
+                                metisMeshPartitioner)
+from . boundaryLayerCy import boundaryLayer
+from . pdeProblems import diffusionProblem, helmholtzProblem
+
+
+meshFactory = meshFactoryClass()
+meshFactory.register('simpleInterval', simpleInterval, 1, aliases=['interval'])
+meshFactory.register('unitInterval', simpleInterval, 1, params={'a': 0., 'b': 1.})
+meshFactory.register('intervalWithInteraction', intervalWithInteraction, 1)
+meshFactory.register('disconnectedInterval', disconnectedInterval, 1)
+meshFactory.register('simpleSquare', simpleSquare, 2)
+meshFactory.register('crossSquare', crossSquare, 2, aliases=['squareCross'])
+meshFactory.register('unitSquare', uniformSquare, 2,
+                     params={'N': 2, 'ax': 0., 'ay': 0., 'bx': 1., 'by': 1.},
+                     aliases=['square'])
+meshFactory.register('gradedSquare', gradedSquare, 2)
+meshFactory.register('gradedBox', gradedBox, 3, aliases=['gradedCube'])
+meshFactory.register('squareWithInteraction', squareWithInteractions, 2)
+meshFactory.register('simpleLshape', simpleLshape, 2, aliases=['Lshape', 'L-shape'])
+meshFactory.register('circle', circle, 2, aliases=['disc', 'unitDisc', 'ball2d', '2dball'])
+meshFactory.register('graded_circle', graded_circle, 2, aliases=['gradedCircle'])
+meshFactory.register('discWithInteraction', discWithInteraction, 2)
+meshFactory.register('cutoutCircle', cutoutCircle, 2, aliases=['cutoutDisc'])
+meshFactory.register('simpleBox', simpleBox, 3, aliases=['box', 'unitBox', 'cube', 'unitCube'])
+meshFactory.register('simpleFicheraCube', simpleFicheraCube, 3, aliases=['fichera', 'ficheraCube'])
+
+
+
+from PyNucleus_base.factory import factory
+
+dofmapFactory = factory()
+dofmapFactory.register('P0d', P0_DoFMap, aliases=['P0'])
+dofmapFactory.register('P1c', P1_DoFMap, aliases=['P1'])
+dofmapFactory.register('P2c', P2_DoFMap, aliases=['P2'])
+dofmapFactory.register('P3c', P3_DoFMap, aliases=['P3'])
+
+
+functionFactory = factory()
+functionFactory.register('rhsFunSin1D', _rhsFunSin1D)
+functionFactory.register('rhsFunSin2D', _rhsFunSin2D)
+functionFactory.register('rhsFunSin3D', _rhsFunSin3D)
+functionFactory.register('solSin1D', _solSin1D, aliases=['sin1d'])
+functionFactory.register('solCos1D', _cos1D, aliases=['cos1d'])
+functionFactory.register('solSin2D', _solSin2D, aliases=['sin2d'])
+functionFactory.register('solCos2D', _cos2D, aliases=['cos2d'])
+functionFactory.register('solSin3D', _solSin3D, aliases=['sin3d'])
+functionFactory.register('solFractional', solFractional)
+functionFactory.register('solFractional1D', solFractional1D)
+functionFactory.register('solFractional2D', solFractional2D)
+functionFactory.register('rhsFractional1D', rhsFractional1D)
+functionFactory.register('rhsFractional2D', rhsFractional2D)
+functionFactory.register('constant', constant)
+functionFactory.register('Lambda', Lambda)
+functionFactory.register('squareIndicator', squareIndicator)
+functionFactory.register('radialIndicator', radialIndicator)
+functionFactory.register('rhsBoundaryLayer2D', rhsBoundaryLayer2D)
+functionFactory.register('solBoundaryLayer2D', solBoundaryLayer2D)
+functionFactory.register('solCornerSingularity2D', solCornerSingularity2D)
+functionFactory.register('lookup', lookupFunction)
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/fem/PyNucleus_fem/_version.py b/fem/PyNucleus_fem/_version.py
new file mode 100644
index 0000000..8a82502
--- /dev/null
+++ b/fem/PyNucleus_fem/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_fem/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/fem/PyNucleus_fem/algebraicOverlaps.pxd b/fem/PyNucleus_fem/algebraicOverlaps.pxd
new file mode 100644
index 0000000..ea9e9a4
--- /dev/null
+++ b/fem/PyNucleus_fem/algebraicOverlaps.pxd
@@ -0,0 +1,160 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from mpi4py cimport MPI
+
+
+cdef enum flush_type:
+    no_flush,
+    flush_local,
+    flush_local_all,
+    flush,
+    flush_all
+
+
+cdef class algebraicOverlap:
+    # Tracks the algebraic overlap with the DoFMap of another subdomain.
+
+    cdef:
+        public INDEX_t num_subdomain_dofs, num_shared_dofs, mySubdomainNo, otherSubdomainNo, numSharedVecs
+        public INDEX_t memOffset, totalMemSize
+        public INDEX_t[::1] memOffsetOther, memOffsetTemp
+        public INDEX_t[::1] shared_dofs
+        REAL_t[:, ::1] exchangeIn, exchangeOut
+        REAL_t[::1] myExchangeIn, myExchangeOut
+        COMPLEX_t[:, ::1] exchangeInComplex, exchangeOutComplex
+        COMPLEX_t[::1] myExchangeInComplex, myExchangeOutComplex
+        MPI.Comm comm
+        INDEX_t tagNoSend, tagNoRecv
+
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=*,
+                          flush_type flushOp=*)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=*,
+                             flush_type flushOp=*)
+    cdef MPI.Request sendComplex(self,
+                                 const COMPLEX_t[::1] vec,
+                                 INDEX_t vecNo=*,
+                                 flush_type flushOp=*)
+    cdef MPI.Request receiveComplex(self,
+                                    const COMPLEX_t[::1] vec,
+                                    INDEX_t vecNo=*,
+                                    flush_type flushOp=*)
+    cdef void accumulateProcess(self,
+                                REAL_t[::1] vec,
+                                INDEX_t vecNo=*)
+    cdef void accumulateProcessComplex(self,
+                                       COMPLEX_t[::1] vec,
+                                       INDEX_t vecNo=*)
+    cdef void setOverlapLocal(self,
+                              REAL_t[::1] vec,
+                              INDEX_t vecNo=*)
+    cdef void uniqueProcess(self,
+                            REAL_t[::1] vec,
+                            INDEX_t vecNo=*)
+
+
+cdef class algebraicOverlapPersistent(algebraicOverlap):
+    cdef:
+        MPI.Prequest SendRequest, RecvRequest
+
+
+cdef class algebraicOverlapOneSidedGet(algebraicOverlap):
+    cdef:
+        MPI.Win Window
+
+
+cdef class algebraicOverlapOneSidedPut(algebraicOverlap):
+    cdef:
+        MPI.Win Window
+
+
+cdef class algebraicOverlapOneSidedPutLockAll(algebraicOverlap):
+    cdef:
+        MPI.Win Window
+
+
+cdef class algebraicOverlapManager:
+    # Tracks the algebraic overlap with the DoFMaps of all other subdomains.
+    cdef:
+        public INDEX_t numSubdomains, num_subdomain_dofs, mySubdomainNo
+        INDEX_t _max_cross
+        public dict overlaps
+        public MPI.Comm comm
+        list requestsSend
+        # public, because Gauss-Seidel needs it
+        public INDEX_t[::1] Didx, DidxNonOverlapping
+        public REAL_t[::1] Dval, DvalNonOverlapping
+        public REAL_t[:, ::1] exchangeIn, exchangeOut
+        public str type
+        MPI.Win Window
+        BOOL_t distribute_is_prepared, non_overlapping_distribute_is_prepared
+
+    cpdef void distribute(self,
+                          REAL_t[::1] vec,
+                          REAL_t[::1] vec2=*,
+                          BOOL_t nonOverlapping=*,
+                          INDEX_t level=*)
+    cdef void distributeComplex(self,
+                                COMPLEX_t[::1] vec,
+                                COMPLEX_t[::1] vec2=*,
+                                BOOL_t nonOverlapping=*)
+    cdef void redistribute(self,
+                           REAL_t[::1] vec,
+                           REAL_t[::1] vec2=*,
+                           BOOL_t nonOverlapping=*,
+                           BOOL_t asynchronous=*,
+                           INDEX_t vecNo=*)
+    cpdef void accumulate(self,
+                          REAL_t[::1] vec,
+                          REAL_t[::1] return_vec=*,
+                          BOOL_t asynchronous=*,
+                          INDEX_t vecNo=*,
+                          INDEX_t level=*)
+    cdef void accumulateComplex(self,
+                                COMPLEX_t[::1] vec,
+                                COMPLEX_t[::1] return_vec=*,
+                                BOOL_t asynchronous=*,
+                                INDEX_t vecNo=*)
+    cdef void send(self,
+                   REAL_t[::1] vec,
+                   BOOL_t asynchronous=*,
+                   INDEX_t vecNo=*,
+                   flush_type flushOp=*)
+    cdef void receive(self,
+                      REAL_t[::1] return_vec,
+                      BOOL_t asynchronous=*,
+                      INDEX_t vecNo=*,
+                      flush_type flushOp=*)
+    cdef void sendComplex(self,
+                          COMPLEX_t[::1] vec,
+                          BOOL_t asynchronous=*,
+                          INDEX_t vecNo=*,
+                          flush_type flushOp=*)
+    cdef void receiveComplex(self,
+                             COMPLEX_t[::1] return_vec,
+                             BOOL_t asynchronous=*,
+                             INDEX_t vecNo=*,
+                             flush_type flushOp=*)
+
+
+cdef class multilevelAlgebraicOverlapManager:
+    cdef:
+        public list levels
+        public MPI.Comm comm
+        REAL_t[::1] reduceMem
+        public MPI.Win ReduceWindow
+        public BOOL_t useLockAll
+        public BOOL_t useAsynchronousComm
+        list ReduceWindows
+        list ReduceMems
+        INDEX_t rank
diff --git a/fem/PyNucleus_fem/algebraicOverlaps.pyx b/fem/PyNucleus_fem/algebraicOverlaps.pyx
new file mode 100644
index 0000000..efa7b76
--- /dev/null
+++ b/fem/PyNucleus_fem/algebraicOverlaps.pyx
@@ -0,0 +1,1981 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t
+from PyNucleus_base.ip_norm cimport mydot
+from PyNucleus_base import uninitialized
+from . DoFMaps cimport DoFMap
+from . mesh import INTERIOR_NONOVERLAPPING, INTERIOR
+from . boundaryLayerCy import boundaryLayer
+from PyNucleus_base.linear_operators import (LinearOperator_wrapper,
+                                              diagonalOperator)
+import numpy as np
+cimport numpy as np
+from numpy.linalg import norm
+cimport cython
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from sys import stdout
+
+
+################################################################################
+# overlap objects for DoFMaps
+
+cdef class algebraicOverlap:
+    # Tracks the algebraic overlap with the DoFMap of another subdomain.
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        self.num_subdomain_dofs = num_subdomain_dofs
+        self.shared_dofs = shared_dofs
+        self.num_shared_dofs = shared_dofs.shape[0]
+        self.mySubdomainNo = mySubdomainNo
+        self.otherSubdomainNo = otherSubdomainNo
+        self.numSharedVecs = numSharedVecs
+        self.comm = comm
+        self.tagNoSend = 50
+        self.tagNoRecv = 50
+        self.exchangeInComplex = None
+        self.exchangeOutComplex = None
+        self.myExchangeInComplex = None
+        self.myExchangeOutComplex = None
+
+    def setMemory(self, REAL_t[:, ::1] exchangeIn, REAL_t[:, ::1] exchangeOut,
+                  INDEX_t memOffset, INDEX_t totalMemSize):
+        self.exchangeIn = exchangeIn
+        self.exchangeOut = exchangeOut
+        self.memOffset = memOffset
+        self.totalMemSize = totalMemSize
+        self.myExchangeIn = self.exchangeIn[0, self.memOffset:self.memOffset+self.num_shared_dofs]
+        self.myExchangeOut = self.exchangeOut[0, self.memOffset:self.memOffset+self.num_shared_dofs]
+
+    def setComplex(self):
+        self.exchangeInComplex = uninitialized((self.exchangeIn.shape[0], self.exchangeIn.shape[1]), dtype=COMPLEX)
+        self.exchangeOutComplex = uninitialized((self.exchangeOut.shape[0], self.exchangeOut.shape[1]), dtype=COMPLEX)
+        self.myExchangeInComplex = self.exchangeInComplex[0, self.memOffset:self.memOffset+self.num_shared_dofs]
+        self.myExchangeOutComplex = self.exchangeOutComplex[0, self.memOffset:self.memOffset+self.num_shared_dofs]
+
+    def flushMemory(self, INDEX_t vecNo=0, REAL_t value=0.):
+        self.exchangeIn[:, :] = value
+        self.exchangeOut[:, :] = value
+
+    def __repr__(self):
+        return ('{} of subdomain {}' +
+                ' with {} has {}/{} dofs').format(self.__class__.__name__,
+                                                  self.mySubdomainNo,
+                                                  self.otherSubdomainNo,
+                                                  self.num_shared_dofs,
+                                                  self.num_subdomain_dofs)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+        for j in range(self.num_shared_dofs):
+            self.myExchangeOut[j] = vec[shared_dofs[j]]
+        self.tagNoSend += 1
+        return self.comm.Isend(self.myExchangeOut,
+                               dest=self.otherSubdomainNo,
+                               tag=self.tagNoSend)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        self.tagNoRecv += 1
+        return self.comm.Irecv(self.myExchangeIn,
+                               source=self.otherSubdomainNo,
+                               tag=self.tagNoRecv)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void accumulateProcess(self,
+                                REAL_t[::1] vec,
+                                INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+        for j in range(self.num_shared_dofs):
+            vec[shared_dofs[j]] += self.myExchangeIn[j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request sendComplex(self,
+                                 const COMPLEX_t[::1] vec,
+                                 INDEX_t vecNo=0,
+                                 flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+        if self.myExchangeOutComplex is None:
+            self.setComplex()
+        for j in range(self.num_shared_dofs):
+            self.myExchangeOutComplex[j] = vec[shared_dofs[j]]
+        self.tagNoSend += 1
+        return self.comm.Isend(self.myExchangeOutComplex,
+                               dest=self.otherSubdomainNo,
+                               tag=self.tagNoSend)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receiveComplex(self,
+                                    const COMPLEX_t[::1] vec,
+                                    INDEX_t vecNo=0,
+                                    flush_type flushOp=no_flush):
+        if self.myExchangeInComplex is None:
+            self.setComplex()
+        self.tagNoRecv += 1
+        return self.comm.Irecv(self.myExchangeInComplex,
+                               source=self.otherSubdomainNo,
+                               tag=self.tagNoRecv)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void accumulateProcessComplex(self,
+                                       COMPLEX_t[::1] vec,
+                                       INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+        for j in range(self.num_shared_dofs):
+            vec[shared_dofs[j]] = vec[shared_dofs[j]] + self.myExchangeInComplex[j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setOverlapLocal(self, REAL_t[::1] vec, INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+        for j in range(self.num_shared_dofs):
+            vec[j] = self.myExchangeIn[j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void uniqueProcess(self,
+                            REAL_t[::1] vec,
+                            INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            vec[shared_dofs[j]] = exchangeIn[j]
+
+    def HDF5write(self, node):
+        compression = 'gzip'
+        node.attrs['num_subdomain_dofs'] = self.num_subdomain_dofs
+        node.attrs['mySubdomainNo'] = self.mySubdomainNo
+        node.attrs['otherSubdomainNo'] = self.otherSubdomainNo
+        node.create_dataset('shared_dofs', data=self.shared_dofs,
+                            compression=compression)
+
+    @staticmethod
+    def HDF5read(node, comm):
+        overlap = algebraicOverlap(node.attrs['num_subdomain_dofs'],
+                                   np.array(node['shared_dofs'], dtype=INDEX),
+                                   node.attrs['mySubdomainNo'],
+                                   node.attrs['otherSubdomainNo'],
+                                   comm)
+        return overlap
+
+
+cdef class algebraicOverlapPersistent(algebraicOverlap):
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        algebraicOverlap.__init__(self,
+                                  num_subdomain_dofs,
+                                  shared_dofs,
+                                  mySubdomainNo,
+                                  otherSubdomainNo,
+                                  comm,
+                                  numSharedVecs)
+
+    def setMemory(self, REAL_t[:, ::1] exchangeIn, REAL_t[:, ::1] exchangeOut):
+        super(algebraicOverlapPersistent, self).setMemory(exchangeIn, exchangeOut)
+        cdef:
+            INDEX_t vecNo = 0
+        self.SendRequest = self.comm.Send_init(self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs],
+                                               dest=self.otherSubdomainNo, tag=55)
+        self.RecvRequest = self.comm.Recv_init(self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs],
+                                               source=self.otherSubdomainNo, tag=55)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            exchangeOut[j] = vec[shared_dofs[j]]
+        self.SendRequest.Start()
+        return self.SendRequest
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        self.RecvRequest.Start()
+        return self.RecvRequest
+
+
+cdef class algebraicOverlapBlocking(algebraicOverlap):
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        algebraicOverlap.__init__(self,
+                                  num_subdomain_dofs,
+                                  shared_dofs,
+                                  mySubdomainNo,
+                                  otherSubdomainNo,
+                                  comm,
+                                  numSharedVecs)
+
+    def setMemory(self, REAL_t[:, ::1] exchangeIn, REAL_t[:, ::1] exchangeOut):
+        super(algebraicOverlapPersistent, self).setMemory(exchangeIn, exchangeOut)
+        cdef:
+            INDEX_t vecNo = 0
+        self.SendRequest = self.comm.Send_init(self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs],
+                                               dest=self.otherSubdomainNo, tag=55)
+        self.RecvRequest = self.comm.Recv_init(self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs],
+                                               source=self.otherSubdomainNo, tag=55)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            exchangeOut[j] = vec[shared_dofs[j]]
+        self.SendRequest.Start()
+        return self.SendRequest
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        self.RecvRequest.Start()
+        return self.RecvRequest
+
+
+cdef class algebraicOverlapOneSidedGet(algebraicOverlap):
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        algebraicOverlap.__init__(self,
+                                  num_subdomain_dofs,
+                                  shared_dofs,
+                                  mySubdomainNo,
+                                  otherSubdomainNo,
+                                  comm,
+                                  numSharedVecs)
+
+    def setWindow(self, MPI.Win w):
+        self.Window = w
+
+    def exchangeMemOffsets(self, comm, INDEX_t tag=0):
+        self.memOffsetOther = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp[0] = self.memOffset
+        return (comm.Isend(self.memOffsetTemp, dest=self.otherSubdomainNo, tag=tag),
+                comm.Irecv(self.memOffsetOther, source=self.otherSubdomainNo, tag=tag))
+
+    def flushMemory(self, INDEX_t vecNo=0, REAL_t value=0.):
+        cdef:
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        exchangeOut[:] = value
+        self.Window.Lock(self.mySubdomainNo, MPI.LOCK_EXCLUSIVE)
+        self.Window.Put(exchangeOut, self.mySubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffset)*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.mySubdomainNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            exchangeOut[j] = vec[shared_dofs[j]]
+        self.Window.Lock(self.mySubdomainNo, MPI.LOCK_EXCLUSIVE)
+        self.Window.Put(exchangeOut, self.mySubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffset)*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.mySubdomainNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        cdef:
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        self.Window.Lock(self.otherSubdomainNo, MPI.LOCK_SHARED)
+        self.Window.Get(exchangeIn, self.otherSubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffsetOther[0])*MPI.REAL.size,
+                                exchangeIn.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.otherSubdomainNo)
+
+
+cdef class algebraicOverlapOneSidedPut(algebraicOverlap):
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        algebraicOverlap.__init__(self,
+                                  num_subdomain_dofs,
+                                  shared_dofs,
+                                  mySubdomainNo,
+                                  otherSubdomainNo,
+                                  comm,
+                                  numSharedVecs)
+
+    def setWindow(self, MPI.Win w):
+        self.Window = w
+
+    def exchangeMemOffsets(self, comm, INDEX_t tag=0):
+        self.memOffsetOther = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp[0] = self.memOffset
+        return (comm.Isend(self.memOffsetTemp, dest=self.otherSubdomainNo, tag=tag),
+                comm.Irecv(self.memOffsetOther, source=self.otherSubdomainNo, tag=tag))
+
+    def flushMemory(self, INDEX_t vecNo=0, REAL_t value=0.):
+        cdef:
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        exchangeOut[:] = value
+        self.Window.Lock(self.otherSubdomainNo, MPI.LOCK_SHARED)
+        self.Window.Put(exchangeOut, self.mySubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffset)*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.otherSubdomainNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=no_flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            exchangeOut[j] = vec[shared_dofs[j]]
+        self.Window.Lock(self.otherSubdomainNo, MPI.LOCK_SHARED)
+        self.Window.Put(exchangeOut, self.otherSubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffsetOther[0])*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.otherSubdomainNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        cdef:
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        self.Window.Lock(self.mySubdomainNo, MPI.LOCK_EXCLUSIVE)
+        self.Window.Get(exchangeIn, self.mySubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffset)*MPI.REAL.size,
+                                exchangeIn.shape[0],
+                                MPI.REAL))
+        self.Window.Unlock(self.mySubdomainNo)
+
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void accumulateProcess(self,
+                                REAL_t[::1] vec,
+                                INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            vec[shared_dofs[j]] += exchangeIn[j]
+
+
+cdef class algebraicOverlapOneSidedPutLockAll(algebraicOverlap):
+    def __init__(self,
+                 INDEX_t num_subdomain_dofs,
+                 INDEX_t[::1] shared_dofs,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 comm,
+                 INDEX_t numSharedVecs=1):
+        algebraicOverlap.__init__(self,
+                                  num_subdomain_dofs,
+                                  shared_dofs,
+                                  mySubdomainNo,
+                                  otherSubdomainNo,
+                                  comm,
+                                  numSharedVecs)
+
+    def setWindow(self, MPI.Win w):
+        self.Window = w
+
+    def exchangeMemOffsets(self, comm, INDEX_t tag=0):
+        self.memOffsetOther = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp = uninitialized((1), dtype=INDEX)
+        self.memOffsetTemp[0] = self.memOffset
+        return (comm.Isend(self.memOffsetTemp, dest=self.otherSubdomainNo, tag=tag),
+                comm.Irecv(self.memOffsetOther, source=self.otherSubdomainNo, tag=tag))
+
+    def flushMemory(self, INDEX_t vecNo=0, REAL_t value=0.):
+        cdef:
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        exchangeOut[:] = value
+        self.Window.Put(exchangeOut, self.otherSubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffsetOther[0])*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request send(self,
+                          const REAL_t[::1] vec,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=flush):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeOut = self.exchangeOut[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            exchangeOut[j] = vec[shared_dofs[j]]
+        self.Window.Put(exchangeOut, self.otherSubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffsetOther[0])*MPI.REAL.size,
+                                exchangeOut.shape[0],
+                                MPI.REAL))
+        if flushOp == no_flush:
+            pass
+        elif flushOp == flush:
+            self.Window.Flush(self.otherSubdomainNo)
+        elif flushOp == flush_local:
+            self.Window.Flush_local(self.otherSubdomainNo)
+        elif flushOp == flush_local_all:
+            self.Window.Flush_local_all()
+        elif flushOp == flush_all:
+            self.Window.Flush_all()
+        else:
+            raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef MPI.Request receive(self,
+                             const REAL_t[::1] vec,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        cdef:
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        if flushOp == no_flush:
+            pass
+        elif flushOp == flush:
+            self.Window.Flush(self.mySubdomainNo)
+        elif flushOp == flush_local:
+            self.Window.Flush_local(self.mySubdomainNo)
+        elif flushOp == flush_local_all:
+            self.Window.Flush_local_all()
+        elif flushOp == flush_all:
+            self.Window.Flush_all()
+        else:
+            raise NotImplementedError()
+        self.Window.Get(exchangeIn, self.mySubdomainNo,
+                        target=((vecNo*self.totalMemSize+self.memOffset)*MPI.REAL.size,
+                                exchangeIn.shape[0],
+                                MPI.REAL))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void accumulateProcess(self,
+                                REAL_t[::1] vec,
+                                INDEX_t vecNo=0):
+        cdef:
+            INDEX_t j
+            INDEX_t[::1] shared_dofs = self.shared_dofs
+            REAL_t[::1] exchangeIn = self.exchangeIn[vecNo, self.memOffset:self.memOffset+self.num_shared_dofs]
+        for j in range(self.num_shared_dofs):
+            vec[shared_dofs[j]] += exchangeIn[j]
+
+
+cdef class algebraicOverlapManager:
+    # Tracks the algebraic overlap with the DoFMaps of all other subdomains.
+    def __init__(self, numSubdomains, num_subdomain_dofs, comm):
+        self.numSubdomains = numSubdomains
+        self.num_subdomain_dofs = num_subdomain_dofs
+        self.overlaps = {}
+        self.mySubdomainNo = comm.rank
+        self.comm = comm
+        self.requestsSend = []
+        self._max_cross = 0
+        self.distribute_is_prepared = False
+        self.non_overlapping_distribute_is_prepared = False
+
+    def setComplex(self):
+        for subdomain in self.overlaps:
+            self.overlaps[subdomain].setComplex()
+
+    def get_max_cross(self):
+        if self._max_cross > 0:
+            return self._max_cross
+        else:
+            return None
+
+    max_cross = property(fget=get_max_cross)
+
+    def get_num_shared_dofs(self, unique=False):
+        if unique:
+            return self.get_shared_dofs().shape[0]
+        else:
+            num_dofs = 0
+            for otherSubdomain in self.overlaps:
+                num_dofs += self.overlaps[otherSubdomain].num_shared_dofs
+            return num_dofs
+
+    num_shared_dofs = property(fget=get_num_shared_dofs)
+
+    def get_shared_dofs(self):
+        shared_dofs = set()
+        for otherSubdomain in self.overlaps:
+            shared_dofs |= set(list(np.array(self.overlaps[otherSubdomain].shared_dofs)))
+        return np.array(list(shared_dofs), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def prepareDistribute(self):
+        cdef:
+            INDEX_t i, subdomainNo, k
+            dict dofCount = {}
+            INDEX_t[::1] Didx
+            REAL_t[::1] Dval
+        for subdomainNo in self.overlaps:
+            for dof in self.overlaps[subdomainNo].shared_dofs:
+                try:
+                    dofCount[dof] += 1
+                except KeyError:
+                    dofCount[dof] = 2
+        self.Didx = uninitialized((len(dofCount)), dtype=INDEX)
+        self.Dval = uninitialized((len(dofCount)), dtype=REAL)
+        Didx = self.Didx
+        Dval = self.Dval
+        k = 0
+        for i in dofCount:
+            Didx[k] = i
+            Dval[k] = 1.0/dofCount[i]
+            k += 1
+        self.distribute_is_prepared = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def prepareDistributeRepartitionSend(self, DoFMap DoFMap):
+        cdef:
+            REAL_t[::1] x
+        x = np.ones((DoFMap.num_dofs), dtype=REAL)
+        self.send(x, asynchronous=False)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    def prepareDistributeRepartition(self, DoFMap DoFMap, BOOL_t doSend=True):
+        cdef:
+            INDEX_t i
+            INDEX_t dofCount = 0
+            REAL_t[::1] y
+        if doSend:
+            self.prepareDistributeRepartitionSend(DoFMap)
+        y = np.zeros((DoFMap.num_dofs), dtype=REAL)
+        self.receive(y, asynchronous=False)
+        for i in range(DoFMap.num_dofs):
+            if abs(y[i]-1.0) > 1e-8:
+                dofCount += 1
+        assert np.array(y, copy=False).min() >= 1.0, (self.mySubdomainNo, np.array(y, copy=False), np.array(y, copy=False).min())
+        self.Didx = uninitialized((dofCount), dtype=INDEX)
+        self.Dval = uninitialized((dofCount), dtype=REAL)
+        dofCount = 0
+        for i in range(DoFMap.num_dofs):
+            if abs(y[i]-1.0) > 1e-8:
+                self.Didx[dofCount] = i
+                self.Dval[dofCount] = 1.0/y[i]
+                dofCount += 1
+        self.distribute_is_prepared = True
+
+    # returns 1-(distance from original subdomain)/meshsize
+    # only sets the vertex dofs
+    def getProtoPartition(self, REAL_t[::1] x, DoFMap DoFMap, vertexLayers, INDEX_t depth):
+        cdef:
+            INDEX_t d, cellNo, vertexNo, dof
+        for d in range(1, len(vertexLayers)+1):
+            for cellNo, vertexNo in vertexLayers[d-1]:
+                dof = DoFMap.cell2dof(cellNo, vertexNo*DoFMap.dofs_per_vertex)
+                if dof >= 0:
+                    if depth > 1:
+                        x[dof] = max(1.0 - (<REAL_t>d)/(<REAL_t>(depth-1)), 0.)
+                    else:
+                        x[dof] = 0.
+        # return x
+
+    # returns 1 on initial domain, 0 otherwise
+    # only sets the vertex dofs
+    def getProtoPartitionNonOverlapping(self, REAL_t[::1] x, DoFMap DoFMap, vertexLayers, INDEX_t depth):
+        cdef:
+            INDEX_t d, cellNo, vertexNo, dof
+        for d in range(1, len(vertexLayers)+1):
+            for cellNo, vertexNo in vertexLayers[d-1]:
+                dof = DoFMap.cell2dof(cellNo, vertexNo*DoFMap.dofs_per_vertex)
+                if dof >= 0:
+                    x[dof] = 0.
+
+    @cython.cdivision(True)
+    def prepareDistributeMeshOverlap(self, mesh, INDEX_t nc, DoFMap DoFMap, INDEX_t depth, meshOverlaps):
+
+        # returns layers of vertices around the original subdomain
+        def getVertexLayers(subdomain):
+            cdef:
+                dict v2c
+                list boundaryVertices2
+                set alreadyAdded, boundaryVertices
+                set boundaryCellsSet
+                list boundaryCellsList
+                INDEX_t[::1] cellLayer
+                INDEX_t v, cell, vertexNo, localVertexNo
+                INDEX_t[:, ::1] cells = subdomain.cells
+
+            boundaryCellsSet = set()
+            boundaryVertices2 = []
+            dim = subdomain.dim
+            if dim == 1:
+                alreadyAdded = set(list(subdomain.getBoundaryVerticesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+                boundaryVertices = set(list(subdomain.getBoundaryVerticesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+            elif dim == 2:
+                alreadyAdded = set(list(subdomain.getBoundaryEdgesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+                boundaryVertices = set(list(subdomain.getBoundaryEdgesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+            elif dim == 3:
+                alreadyAdded = set(list(subdomain.getBoundaryFacesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+                boundaryVertices = set(list(subdomain.getBoundaryFacesByTag([INTERIOR_NONOVERLAPPING]).ravel()))
+            else:
+                raise NotImplementedError()
+
+            exteriorBL = boundaryLayer(subdomain, depth,
+                                       afterRefinements=0, startCell=nc)
+            v2c = exteriorBL.vertex2cells(subdomain.cells[nc:, :])
+
+            for v in boundaryVertices:
+                boundaryCellsSet |= set(v2c[v])
+            boundaryCellsList = exteriorBL.getLayer(depth, boundaryCellsSet, True, subdomain.cells[nc:, :])
+            for cellLayer in boundaryCellsList:
+                boundaryVertices2.append([])
+                for cell in cellLayer:
+                    for vertexNo in range(cells.shape[1]):
+                        localVertexNo = cells[nc+cell, vertexNo]
+                        if not localVertexNo in alreadyAdded:
+                            boundaryVertices2[-1].append((nc+cell, vertexNo))
+                            alreadyAdded.add(localVertexNo)
+            return boundaryVertices2
+
+        # get linear interpolant in all dofs (the proto-partition is only given in the vertex dofs)
+        def linearInterpolant(x):
+            cdef:
+                INDEX_t cellNo, vertexNo, i, dof
+                REAL_t[::1] corner_vals
+            # the values in the vertices
+            corner_vals = uninitialized((mesh.dim+1), dtype=REAL)
+            # linear interpolation for all other dofs
+            for cellNo in sorted(overlapCells, reverse=True):
+                # get values in the vertices
+                for vertexNo in range(mesh.dim+1):
+                    dof = DoFMap.cell2dof(cellNo, vertexNo*DoFMap.dofs_per_vertex)
+                    if dof >= 0:
+                        corner_vals[vertexNo] = x[dof]
+                    elif cellNo < nc:
+                        corner_vals[vertexNo] = 1.
+                    else:
+                        corner_vals[vertexNo] = 0.
+                # set the correct linear interpolant
+                for i in range(DoFMap.dofs_per_element):
+                    dof = DoFMap.cell2dof(cellNo, i)
+                    if dof >= 0:
+                        x[dof] = mydot(DoFMap.nodes[i, :], corner_vals)
+
+        def cutOff(x):
+            cdef:
+                INDEX_t cellNo, vertexNo, dof, i
+                REAL_t[::1] corner_vals
+            corner_vals = uninitialized((mesh.dim+1), dtype=REAL)
+            for cellNo in sorted(overlapCells, reverse=True):
+                for vertexNo in range(mesh.dim+1):
+                    dof = DoFMap.cell2dof(cellNo, vertexNo*DoFMap.dofs_per_vertex)
+                    if dof >= 0:
+                        corner_vals[vertexNo] = x[dof]
+                    elif cellNo < nc:
+                        corner_vals[vertexNo] = 1.
+                    else:
+                        corner_vals[vertexNo] = 0.
+                for i in range((mesh.dim+1)*DoFMap.dofs_per_vertex, DoFMap.dofs_per_element):
+                    dof = DoFMap.cell2dof(cellNo, i)
+                    if dof >= 0:
+                        if mydot(DoFMap.nodes[i, :], corner_vals) < 1.0:
+                            x[dof] = 0.
+                        else:
+                            x[dof] = 1.
+
+        cdef:
+            INDEX_t k, dof, subdomainNo, cellNo, m
+            INDEX_t[::1] Didx
+            REAL_t[::1] Dval
+            dict overlapCells = {}
+            REAL_t[::1] x, y
+            set sharedDofs
+
+        for subdomainNo in meshOverlaps.overlaps:
+            for cellNo in meshOverlaps.overlaps[subdomainNo].cells:
+                try:
+                    overlapCells[cellNo] += 1
+                except KeyError:
+                    overlapCells[cellNo] = 2
+        m = 0
+        for cellNo in overlapCells:
+            m = max(m, overlapCells[cellNo])
+        self._max_cross = m
+
+        vertexLayers = getVertexLayers(mesh)
+        x = np.ones((DoFMap.num_dofs), dtype=REAL)
+        self.getProtoPartition(x, DoFMap, vertexLayers, depth)
+        linearInterpolant(x)
+
+        y = uninitialized((DoFMap.num_dofs), dtype=REAL)
+        self.accumulate(x, y, asynchronous=False)
+        assert np.all(np.absolute(y) > 0), (np.array(x), np.array(y))
+        for m in range(x.shape[0]):
+            x[m] /= y[m]
+        assert np.all(np.isfinite(x)), np.array(x)
+
+        sharedDofs = set()
+        for subdomainNo in self.overlaps:
+            for dof in self.overlaps[subdomainNo].shared_dofs:
+                sharedDofs.add(dof)
+        self.Didx = uninitialized((len(sharedDofs)), dtype=INDEX)
+        self.Dval = uninitialized((len(sharedDofs)), dtype=REAL)
+        Didx = self.Didx
+        Dval = self.Dval
+        k = 0
+        for dof in sharedDofs:
+            Didx[k] = dof
+            Dval[k] = x[dof]
+            k += 1
+        self.distribute_is_prepared = True
+
+        x[:] = 1.0
+        self.getProtoPartitionNonOverlapping(x, DoFMap, vertexLayers, depth)
+        cutOff(x)
+        self.accumulate(x, y, asynchronous=False)
+        for m in range(x.shape[0]):
+            x[m] /= y[m]
+        assert np.all(np.isfinite(x)), np.array(x)
+
+        self.DidxNonOverlapping = uninitialized((len(sharedDofs)), dtype=INDEX)
+        self.DvalNonOverlapping = uninitialized((len(sharedDofs)), dtype=REAL)
+        Didx = self.DidxNonOverlapping
+        Dval = self.DvalNonOverlapping
+        k = 0
+        for dof in sharedDofs:
+            Didx[k] = dof
+            Dval[k] = x[dof]
+            k += 1
+        self.non_overlapping_distribute_is_prepared = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef void distribute(self,
+                          REAL_t[::1] vec,
+                          REAL_t[::1] vec2=None,
+                          BOOL_t nonOverlapping=False,
+                          INDEX_t level=0):
+        """
+        Distribute an accumulated vector.
+        """
+        cdef:
+            INDEX_t[::1] Didx
+            REAL_t[::1] Dval
+            INDEX_t i, dof, n = self.num_subdomain_dofs
+        if nonOverlapping:
+            assert self.non_overlapping_distribute_is_prepared, "Non-overlapping distribute has not been prepared for this algebraic overlap."
+            Didx = self.DidxNonOverlapping
+            Dval = self.DvalNonOverlapping
+        else:
+            assert self.distribute_is_prepared, "Distribute has not been prepared for this algebraic overlap."
+            Didx = self.Didx
+            Dval = self.Dval
+
+        if vec2 is None:
+            vec2 = vec
+        else:
+            for dof in range(n):
+                vec2[dof] = vec[dof]
+        for i in range(Didx.shape[0]):
+            dof = Didx[i]
+            vec2[dof] *= Dval[i]
+
+    def distribute_py(self, vec, vec2=None, nonOverlapping=False):
+        self.distribute(vec, vec2, nonOverlapping)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void distributeComplex(self,
+                                COMPLEX_t[::1] vec,
+                                COMPLEX_t[::1] vec2=None,
+                                BOOL_t nonOverlapping=False):
+        """
+        Distribute an accumulated vector.
+        """
+        cdef:
+            INDEX_t[::1] Didx
+            REAL_t[::1] Dval
+            INDEX_t i, dof, n = self.num_subdomain_dofs
+        if nonOverlapping:
+            assert self.non_overlapping_distribute_is_prepared, "Non-overlapping distribute has not been prepared for this algebraic overlap."
+            Didx = self.DidxNonOverlapping
+            Dval = self.DvalNonOverlapping
+        else:
+            assert self.distribute_is_prepared, "Distribute has not been prepared for this algebraic overlap."
+            Didx = self.Didx
+            Dval = self.Dval
+
+        if vec2 is None:
+            vec2 = vec
+        else:
+            for dof in range(n):
+                vec2[dof] = vec[dof]
+        for i in range(Didx.shape[0]):
+            dof = Didx[i]
+            vec2[dof] = vec2[dof]*Dval[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void redistribute(self,
+                           REAL_t[::1] vec,
+                           REAL_t[::1] vec2=None,
+                           BOOL_t nonOverlapping=False,
+                           BOOL_t asynchronous=False,
+                           INDEX_t vecNo=0):
+        if vec2 is None:
+            self.accumulate(vec, None, asynchronous=asynchronous, vecNo=vecNo)
+            self.distribute(vec, None, nonOverlapping=nonOverlapping)
+        else:
+            self.accumulate(vec, vec2, asynchronous=asynchronous, vecNo=vecNo)
+            self.distribute(vec2, None, nonOverlapping=nonOverlapping)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef void accumulate(self,
+                          REAL_t[::1] vec,
+                          REAL_t[::1] return_vec=None,
+                          BOOL_t asynchronous=False,
+                          INDEX_t vecNo=0,
+                          INDEX_t level=0):
+        """
+        Exchange information in the overlap.
+        """
+        cdef:
+            INDEX_t j, subdomainNo
+            list requestsReceive = [], requestsSend = self.requestsSend, requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            BOOL_t setBarrier
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        MPI.Request.Waitall(requestsSend)
+        del requestsSend[:]
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            # FIX here
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                ov1S.send(vec, vecNo=vecNo)
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                ov1SP.send(vec, vecNo=vecNo)
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                ov1SPLA.send(vec, vecNo=vecNo)
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsSend.append(ov.send(vec, vecNo=vecNo))
+                requestsReceive.append(ov.receive(vec, vecNo=vecNo))
+        # if len(requestsOneSidedPutLockAll) > 0:
+        #     self.Window.Flush_all()
+
+        if return_vec is None:
+            return_vec = vec
+        else:
+            for j in range(vec.shape[0]):
+                return_vec[j] = vec[j]
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            setBarrier = True
+            self.comm.Barrier()
+        else:
+            setBarrier = False
+
+        while len(requestsReceive) > 0:
+            status = MPI.Status()
+            done = MPI.Request.Waitany(requestsReceive, status)
+            assert status.error == 0
+            requestsReceive.pop(done)
+            subdomainNo = status.source
+            ov = self.overlaps[subdomainNo]
+            ov.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedGet:
+            ov1S = self.overlaps[subdomainNo]
+            ov1S.receive(return_vec, vecNo=vecNo)
+            ov1S.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPut:
+            ov1SP = self.overlaps[subdomainNo]
+            ov1SP.receive(return_vec, vecNo=vecNo)
+            ov1SP.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPutLockAll:
+            ov1SPLA = self.overlaps[subdomainNo]
+            ov1SPLA.receive(return_vec, vecNo=vecNo)
+            ov1SPLA.accumulateProcess(return_vec, vecNo=vecNo)
+        if setBarrier:
+            self.comm.Barrier()
+
+    def accumulate_py(self,
+                      vec,
+                      return_vec=None,
+                      asynchronous=False,
+                      vecNo=0):
+        self.accumulate(vec, return_vec, asynchronous, vecNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void accumulateComplex(self,
+                                COMPLEX_t[::1] vec,
+                                COMPLEX_t[::1] return_vec=None,
+                                BOOL_t asynchronous=False,
+                                INDEX_t vecNo=0):
+        """
+        Exchange information in the overlap.
+        """
+        cdef:
+            INDEX_t j, subdomainNo
+            list requestsReceive = [], requestsSend = self.requestsSend, requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            BOOL_t setBarrier
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        MPI.Request.Waitall(requestsSend)
+        del requestsSend[:]
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            # FIX here
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                ov1S.sendComplex(vec, vecNo=vecNo)
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                ov1SP.sendComplex(vec, vecNo=vecNo)
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                ov1SPLA.sendComplex(vec, vecNo=vecNo)
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsSend.append(ov.sendComplex(vec, vecNo=vecNo))
+                requestsReceive.append(ov.receiveComplex(vec, vecNo=vecNo))
+        # if len(requestsOneSidedPutLockAll) > 0:
+        #     self.Window.Flush_all()
+
+        if return_vec is None:
+            return_vec = vec
+        else:
+            for j in range(vec.shape[0]):
+                return_vec[j] = vec[j]
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            setBarrier = True
+            self.comm.Barrier()
+        else:
+            setBarrier = False
+
+        while len(requestsReceive) > 0:
+            status = MPI.Status()
+            done = MPI.Request.Waitany(requestsReceive, status)
+            assert status.error == 0
+            requestsReceive.pop(done)
+            subdomainNo = status.source
+            ov = self.overlaps[subdomainNo]
+            ov.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedGet:
+            ov1S = self.overlaps[subdomainNo]
+            ov1S.receiveComplex(return_vec, vecNo=vecNo)
+            ov1S.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPut:
+            ov1SP = self.overlaps[subdomainNo]
+            ov1SP.receiveComplex(return_vec, vecNo=vecNo)
+            ov1SP.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPutLockAll:
+            ov1SPLA = self.overlaps[subdomainNo]
+            ov1SPLA.receiveComplex(return_vec, vecNo=vecNo)
+            ov1SPLA.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        if setBarrier:
+            self.comm.Barrier()
+
+    def unique(self, REAL_t[::1] vec, INDEX_t vecNo=0):
+        """
+        Return an accumulated vector by taking values from the highest
+        rank.
+        """
+        cdef:
+            INDEX_t i, subdomainNo
+            dict requestsReceive = {}
+            list requestsSend = []
+            algebraicOverlap ov
+        for subdomainNo in self.overlaps:
+            # FIX: This is inefficient, we would only need to send
+            # from the highest rank, not all higher ranks..
+            ov = self.overlaps[subdomainNo]
+            if subdomainNo < self.comm.rank:
+                requestSend = ov.send(vec, vecNo=vecNo)
+                requestsSend.append(requestSend)
+            else:
+                requestReceive = ov.receive(vec, vecNo=vecNo)
+                requestsReceive[subdomainNo] = requestReceive
+
+        for subdomainNo in self.overlaps:
+            ov = self.overlaps[subdomainNo]
+            if subdomainNo < self.comm.rank:
+                continue
+            status = MPI.Status()
+            done = MPI.Request.Wait(requestsReceive[subdomainNo], status)
+            i = status.source
+            ov.uniqueProcess(vec, vecNo=vecNo)
+        MPI.Request.Waitall(requestsSend)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void send(self,
+                   REAL_t[::1] vec,
+                   BOOL_t asynchronous=False,
+                   INDEX_t vecNo=0,
+                   flush_type flushOp=flush):
+        """
+        Send information in the overlap.
+        """
+        cdef:
+            INDEX_t subdomainNo
+            list requestsSend = self.requestsSend, requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        MPI.Request.Waitall(requestsSend)
+        del requestsSend[:]
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                ov1S.send(vec, vecNo=vecNo)
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                ov1SP.send(vec, vecNo=vecNo)
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                ov1SPLA.send(vec, vecNo=vecNo, flushOp=flushOp)
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsSend.append(ov.send(vec, vecNo=vecNo))
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            self.comm.Barrier()
+
+    def send_py(self, vec, asynchronous=False, vecNo=0):
+        self.send(vec, asynchronous, vecNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void sendComplex(self,
+                          COMPLEX_t[::1] vec,
+                          BOOL_t asynchronous=False,
+                          INDEX_t vecNo=0,
+                          flush_type flushOp=flush):
+        """
+        Send information in the overlap.
+        """
+        cdef:
+            INDEX_t subdomainNo
+            list requestsSend = self.requestsSend, requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        MPI.Request.Waitall(requestsSend)
+        del requestsSend[:]
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                ov1S.sendComplex(vec, vecNo=vecNo)
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                ov1SP.sendComplex(vec, vecNo=vecNo)
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                ov1SPLA.sendComplex(vec, vecNo=vecNo, flushOp=flushOp)
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsSend.append(ov.sendComplex(vec, vecNo=vecNo))
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            self.comm.Barrier()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void receive(self,
+                      REAL_t[::1] return_vec,
+                      BOOL_t asynchronous=False,
+                      INDEX_t vecNo=0,
+                      flush_type flushOp=no_flush):
+        """
+        Exchange information in the overlap.
+        """
+        cdef:
+            INDEX_t subdomainNo
+            list requestsReceive = [], requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            BOOL_t setBarrier
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            # FIX here
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsReceive.append(ov.receive(return_vec, vecNo=vecNo))
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            setBarrier = True
+        else:
+            setBarrier = False
+
+        while len(requestsReceive) > 0:
+            status = MPI.Status()
+            done = MPI.Request.Waitany(requestsReceive, status)
+            assert status.error == 0
+            requestsReceive.pop(done)
+            subdomainNo = status.source
+            ov = self.overlaps[subdomainNo]
+            ov.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedGet:
+            ov1S = self.overlaps[subdomainNo]
+            ov1S.receive(return_vec, vecNo=vecNo)
+            ov1S.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPut:
+            ov1SP = self.overlaps[subdomainNo]
+            ov1SP.receive(return_vec, vecNo=vecNo)
+            ov1SP.accumulateProcess(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPutLockAll:
+            ov1SPLA = self.overlaps[subdomainNo]
+            ov1SPLA.receive(return_vec, vecNo=vecNo, flushOp=flushOp)
+            ov1SPLA.accumulateProcess(return_vec, vecNo=vecNo)
+        if setBarrier:
+            self.comm.Barrier()
+
+    def receive_py(self, return_vec, asynchronous=False, vecNo=0):
+        self.receive(return_vec, asynchronous, vecNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void receiveComplex(self,
+                             COMPLEX_t[::1] return_vec,
+                             BOOL_t asynchronous=False,
+                             INDEX_t vecNo=0,
+                             flush_type flushOp=no_flush):
+        """
+        Exchange information in the overlap.
+        """
+        cdef:
+            INDEX_t subdomainNo
+            list requestsReceive = [], requestsOneSidedGet, requestsOneSidedPut
+            list requestsOneSidedPutLockAll
+            BOOL_t setBarrier
+            algebraicOverlap ov
+            algebraicOverlapOneSidedGet ov1S
+            algebraicOverlapOneSidedPut ov1SP
+            algebraicOverlapOneSidedPutLockAll ov1SPLA
+        requestsOneSidedGet = []
+        requestsOneSidedPut = []
+        requestsOneSidedPutLockAll = []
+        for subdomainNo in self.overlaps:
+            # FIX here
+            if isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedGet):
+                ov1S = self.overlaps[subdomainNo]
+                requestsOneSidedGet.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPut):
+                ov1SP = self.overlaps[subdomainNo]
+                requestsOneSidedPut.append(subdomainNo)
+            elif isinstance(self.overlaps[subdomainNo], algebraicOverlapOneSidedPutLockAll):
+                ov1SPLA = self.overlaps[subdomainNo]
+                requestsOneSidedPutLockAll.append(subdomainNo)
+            else:
+                ov = self.overlaps[subdomainNo]
+                requestsReceive.append(ov.receiveComplex(return_vec, vecNo=vecNo))
+
+        if ((len(requestsOneSidedGet) > 0 or
+             len(requestsOneSidedPut) > 0 or
+             len(requestsOneSidedPutLockAll) > 0) and not asynchronous):
+            setBarrier = True
+        else:
+            setBarrier = False
+
+        while len(requestsReceive) > 0:
+            status = MPI.Status()
+            done = MPI.Request.Waitany(requestsReceive, status)
+            assert status.error == 0
+            requestsReceive.pop(done)
+            subdomainNo = status.source
+            ov = self.overlaps[subdomainNo]
+            ov.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedGet:
+            ov1S = self.overlaps[subdomainNo]
+            ov1S.receiveComplex(return_vec, vecNo=vecNo)
+            ov1S.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPut:
+            ov1SP = self.overlaps[subdomainNo]
+            ov1SP.receiveComplex(return_vec, vecNo=vecNo)
+            ov1SP.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        for subdomainNo in requestsOneSidedPutLockAll:
+            ov1SPLA = self.overlaps[subdomainNo]
+            ov1SPLA.receiveComplex(return_vec, vecNo=vecNo, flushOp=flushOp)
+            ov1SPLA.accumulateProcessComplex(return_vec, vecNo=vecNo)
+        if setBarrier:
+            self.comm.Barrier()
+
+    def countDoFs(self):
+        lv = set()
+        for subdomainNo in self.overlaps:
+            if subdomainNo < self.comm.rank:
+                lv |= set(list(self.overlaps[subdomainNo].shared_dofs))
+        return self.comm.allreduce(self.num_subdomain_dofs-len(lv))
+
+    def getGlobalIndices(self):
+        cdef:
+            INDEX_t rank = self.comm.rank
+            REAL_t[::1] v = rank*np.ones((self.num_subdomain_dofs),
+                                          dtype=REAL)
+            INDEX_t i, k, m
+
+        self.unique(v)
+        k = 0
+        for i in range(self.num_subdomain_dofs):
+            if v[i] == rank:
+                k += 1
+        m = self.comm.scan(k, MPI.SUM)
+        m = m-k
+        for i in range(self.num_subdomain_dofs):
+            if v[i] == rank:
+                v[i] = m
+                m += 1
+        self.unique(v)
+        return np.array(v, copy=False, dtype=INDEX)
+
+    def __repr__(self):
+        s = ''
+        for subdomainNo in self.overlaps:
+            s += self.overlaps[subdomainNo].__repr__() + '\n'
+        return s
+
+    def HDF5write(self, node):
+        compression = 'gzip'
+        node.attrs['numSubdomains'] = self.numSubdomains
+        node.attrs['num_subdomain_dofs'] = self.num_subdomain_dofs
+        node.attrs['max_cross'] = self._max_cross
+        for subdomainNo in self.overlaps:
+            grp = node.create_group(str(subdomainNo))
+            self.overlaps[subdomainNo].HDF5write(grp)
+        node.create_dataset('Dval', data=self.Dval,
+                            compression=compression)
+        node.create_dataset('Didx', data=self.Didx,
+                            compression=compression)
+        if hasattr(self, 'DidxNonOverlapping'):
+            node.create_dataset('DvalNonOverlapping',
+                                data=self.DvalNonOverlapping,
+                                compression=compression)
+            node.create_dataset('DidxNonOverlapping',
+                                data=self.DidxNonOverlapping,
+                                compression=compression)
+
+    @staticmethod
+    def HDF5read(node, comm):
+        overlaps = algebraicOverlapManager(node.attrs['numSubdomains'],
+                                           node.attrs['num_subdomain_dofs'],
+                                           comm)
+        overlaps._max_cross = node.attrs['max_cross']
+        for grp in node:
+            if grp == 'Didx':
+                overlaps.Didx = np.array(node['Didx'], dtype=INDEX)
+            elif grp == 'Dval':
+                overlaps.Dval = np.array(node['Dval'], dtype=REAL)
+            elif grp == 'DidxNonOverlapping':
+                overlaps.DidxNonOverlapping = np.array(node['DidxNonOverlapping'], dtype=INDEX)
+            elif grp == 'DvalNonOverlapping':
+                overlaps.DvalNonOverlapping = np.array(node['DvalNonOverlapping'], dtype=REAL)
+            else:
+                overlaps.overlaps[int(grp)] = algebraicOverlap.HDF5read(node[grp],
+                                                                        comm)
+        return overlaps
+
+    def check(self, mesh=None, DoFMap dm=None, interfaces=None, label="Algebraic overlap"):
+        if mesh is not None:
+            dof2Cell = uninitialized((dm.num_dofs, 2), dtype=INDEX)
+            dof2Cells = [set() for dof in range(dm.num_dofs)]
+            for cellNo in range(mesh.num_cells):
+                for dofNo in range(dm.dofs_per_element):
+                    dof = dm.cell2dof(cellNo, dofNo)
+                    if dof >= 0:
+                        dof2Cell[dof, 0] = cellNo
+                        dof2Cell[dof, 1] = dofNo
+                        dof2Cells[dof].add((cellNo, dofNo))
+
+        requests = []
+        requests2 = []
+        success = True
+        myDofNodes = {}
+        for i in self.overlaps:
+            requests.append(self.comm.isend(self.overlaps[i].num_shared_dofs, dest=i, tag=5))
+
+            if mesh is not None:
+                dofNodes = np.zeros((self.overlaps[i].num_shared_dofs, mesh.dim), dtype=REAL)
+                nodalCoords = uninitialized((dm.dofs_per_element, mesh.dim), dtype=REAL)
+                k = 0
+                for dof in self.overlaps[i].shared_dofs:
+                    cellNo, dofNo = dof2Cell[dof, 0], dof2Cell[dof, 1]
+                    dm.getNodalCoordinates(mesh.vertices_as_array[mesh.cells_as_array[cellNo, :], :],
+                                           nodalCoords)
+                    dofNodes[k, :] = nodalCoords[dofNo, :]
+                    k += 1
+                myDofNodes[i] = dofNodes
+                requests2.append(self.comm.Isend(myDofNodes[i], dest=i, tag=6))
+
+        for i in self.overlaps:
+            numDoFsOther = self.comm.recv(source=i, tag=5)
+            if mesh is not None:
+                otherDofNodes = uninitialized((numDoFsOther, mesh.dim), dtype=REAL)
+                self.comm.Recv(otherDofNodes, source=i, tag=6)
+            if numDoFsOther != self.overlaps[i].num_shared_dofs:
+                print('{}: Subdomains {} and {} shared different number of DoFs: {} vs {}.'.format(label, self.comm.rank, i,
+                                                                                                   self.overlaps[i].num_shared_dofs, numDoFsOther))
+                success = False
+            elif mesh is not None:
+                diff = norm(myDofNodes[i]-otherDofNodes, axis=1)
+                if diff.max() > 1e-9:
+                    diffCount = (diff > 1e-9).sum()
+                    s = '{}: Subdomains {} and {} shared {} different DoFs\n'.format(label, self.comm.rank, i, diffCount)
+                    k = 0
+                    for dof in self.overlaps[i].shared_dofs:
+                        if diff[k] > 1e-9:
+                            cellNo, dofNo = dof2Cell[dof, 0], dof2Cell[dof, 1]
+                            s += 'cellNo {} dofNo {}: {} != {}\n'.format(cellNo, dofNo,
+                                                                         myDofNodes[i][k, :],
+                                                                         otherDofNodes[k, :])
+                            if interfaces is not None:
+                                interface = interfaces.interfaces[i]
+                                for cellNo, dofNo in dof2Cells[dof]:
+                                    for j in range(interface.num_vertices):
+                                        if interface.vertices[j, 0] == cellNo:
+                                            s += 'vertex {}\n'.format(interface.vertices[j, 1])
+                                    for j in range(interface.num_edges):
+                                        if interface.edges[j, 0] == cellNo:
+                                            s += 'edge {} {}\n'.format(interface.edges[j, 1], interface.edges[j, 2])
+                                    for j in range(interface.num_faces):
+                                        if interface.faces[j, 0] == cellNo:
+                                            s += 'face {} {}\n'.format(interface.faces[j, 1], interface.faces[j, 2])
+                                    for j in range(interface.num_cells):
+                                        if interface.cells[j] == cellNo:
+                                            s += 'cell\n'
+
+                        k += 1
+                    print(s)
+                    success = False
+
+        MPI.Request.Waitall(requests)
+        MPI.Request.Waitall(requests2)
+        assert success
+        self.comm.Barrier()
+        if self.comm.rank == 0:
+            print('{} check successful.'.format(label))
+        stdout.flush()
+        self.comm.Barrier()
+
+    def getAccumulateOperator(self):
+        a = lambda x, y: self.accumulate(x, y)
+        acc = LinearOperator_wrapper(self.num_subdomain_dofs,
+                                     self.num_subdomain_dofs,
+                                     a)
+        return acc
+
+    def getDistributeOperator(self, BOOL_t nonOverlapping=False):
+        d = lambda x, y: self.distribute(x, y, nonOverlapping=nonOverlapping)
+        dist = LinearOperator_wrapper(self.num_subdomain_dofs,
+                                      self.num_subdomain_dofs,
+                                      d)
+        return dist
+
+    def getDistributeAsDiagonalOperator(self, BOOL_t nonOverlapping=False):
+        d = np.ones((self.num_subdomain_dofs), dtype=REAL)
+        self.distribute(d, vec2=None, nonOverlapping=nonOverlapping)
+        return diagonalOperator(d)
+
+    def flushMemory(self, INDEX_t vecNo=0, REAL_t value=0.):
+        for i in self.overlaps:
+            self.overlaps[i].flushMemory(vecNo=vecNo, value=value)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def findMinPerOverlap(self, REAL_t[::1] indicator):
+        # Takes local vector.
+        # Returns the elementwise minimum in each overlap, i.e. a dict of vectors.
+        cdef:
+           algebraicOverlap ov
+           INDEX_t j, subdomainNo, dof
+           dict localIndicators
+           REAL_t[::1] myLocalIndicator, otherLocalIndicator
+        # FIXME: We don't really want to accumulate into a temporary vector here.
+        indicatorTmp = uninitialized((indicator.shape[0]), dtype=REAL)
+        self.accumulate(indicator, indicatorTmp, asynchronous=False)
+        del indicatorTmp
+        localIndicators = {}
+        for subdomainNo in self.overlaps:
+            ov = self.overlaps[subdomainNo]
+            myLocalIndicator = uninitialized((ov.num_shared_dofs), dtype=REAL)
+            for j in range(ov.num_shared_dofs):
+                dof = ov.shared_dofs[j]
+                myLocalIndicator[j] = indicator[dof]
+            otherLocalIndicator = uninitialized((ov.num_shared_dofs), dtype=REAL)
+            ov.setOverlapLocal(otherLocalIndicator)
+            localIndicators[subdomainNo] = uninitialized((ov.num_shared_dofs), dtype=REAL)
+            np.minimum(myLocalIndicator,
+                       otherLocalIndicator,
+                       out=localIndicators[subdomainNo])
+        return localIndicators
+
+    def reduce(self, REAL_t v, BOOL_t asynchronous=False):
+        cdef:
+            REAL_t v2_mem[1]
+            REAL_t[::1] v2 = v2_mem
+        assert not asynchronous
+        v2[0] = v
+        self.comm.Allreduce(MPI.IN_PLACE, v2)
+        return v2[0]
+
+    def cleanup(self):
+        MPI.Request.Waitall(self.requestsSend)
+        # make sure we don't delete any MPI windows that still hold data
+        self.comm.Barrier()
+
+
+cdef class multilevelAlgebraicOverlapManager:
+    # Tracks the algebraic overlap with the DoFMaps of all other
+    # subdomains on all levels.
+
+    def __init__(self, comm):
+        self.levels = []
+        self.comm = comm
+        self.useLockAll = False
+        numSubComms = 2
+        maxCommSize = 300
+        numSubComms = max(numSubComms, (comm.size-1)//maxCommSize+1)
+        commSplits = np.around(np.linspace(1, comm.size, numSubComms+1)).astype(INDEX)
+        comm.Barrier()
+        # split comm into subcomms, rank 0 is in the intersection
+        if self.comm.rank == 0:
+            time = MPI.Wtime()
+            subcomms = []
+            self.ReduceWindows = []
+            self.ReduceMems = []
+            for splitNo in range(numSubComms):
+                subcomms.append(comm.Split(0))
+            t1 = MPI.Wtime()-time
+            time = MPI.Wtime()
+            for subcomm in subcomms:
+                self.ReduceWindows.append(MPI.Win.Allocate(MPI.REAL.size*subcomm.size, comm=subcomm))
+                self.ReduceMems.append(np.ones((subcomm.size), dtype=REAL))
+            t2 = MPI.Wtime()-time
+        else:
+            time = MPI.Wtime()
+            for splitNo in range(numSubComms):
+                if commSplits[splitNo] <= comm.rank  and comm.rank < commSplits[splitNo+1]:
+                    color = 0
+                else:
+                    color = MPI.UNDEFINED
+                subcommTemp = comm.Split(color)
+                if subcommTemp != MPI.COMM_NULL:
+                    subcomm = subcommTemp
+            t1 = MPI.Wtime()-time
+            time = MPI.Wtime()
+            self.rank = subcomm.rank
+            self.ReduceWindow = MPI.Win.Allocate(MPI.REAL.size, comm=subcomm)
+            t2 = MPI.Wtime()-time
+        self.useAsynchronousComm = False
+
+    def LockAll(self):
+        cdef:
+            MPI.Win ReduceWindow
+            INDEX_t i
+        if self.useLockAll:
+            if self.comm.rank == 0:
+                for i in range(len(self.ReduceWindows)):
+                    ReduceWindow = self.ReduceWindows[i]
+                    ReduceWindow.Lock_all(MPI.MODE_NOCHECK)
+            else:
+                self.ReduceWindow.Lock_all(MPI.MODE_NOCHECK)
+
+    def setComplex(self):
+        for level in range(len(self.levels)):
+            self.levels[level].setComplex()
+
+    def getLevel(self, INDEX_t n):
+        cdef:
+            INDEX_t level
+        for level in range(len(self.levels)-1, -1, -1):
+            if self.levels[level].num_subdomain_dofs == n:
+                return level
+        else:
+            raise NotImplementedError("Cannot find a level of size {}.\nLevel sizes: {}".format(n, [self.levels[level].num_subdomain_dofs for level in range(len(self.levels))]))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def accumulate(self,
+                   REAL_t[::1] vec,
+                   REAL_t[::1] return_vec=None,
+                   INDEX_t level=-1,
+                   BOOL_t asynchronous=False,
+                   INDEX_t vecNo=0):
+        cdef:
+            INDEX_t n
+            algebraicOverlapManager ovM
+        if level == -1:
+            n  = vec.shape[0]
+            for level in range(len(self.levels)-1, -1, -1):
+                if self.levels[level].num_subdomain_dofs == n:
+                    break
+            else:
+                raise NotImplementedError("Cannot find a level of size {}.\nLevel sizes: {}".format(n, [self.levels[level].num_subdomain_dofs for level in range(len(self.levels))]))
+        ovM = self.levels[level]
+        ovM.accumulate(vec, return_vec, asynchronous=asynchronous, vecNo=vecNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def accumulateComplex(self,
+                          COMPLEX_t[::1] vec,
+                          COMPLEX_t[::1] return_vec=None,
+                          INDEX_t level=-1,
+                          BOOL_t asynchronous=False,
+                          INDEX_t vecNo=0):
+        cdef:
+            INDEX_t n
+            algebraicOverlapManager ovM
+        if level == -1:
+            n  = vec.shape[0]
+            for level in range(len(self.levels)-1, -1, -1):
+                if self.levels[level].num_subdomain_dofs == n:
+                    break
+            else:
+                raise NotImplementedError("Cannot find a level of size {}.\nLevel sizes: {}".format(n, [self.levels[level].num_subdomain_dofs for level in range(len(self.levels))]))
+        ovM = self.levels[level]
+        ovM.accumulateComplex(vec, return_vec, asynchronous=asynchronous, vecNo=vecNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def unique(self,
+               REAL_t[::1] vec,
+               INDEX_t vecNo=0):
+        cdef INDEX_t level, n = vec.shape[0]
+        for level in range(len(self.levels)):
+            if self.levels[level].num_subdomain_dofs == n:
+                break
+        else:
+            raise NotImplementedError()
+        self.levels[level].unique(vec, vecNo=vecNo)
+
+    def prepareDistribute(self):
+        for i in range(len(self.levels)):
+            self.levels[i].prepareDistribute()
+
+    def prepareDistributeMeshOverlap(self, mesh, nc, DoFMap DoFMap, depth, meshOverlaps):
+        for i in range(len(self.levels)-1):
+            self.levels[i].prepareDistribute()
+        self.levels[-1].prepareDistributeMeshOverlap(mesh, nc, DoFMap, depth, meshOverlaps)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def distribute(self,
+                   REAL_t[::1] vec,
+                   REAL_t[::1] vec2=None,
+                   INDEX_t level=-1,
+                   BOOL_t nonOverlapping=False):
+        cdef:
+            INDEX_t n
+            algebraicOverlapManager ovM
+        if level == -1:
+            n  = vec.shape[0]
+            for level in range(len(self.levels)-1, -1, -1):
+                if self.levels[level].num_subdomain_dofs == n:
+                    break
+            else:
+                raise NotImplementedError()
+        ovM = self.levels[level]
+        ovM.distribute(vec, vec2, nonOverlapping=nonOverlapping)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def distributeComplex(self,
+                          COMPLEX_t[::1] vec,
+                          COMPLEX_t[::1] vec2=None,
+                          INDEX_t level=-1,
+                          BOOL_t nonOverlapping=False):
+        cdef:
+            INDEX_t n
+            algebraicOverlapManager ovM
+        if level == -1:
+            n  = vec.shape[0]
+            for level in range(len(self.levels)-1, -1, -1):
+                if self.levels[level].num_subdomain_dofs == n:
+                    break
+            else:
+                raise NotImplementedError()
+        ovM = self.levels[level]
+        ovM.distributeComplex(vec, vec2, nonOverlapping=nonOverlapping)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def redistribute(self,
+                     REAL_t[::1] vec,
+                     REAL_t[::1] vec2=None,
+                     level=None,
+                     BOOL_t nonOverlapping=False,
+                     BOOL_t asynchronous=False,
+                     INDEX_t vecNo=0):
+        cdef:
+            algebraicOverlapManager ovM
+        if level is None:
+            level = len(self.levels)-1
+        ovM = self.levels[level]
+        ovM.redistribute(vec, vec2,
+                         nonOverlapping=nonOverlapping,
+                         asynchronous=asynchronous,
+                         vecNo=vecNo)
+
+    def countDoFs(self, localsize=None, level=None):
+        # FIX: remove the localsize argument
+        if level is None:
+            level = len(self.levels)-1
+        return self.levels[level].countDoFs()
+
+    def get_num_shared_dofs(self, unique=False):
+        return self.levels[len(self.levels)-1].get_num_shared_dofs(unique)
+
+    num_shared_dofs = property(fget=get_num_shared_dofs)
+
+    def __repr__(self):
+        s = ''
+        for i in range(len(self.levels)):
+            s += 'Level {}\n'.format(i)+self.levels[i].__repr__()
+        return s
+
+    def HDF5write(self, node):
+        for i, lvl in enumerate(self.levels):
+            grp = node.create_group(str(i))
+            self.levels[i].HDF5write(grp)
+
+    @staticmethod
+    def HDF5read(node, comm):
+        overlaps = multilevelAlgebraicOverlapManager(comm)
+        levels = node.keys()
+        levels = sorted([int(lvl) for lvl in levels])
+        for grp in levels:
+            overlaps.levels.append(algebraicOverlapManager.HDF5read(node[str(grp)],
+                                                                    comm))
+        return overlaps
+
+    def check(self, meshes, DoFMaps, label='Algebraic overlap'):
+        for i in range(len(self.levels)):
+            self.levels[i].check(meshes[i], DoFMaps[i], '{} Level {}'.format(label, i))
+        print('Validation successful.')
+
+    def reduce(self, REAL_t v, BOOL_t asynchronous=False):
+        cdef:
+            INDEX_t i
+            REAL_t v2_mem[1]
+            REAL_t[::1] v2 = v2_mem
+            MPI.Win ReduceWindow
+            REAL_t[::1] reduceMem
+        if asynchronous or self.useAsynchronousComm:
+            if self.comm.rank == 0:
+                if not asynchronous:
+                    # let all ranks write local contributions
+                    self.comm.Barrier()
+                # get all local residuals
+                for j in range(len(self.ReduceWindows)):
+                    ReduceWindow = self.ReduceWindows[j]
+                    reduceMem = self.ReduceMems[j]
+                    if not self.useLockAll:
+                        ReduceWindow.Lock(0, MPI.LOCK_EXCLUSIVE)
+                    else:
+                        ReduceWindow.Flush_all()
+                    ReduceWindow.Get(reduceMem, 0)
+                    if not self.useLockAll:
+                        ReduceWindow.Unlock(0)
+                    else:
+                        ReduceWindow.Flush_local(0)
+                    # sum up
+                    for i in range(1, reduceMem.shape[0]):
+                        v += reduceMem[i]
+                # put global residual in window
+                v2[0] = v
+                for j in range(len(self.ReduceWindows)):
+                    ReduceWindow = self.ReduceWindows[j]
+                    reduceMem = self.ReduceMems[j]
+                    if not self.useLockAll:
+                        ReduceWindow.Lock(0, MPI.LOCK_EXCLUSIVE)
+                    # self.ReduceWindow.Put(v2, 0, target=(0, 1, MPI.REAL))
+                    for rank in range(1, reduceMem.shape[0]):
+                        ReduceWindow.Put(v2, rank, target=(0, 1, MPI.REAL))
+                    if not self.useLockAll:
+                        ReduceWindow.Unlock(0)
+                    else:
+                        ReduceWindow.Flush(0)
+                if not asynchronous:
+                    # let all ranks acces result
+                    self.comm.Barrier()
+                return v
+            else:
+                v2[0] = v
+                # put local residual into window on master
+                if not self.useLockAll:
+                    self.ReduceWindow.Lock(0, MPI.LOCK_SHARED)
+                self.ReduceWindow.Put(v2, 0, target=(self.rank*MPI.REAL.size, 1, MPI.REAL))
+                if not self.useLockAll:
+                    self.ReduceWindow.Unlock(0)
+                # else:
+                #     self.ReduceWindow.Flush_all()
+                if not asynchronous:
+                    # before rank 0 accesses local contributions
+                    self.comm.Barrier()
+
+                if not asynchronous:
+                    # wait until rank 0 has published result
+                    self.comm.Barrier()
+                # get global residual from window on master
+                if not self.useLockAll:
+                    self.ReduceWindow.Lock(self.rank, MPI.LOCK_SHARED)
+                # self.ReduceWindow.Get(v2, 0, target=(0, 1, MPI.REAL))
+                self.ReduceWindow.Get(v2, self.rank, target=(0, 1, MPI.REAL))
+                if not self.useLockAll:
+                    self.ReduceWindow.Unlock(self.rank)
+                else:
+                    self.ReduceWindow.Flush_local(self.rank)
+                return v2[0]
+        else:
+            v2[0] = v
+            self.comm.Allreduce(MPI.IN_PLACE, v2)
+            return v2[0]
+
+    def getAccumulateOperator(self, level=None):
+        if level is None:
+            level = len(self.levels)-1
+        return self.levels[level].getAccumulateOperator()
+
+    def getDistributeOperator(self, level=None, BOOL_t nonOverlapping=False):
+        if level is None:
+            level = len(self.levels)-1
+        return self.levels[level].getDistributeOperator(nonOverlapping)
+
+    def getDistributeAsDiagonalOperator(self, level=None, BOOL_t nonOverlapping=False):
+        if level is None:
+            level = len(self.levels)-1
+        return self.levels[level].getDistributeAsDiagonalOperator()
+
+    def getGlobalIndices(self, level=None):
+        if level is None:
+            level = len(self.levels)-1
+        return self.levels[level].getGlobalIndices()
+
+    def flushMemory(self, level=None, INDEX_t vecNo=0):
+        cdef:
+            REAL_t v2_mem[1]
+            REAL_t[::1] v2 = v2_mem
+            MPI.Win ReduceWindow
+            REAL_t[::1] reduceMem
+            INDEX_t i, j
+        if level is None:
+            level = len(self.levels)-1
+        self.levels[level].flushMemory(vecNo=vecNo)
+        if self.comm.rank == 0:
+            for i in range(len(self.ReduceWindows)):
+                reduceMem = self.ReduceMems[i]
+                ReduceWindow = self.ReduceWindows[i]
+                for j in range(reduceMem.shape[0]):
+                    reduceMem[j] = 1.
+                if not self.useLockAll:
+                    ReduceWindow.Lock(0, MPI.LOCK_EXCLUSIVE)
+                ReduceWindow.Put(reduceMem, 0)
+                if not self.useLockAll:
+                    ReduceWindow.Unlock(0)
+        else:
+            v2[0] = 1.
+            if not self.useLockAll:
+                self.ReduceWindow.Lock(self.rank, MPI.LOCK_EXCLUSIVE)
+            self.ReduceWindow.Put(v2, self.rank)
+            if not self.useLockAll:
+                self.ReduceWindow.Unlock(self.rank)
+        self.comm.Barrier()
+
+    def getOverlapLevel(self, num_subdomain_dofs):
+        for lvl in range(len(self.levels)):
+            if self.levels[lvl].num_subdomain_dofs == num_subdomain_dofs:
+                return lvl
+        raise NotImplementedError()
diff --git a/fem/PyNucleus_fem/boundaryLayerCy.pyx b/fem/PyNucleus_fem/boundaryLayerCy.pyx
new file mode 100644
index 0000000..2725abb
--- /dev/null
+++ b/fem/PyNucleus_fem/boundaryLayerCy.pyx
@@ -0,0 +1,353 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import numpy as np
+from PyNucleus_base.myTypes import INDEX
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_base import uninitialized
+from . meshCy cimport decode_edge, encode_edge, encode_face, decode_face
+from . meshCy cimport sortEdge, sortFace, faceVals
+
+import mpi4py
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+cimport cython
+
+
+
+cdef class boundaryLayer(object):
+    cdef:
+        INDEX_t depth
+        INDEX_t afterRefinements
+        INDEX_t dim
+        dict cell_connectivity
+        list boundary_vertices
+        set boundary_cells
+        dict _v2c
+        BOOL_t v2c_set_up
+
+    """
+    Tracks cell connectivity in a boundary layer.
+    """
+    def __init__(self, mesh, depth, afterRefinements, INDEX_t startCell=0):
+        self.v2c_set_up = False
+        self.getBoundaryAndConnectivity(mesh, startCell)
+        self.depth = depth
+        self.afterRefinements = afterRefinements
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getBoundaryAndConnectivity(self, mesh, INDEX_t startCell=0):
+        """
+        Calculate the connectivity and the boundary cells and edges of the
+        given cells.
+        """
+        cdef:
+            INDEX_t[:, ::1] cells = mesh.cells[startCell:, :]
+            INDEX_t i, c0, c1, c2, c3, j, k, m
+            INDEX_t[:, ::1] tempEdge = uninitialized((3, 2), dtype=INDEX)
+            INDEX_t[::1] e0 = tempEdge[0, :]
+            INDEX_t[::1] e1 = tempEdge[1, :]
+            INDEX_t[::1] e2 = tempEdge[2, :]
+            INDEX_t[:, ::1] tempFace = uninitialized((4, 3), dtype=INDEX)
+            INDEX_t[::1] f0 = tempFace[0, :]
+            INDEX_t[::1] f1 = tempFace[1, :]
+            INDEX_t[::1] f2 = tempFace[2, :]
+            INDEX_t[::1] f3 = tempFace[3, :]
+            ENCODE_t he
+            faceVals faceLookup
+            set boundary_cells
+            dict lookup
+
+        self.dim = cells.shape[1]-1
+        if self.dim == 1:
+            lookup = {}
+            self.cell_connectivity = {i: [-1]*2 for i in range(cells.shape[0])}
+            for i, c in enumerate(cells):
+                c0, c1 = c
+                for m, v in enumerate(c):
+                    try:
+                        j, k = lookup.pop(v)
+                        self.cell_connectivity[i][m] = j
+                        self.cell_connectivity[j][k] = i
+                    except KeyError:
+                        lookup[v] = i, m
+            self.boundary_vertices = list(lookup.keys())
+            self.boundary_cells = set([t[0] for t in lookup.values()])
+        elif self.dim == 2:
+            lookup = dict()
+            self.cell_connectivity = {i: [-1]*3 for i in range(cells.shape[0])}
+            for i in range(cells.shape[0]):
+                c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+                sortEdge(c0, c1, e0)
+                sortEdge(c1, c2, e1)
+                sortEdge(c2, c0, e2)
+                for m in range(3):
+                    he = encode_edge(tempEdge[m, :])
+                    try:
+                        j, k = lookup.pop(he)
+                        self.cell_connectivity[i][m] = j
+                        self.cell_connectivity[j][k] = i
+                    except KeyError:
+                        lookup[he] = i, m
+            boundary_cells = set()
+            for he in lookup:
+                boundary_cells.add(lookup[he][0])
+            self.boundary_cells = boundary_cells
+        elif self.dim == 3:
+            faceLookup = faceVals(mesh.num_vertices)
+            self.cell_connectivity = {i: [] for i in range(cells.shape[0])}
+            for i in range(cells.shape[0]):
+                c0, c1, c2, c3 = cells[i, 0], cells[i, 1], cells[i, 2], cells[i, 3]
+                sortFace(c0, c1, c2, f0)
+                sortFace(c0, c1, c3, f1)
+                sortFace(c1, c2, c3, f2)
+                sortFace(c2, c0, c3, f3)
+                for m in range(4):
+                    j = faceLookup.enterValue(tempFace[m, :], i)
+                    if i != j:
+                        self.cell_connectivity[i].append(j)
+                        self.cell_connectivity[j].append(i)
+            boundary_cells = set()
+            faceLookup.startIter()
+            while faceLookup.next(f0, &i):
+                boundary_cells.add(i)
+            self.boundary_cells = boundary_cells
+        else:
+            raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getLayer(self, INDEX_t depth, set ofCells=None, BOOL_t returnLayerNo=False, INDEX_t[:, ::1] cells=None):
+        """
+        Returns depth layers of cells that are adjacent to ofCells.
+        """
+        cdef:
+            list bcells
+            INDEX_t k, v, i, j, numVerticesPerCell
+            dict v2c
+            set layerCells, candidates, bc, workset
+        bcells = []
+        if ofCells is None:
+            bcells.append(self.boundary_cells)
+        else:
+            bcells.append(ofCells)
+        if depth == 0:
+            bcells = []
+
+        assert cells is not None
+        numVerticesPerCell = cells.shape[1]
+        v2c = self.vertex2cells(cells)
+        for k in range(depth-1):
+            candidates = set()
+            workset = bcells[k]
+            for i in workset:
+                for j in range(numVerticesPerCell):
+                    v = cells[i, j]
+                    candidates.add(v)
+            workset = set()
+            for v in candidates:
+                workset |= set(v2c[v])
+            workset -= bcells[k]
+            if k > 0:
+                workset -= bcells[k-1]
+            bcells.append(workset)
+        if not returnLayerNo:
+            layerCells = set()
+            for i in range(len(bcells)):
+                layerCells |= bcells[i]
+            return np.array(list(layerCells), dtype=INDEX)
+        else:
+            return [np.array(list(bc), dtype=INDEX) for bc in bcells]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def prune(self, depth, pp=None, cells=None):
+        """
+        Remove cells that are to far from the boundary.
+        """
+        cdef:
+            INDEX_t i, j
+            INDEX_t[::1] layerCells
+            dict new_cell_connectivity = {}
+        layerCells = self.getLayer(depth, pp, cells=cells)
+        for i in layerCells:
+            new_cell_connectivity[i] = self.cell_connectivity.pop(i)
+        new_cell_connectivity[-1] = [-1]*(self.dim+1)
+        for i in new_cell_connectivity:
+            for j in range(self.dim+1):
+                try:
+                    new_cell_connectivity[new_cell_connectivity[i][j]]
+                except KeyError:
+                    new_cell_connectivity[i][j] = -2
+        new_cell_connectivity.pop(-1)
+        self.cell_connectivity = new_cell_connectivity
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    def refine(self, newMesh):
+        """
+        Refine the boundary layers.
+        """
+        cdef:
+            INDEX_t i, j, posJ, k, posK, l, posL
+            dict new_cell_connectivity
+            set new_bcells
+            INDEX_t[:, ::1] newCells = newMesh.cells
+            INDEX_t c0, c1, c2, c3, cellNo, subcellNo, otherSubCellNo
+            INDEX_t[:, ::1] tempEdge = uninitialized((3, 2), dtype=INDEX)
+            INDEX_t[::1] e0 = tempEdge[0, :]
+            INDEX_t[::1] e1 = tempEdge[1, :]
+            INDEX_t[::1] e2 = tempEdge[2, :]
+            INDEX_t[:, ::1] tempFace = uninitialized((4, 3), dtype=INDEX)
+            INDEX_t[::1] f0 = tempFace[0, :]
+            INDEX_t[::1] f1 = tempFace[1, :]
+            INDEX_t[::1] f2 = tempFace[2, :]
+            INDEX_t[::1] f3 = tempFace[3, :]
+            faceVals faceLookup
+            set pp
+
+        new_cell_connectivity = {}
+        new_bcells = set()
+        if self.dim == 1:
+            for i in self.cell_connectivity:
+                new_cell_connectivity[2*i] = [-2]*2
+                new_cell_connectivity[2*i+1] = [-2]*2
+                j, k = self.cell_connectivity[i][:]
+                new_cell_connectivity[2*i][1] = 2*i+1
+                new_cell_connectivity[2*i+1][0] = 2*i
+                if j > -1:
+                    posJ = self.cell_connectivity[j].index(i)
+                    new_cell_connectivity[2*i][0] = 2*j+posJ
+                elif j == -1:
+                    new_cell_connectivity[2*i][0] = -1
+                    new_bcells.add(2*i)
+                if k > -1:
+                    posK = self.cell_connectivity[k].index(i)
+                    new_cell_connectivity[2*i+1][1] = 2*k+posK
+                elif k == -1:
+                    new_cell_connectivity[2*i+1][1] = -1
+                    new_bcells.add(2*i+1)
+        elif self.dim == 2:
+            for i in self.cell_connectivity:
+                new_cell_connectivity[4*i] = [-2]*3
+                new_cell_connectivity[4*i+1] = [-2]*3
+                new_cell_connectivity[4*i+2] = [-2]*3
+                new_cell_connectivity[4*i+3] = [4*i+1, 4*i+2, 4*i]
+                new_cell_connectivity[4*i][1] = 4*i+3
+                new_cell_connectivity[4*i+1][1] = 4*i+3
+                new_cell_connectivity[4*i+2][1] = 4*i+3
+                j, k, l = self.cell_connectivity[i][:]
+
+                # is there an adjacent cell?
+                if j > -1:
+                    # on which edge is this cell adjacent in the other cell?
+                    posJ = self.cell_connectivity[j].index(i)
+                    new_cell_connectivity[4*i+1][2] = 4*j+posJ
+                    posJ = (1+posJ) % 3
+                    new_cell_connectivity[4*i][0] = 4*j+posJ
+                elif j == -1:
+                    new_cell_connectivity[4*i+1][2] = -1
+                    new_cell_connectivity[4*i][0] = -1
+                    new_bcells.add(4*i)
+                    new_bcells.add(4*i+1)
+
+                # is there an adjacent cell?
+                if k > -1:
+                    posK = self.cell_connectivity[k].index(i)
+                    new_cell_connectivity[4*i+2][2] = 4*k+posK
+                    posK = (1+posK) % 3
+                    new_cell_connectivity[4*i+1][0] = 4*k+posK
+                elif k == -1:
+                    new_cell_connectivity[4*i+2][2] = -1
+                    new_cell_connectivity[4*i+1][0] = -1
+                    new_bcells.add(4*i+1)
+                    new_bcells.add(4*i+2)
+
+                # is there an adjacent cell?
+                if l > -1:
+                    posL = self.cell_connectivity[l].index(i)
+                    new_cell_connectivity[4*i][2] = 4*l+posL
+                    posL = (1+posL) % 3
+                    new_cell_connectivity[4*i+2][0] = 4*l+posL
+                elif l == -1:
+                    new_cell_connectivity[4*i][2] = -1
+                    new_cell_connectivity[4*i+2][0] = -1
+                    new_bcells.add(4*i+2)
+                    new_bcells.add(4*i)
+        elif self.dim == 3:
+            faceLookup = faceVals(newMesh.num_vertices)
+            for cellNo in self.cell_connectivity:
+                for subcellNo in range(8*cellNo, 8*cellNo+8):
+                    c0, c1, c2, c3 = newCells[subcellNo, 0], newCells[subcellNo, 1], newCells[subcellNo, 2], newCells[subcellNo, 3]
+                    sortFace(c0, c1, c2, f0)
+                    sortFace(c0, c1, c3, f1)
+                    sortFace(c1, c2, c3, f2)
+                    sortFace(c2, c0, c3, f3)
+                    new_cell_connectivity[subcellNo] = []
+                    for m in range(4):
+                        otherSubCellNo = faceLookup.enterValue(tempFace[m, :], subcellNo)
+                        if otherSubCellNo != subcellNo:
+                            new_cell_connectivity[subcellNo].append(otherSubCellNo)
+                            new_cell_connectivity[otherSubCellNo].append(subcellNo)
+            faceLookup.startIter()
+            while faceLookup.next(f0, &subcellNo):
+                # cellNo = subcellNo//8
+                cellNo = subcellNo>>3
+                if cellNo in self.boundary_cells:
+                    new_bcells.add(subcellNo)
+                    new_cell_connectivity[subcellNo].append(-1)
+                else:
+                    new_cell_connectivity[subcellNo].append(-1)
+        self.cell_connectivity = new_cell_connectivity
+        self.boundary_cells = new_bcells
+
+        depth = int(np.ceil(<REAL_t>self.depth/<REAL_t>self.afterRefinements))
+        if self.v2c_set_up:
+            self.v2c_set_up = False
+        v2c = self.vertex2cells(newMesh.cells)
+
+        pp = set()
+        for i in set(newMesh.boundaryVertices):
+            pp |= set(v2c[i])
+
+        self.prune(depth, pp, cells=newMesh.cells)
+        self.afterRefinements -= 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def vertex2cells(self, const INDEX_t[:, ::1] cells):
+        """
+        Return a lookup dict
+        vertex no -> cell no
+        """
+        cdef:
+            dict v2c
+            INDEX_t i, j, k
+            INDEX_t numVerticesPerCell = cells.shape[1]
+        if self.v2c_set_up:
+            return self._v2c
+        else:
+            v2c = {}
+            for i in self.cell_connectivity:
+                for k in range(numVerticesPerCell):
+                    j = cells[i, k]
+                    try:
+                        v2c[j].append(i)
+                    except KeyError:
+                        v2c[j] = [i]
+            self._v2c = v2c
+            self.v2c_set_up = True
+            return v2c
diff --git a/fem/PyNucleus_fem/distributed_operators.pxd b/fem/PyNucleus_fem/distributed_operators.pxd
new file mode 100644
index 0000000..1feceb6
--- /dev/null
+++ b/fem/PyNucleus_fem/distributed_operators.pxd
@@ -0,0 +1,14 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_base.linear_operators cimport LinearOperator, CSR_LinearOperator
+from . algebraicOverlaps cimport algebraicOverlapManager
+
+include "distributed_operators_decl_REAL.pxi"
+include "distributed_operators_decl_COMPLEX.pxi"
diff --git a/fem/PyNucleus_fem/distributed_operators.pyx b/fem/PyNucleus_fem/distributed_operators.pyx
new file mode 100644
index 0000000..2d69153
--- /dev/null
+++ b/fem/PyNucleus_fem/distributed_operators.pyx
@@ -0,0 +1,18 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX
+from PyNucleus_base.blas cimport assign
+from PyNucleus_base import uninitialized
+cimport cython
+
+
+include "distributed_operators_REAL.pxi"
+include "distributed_operators_COMPLEX.pxi"
diff --git a/fem/PyNucleus_fem/distributed_operators_decl_{SCALAR}.pxi b/fem/PyNucleus_fem/distributed_operators_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..1a2303a
--- /dev/null
+++ b/fem/PyNucleus_fem/distributed_operators_decl_{SCALAR}.pxi
@@ -0,0 +1,30 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, {SCALAR}_t, BOOL_t
+from PyNucleus_base.linear_operators cimport {SCALAR_label}LinearOperator, {SCALAR_label}CSR_LinearOperator
+from . algebraicOverlaps cimport algebraicOverlapManager
+
+
+cdef class {SCALAR_label}DistributedLinearOperator({SCALAR_label}LinearOperator):
+    cdef:
+        {SCALAR_label}LinearOperator A
+        algebraicOverlapManager overlaps
+        public BOOL_t asynchronous
+        public BOOL_t doDistribute
+        public BOOL_t keepDistributedResult
+        {SCALAR}_t[::1] tempMemX
+        public {SCALAR}_t[::1] tempMemY
+    cdef void allocateTempMemory(self, INDEX_t sizeX, INDEX_t sizeY)
+    cdef void setTempMemory(self, {SCALAR}_t[::1] tempMemX, {SCALAR}_t[::1] tempMemY)
+
+
+cdef class {SCALAR_label}CSR_DistributedLinearOperator({SCALAR_label}DistributedLinearOperator):
+    cdef:
+        {SCALAR_label}CSR_LinearOperator csrA
+        INDEX_t[::1] overlap_indices
diff --git a/fem/PyNucleus_fem/distributed_operators_{SCALAR}.pxi b/fem/PyNucleus_fem/distributed_operators_{SCALAR}.pxi
new file mode 100644
index 0000000..e9b5e1d
--- /dev/null
+++ b/fem/PyNucleus_fem/distributed_operators_{SCALAR}.pxi
@@ -0,0 +1,195 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+cdef class {SCALAR_label}DistributedLinearOperator({SCALAR_label}LinearOperator):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 algebraicOverlapManager overlaps,
+                 BOOL_t doDistribute=False,
+                 BOOL_t keepDistributedResult=False):
+        super({SCALAR_label}DistributedLinearOperator, self).__init__(A.num_rows, A.num_columns)
+        self.A = A
+        self.overlaps = overlaps
+        self.doDistribute = doDistribute
+        self.keepDistributedResult = keepDistributedResult
+        self.allocateTempMemory(A.shape[0], A.shape[1])
+        self.asynchronous = False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void allocateTempMemory(self, INDEX_t sizeX, INDEX_t sizeY):
+        if self.doDistribute:
+            self.tempMemX = uninitialized((sizeX), dtype=REAL)
+        if self.keepDistributedResult:
+            self.tempMemY = uninitialized((sizeY), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setTempMemory(self, {SCALAR}_t[::1] tempMemX, {SCALAR}_t[::1] tempMemY):
+        if self.doDistribute:
+            self.tempMemX = tempMemX
+        if self.keepDistributedResult:
+            self.tempMemY = tempMemY
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        cdef:
+            {SCALAR}_t[::1] z, w
+        if self.doDistribute:
+            z = self.tempMemX
+            self.overlaps.distribute{SCALAR_label}(x, z)
+        else:
+            z = x
+        if self.keepDistributedResult:
+            w = self.tempMemY
+        else:
+            w = y
+        self.A.matvec(z, w)
+        if self.keepDistributedResult:
+            assign(y, w)
+        self.overlaps.accumulate{SCALAR_label}(y, return_vec=None, asynchronous=self.asynchronous)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void residual(self,
+                       {SCALAR}_t[::1] x,
+                       {SCALAR}_t[::1] rhs,
+                       {SCALAR}_t[::1] resAcc,
+                       BOOL_t simpleResidual=False):
+        cdef:
+            {SCALAR}_t[::1] z, w
+        if self.doDistribute:
+            z = self.tempMemX
+            self.overlaps.distribute{SCALAR_label}(x, z)
+        else:
+            z = x
+        if self.keepDistributedResult:
+            w = self.tempMemY
+        else:
+            w = resAcc
+        self.A.residual(z, rhs, w, simpleResidual=simpleResidual)
+        if self.keepDistributedResult:
+            assign(resAcc, w)
+        self.overlaps.accumulate{SCALAR_label}(resAcc, return_vec=None, asynchronous=self.asynchronous)
+
+
+cdef class {SCALAR_label}CSR_DistributedLinearOperator({SCALAR_label}DistributedLinearOperator):
+    def __init__(self,
+                 {SCALAR_label}CSR_LinearOperator A,
+                 algebraicOverlapManager overlaps,
+                 BOOL_t doDistribute=False,
+                 BOOL_t keepDistributedResult=False):
+        super({SCALAR_label}CSR_DistributedLinearOperator, self).__init__(A, overlaps, doDistribute, keepDistributedResult)
+        self.csrA = A
+        self.overlap_indices = self.overlaps.get_shared_dofs()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, jj, j, k
+            {SCALAR}_t temp
+            {SCALAR}_t[::1] z, w
+        if self.doDistribute:
+            if self.tempMemory is None:
+                self.allocateTempMemory(x.shape[0], y.shape[0])
+            z = self.tempMem
+            self.overlaps.distribute{SCALAR_label}(x, z)
+        else:
+            z = x
+
+        if self.keepDistributedResult:
+            if self.tempMemY is None:
+                self.allocateTempMemory(x.shape[0], y.shape[0])
+            w = self.tempMemY
+        else:
+            w = y
+
+        for k in range(self.overlap_indices.shape[0]):
+            i = self.overlap_indices[k]
+            temp = 0.0
+            for jj in range(self.csrA.indptr[i], self.csrA.indptr[i+1]):
+                j = self.csrA.indices[jj]
+                temp += self.csrA.data[jj]*z[j]
+            w[i] = temp
+        self.overlaps.send{SCALAR_label}(w, asynchronous=self.asynchronous)
+
+        k = 0
+        for i in range(self.num_rows):
+            if self.overlap_indices[k] == i:
+                k += 1
+                continue
+            temp = 0.0
+            for jj in range(self.csrA.indptr[i], self.csrA.indptr[i+1]):
+                j = self.csrA.indices[jj]
+                temp += self.csrA.data[jj]*z[j]
+            w[i] = temp
+
+        if self.keepDistributedResult:
+            assign(y, w)
+        self.overlaps.receive{SCALAR_label}(y, asynchronous=self.asynchronous)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void residual(self,
+                       {SCALAR}_t[::1] x,
+                       {SCALAR}_t[::1] rhs,
+                       {SCALAR}_t[::1] resAcc,
+                       BOOL_t simpleResidual=False):
+        cdef:
+            INDEX_t i, jj, j, k
+            {SCALAR}_t temp
+            {SCALAR}_t[::1] z, w
+
+        if self.doDistribute:
+            z = self.tempMemX
+            self.overlaps.distribute{SCALAR_label}(x, z)
+        else:
+            z = x
+
+        if self.keepDistributedResult:
+            w = self.tempMemY
+        else:
+            w = resAcc
+
+        for k in range(self.overlap_indices.shape[0]):
+            i = self.overlap_indices[k]
+            temp = rhs[i]
+            for jj in range(self.csrA.indptr[i], self.csrA.indptr[i+1]):
+                j = self.csrA.indices[jj]
+                temp -= self.csrA.data[jj]*z[j]
+            w[i] = temp
+        self.overlaps.send{SCALAR_label}(w, asynchronous=self.asynchronous)
+
+        k = 0
+        for i in range(self.num_rows):
+            if self.overlap_indices[k] == i:
+                k += 1
+                continue
+            temp = rhs[i]
+            for jj in range(self.csrA.indptr[i], self.csrA.indptr[i+1]):
+                j = self.csrA.indices[jj]
+                temp -= self.csrA.data[jj]*z[j]
+            w[i] = temp
+        if self.keepDistributedResult:
+            assign(resAcc, w)
+        self.overlaps.receive{SCALAR_label}(resAcc, asynchronous=self.asynchronous)
diff --git a/fem/PyNucleus_fem/femCy.pxd b/fem/PyNucleus_fem/femCy.pxd
new file mode 100644
index 0000000..092fa1d
--- /dev/null
+++ b/fem/PyNucleus_fem/femCy.pxd
@@ -0,0 +1,28 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport REAL_t, INDEX_t, BOOL_t
+from numpy cimport uint8_t
+
+
+ctypedef REAL_t(*volume_t)(REAL_t[:, ::1])
+
+
+cdef class local_matrix_t:
+    cdef:
+        INDEX_t dim
+        BOOL_t needsCellInfo
+        INDEX_t[::1] cell
+        BOOL_t additiveAssembly
+
+    cdef void eval(self,
+                   const REAL_t[:, ::1] simplex,
+                   REAL_t[::1] contrib)
+
+    cdef void setCell(self,
+                      INDEX_t[::1] cell)
diff --git a/fem/PyNucleus_fem/femCy.pyx b/fem/PyNucleus_fem/femCy.pyx
new file mode 100644
index 0000000..67b1cf5
--- /dev/null
+++ b/fem/PyNucleus_fem/femCy.pyx
@@ -0,0 +1,2383 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan2, pow)
+import numpy as np
+cimport numpy as np
+cimport cython
+
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t
+from PyNucleus_base import uninitialized
+from PyNucleus_base.ip_norm cimport mydot, vector_t, complex_vector_t
+from . meshCy cimport (meshBase,
+                       vertices_t, cells_t,
+                       vectorProduct,
+                       volume1D, volume1Dnew,
+                       volume1D_in_2D,
+                       volume2Dnew,
+                       volume3D, volume3Dnew,
+                       volume2D_in_3D,
+                       sortEdge, sortFace,
+                       decode_edge,
+                       encode_edge)
+from . mesh import NO_BOUNDARY
+from PyNucleus_base.linear_operators cimport (LinearOperator,
+                                               CSR_LinearOperator,
+                                               SSS_LinearOperator)
+from PyNucleus_base.sparsityPattern cimport sparsityPattern
+from . DoFMaps cimport (P0_DoFMap, P1_DoFMap, P2_DoFMap, P3_DoFMap,
+                        
+                        DoFMap,
+                        vectorShapeFunction,
+                        fe_vector, complex_fe_vector)
+from . quadrature cimport simplexQuadratureRule, Gauss1D, Gauss2D, Gauss3D, simplexXiaoGimbutas
+from . functions cimport function, complexFunction, vectorFunction
+from . simplexMapper cimport simplexMapper
+
+
+
+cdef class local_matrix_t:
+    def __init__(self, INDEX_t dim):
+        self.dim = dim
+        self.needsCellInfo = False
+        self.cell = uninitialized((dim+1), dtype=INDEX)
+        self.additiveAssembly = True
+
+    def __call__(self,
+                 REAL_t[:, ::1] simplex,
+                 REAL_t[::1] contrib):
+        return self.eval(simplex, contrib)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self,
+                   REAL_t[:, ::1] simplex,
+                   REAL_t[::1] contrib):
+        pass
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cdef void setCell(self,
+                      INDEX_t[::1] cell):
+        cdef:
+            INDEX_t i
+        for i in range(self.dim+1):
+            self.cell[i] = cell[i]
+
+
+cdef inline REAL_t simplexVolume1D(const REAL_t[:, ::1] simplex,
+                                     REAL_t[:, ::1] temp):
+    # temp needs to bed of size 0x1
+    # Calculate volume
+    return abs(simplex[1, 0]-simplex[0, 0])
+
+
+cdef inline REAL_t simplexVolume2D(const REAL_t[:, ::1] simplex,
+                                     REAL_t[:, ::1] temp):
+    # temp needs to bed of size 2x2
+    cdef:
+        INDEX_t j
+
+    # Calculate volume
+    for j in range(2):
+        temp[0, j] = simplex[1, j]-simplex[0, j]
+        temp[1, j] = simplex[2, j]-simplex[0, j]
+    return volume2Dnew(temp)
+
+
+cdef inline REAL_t simplexVolume1Din2D(const REAL_t[:, ::1] simplex,
+                                         REAL_t[:, ::1] temp):
+    # temp needs to bed of size 1x2
+    # Calculate volume
+    temp[0, 0] = simplex[1, 0]-simplex[0, 0]
+    temp[0, 1] = simplex[1, 1]-simplex[0, 1]
+    return volume1D_in_2D(temp)
+
+
+cdef inline REAL_t simplexVolume3D(const REAL_t[:, ::1] simplex,
+                                     REAL_t[:, ::1] temp):
+    # temp needs to be 4x3
+    cdef:
+        INDEX_t j
+
+    # Calculate volume
+    for j in range(3):
+        temp[0, j] = simplex[1, j]-simplex[0, j]  # v01
+        temp[1, j] = simplex[2, j]-simplex[0, j]  # v02
+        temp[2, j] = simplex[3, j]-simplex[0, j]  # v03
+    return volume3Dnew(temp[:3, :], temp[3, :])
+
+
+cdef inline REAL_t simplexVolume2Din3D(const REAL_t[:, ::1] simplex,
+                                         REAL_t[:, ::1] temp):
+    # temp needs to bed of size 2x3
+    # Calculate volume
+    for j in range(3):
+        temp[0, j] = simplex[1, j]-simplex[0, j]
+        temp[1, j] = simplex[2, j]-simplex[0, j]
+    return volume2D_in_3D(temp[0, :], temp[1, :])
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef inline void coeffProducts1D(const REAL_t[:, ::1] simplex,
+                                 vectorFunction coeff,
+                                 REAL_t[::1] innerProducts,
+                                 REAL_t[:, ::1] temp):
+    # innerProducts needs to bed of size 2
+    # temp needs to bed of size 2x1
+    cdef:
+        INDEX_t i
+        REAL_t fac = 0.5
+    temp[1, 0] = 0.
+    for i in range(2):
+        temp[1, 0] += simplex[i, 0]
+    temp[1, 0] *= fac
+    coeff.eval(temp[1, :], temp[0, :])
+
+    # inner product of barycentric gradients
+    innerProducts[0] = -1.*temp[0, 0]
+    innerProducts[1] = 1.*temp[0, 0]
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef inline REAL_t simplexVolumeAndProducts2D(const REAL_t[:, ::1] simplex,
+                                                REAL_t[::1] innerProducts,
+                                                REAL_t[:, ::1] temp):
+    # innerProducts needs to bed of size 6
+    # temp needs to bed of size 3x2
+    cdef:
+        REAL_t vol
+        INDEX_t j
+
+    # Calculate volume
+    for j in range(2):
+        temp[0, j] = simplex[2, j]-simplex[1, j]
+        temp[1, j] = simplex[0, j]-simplex[2, j]
+        temp[2, j] = simplex[1, j]-simplex[0, j]
+    vol = volume2Dnew(temp[1:, :])
+    # inner product of barycentric gradients
+    innerProducts[0] = mydot(temp[0, :], temp[0, :])
+    innerProducts[1] = mydot(temp[0, :], temp[1, :])
+    innerProducts[2] = mydot(temp[0, :], temp[2, :])
+    innerProducts[3] = mydot(temp[1, :], temp[1, :])
+    innerProducts[4] = mydot(temp[1, :], temp[2, :])
+    innerProducts[5] = mydot(temp[2, :], temp[2, :])
+    return vol
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef inline REAL_t simplexVolumeGradientsProducts2D(const REAL_t[:, ::1] simplex,
+                                                      REAL_t[::1] innerProducts,
+                                                      REAL_t[:, ::1] gradients):
+    # innerProducts needs to bed of size 6
+    # temp needs to bed of size 3x2
+    cdef:
+        REAL_t vol, f = 1
+        INDEX_t j
+
+    # Calculate volume
+    for j in range(2):
+        gradients[0, 1-j] = f*(simplex[2, j]-simplex[1, j])
+        gradients[1, 1-j] = f*(simplex[0, j]-simplex[2, j])
+        gradients[2, 1-j] = f*(simplex[1, j]-simplex[0, j])
+        f = -1
+    vol = volume2Dnew(gradients[1:, :])
+    # inner product of barycentric gradients
+    innerProducts[0] = mydot(gradients[0, :], gradients[0, :])
+    innerProducts[1] = mydot(gradients[0, :], gradients[1, :])
+    innerProducts[2] = mydot(gradients[0, :], gradients[2, :])
+    innerProducts[3] = mydot(gradients[1, :], gradients[1, :])
+    innerProducts[4] = mydot(gradients[1, :], gradients[2, :])
+    innerProducts[5] = mydot(gradients[2, :], gradients[2, :])
+    return vol
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef inline REAL_t mydot_rot2D(const REAL_t[::1] a, const REAL_t[::1] b):
+    return -a[0]*b[1]+a[1]*b[0]
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef inline void coeffProducts2D(const REAL_t[:, ::1] simplex,
+                                 vectorFunction coeff,
+                                 REAL_t[::1] innerProducts,
+                                 REAL_t[:, ::1] temp):
+    # innerProducts needs to bed of size 3
+    # temp needs to bed of size 4x2
+    cdef:
+        INDEX_t i, j
+        REAL_t fac = 1./3.
+    for j in range(2):
+        temp[0, j] = 0.
+    for i in range(3):
+        for j in range(2):
+            temp[0, j] += simplex[i, j]
+    for j in range(2):
+        temp[0, j] *= fac
+    coeff.eval(temp[0, :], temp[3, :])
+
+    # Calculate volume
+    for j in range(2):
+        temp[0, j] = simplex[2, j]-simplex[1, j]
+        temp[1, j] = simplex[0, j]-simplex[2, j]
+        temp[2, j] = simplex[1, j]-simplex[0, j]
+    # inner product of coeffVec with barycentric gradients
+    innerProducts[0] = mydot_rot2D(temp[3, :], temp[0, :])
+    innerProducts[1] = mydot_rot2D(temp[3, :], temp[1, :])
+    innerProducts[2] = mydot_rot2D(temp[3, :], temp[2, :])
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef inline REAL_t simplexVolumeAndProducts3D(const REAL_t[:, ::1] simplex,
+                                                REAL_t[::1] innerProducts,
+                                                REAL_t[:, ::1] temp):
+    # innerProducts needs to bed of size 10
+    # temp needs to bed of size 10x3
+    cdef:
+        REAL_t vol
+        INDEX_t j
+
+    # Calculate volume
+    for j in range(3):
+        temp[0, j] = simplex[1, j]-simplex[0, j]  # v01
+        temp[1, j] = simplex[2, j]-simplex[0, j]  # v02
+        temp[2, j] = simplex[3, j]-simplex[0, j]  # v03
+        temp[3, j] = simplex[2, j]-simplex[1, j]  # v12
+        temp[4, j] = simplex[3, j]-simplex[1, j]  # v13
+        temp[5, j] = simplex[2, j]-simplex[3, j]  # v32
+    vol = volume3Dnew(temp[:3, :], temp[6, :])
+
+    # v12 x v13
+    vectorProduct(temp[3, :], temp[4, :], temp[6, :])
+    # v02 x v32
+    vectorProduct(temp[1, :], temp[5, :], temp[7, :])
+    # v01 x v03
+    vectorProduct(temp[0, :], temp[2, :], temp[8, :])
+    # v12 x v02
+    vectorProduct(temp[3, :], temp[1, :], temp[9, :])
+    # inner product of barycentric gradients
+    innerProducts[0] = mydot(temp[6, :], temp[6, :])
+    innerProducts[1] = mydot(temp[6, :], temp[7, :])
+    innerProducts[2] = mydot(temp[6, :], temp[8, :])
+    innerProducts[3] = mydot(temp[6, :], temp[9, :])
+    innerProducts[4] = mydot(temp[7, :], temp[7, :])
+    innerProducts[5] = mydot(temp[7, :], temp[8, :])
+    innerProducts[6] = mydot(temp[7, :], temp[9, :])
+    innerProducts[7] = mydot(temp[8, :], temp[8, :])
+    innerProducts[8] = mydot(temp[8, :], temp[9, :])
+    innerProducts[9] = mydot(temp[9, :], temp[9, :])
+    return vol
+
+
+cdef class mass_1d(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+
+    def __init__(self):
+        self.temp = uninitialized((0, 1), dtype=REAL)
+        local_matrix_t.__init__(self, 1)
+
+
+cdef class drift_1d(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.temp = uninitialized((2, 1), dtype=REAL)
+        self.innerProducts = uninitialized((2), dtype=REAL)
+        local_matrix_t.__init__(self, 1)
+
+
+cdef class drift_1d_P1(drift_1d):
+    cdef:
+        vectorFunction coeff
+
+    def __init__(self, vectorFunction coeff):
+        drift_1d.__init__(self)
+        self.coeff = coeff
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t fac = 0.5
+
+        coeffProducts1D(simplex, self.coeff, self.innerProducts, self.temp)
+
+        contrib[0] = self.innerProducts[0]*fac
+        contrib[1] = self.innerProducts[1]*fac
+        contrib[2] = self.innerProducts[0]*fac
+        contrib[3] = self.innerProducts[1]*fac
+
+
+cdef class stiffness_1d_sym(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.innerProducts = uninitialized((0), dtype=REAL)
+        self.temp = uninitialized((0, 1), dtype=REAL)
+        local_matrix_t.__init__(self, 1)
+
+
+cdef class mass_2d(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.temp = uninitialized((3, 2), dtype=REAL)
+        self.innerProducts = uninitialized((6), dtype=REAL)
+        local_matrix_t.__init__(self, 2)
+
+
+cdef class drift_2d(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.temp = uninitialized((4, 2), dtype=REAL)
+        self.innerProducts = uninitialized((3), dtype=REAL)
+        local_matrix_t.__init__(self, 2)
+
+
+cdef class drift_2d_P1(drift_2d):
+    cdef:
+        vectorFunction coeff
+
+    def __init__(self, vectorFunction coeff):
+        drift_2d.__init__(self)
+        self.coeff = coeff
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t fac = 1./6.
+
+        coeffProducts2D(simplex, self.coeff, self.innerProducts, self.temp)
+
+        contrib[0] = self.innerProducts[0]*fac
+        contrib[1] = self.innerProducts[1]*fac
+        contrib[2] = self.innerProducts[2]*fac
+        contrib[3] = self.innerProducts[0]*fac
+        contrib[4] = self.innerProducts[1]*fac
+        contrib[5] = self.innerProducts[2]*fac
+        contrib[6] = self.innerProducts[0]*fac
+        contrib[7] = self.innerProducts[1]*fac
+        contrib[8] = self.innerProducts[2]*fac
+
+
+cdef class stiffness_2d_sym(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.innerProducts = uninitialized((6), dtype=REAL)
+        self.temp = uninitialized((3, 2), dtype=REAL)
+        local_matrix_t.__init__(self, 2)
+
+
+cdef class curlcurl_2d_sym(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.innerProducts = uninitialized((6), dtype=REAL)
+        self.temp = uninitialized((3, 2), dtype=REAL)
+        local_matrix_t.__init__(self, 2)
+
+
+cdef class mass_3d(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+
+    def __init__(self):
+        self.temp = uninitialized((4, 3), dtype=REAL)
+        local_matrix_t.__init__(self, 3)
+
+
+cdef class stiffness_3d_sym(local_matrix_t):
+    cdef:
+        REAL_t[:, ::1] temp
+        REAL_t[::1] innerProducts
+
+    def __init__(self):
+        self.innerProducts = uninitialized((10), dtype=REAL)
+        self.temp = uninitialized((10, 3), dtype=REAL)
+        local_matrix_t.__init__(self, 3)
+
+
+# cdef class generic_matrix(local_matrix_t):
+#     cdef:
+#         REAL_t[::1] entries
+
+#     def __init__(self, REAL_t[::1] entries):
+#         self.entries = entries
+
+#     @cython.initializedcheck(False)
+#     @cython.boundscheck(False)
+#     @cython.wraparound(False)
+#     cdef inline void eval(self,
+#                           REAL_t[:, ::1] local_vertices,
+#                           REAL_t[::1] contrib,
+#                           REAL_t[:, ::1] span):
+#         cdef:
+#             REAL_t vol
+#             INDEX_t k, j
+
+#         # Calculate volume
+#         for k in range(local_vertices.shape[0]-1):
+#             for j in range(local_vertices.shape[1]):
+#                 span[k, j] = local_vertices[k+1, j]-local_vertices[0, j]
+#         # TODO: Fix this
+#         vol = volume2Dnew(span)
+
+#         for k in range(self.entries.shape[0]):
+#             contrib[k] = vol*self.entries[k]
+
+
+######################################################################
+# Local mass matrices for subdmanifolds in 1d, 2d
+
+cdef class mass_0d_in_1d_sym_P1(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        contrib[0] = 1.0
+
+
+cdef class mass_1d_in_2d_sym_P1(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.0/6.0
+
+        vol *= simplexVolume1Din2D(simplex, self.temp)
+
+        contrib[0] = contrib[2] = 2.0*vol
+        contrib[1] = vol
+
+
+cdef class mass_2d_in_3d_sym_P1(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0833333333333333
+
+        vol *= simplexVolume2Din3D(simplex, self.temp)
+
+        contrib[0] = 2*vol
+        contrib[1] = vol
+        contrib[2] = vol
+        contrib[3] = 2*vol
+        contrib[4] = vol
+        contrib[5] = 2*vol
+
+
+
+######################################################################
+# Anisotropic local mass matrices in 1d, 2d, 3d
+
+cdef class mass_quadrature_matrix(local_matrix_t):
+    cdef:
+        function diffusivity
+        simplexQuadratureRule qr
+        REAL_t[:, ::1] PHI
+        REAL_t[::1] funVals
+        REAL_t[:, ::1] temp
+
+    def __init__(self, function diffusivity, DoFMap DoFMap, simplexQuadratureRule qr):
+        cdef:
+            INDEX_t I, k
+        self.diffusivity = diffusivity
+        self.qr = qr
+
+        # evaluate local shape functions on quadrature nodes
+        self.PHI = uninitialized((DoFMap.dofs_per_element, qr.num_nodes), dtype=REAL)
+        for I in range(DoFMap.dofs_per_element):
+            for k in range(qr.num_nodes):
+                self.PHI[I, k] = DoFMap.localShapeFunctions[I](np.ascontiguousarray(qr.nodes[:, k]))
+
+        self.funVals = uninitialized((qr.num_nodes), dtype=REAL)
+        self.temp = uninitialized((10, DoFMap.dim), dtype=REAL)
+
+
+cdef class stiffness_quadrature_matrix(mass_quadrature_matrix):
+    cdef:
+        REAL_t[::1] innerProducts
+
+    def __init__(self, function diffusivity, simplexQuadratureRule qr):
+        from . DoFMaps import P1_DoFMap
+        from . mesh import meshNd
+        fakeMesh = meshNd(uninitialized((0, self.dim), dtype=REAL),
+                          uninitialized((0, self.dim+1), dtype=INDEX))
+        dm = P1_DoFMap(fakeMesh)
+        super(stiffness_quadrature_matrix, self).__init__(diffusivity, dm, qr)
+        self.innerProducts = uninitialized((((self.dim+1)*(self.dim+2))//2), dtype=REAL)
+
+
+cdef class mass_1d_sym_scalar_anisotropic(mass_quadrature_matrix):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol
+            INDEX_t p, I, J, k
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        vol = simplexVolume1D(simplex, self.temp)
+
+        p = 0
+        for I in range(self.PHI.shape[0]):
+            for J in range(I, self.PHI.shape[0]):
+                contrib[p] = 0.
+                for k in range(self.qr.num_nodes):
+                    contrib[p] += vol * self.qr.weights[k] * self.funVals[k] * self.PHI[I, k] * self.PHI[J, k]
+                p += 1
+
+
+cdef class mass_2d_sym_scalar_anisotropic(mass_quadrature_matrix):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol
+            INDEX_t p, I, J, k
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        vol = simplexVolume2D(simplex, self.temp)
+
+        p = 0
+        for I in range(self.PHI.shape[0]):
+            for J in range(I, self.PHI.shape[0]):
+                contrib[p] = 0.
+                for k in range(self.qr.num_nodes):
+                    contrib[p] += vol * self.qr.weights[k] * self.funVals[k] * self.PHI[I, k] * self.PHI[J, k]
+                p += 1
+
+
+cdef class mass_3d_sym_scalar_anisotropic(mass_quadrature_matrix):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol
+            INDEX_t p, I, J, k
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        vol = simplexVolume3D(simplex, self.temp)
+
+        p = 0
+        for I in range(self.PHI.shape[0]):
+            for J in range(I, self.PHI.shape[0]):
+                contrib[p] = 0.
+                for k in range(self.qr.num_nodes):
+                    contrib[p] += vol * self.qr.weights[k] * self.funVals[k] * self.PHI[I, k] * self.PHI[J, k]
+                p += 1
+
+
+######################################################################
+# Local stiffness matrices in 1d, 2d, 3d
+
+cdef class stiffness_1d_in_2d_sym_P1(stiffness_2d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.0/6.0
+
+        vol /= simplexVolume1Din2D(simplex, self.temp)
+
+        contrib[0] = contrib[2] = vol
+        contrib[1] = -vol
+
+
+cdef class stiffness_2d_sym_anisotropic_P1(stiffness_2d_sym):
+    cdef:
+        function diffusivity, diff00, diff01, diff11
+        REAL_t[::1] mean, temp2
+        public REAL_t[:, ::1] K
+        BOOL_t diffTensor
+
+    def __init__(self, diffusivity):
+        super(stiffness_2d_sym_anisotropic_P1, self).__init__()
+        if isinstance(diffusivity, function):
+            self.diffusivity = diffusivity
+            self.diffTensor = False
+        elif len(diffusivity) == 3:
+            self.diff00, self.diff01, self.diff11 = diffusivity[0], diffusivity[1], diffusivity[2]
+            self.temp2 = uninitialized((2), dtype=REAL)
+            self.K = uninitialized((2, 2), dtype=REAL)
+            self.diffTensor = True
+        else:
+            raise NotImplementedError()
+        self.mean = uninitialized((2), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.25
+            INDEX_t k, j, p
+
+        # Calculate gradient matrix
+        for j in range(2):
+            self.mean[j] = (simplex[0, j] +
+                            simplex[1, j] +
+                            simplex[2, j])/3.0
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+
+        if self.diffTensor:
+            # need to take into account rotation matrix, that's why
+            # the entries are in a weird order
+            self.K[0, 0] = self.diff11(self.mean)
+            self.K[0, 1] = self.K[1, 0] = -self.diff01(self.mean)
+            self.K[1, 1] = self.diff00(self.mean)
+
+            p = 0
+            for j in range(3):
+                matvec(self.K, self.temp[j, :], self.temp2)
+                for k in range(j, 3):
+                    contrib[p] = mydot(self.temp2, self.temp[k, :])*vol
+                    p += 1
+        else:
+            vol *= self.diffusivity(self.mean)
+
+            p = 0
+            for j in range(3):
+                for k in range(j, 3):
+                    contrib[p] = self.innerProducts[p]*vol
+                    p += 1
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void matvec(const REAL_t[:, ::1] A, const REAL_t[::1] x, REAL_t[::1] y):
+    cdef INDEX_t i, j
+    for i in range(A.shape[0]):
+        y[i] = 0.
+        for j in range(A.shape[1]):
+            y[i] += A[i, j]*x[j]
+
+
+cdef class stiffness_2d_sym_anisotropic2_P1(stiffness_2d_sym):
+    cdef:
+        REAL_t alpha, beta, theta
+        REAL_t[:, ::1] diffusivity
+        REAL_t[::1] temp2
+
+    def __init__(self, REAL_t alpha, REAL_t beta, REAL_t theta):
+        self.alpha = alpha
+        self.beta = beta
+        self.theta = theta
+        # TODO:
+        # need to take into account rotation matrix, that's why
+        # the entries should be in a weird order, see above
+        Q = np.array([[cos(theta), -sin(theta)],
+                      [sin(theta), cos(theta)]],
+                     dtype=REAL)
+        D = np.array([[alpha, 0.],
+                      [0., beta]],
+                     dtype=REAL)
+        self.diffusivity = np.dot(Q, np.dot(D, Q.T))
+        self.temp2 = uninitialized((2), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        # temp needs to bed of size 3x2
+        cdef:
+            REAL_t vol = 0.25
+            INDEX_t k, j, p
+
+        # Calculate gradient matrix
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+
+        p = 0
+        for j in range(3):
+            matvec(self.diffusivity, self.temp[j, :], self.temp2)
+            for k in range(j, 3):
+                contrib[p] = mydot(self.temp2, self.temp[k, :])*vol
+                p += 1
+
+
+cdef class mass_1d_in_2d_sym_P2(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        # needs no temp memory
+        cdef:
+            REAL_t vol = 1./30.
+        vol *= simplexVolume1Din2D(simplex, self.temp)
+
+        contrib[0] = contrib[3] = 4.0*vol
+        contrib[1] = -vol
+        contrib[2] = contrib[4] = 2.0*vol
+        contrib[5] = 16.0*vol
+
+
+include "mass_1D_P0.pxi"
+include "mass_2D_P0.pxi"
+include "mass_3D_P0.pxi"
+
+include "mass_1D_P0_P1.pxi"
+include "mass_2D_P0_P1.pxi"
+include "mass_3D_P0_P1.pxi"
+
+include "mass_1D_P1.pxi"
+include "mass_2D_P1.pxi"
+include "mass_3D_P1.pxi"
+include "stiffness_1D_P1.pxi"
+include "stiffness_2D_P1.pxi"
+include "stiffness_3D_P1.pxi"
+include "scalar_coefficient_stiffness_1D_P1.pxi"
+include "scalar_coefficient_stiffness_2D_P1.pxi"
+include "scalar_coefficient_stiffness_3D_P1.pxi"
+
+include "mass_1D_P2.pxi"
+include "mass_2D_P2.pxi"
+include "mass_3D_P2.pxi"
+include "stiffness_1D_P2.pxi"
+include "stiffness_2D_P2.pxi"
+include "stiffness_3D_P2.pxi"
+include "scalar_coefficient_stiffness_1D_P2.pxi"
+include "scalar_coefficient_stiffness_2D_P2.pxi"
+include "scalar_coefficient_stiffness_3D_P2.pxi"
+
+
+include "mass_1D_P3.pxi"
+include "mass_2D_P3.pxi"
+include "mass_3D_P3.pxi"
+include "stiffness_1D_P3.pxi"
+include "stiffness_2D_P3.pxi"
+include "stiffness_3D_P3.pxi"
+
+
+
+def assembleMass(DoFMap dm,
+                 vector_t boundary_data=None,
+                 vector_t rhs_contribution=None,
+                 LinearOperator A=None,
+                 INDEX_t start_idx=-1,
+                 INDEX_t end_idx=-1,
+                 BOOL_t sss_format=False,
+                 BOOL_t reorder=False,
+                 INDEX_t[::1] cellIndices=None):
+    cdef:
+        INDEX_t dim = dm.mesh.dim
+        local_matrix_t local_matrix
+    if isinstance(dm, P0_DoFMap):
+        if dim == 1:
+            local_matrix = mass_1d_sym_P0()
+        elif dim == 2:
+            local_matrix = mass_2d_sym_P0()
+        elif dim == 3:
+            local_matrix = mass_3d_sym_P0()
+        else:
+            raise NotImplementedError()
+    elif isinstance(dm, P1_DoFMap):
+        if dim == 1:
+            local_matrix = mass_1d_sym_P1()
+        elif dim == 2:
+            local_matrix = mass_2d_sym_P1()
+        elif dim == 3:
+            local_matrix = mass_3d_sym_P1()
+        else:
+            raise NotImplementedError()
+    elif isinstance(dm, P2_DoFMap):
+        if dim == 1:
+            local_matrix = mass_1d_sym_P2()
+        elif dim == 2:
+            local_matrix = mass_2d_sym_P2()
+        elif dim == 3:
+            local_matrix = mass_3d_sym_P2()
+        else:
+            raise NotImplementedError()
+    elif isinstance(dm, P3_DoFMap):
+        if dim == 1:
+            local_matrix = mass_1d_sym_P3()
+        elif dim == 2:
+            local_matrix = mass_2d_sym_P3()
+        elif dim == 3:
+            local_matrix = mass_3d_sym_P3()
+        else:
+            raise NotImplementedError()
+    
+    else:
+        raise NotImplementedError(dm)
+    return assembleMatrix(dm.mesh,
+                          dm,
+                          local_matrix,
+                          boundary_data,
+                          rhs_contribution,
+                          A,
+                          start_idx,
+                          end_idx,
+                          sss_format,
+                          reorder,
+                          cellIndices=cellIndices)
+
+
+def getSurfaceDoFMap(meshBase mesh,
+                     meshBase surface,
+                     DoFMap volumeDoFMap,
+                     INDEX_t[::1] boundaryCells=None):
+    cdef:
+        DoFMap dmS
+        INDEX_t[:, ::1] v2d
+        INDEX_t cellNo, localVertexNo, vertexNo, k
+
+    if isinstance(volumeDoFMap, P0_DoFMap):
+        assert boundaryCells is not None
+        dmS = P0_DoFMap(surface, NO_BOUNDARY)
+        for cellNo in range(surface.num_cells):
+            dmS.dofs[cellNo, 0] = volumeDoFMap.dofs[boundaryCells[cellNo], 0]
+        return dmS
+    elif isinstance(volumeDoFMap, P1_DoFMap):
+        dmS = P1_DoFMap(surface, NO_BOUNDARY)
+    elif isinstance(volumeDoFMap, P2_DoFMap):
+        dmS = P2_DoFMap(surface, NO_BOUNDARY)
+    elif isinstance(volumeDoFMap, P3_DoFMap):
+        dmS = P3_DoFMap(surface, NO_BOUNDARY)
+    else:
+        raise NotImplementedError()
+
+    assert volumeDoFMap.dofs_per_edge == 0
+    assert volumeDoFMap.dofs_per_face == 0
+
+    dmS.num_dofs = volumeDoFMap.num_dofs
+
+    v2d = uninitialized((mesh.num_vertices, volumeDoFMap.dofs_per_vertex), dtype=INDEX)
+    volumeDoFMap.getVertexDoFs(v2d)
+
+    for cellNo in range(surface.num_cells):
+        for localVertexNo in range(surface.cells.shape[1]):
+            vertexNo = surface.cells[cellNo, localVertexNo]
+            for k in range(dmS.dofs_per_vertex):
+                dmS.dofs[cellNo, localVertexNo*dmS.dofs_per_vertex+k] = v2d[vertexNo, k]
+    return dmS
+
+
+def assembleSurfaceMass(meshBase mesh,
+                        meshBase surface,
+                        DoFMap volumeDoFMap,
+                        LinearOperator A=None,
+                        BOOL_t sss_format=False,
+                        BOOL_t reorder=False,
+                        BOOL_t compress=False):
+    cdef:
+        INDEX_t dim = mesh.dim
+        local_matrix_t local_matrix
+        DoFMap dmS
+
+    if isinstance(volumeDoFMap, P1_DoFMap):
+        if dim == 1:
+            local_matrix = mass_0d_in_1d_sym_P1()
+        elif dim == 2:
+            local_matrix = mass_1d_in_2d_sym_P1()
+        elif dim == 3:
+            local_matrix = mass_2d_in_3d_sym_P1()
+        else:
+            raise NotImplementedError()
+    elif isinstance(volumeDoFMap, P2_DoFMap):
+        if dim == 2:
+            local_matrix = mass_1d_in_2d_sym_P2()
+        else:
+            raise NotImplementedError()
+    else:
+        raise NotImplementedError()
+
+    dmS = getSurfaceDoFMap(mesh, surface, volumeDoFMap)
+
+    if A is None:
+        A = volumeDoFMap.buildSparsityPattern(mesh.cells,
+                                              0,
+                                              mesh.num_cells,
+                                              symmetric=sss_format,
+                                              reorder=reorder)
+
+    A = assembleMatrix(surface,
+                          dmS,
+                          local_matrix,
+                          A=A,
+                          sss_format=sss_format,
+                          reorder=reorder)
+
+    if compress:
+        A.eliminate_zeros()
+
+    return A
+
+
+def assembleMassNonSym(meshBase mesh,
+                       DoFMap DoFMap1,
+                       DoFMap DoFMap2,
+                       LinearOperator A=None,
+                       INDEX_t start_idx=-1,
+                       INDEX_t end_idx=-1):
+    cdef:
+        INDEX_t dim = mesh.dim
+        local_matrix_t local_matrix
+        BOOL_t symLocalMatrix
+    assert DoFMap1.mesh == DoFMap2.mesh
+    if isinstance(DoFMap1, P0_DoFMap):
+        if isinstance(DoFMap2, P0_DoFMap):
+            symLocalMatrix = True
+            if dim == 1:
+                local_matrix = mass_1d_sym_P0()
+            elif dim == 2:
+                local_matrix = mass_2d_sym_P0()
+            elif dim == 3:
+                local_matrix = mass_3d_sym_P0()
+            else:
+                raise NotImplementedError()
+        elif isinstance(DoFMap2, P1_DoFMap):
+            symLocalMatrix = False
+            if dim == 1:
+                local_matrix = mass_1d_nonsym_P0_P1()
+            elif dim == 2:
+                local_matrix = mass_2d_nonsym_P0_P1()
+            elif dim == 3:
+                local_matrix = mass_3d_nonsym_P0_P1()
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    elif isinstance(DoFMap1, P1_DoFMap):
+        if isinstance(DoFMap2, P1_DoFMap):
+            symLocalMatrix = True
+            if dim == 1:
+                local_matrix = mass_1d_sym_P1()
+            elif dim == 2:
+                local_matrix = mass_2d_sym_P1()
+            elif dim == 3:
+                local_matrix = mass_3d_sym_P1()
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    # elif isinstance(DoFMap1, P1_DoFMap):
+    #     if isinstance(DoFMap2, P0_DoFMap):
+    #         if dim == 1:
+    #             local_matrix = mass_1d_nonsym_P0_P1()
+    #         elif dim == 2:
+    #             local_matrix = mass_2d_nonsym_P0_P1()
+    #         else:
+    #             raise NotImplementedError()
+    #     else:
+    #         raise NotImplementedError()
+    else:
+        raise NotImplementedError()
+    return assembleNonSymMatrix_CSR(mesh,
+                                    local_matrix,
+                                    DoFMap1,
+                                    DoFMap2,
+                                    A,
+                                    start_idx,
+                                    end_idx,
+                                    cellIndices=None,
+                                    symLocalMatrix=symLocalMatrix)
+
+
+def assembleDrift(meshBase mesh,
+                  DoFMap DoFMap,
+                  vectorFunction coeff,
+                  LinearOperator A=None,
+                  INDEX_t start_idx=-1,
+                  INDEX_t end_idx=-1,
+                  INDEX_t[::1] cellIndices=None):
+    cdef:
+        INDEX_t dim = mesh.dim
+        local_matrix_t local_matrix
+    if isinstance(DoFMap, P1_DoFMap):
+        if dim == 1:
+            local_matrix = drift_1d_P1(coeff)
+        elif dim == 2:
+            local_matrix = drift_2d_P1(coeff)
+        else:
+            raise NotImplementedError()
+    else:
+        raise NotImplementedError()
+    return assembleNonSymMatrix_CSR(mesh,
+                                    local_matrix,
+                                    DoFMap,
+                                    DoFMap,
+                                    A,
+                                    start_idx,
+                                    end_idx,
+                                    cellIndices=cellIndices)
+
+
+def assembleStiffness(DoFMap dm,
+                      vector_t boundary_data=None,
+                      vector_t rhs_contribution=None,
+                      LinearOperator A=None,
+                      INDEX_t start_idx=-1,
+                      INDEX_t end_idx=-1,
+                      BOOL_t sss_format=False,
+                      BOOL_t reorder=False,
+                      function diffusivity=None,
+                      INDEX_t[::1] cellIndices=None,
+                      DoFMap dm2=None):
+    cdef:
+        INDEX_t dim = dm.mesh.dim
+        local_matrix_t local_matrix
+    if diffusivity is None:
+        if isinstance(dm, P1_DoFMap):
+            if dim == 1:
+                local_matrix = stiffness_1d_sym_P1()
+            elif dim == 2:
+                local_matrix = stiffness_2d_sym_P1()
+            elif dim == 3:
+                local_matrix = stiffness_3d_sym_P1()
+            else:
+                raise NotImplementedError()
+        elif isinstance(dm, P2_DoFMap):
+            if dim == 1:
+                local_matrix = stiffness_1d_sym_P2()
+            elif dim == 2:
+                local_matrix = stiffness_2d_sym_P2()
+            elif dim == 3:
+                local_matrix = stiffness_3d_sym_P2()
+            else:
+                raise NotImplementedError()
+        elif isinstance(dm, P3_DoFMap):
+            if dim == 1:
+                local_matrix = stiffness_1d_sym_P3()
+            elif dim == 2:
+                local_matrix = stiffness_2d_sym_P3()
+            elif dim == 3:
+                local_matrix = stiffness_3d_sym_P3()
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    else:
+        if isinstance(dm, P1_DoFMap):
+            if dim == 1:
+                local_matrix = scalar_coefficient_stiffness_1d_sym_P1(diffusivity)
+            elif dim == 2:
+                local_matrix = scalar_coefficient_stiffness_2d_sym_P1(diffusivity)
+            elif dim == 3:
+                local_matrix = scalar_coefficient_stiffness_3d_sym_P1(diffusivity)
+            else:
+                raise NotImplementedError()
+        elif isinstance(dm, P2_DoFMap):
+            if dim == 1:
+                local_matrix = scalar_coefficient_stiffness_1d_sym_P2(diffusivity)
+            elif dim == 2:
+                local_matrix = scalar_coefficient_stiffness_1d_sym_P2(diffusivity)
+            elif dim == 3:
+                local_matrix = scalar_coefficient_stiffness_3d_sym_P2(diffusivity)
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    if dm2 is None:
+        return assembleMatrix(dm.mesh,
+                              dm,
+                              local_matrix,
+                              boundary_data,
+                              rhs_contribution,
+                              A,
+                              start_idx,
+                              end_idx,
+                              sss_format,
+                              reorder,
+                              cellIndices=cellIndices)
+    else:
+        return assembleNonSymMatrix_CSR(dm.mesh,
+                                        local_matrix,
+                                        dm,
+                                        dm2,
+                                        A,
+                                        start_idx,
+                                        end_idx,
+                                        cellIndices=cellIndices,
+                                        symLocalMatrix=True)
+
+
+
+
+def assembleMatrix(meshBase mesh,
+                   DoFMap DoFMap,
+                   local_matrix_t local_matrix,
+                   vector_t boundary_data=None,
+                   vector_t rhs_contribution=None,
+                   LinearOperator A=None,
+                   INDEX_t start_idx=-1,
+                   INDEX_t end_idx=-1,
+                   BOOL_t sss_format=False,
+                   BOOL_t reorder=False,
+                   INDEX_t[::1] cellIndices=None):
+    if A is not None:
+        sss_format = isinstance(A, SSS_LinearOperator)
+        reorder = False
+    if boundary_data is not None and rhs_contribution is None:
+        rhs_contribution = np.zeros((DoFMap.num_dofs), dtype=REAL)
+        if sss_format:
+            return assembleSymMatrix_SSS(mesh,
+                                         local_matrix, DoFMap,
+                                         boundary_data,
+                                         rhs_contribution,
+                                         A,
+                                         start_idx, end_idx,
+                                         reorder=reorder), rhs_contribution
+        else:
+            return assembleSymMatrix_CSR(mesh,
+                                         local_matrix, DoFMap,
+                                         boundary_data,
+                                         rhs_contribution,
+                                         A,
+                                         start_idx, end_idx,
+                                         reorder=reorder,
+                                         cellIndices=cellIndices), rhs_contribution
+    else:
+        if sss_format:
+            return assembleSymMatrix_SSS(mesh,
+                                         local_matrix, DoFMap,
+                                         boundary_data, rhs_contribution,
+                                         A,
+                                         start_idx, end_idx,
+                                         reorder=reorder)
+        else:
+            return assembleSymMatrix_CSR(mesh,
+                                         local_matrix, DoFMap,
+                                         boundary_data, rhs_contribution,
+                                         A,
+                                         start_idx, end_idx,
+                                         reorder=reorder,
+                                         cellIndices=cellIndices)
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef assembleSymMatrix_CSR(meshBase mesh,
+                           local_matrix_t local_matrix,
+                           DoFMap DoFMap,
+                           vector_t boundary_data=None,
+                           vector_t rhs_contribution=None,
+                           LinearOperator A=None,
+                           INDEX_t start_idx=-1,
+                           INDEX_t end_idx=-1,
+                           BOOL_t reorder=False,
+                           INDEX_t[::1] cellIndices=None):
+    cdef:
+        INDEX_t i, j, I, J, k, s
+        REAL_t[:, ::1] local_vertices = uninitialized((mesh.manifold_dim+1,
+                                                    mesh.dim), dtype=REAL)
+        # local matrix entries
+        REAL_t[::1] local_contrib = uninitialized((DoFMap.dofs_per_element *
+                                                (DoFMap.dofs_per_element+1))//2,
+                                               dtype=REAL)
+
+    if start_idx == -1:
+        start_idx = 0
+    if end_idx == -1:
+        end_idx = mesh.num_cells
+
+    if A is None:
+        A = DoFMap.buildSparsityPattern(mesh.cells,
+                                        start_idx,
+                                        end_idx,
+                                        reorder=reorder)
+
+    if boundary_data.shape[0] == 0:
+        if cellIndices is None:
+            for i in range(start_idx, end_idx):
+                # Get local vertices
+                mesh.getSimplex(i, local_vertices)
+
+                if local_matrix.needsCellInfo:
+                    local_matrix.setCell(mesh.cells[i, :])
+
+                # Get symmetric local matrix
+                local_matrix.eval(local_vertices, local_contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap.dofs_per_element):
+                    I = DoFMap.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap.dofs_per_element-j
+                        continue
+                    for k in range(j, DoFMap.dofs_per_element):
+                        J = DoFMap.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        if I == J:
+                            A.addToEntry(I, I, local_contrib[s])
+                        else:
+                            A.addToEntry(I, J, local_contrib[s])
+                            A.addToEntry(J, I, local_contrib[s])
+                        s += 1
+        else:
+            for i in cellIndices:
+                # Get local vertices
+                mesh.getSimplex(i, local_vertices)
+
+                # Get symmetric local matrix
+                local_matrix.eval(local_vertices, local_contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap.dofs_per_element):
+                    I = DoFMap.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap.dofs_per_element-j
+                        continue
+                    for k in range(j, DoFMap.dofs_per_element):
+                        J = DoFMap.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        if I == J:
+                            A.addToEntry(I, I, local_contrib[s])
+                        else:
+                            A.addToEntry(I, J, local_contrib[s])
+                            A.addToEntry(J, I, local_contrib[s])
+                        s += 1
+    else:
+        for i in range(start_idx, end_idx):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Get symmetric local matrix
+            local_matrix.eval(local_vertices, local_contrib)
+
+            s = 0
+            # enter the data into CSR matrix
+            for j in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, j)
+                for k in range(j, DoFMap.dofs_per_element):
+                    J = DoFMap.cell2dof(i, k)
+                    # write this in a better way
+                    if I >= 0:
+                        if J >= 0:
+                            if I == J:
+                                A.addToEntry(I, I, local_contrib[s])
+                            else:
+                                A.addToEntry(I, J, local_contrib[s])
+                                A.addToEntry(J, I, local_contrib[s])
+                        else:
+                            rhs_contribution[I] -= local_contrib[s]*boundary_data[-J-1]
+                    else:
+                        if J >= 0:
+                            rhs_contribution[J] -= local_contrib[s]*boundary_data[-I-1]
+                    s += 1
+    return A
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef assembleSymMatrix_SSS(meshBase mesh,
+                           local_matrix_t local_matrix,
+                           DoFMap DoFMap,
+                           vector_t boundary_data=None,
+                           vector_t rhs_contribution=None,
+                           LinearOperator A=None,
+                           INDEX_t start_idx=-1,
+                           INDEX_t end_idx=-1,
+                           BOOL_t reorder=False):
+    cdef:
+        INDEX_t i, j, I, J, k, s
+        REAL_t[:, ::1] local_vertices = uninitialized((mesh.manifold_dim+1,
+                                                    mesh.dim), dtype=REAL)
+        # local matrix entries
+        REAL_t[::1] local_contrib = uninitialized((DoFMap.dofs_per_element *
+                                                (DoFMap.dofs_per_element+1))//2,
+                                               dtype=REAL)
+
+    if start_idx == -1:
+        start_idx = 0
+    if end_idx == -1:
+        end_idx = mesh.num_cells
+
+    if A is None:
+        A = DoFMap.buildSparsityPattern(mesh.cells,
+                                        start_idx,
+                                        end_idx,
+                                        symmetric=True,
+                                        reorder=reorder)
+    if boundary_data.shape[0] == 0:
+        for i in range(start_idx, end_idx):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Get symmetric local matrix
+            local_matrix.eval(local_vertices, local_contrib)
+
+            s = 0
+            # enter the data into SSS matrix
+            for j in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, j)
+                if I < 0:
+                    s += DoFMap.dofs_per_element-j
+                    continue
+                A.addToEntry(I, I, local_contrib[s])
+                s += 1
+                for k in range(j+1, DoFMap.dofs_per_element):
+                    J = DoFMap.cell2dof(i, k)
+                    if J < 0:
+                        s += 1
+                        continue
+                    if I < J:
+                        A.addToEntry(J, I, local_contrib[s])
+                    else:
+                        A.addToEntry(I, J, local_contrib[s])
+                    s += 1
+    else:
+        for i in range(start_idx, end_idx):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Get symmetric local matrix
+            local_matrix.eval(local_vertices, local_contrib)
+
+            s = 0
+            # enter the data into SSS matrix
+            for j in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, j)
+                for k in range(j, DoFMap.dofs_per_element):
+                    J = DoFMap.cell2dof(i, k)
+                    # write this in a better way
+                    if I >= 0:
+                        if J >= 0:
+                            if I == J:
+                                A.addToEntry(I, I, local_contrib[s])
+                            else:
+                                if I < J:
+                                    A.addToEntry(J, I, local_contrib[s])
+                                else:
+                                    A.addToEntry(I, J, local_contrib[s])
+                        else:
+                            rhs_contribution[I] -= local_contrib[s]*boundary_data[-J-1]
+                    else:
+                        if J >= 0:
+                            rhs_contribution[J] -= local_contrib[s]*boundary_data[-I-1]
+                    s += 1
+    return A
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef assembleNonSymMatrix_CSR(meshBase mesh,
+                              local_matrix_t local_matrix,
+                              DoFMap DoFMap1,
+                              DoFMap DoFMap2,
+                              CSR_LinearOperator A=None,
+                              INDEX_t start_idx=-1,
+                              INDEX_t end_idx=-1,
+                              INDEX_t[::1] cellIndices=None,
+                              BOOL_t symLocalMatrix=False):
+    cdef:
+        INDEX_t i, j, I, J, k, s
+        REAL_t[:, ::1] simplex = uninitialized((mesh.manifold_dim+1,
+                                                  mesh.dim), dtype=REAL)
+        # local matrix entries
+        REAL_t[::1] contrib = uninitialized((DoFMap1.dofs_per_element *
+                                               DoFMap2.dofs_per_element),
+                                              dtype=REAL)
+        REAL_t[::1] contribSym
+
+    if start_idx == -1:
+        start_idx = 0
+    if end_idx == -1:
+        end_idx = mesh.num_cells
+    if symLocalMatrix:
+        contribSym = uninitialized((DoFMap1.dofs_per_element *
+                                    DoFMap2.dofs_per_element),
+                                   dtype=REAL)
+
+    if A is None:
+        A = DoFMap1.buildNonSymmetricSparsityPattern(mesh.cells,
+                                                     DoFMap2,
+                                                     start_idx,
+                                                     end_idx)
+
+    if local_matrix.additiveAssembly:
+        if cellIndices is None:
+            for i in range(start_idx, end_idx):
+                # Get local vertices
+                mesh.getSimplex(i, simplex)
+
+                if local_matrix.needsCellInfo:
+                    local_matrix.setCell(mesh.cells[i, :])
+
+                # Evaluate nonsymmetric local matrix
+                if symLocalMatrix:
+                    local_matrix.eval(simplex, contribSym)
+                    s = 0
+                    for j in range(DoFMap1.dofs_per_element):
+                        for k in range(j, DoFMap2.dofs_per_element):
+                            contrib[j*DoFMap1.dofs_per_element+k] = contribSym[s]
+                            contrib[k*DoFMap1.dofs_per_element+j] = contribSym[s]
+                            s += 1
+                else:
+                    local_matrix.eval(simplex, contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap1.dofs_per_element):
+                    I = DoFMap1.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap2.dofs_per_element
+                        continue
+                    for k in range(DoFMap2.dofs_per_element):
+                        J = DoFMap2.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        A.addToEntry(I, J, contrib[s])
+                        s += 1
+        else:
+            for i in cellIndices:
+                # Get local vertices
+                mesh.getSimplex(i, simplex)
+
+                if local_matrix.needsCellInfo:
+                    local_matrix.setCell(mesh.cells[i, :])
+
+                # Evaluate nonsymmetric local matrix
+                if symLocalMatrix:
+                    local_matrix.eval(simplex, contribSym)
+                    s = 0
+                    for j in range(DoFMap1.dofs_per_element):
+                        for k in range(j, DoFMap2.dofs_per_element):
+                            contrib[j*DoFMap1.dofs_per_element+k] = contribSym[s]
+                            contrib[k*DoFMap1.dofs_per_element+j] = contribSym[s]
+                            s += 1
+                else:
+                    local_matrix.eval(simplex, contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap1.dofs_per_element):
+                    I = DoFMap1.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap2.dofs_per_element
+                        continue
+                    for k in range(DoFMap2.dofs_per_element):
+                        J = DoFMap2.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        A.addToEntry(I, J, contrib[s])
+                        s += 1
+    else:
+        if cellIndices is None:
+            for i in range(start_idx, end_idx):
+                # Get local vertices
+                mesh.getSimplex(i, simplex)
+
+                if local_matrix.needsCellInfo:
+                    local_matrix.setCell(mesh.cells[i, :])
+
+                # Evaluate nonsymmetric local matrix
+                if symLocalMatrix:
+                    local_matrix.eval(simplex, contribSym)
+                    s = 0
+                    for j in range(DoFMap1.dofs_per_element):
+                        for k in range(j, DoFMap2.dofs_per_element):
+                            contrib[j*DoFMap1.dofs_per_element+k] = contribSym[s]
+                            contrib[k*DoFMap1.dofs_per_element+j] = contribSym[s]
+                            s += 1
+                else:
+                    local_matrix.eval(simplex, contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap1.dofs_per_element):
+                    I = DoFMap1.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap2.dofs_per_element
+                        continue
+                    for k in range(DoFMap2.dofs_per_element):
+                        J = DoFMap2.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        A.setEntry(I, J, contrib[s])
+                        s += 1
+        else:
+            for i in cellIndices:
+                # Get local vertices
+                mesh.getSimplex(i, simplex)
+
+                if local_matrix.needsCellInfo:
+                    local_matrix.setCell(mesh.cells[i, :])
+
+                # Evaluate nonsymmetric local matrix
+                if symLocalMatrix:
+                    local_matrix.eval(simplex, contribSym)
+                    s = 0
+                    for j in range(DoFMap1.dofs_per_element):
+                        for k in range(j, DoFMap2.dofs_per_element):
+                            contrib[j*DoFMap1.dofs_per_element+k] = contribSym[s]
+                            contrib[k*DoFMap1.dofs_per_element+j] = contribSym[s]
+                            s += 1
+                else:
+                    local_matrix.eval(simplex, contrib)
+
+                s = 0
+                # enter the data into CSR matrix
+                for j in range(DoFMap1.dofs_per_element):
+                    I = DoFMap1.cell2dof(i, j)
+                    if I < 0:
+                        s += DoFMap2.dofs_per_element
+                        continue
+                    for k in range(DoFMap2.dofs_per_element):
+                        J = DoFMap2.cell2dof(i, k)
+                        if J < 0:
+                            s += 1
+                            continue
+                        A.setEntry(I, J, contrib[s])
+                        s += 1
+    return A
+
+
+ctypedef fused FUNCTION_t:
+    function
+    vectorFunction
+
+
+@cython.boundscheck(False)
+@cython.cdivision(True)
+def assembleRHS(FUNCTION_t fun, DoFMap dm,
+                simplexQuadratureRule qr=None):
+    cdef:
+        meshBase mesh = dm.mesh
+        INDEX_t dim = mesh.dim
+        INDEX_t dimManifold = mesh.manifold_dim
+        INDEX_t num_vertices = dimManifold+1
+        INDEX_t num_quad_nodes
+        REAL_t[:, ::1] PHI
+        REAL_t[:, :, ::1] PHIVector
+        REAL_t[::1] weights
+        INDEX_t i, k, j, l, I
+        fe_vector dataVec
+        vector_t data
+        REAL_t vol
+        REAL_t[:, ::1] span = uninitialized((mesh.manifold_dim, mesh.dim), dtype=REAL)
+        REAL_t[:, ::1] simplex = uninitialized((mesh.manifold_dim+1, mesh.dim),
+                                                 dtype=REAL)
+        volume_t volume
+        vectorShapeFunction phi
+        REAL_t[::1] fvals
+        REAL_t[:, ::1] fvalsVector
+
+    if qr is None:
+        if dim == dimManifold:
+            if dimManifold == 1:
+                if isinstance(dm, P0_DoFMap):
+                    qr = Gauss1D(order=3)
+                elif isinstance(dm, P1_DoFMap):
+                    qr = Gauss1D(order=3)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss1D(order=5)
+                volume = volume1Dnew
+            elif dimManifold == 2:
+                if isinstance(dm, P0_DoFMap):
+                    qr = Gauss2D(order=2)
+                elif isinstance(dm, P1_DoFMap):
+                    qr = Gauss2D(order=2)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss2D(order=5)
+                volume = volume2Dnew
+            elif dimManifold == 3:
+                if isinstance(dm, P1_DoFMap):
+                    qr = Gauss3D(order=3)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss3D(order=3)
+                volume = volume3D
+            else:
+                raise NotImplementedError()
+        if qr is None:
+            qr = simplexXiaoGimbutas(2*dm.polynomialOrder+2, dim, dimManifold)
+            volume = qr.volume
+    else:
+        volume = qr.volume
+
+    dataVec = dm.zeros()
+    data = dataVec
+    weights = qr.weights
+    num_quad_nodes = qr.num_nodes
+
+    if FUNCTION_t is function:
+        # evaluate local shape functions on quadrature nodes
+        PHI = uninitialized((dm.dofs_per_element, qr.num_nodes), dtype=REAL)
+        for i in range(dm.dofs_per_element):
+            for j in range(qr.num_nodes):
+                PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(qr.nodes[:, j]))
+
+        fvals = uninitialized((num_quad_nodes), dtype=REAL)
+
+        for i in range(mesh.num_cells):
+            # Get local vertices
+            mesh.getSimplex(i, simplex)
+
+            # Calculate volume
+            for k in range(num_vertices-1):
+                for j in range(dim):
+                    span[k, j] = simplex[k+1, j]-simplex[0, j]
+            vol = volume(span)
+
+            # Get function values at quadrature nodes
+            qr.evalFun(fun, simplex, fvals)
+
+            # Put everything together
+            for k in range(dm.dofs_per_element):
+                I = dm.cell2dof(i, k)
+                if I < 0:
+                    continue
+                for j in range(num_quad_nodes):
+                    data[I] += vol*weights[j]*fvals[j]*PHI[k, j]
+    else:
+        PHIVector = uninitialized((dm.dofs_per_element, qr.num_nodes, dim), dtype=REAL)
+        innerProducts = uninitialized((6), dtype=REAL)
+        gradients = uninitialized((3, 2), dtype=REAL)
+
+        fvalsVector = uninitialized((num_quad_nodes, dim), dtype=REAL)
+
+        for i in range(mesh.num_cells):
+            # Get local vertices
+            mesh.getSimplex(i, simplex)
+
+            # Calculate volume
+            vol = simplexVolumeGradientsProducts2D(simplex, innerProducts, gradients)
+
+            # evaluate local shape functions on quadrature nodes
+            for k in range(dm.dofs_per_element):
+                phi = dm.localShapeFunctions[k]
+                phi.setCell(mesh.cells[i, :])
+                for j in range(num_quad_nodes):
+                    phi.eval(np.ascontiguousarray(qr.nodes[:, j]), gradients, PHIVector[k, j, :])
+
+            # Get function values at quadrature nodes
+            qr.evalVectorFun(fun, simplex, fvalsVector)
+
+            # Put everything together
+            for k in range(dm.dofs_per_element):
+                I = dm.cell2dof(i, k)
+                if I < 0:
+                    continue
+                for j in range(num_quad_nodes):
+                    for l in range(dim):
+                        data[I] += vol*weights[j]*fvalsVector[j, l]*PHIVector[k, j, l]
+    return dataVec
+
+
+@cython.boundscheck(False)
+@cython.cdivision(True)
+def assembleRHScomplex(complexFunction fun, DoFMap dm,
+                       simplexQuadratureRule qr=None):
+    cdef:
+        meshBase mesh = dm.mesh
+        INDEX_t dim = mesh.dim
+        INDEX_t dimManifold = mesh.manifold_dim
+        INDEX_t num_vertices = dimManifold+1
+        INDEX_t num_quad_nodes
+        REAL_t[:, ::1] PHI
+        REAL_t[::1] weights
+        INDEX_t i, k, j, I
+        complex_fe_vector dataVec
+        complex_vector_t data
+        REAL_t vol
+        REAL_t[:, ::1] span = uninitialized((mesh.manifold_dim, mesh.dim), dtype=REAL)
+        REAL_t[:, ::1] simplex = uninitialized((mesh.manifold_dim+1, mesh.dim),
+                                            dtype=REAL)
+        volume_t volume
+        COMPLEX_t[::1] fvals
+
+    if qr is None:
+        if dim == dimManifold:
+            if dimManifold == 1:
+                if isinstance(dm, P0_DoFMap):
+                    qr = Gauss1D(order=3)
+                elif isinstance(dm, P1_DoFMap):
+                    qr = Gauss1D(order=3)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss1D(order=5)
+                volume = volume1Dnew
+            elif dimManifold == 2:
+                if isinstance(dm, P0_DoFMap):
+                    qr = Gauss2D(order=2)
+                elif isinstance(dm, P1_DoFMap):
+                    qr = Gauss2D(order=2)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss2D(order=5)
+                volume = volume2Dnew
+            elif dimManifold == 3:
+                if isinstance(dm, P1_DoFMap):
+                    qr = Gauss3D(order=3)
+                elif isinstance(dm, P2_DoFMap):
+                    qr = Gauss3D(order=3)
+                volume = volume3D
+            else:
+                raise NotImplementedError()
+        if qr is None:
+            qr = simplexXiaoGimbutas(2*dm.polynomialOrder+2, dim, dimManifold)
+            volume = qr.volume
+    else:
+        volume = qr.volume
+
+    # evaluate local shape functions on quadrature nodes
+    PHI = uninitialized((dm.dofs_per_element, qr.num_nodes), dtype=REAL)
+    for i in range(dm.dofs_per_element):
+        for j in range(qr.num_nodes):
+            PHI[i, j] = dm.localShapeFunctions[i](np.ascontiguousarray(qr.nodes[:, j]))
+    weights = qr.weights
+
+    num_quad_nodes = qr.num_nodes
+
+    dataVec = dm.zeros(dtype=COMPLEX)
+    data = dataVec
+    fvals = uninitialized((num_quad_nodes), dtype=COMPLEX)
+
+    for i in range(mesh.num_cells):
+        # Get local vertices
+        mesh.getSimplex(i, simplex)
+
+        # Calculate volume
+        for k in range(num_vertices-1):
+            for j in range(dim):
+                span[k, j] = simplex[k+1, j]-simplex[0, j]
+        vol = volume(span)
+
+        # Get function values at quadrature nodes
+        qr.evalComplexFun(fun, simplex, fvals)
+
+        # Put everything together
+        for k in range(dm.dofs_per_element):
+            I = dm.cell2dof(i, k)
+            if I < 0:
+                continue
+            for j in range(num_quad_nodes):
+                data[I] = data[I] + vol*weights[j]*fvals[j]*PHI[k, j]
+    return dataVec
+
+
+
+cdef class multi_function:
+    cdef:
+        public INDEX_t numInputs, numOutputs
+
+    def __init__(self, numInputs, numOutputs):
+        self.numInputs = numInputs
+        self.numOutputs = numOutputs
+
+    def __call__(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.eval(x, y)
+
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        pass
+
+
+cdef class power(multi_function):
+    cdef:
+        REAL_t k
+
+    def __init__(self, k=2.):
+        self.k = k
+        multi_function.__init__(self, 1, 1)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t u
+        u = x[0]
+        y[0] = u**self.k
+
+
+cdef class gray_scott(multi_function):
+    cdef:
+        REAL_t F, k
+
+    def __init__(self, F=0.025, k=0.06):
+        self.F = F
+        self.k = k
+        multi_function.__init__(self, 2, 2)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t u, v
+        u = x[0]
+        v = x[1]
+        y[0] = -u*v**2 + self.F*(1.-u)
+        y[1] = u*v**2 - (self.F+self.k)*v
+
+
+cdef class gray_scott_gradient(multi_function):
+    cdef:
+        REAL_t F, k
+
+    def __init__(self, F=0.025, k=0.06):
+        self.F = F
+        self.k = k
+        multi_function.__init__(self, 4, 2)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef REAL_t u, v, unew, vnew
+        u = x[0]
+        v = x[1]
+        unew = x[2]
+        vnew = x[3]
+        y[0] = -unew*v**2 - 2.*u*v*vnew - self.F*unew
+        y[1] = unew*v**2 + 2.*u*v*vnew - (self.k+self.F)*vnew
+
+
+cdef class brusselator(multi_function):
+    cdef:
+        REAL_t B, Q
+
+    def __init__(self, B=0.025, Q=0.06):
+        self.B = B
+        self.Q = Q
+        multi_function.__init__(self, 2, 2)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t u, v, z
+        u = x[0]
+        v = x[1]
+        z = self.B*u + self.Q**2*v + self.B/self.Q*u**2 + 2.*self.Q*u*v + u**2*v
+        y[0] = -u + z
+        y[1] = -z
+
+
+cdef class CahnHilliard(multi_function):
+    def __init__(self):
+        multi_function.__init__(self, 1, 1)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t u
+        u = x[0]
+        y[0] = u**3-u
+
+
+cdef class CahnHilliard2(multi_function):
+    def __init__(self):
+        multi_function.__init__(self, 1, 1)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t u
+        u = x[0]
+        y[0] = 0.25*(1.-u**2)**2
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.cdivision(True)
+def assembleNonlinearity(meshBase mesh, multi_function fun, DoFMap DoFMap, list U):
+    cdef:
+        INDEX_t dim = mesh.dim
+        INDEX_t dimManifold = mesh.manifold_dim
+        INDEX_t num_vertices = dimManifold+1
+        INDEX_t num_quad_nodes
+        REAL_t[:, ::1] PHI
+        REAL_t[::1] weights
+        INDEX_t num_cells = mesh.cells.shape[0]
+        INDEX_t i, k, j, m, I
+        # np.ndarray[REAL_t, ndim=1] data_mem
+        REAL_t[::1] data
+        REAL_t vol
+        REAL_t[:, ::1] span = uninitialized((num_vertices-1, dim), dtype=REAL)
+        REAL_t[:, ::1] local_vertices = uninitialized((num_vertices, dim),
+                                                   dtype=REAL)
+        volume_t volume
+        REAL_t[:, ::1] fvals, fvals2
+        REAL_t[:, ::1] u
+        REAL_t[::1] UU
+
+    if dimManifold == 1:
+        qr = Gauss1D(order=3)
+        volume = volume1Dnew
+    elif dimManifold == 2:
+        if isinstance(DoFMap, P1_DoFMap):
+            qr = Gauss2D(order=2)
+        elif isinstance(DoFMap, P2_DoFMap):
+            qr = Gauss2D(order=5)
+        else:
+            raise NotImplementedError()
+        volume = volume2Dnew
+    elif dimManifold == 3:
+        if isinstance(DoFMap, P1_DoFMap):
+            qr = Gauss3D(order=3)
+        elif isinstance(DoFMap, P2_DoFMap):
+            qr = Gauss3D(order=3)
+        else:
+            raise NotImplementedError()
+        volume = volume3D
+    else:
+        raise NotImplementedError()
+
+    assert len(U) == fun.numInputs
+
+    # evaluate local shape functions on quadrature nodes
+    PHI = uninitialized((DoFMap.dofs_per_element, qr.num_nodes), dtype=REAL)
+    for i in range(DoFMap.dofs_per_element):
+        for j in range(qr.num_nodes):
+            PHI[i, j] = DoFMap.localShapeFunctions[i](np.ascontiguousarray(qr.nodes[:, j]))
+    weights = qr.weights
+    # quad_points = qr.nodes
+
+    num_quad_nodes = qr.num_nodes
+
+    u = uninitialized((DoFMap.num_dofs, len(U)), dtype=REAL)
+    for j in range(len(U)):
+        UU = U[j]
+        for i in range(DoFMap.num_dofs):
+            u[i, j] = UU[i]
+
+    dataList = [np.zeros((DoFMap.num_dofs), dtype=REAL) for _ in range(fun.numOutputs)]
+    # data = data_mem
+    fvals = uninitialized((num_quad_nodes, fun.numInputs), dtype=REAL)
+    fvals2 = uninitialized((num_quad_nodes, fun.numOutputs), dtype=REAL)
+
+    for i in range(num_cells):
+        # Get local vertices
+        mesh.getSimplex(i, local_vertices)
+
+        # Calculate volume
+        for k in range(num_vertices-1):
+            for j in range(dim):
+                span[k, j] = local_vertices[k+1, j]-local_vertices[0, j]
+        vol = volume(span)
+
+        fvals[:] = 0.
+        for m in range(DoFMap.dofs_per_element):
+            I = DoFMap.cell2dof(i, m)
+            if I >= 0:
+                for k in range(num_quad_nodes):
+                    for j in range(fun.numInputs):
+                        # u = U[j]
+                        fvals[k, j] += u[I, j]*PHI[m, k]
+        for k in range(num_quad_nodes):
+            fun.eval(fvals[k, :], fvals2[k, :])
+
+        # Put everything together
+        for m in range(fun.numOutputs):
+            data = dataList[m]
+            for k in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, k)
+                if I >= 0:
+                    for j in range(num_quad_nodes):
+                        data[I] += vol*weights[j]*fvals2[j, m]*PHI[k, j]
+                        # data[m*DoFMap.num_dofs+I] += vol*weights[j]*fvals2[j, m]*PHI[k, j]
+    return dataList
+    # return data_mem
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.cdivision(True)
+def assembleRHSfromFEfunction(meshBase mesh,
+                              vector_t u,
+                              DoFMap DoFMap,
+                              DoFMap target,
+                              simplexQuadratureRule qr=None):
+    cdef:
+        INDEX_t dim = mesh.dim
+        INDEX_t dimManifold = mesh.manifold_dim
+        INDEX_t num_vertices = dimManifold+1
+        INDEX_t num_quad_nodes
+        REAL_t[:, ::1] PHI, PHItarget
+        REAL_t[::1] weights
+        INDEX_t num_cells = mesh.cells.shape[0]
+        INDEX_t i, k, j, m, I
+        REAL_t vol
+        REAL_t[:, ::1] span = uninitialized((num_vertices-1, dim), dtype=REAL)
+        REAL_t[:, ::1] local_vertices = uninitialized((num_vertices, dim),
+                                                   dtype=REAL)
+        volume_t volume
+        REAL_t[::1] fvals
+        vector_t b
+
+    assert DoFMap.mesh.num_vertices == target.mesh.num_vertices, "DoFmap and target have different meshes"
+    assert DoFMap.mesh.num_cells == target.mesh.num_cells, "DoFmap and target have different meshes"
+    assert u.shape[0] == DoFMap.num_dofs, "u and DoFMap have different number of DoFs: {} != {}".format(u.shape[0], DoFMap.num_dofs)
+
+    if qr is None:
+        if dimManifold == 1:
+            qr = Gauss1D(order=3)
+            volume = volume1Dnew
+        elif dimManifold == 2:
+            qr = Gauss2D(order=2)
+            volume = volume2Dnew
+        elif dimManifold == 3:
+            qr = Gauss3D(order=3)
+            volume = volume3D
+        else:
+            raise NotImplementedError()
+    else:
+        volume = qr.volume
+
+    # evaluate local shape functions on quadrature nodes
+    PHI = uninitialized((DoFMap.dofs_per_element, qr.num_nodes), dtype=REAL)
+    for i in range(DoFMap.dofs_per_element):
+        for j in range(qr.num_nodes):
+            PHI[i, j] = DoFMap.localShapeFunctions[i](np.ascontiguousarray(qr.nodes[:, j]))
+    weights = qr.weights
+    num_quad_nodes = qr.num_nodes
+
+    # evaluate local shape functions on quadrature nodes
+    PHItarget = uninitialized((target.dofs_per_element, qr.num_nodes), dtype=REAL)
+    for i in range(target.dofs_per_element):
+        for j in range(qr.num_nodes):
+            PHItarget[i, j] = target.localShapeFunctions[i](np.ascontiguousarray(qr.nodes[:, j]))
+
+    fvals = uninitialized((num_quad_nodes), dtype=REAL)
+
+    b = np.zeros((target.num_dofs), dtype=REAL)
+
+    for i in range(num_cells):
+        # Get local vertices
+        mesh.getSimplex(i, local_vertices)
+
+        # Calculate volume
+        for k in range(num_vertices-1):
+            for j in range(dim):
+                span[k, j] = local_vertices[k+1, j]-local_vertices[0, j]
+        vol = volume(span)
+
+        # get u at quadrature nodes
+        fvals[:] = 0.
+        for m in range(DoFMap.dofs_per_element):
+            I = DoFMap.cell2dof(i, m)
+            if I >= 0:
+                for k in range(num_quad_nodes):
+                    fvals[k] += u[I]*PHI[m, k]
+
+        # Integrate aggainst basis functions in target DoFMap
+        for m in range(target.dofs_per_element):
+            I = target.cell2dof(i, m)
+            if I >= 0:
+                for k in range(num_quad_nodes):
+                    b[I] += vol*weights[k]*fvals[k]*PHItarget[m, k]
+    return np.array(b, copy=False)
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+def assembleJumpMatrix(meshBase mesh, P0_DoFMap dm):
+    cdef:
+        sparsityPattern sPat = sparsityPattern(dm.num_dofs)
+        cells_t cells = mesh.cells
+        INDEX_t dim = mesh.dim
+        dict lookup
+        ENCODE_t hv = 0
+        INDEX_t hv1 = 0
+        tuple hvFace
+        INDEX_t cellNo, otherCellNo, vertexNo, I, J
+        REAL_t[:, ::1] edgeSimplex = uninitialized((2, 2), dtype=REAL)
+        REAL_t[:, ::1] faceSimplex = uninitialized((3, 3), dtype=REAL)
+        REAL_t[:, ::1] temp = uninitialized((4, 3), dtype=REAL)
+        REAL_t vol2
+        CSR_LinearOperator A
+        simplexMapper sM
+
+    if dim == 1:
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            for vertexNo in range(dim+1):
+                #vertex = sm.getVertexInCell(cellNo, vertexNo)
+                vertex = cells[cellNo, vertexNo]
+                try:
+                    otherCellNo = lookup.pop(vertex)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    sPat.add(I, I)
+                    sPat.add(J, J)
+                    sPat.add(I, J)
+                    sPat.add(J, I)
+                except KeyError:
+                    lookup[vertex] = cellNo
+        indptr, indices = sPat.freeze()
+        del sPat
+        nnz = indptr[dm.num_dofs]
+        data = np.zeros((nnz), dtype=REAL)
+        A = CSR_LinearOperator(indices, indptr, data)
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            for vertexNo in range(dim+1):
+                #vertex = sm.getVertexInCell(cellNo, vertexNo)
+                vertex = mesh.cells[cellNo, vertexNo]
+                try:
+                    otherCellNo, vol2 = lookup.pop(vertex)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    A.addToEntry(I, I, vol2)
+                    A.addToEntry(J, J, vol2)
+                    A.addToEntry(I, J, -vol2)
+                    A.addToEntry(J, I, -vol2)
+                except KeyError:
+                    vol2 = 1.
+                    lookup[vertex] = cellNo, vol2
+    elif dim == 2:
+        sM = mesh.simplexMapper
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            sM.startLoopOverCellEdges(cells[cellNo, :])
+            while sM.loopOverCellEdgesEncoded(&hv):
+                try:
+                    otherCellNo = lookup.pop(hv)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    sPat.add(I, I)
+                    sPat.add(J, J)
+                    sPat.add(I, J)
+                    sPat.add(J, I)
+                except KeyError:
+                    lookup[hv] = cellNo
+        indptr, indices = sPat.freeze()
+        del sPat
+        nnz = indptr[dm.num_dofs]
+        data = np.zeros((nnz), dtype=REAL)
+        A = CSR_LinearOperator(indices, indptr, data)
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            sM.startLoopOverCellEdges(cells[cellNo, :])
+            while sM.loopOverCellEdgesEncoded(&hv):
+                try:
+                    otherCellNo, vol2 = lookup.pop(hv)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    A.addToEntry(I, I, vol2)
+                    A.addToEntry(J, J, vol2)
+                    A.addToEntry(I, J, -vol2)
+                    A.addToEntry(J, I, -vol2)
+                except KeyError:
+                    sM.getEncodedEdgeSimplex(hv, edgeSimplex)
+                    vol2 = simplexVolume1Din2D(edgeSimplex, temp)**2
+                    lookup[hv] = cellNo, vol2
+    elif dim == 3:
+        sM = mesh.simplexMapper
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            sM.startLoopOverCellFaces(cells[cellNo, :])
+            while sM.loopOverCellFacesEncoded(&hv1, &hv):
+                hvFace = (hv1, hv)
+                try:
+                    otherCellNo = lookup.pop(hvFace)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    sPat.add(I, I)
+                    sPat.add(J, J)
+                    sPat.add(I, J)
+                    sPat.add(J, I)
+                except KeyError:
+                    lookup[hvFace] = cellNo
+        indptr, indices = sPat.freeze()
+        del sPat
+        nnz = indptr[dm.num_dofs]
+        data = np.zeros((nnz), dtype=REAL)
+        A = CSR_LinearOperator(indices, indptr, data)
+        lookup = {}
+        for cellNo in range(mesh.num_cells):
+            sM.startLoopOverCellFaces(cells[cellNo, :])
+            while sM.loopOverCellFacesEncoded(&hv1, &hv):
+                hvFace = (hv1, hv)
+                try:
+                    otherCellNo, vol2 = lookup.pop(hvFace)
+                    I = dm.cell2dof(cellNo, 0)
+                    J = dm.cell2dof(otherCellNo, 0)
+                    A.addToEntry(I, I, vol2)
+                    A.addToEntry(J, J, vol2)
+                    A.addToEntry(I, J, -vol2)
+                    A.addToEntry(J, I, -vol2)
+                except KeyError:
+                    sM.getEncodedFaceSimplex(hvFace, faceSimplex)
+                    vol2 = simplexVolume2Din3D(faceSimplex, temp)**2
+                    lookup[hvFace] = cellNo, vol2
+    else:
+        raise NotImplementedError()
+    return A
+
+
+cdef class matrixFreeOperator(LinearOperator):
+    cdef:
+        meshBase mesh
+        DoFMap dm
+        local_matrix_t local_matrix
+
+    def __init__(self, meshBase mesh, DoFMap dm, local_matrix_t local_matrix):
+        self.mesh = mesh
+        self.dm = dm
+        self.local_matrix = local_matrix
+        LinearOperator.__init__(self,
+                                dm.num_dofs,
+                                dm.num_dofs)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        for i in range(self.dm.num_dofs):
+            y[i] = 0.
+        self.matvec_no_overwrite(x, y)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t i, s, j, k, I, J
+            REAL_t[:, ::1] simplex = uninitialized((self.mesh.dim+1, self.mesh.manifold_dim), dtype=REAL)
+            REAL_t[::1] local_contrib = uninitialized((self.dm.dofs_per_element *
+                                                    (self.dm.dofs_per_element+1))//2,
+                                                   dtype=REAL)
+
+        for i in range(self.mesh.num_cells):
+            # Get simplex
+            self.mesh.getSimplex(i, simplex)
+            self.local_matrix.eval(simplex, local_contrib)
+            s = 0
+            for j in range(self.dm.dofs_per_element):
+                I = self.dm.cell2dof(i, j)
+                if I < 0:
+                    s += self.dm.dofs_per_element-j
+                    continue
+                for k in range(j, self.dm.dofs_per_element):
+                    J = self.dm.cell2dof(i, k)
+                    if J < 0:
+                        s += 1
+                        continue
+                    if I == J:
+                        y[I] += local_contrib[s] * x[I]
+                    else:
+                        y[I] += local_contrib[s] * x[J]
+                        y[J] += local_contrib[s] * x[I]
+                    s += 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def get_diagonal(self):
+        cdef:
+            INDEX_t i, s, j, k, I, J
+            REAL_t[:, ::1] simplex = uninitialized((self.mesh.dim+1, self.mesh.manifold_dim), dtype=REAL)
+            REAL_t[::1] local_contrib = uninitialized((self.dm.dofs_per_element *
+                                                         (self.dm.dofs_per_element+1))//2,
+                                                        dtype=REAL)
+            REAL_t[::1] d = np.zeros((self.dm.num_dofs), dtype=REAL)
+
+        for i in range(self.mesh.num_cells):
+            # Get simplex
+            self.mesh.getSimplex(i, simplex)
+            self.local_matrix.eval(simplex, local_contrib)
+            s = 0
+            for j in range(self.dm.dofs_per_element):
+                I = self.dm.cell2dof(i, j)
+                if I < 0:
+                    s += self.dm.dofs_per_element-j
+                    continue
+                for k in range(j, self.dm.dofs_per_element):
+                    J = self.dm.cell2dof(i, k)
+                    if J < 0:
+                        s += 1
+                        continue
+                    if I == J:
+                        d[I] += local_contrib[s]
+                    s += 1
+        return np.array(d, copy=False)
+
+    diagonal = property(fget=get_diagonal)
diff --git a/fem/PyNucleus_fem/functions.pxd b/fem/PyNucleus_fem/functions.pxd
new file mode 100644
index 0000000..da9378b
--- /dev/null
+++ b/fem/PyNucleus_fem/functions.pxd
@@ -0,0 +1,34 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport REAL_t, INDEX_t, COMPLEX_t, BOOL_t
+from numpy cimport uint8_t
+
+
+
+cdef class function:
+    cdef REAL_t eval(self, REAL_t[::1] x)
+
+
+cdef class constant(function):
+    cdef:
+        public REAL_t value
+
+
+ctypedef REAL_t(*volume_t)(REAL_t[:, ::1])
+
+
+cdef class complexFunction:
+    cdef COMPLEX_t eval(self, REAL_t[::1] x)
+
+
+cdef class vectorFunction:
+    cdef:
+        list components
+        INDEX_t rows
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] vals)
diff --git a/fem/PyNucleus_fem/functions.pyx b/fem/PyNucleus_fem/functions.pyx
new file mode 100644
index 0000000..ff1dc12
--- /dev/null
+++ b/fem/PyNucleus_fem/functions.pyx
@@ -0,0 +1,2075 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan2,
+                        M_PI as pi, pow, exp, floor, log2)
+import numpy as np
+cimport numpy as np
+cimport cython
+
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, ENCODE
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, ENCODE_t
+from PyNucleus_base import uninitialized
+
+from . quadrature cimport sphericalQuadRule, sphericalQuadRule1D, sphericalQuadRule2D
+
+
+cdef class function:
+    def __call__(self, REAL_t[::1] x):
+        return self.eval(x)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        pass
+
+    def __add__(self, function other):
+        if isinstance(self, mulFunction):
+            if isinstance(other, mulFunction):
+                return sumFunction(self.f, self.fac, other.f, other.fac)
+            else:
+                return sumFunction(self.f, self.fac, other, 1.)
+        else:
+            if isinstance(other, mulFunction):
+                return sumFunction(self, 1., other.f, other.fac)
+            else:
+                return sumFunction(self, 1., other, 1.)
+
+    def __sub__(self, function other):
+        if isinstance(self, mulFunction):
+            if isinstance(other, mulFunction):
+                return sumFunction(self.f, self.fac, other.f, -other.fac)
+            else:
+                return sumFunction(self.f, self.fac, other, -1.)
+        else:
+            if isinstance(other, mulFunction):
+                return sumFunction(self, 1., other.f, -other.fac)
+            else:
+                return sumFunction(self, 1., other, -1.)
+
+    def __mul__(first, second):
+        if isinstance(first, function) and isinstance(second, function):
+            return prodFunction(first, second)
+        elif isinstance(first, function):
+            return mulFunction(first, second)
+        elif isinstance(second, function):
+            return mulFunction(second, first)
+        else:
+            return NotImplemented
+
+    def __neg__(self):
+        if isinstance(self, mulFunction):
+            return mulFunction(self.f, -self.fac)
+        elif isinstance(self, sumFunction):
+            return sumFunction(self.f1, -self.fac1, self.f2, -self.fac2)
+        else:
+            return mulFunction(self, -1.0)
+
+    def __repr__(self):
+        return '%s' % (self.__class__.__name__)
+
+
+cdef class sumFunction(function):
+    cdef:
+        public function f1, f2
+        public REAL_t fac1, fac2
+
+    def __init__(self, function f1, REAL_t fac1, function f2, REAL_t fac2):
+        self.f1 = f1
+        self.fac1 = fac1
+        self.f2 = f2
+        self.fac2 = fac2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        return self.fac1*self.f1.eval(x)+self.fac2*self.f2.eval(x)
+
+    def __repr__(self):
+        return '{}*{}+{}*{}'.format(self.fac1, self.f1, self.fac2, self.f2)
+
+
+cdef class mulFunction(function):
+    cdef:
+        public function f
+        public REAL_t fac
+
+    def __init__(self, function f, REAL_t fac):
+        self.f = f
+        self.fac = fac
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        return self.fac*self.f.eval(x)
+
+    def __repr__(self):
+        return '{}*{}'.format(self.fac, self.f)
+
+
+cdef class prodFunction(function):
+    cdef:
+        public function f1, f2
+
+    def __init__(self, function f1, function f2):
+        self.f1 = f1
+        self.f2 = f2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        return self.f1.eval(x)*self.f2.eval(x)
+
+    def __repr__(self):
+        return '{}*{}'.format(self.f1, self.f2)
+
+
+# FIX: This doesn't outperform the version without memory
+#      Maybe I should create a binary tree on x, given that
+#      I know the interval of values
+cdef class _memoized_sin:
+    cdef:
+        dict memory
+        int hit, miss
+
+    def __init__(self):
+        self.memory = dict()
+        self.hit = 0
+        self.miss = 0
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t x):
+        cdef REAL_t val
+        try:
+            val = self.memory[x]
+            self.hit += 1
+            return val
+        except KeyError:
+            self.miss += 1
+            val = sin(x)
+            self.memory[x] = val
+            return val
+
+    def stats(self):
+        print(len(self.memory), self.hit, self.miss)
+
+
+cdef _memoized_sin memoized_sin = _memoized_sin()
+
+
+cdef class Lambda(function):
+    cdef:
+        object fun
+
+    def __init__(self, fun):
+        self.fun = fun
+
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return self.fun(x)
+
+
+cdef class constant(function):
+    def __init__(self, REAL_t value):
+        self.value = value
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return self.value
+
+    def __repr__(self):
+        return '{}'.format(self.value)
+
+
+cdef class _rhsFunSin1D(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return pi**2.0*sin(pi*x[0])
+
+
+cdef class _solSin1D(function):
+    cdef:
+        REAL_t k
+
+    def __init__(self, INDEX_t k=1):
+        self.k = k*pi
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return sin(self.k*x[0])
+
+
+cdef class _cos1D(function):
+    cdef:
+        REAL_t k
+
+    def __init__(self, INDEX_t k=1):
+        self.k = k*pi
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return cos(self.k*x[0])
+
+
+cdef class _rhsFunSin2D(function):
+    cdef:
+        REAL_t k, l, fac
+
+    def __init__(self, INDEX_t k=1, INDEX_t l=1):
+        self.k = k*pi
+        self.l = l*pi
+        self.fac = self.k**2 + self.l**2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return self.fac * sin(self.k*x[0])*sin(self.l*x[1])
+
+
+cdef class _cos2D(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return cos(pi*x[0])*cos(pi*x[1])
+
+
+cdef class _rhsCos2D(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return 2.0*pi**2*cos(pi*x[0])*cos(pi*x[1])
+
+
+cdef class _grad_cos2d_n(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if x[0] == 1.0:
+            return -sin(1.0)*cos(x[1])
+        elif x[1] == 1.0:
+            return -sin(1.0)*cos(x[0])
+
+
+cdef class _solSin2D(function):
+    cdef:
+        REAL_t k, l
+
+    def __init__(self, INDEX_t k=1, INDEX_t l=1):
+        self.k = k*pi
+        self.l = l*pi
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return sin(self.k*x[0])*sin(self.l*x[1])
+
+
+cdef class _rhsFunSin3D(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return 3.0*pi**2.0*sin(pi*x[0])*sin(pi*x[1])*sin(pi*x[2])
+
+
+cdef class _rhsFunSin3D_memoized(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return 3.0*pi**2.0*memoized_sin.eval(pi*x[0])*memoized_sin.eval(pi*x[1])*memoized_sin.eval(pi*x[2])
+
+
+cdef class _solSin3D(function):
+    cdef:
+        REAL_t k, l, m
+
+    def __init__(self, INDEX_t k=1, INDEX_t l=1, INDEX_t m=1):
+        self.k = k*pi
+        self.l = l*pi
+        self.m = m*pi
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return sin(self.k*x[0])*sin(self.l*x[1])*sin(self.m*x[2])
+
+
+cdef class _rhsBoundaryLayer2D(function):
+    cdef:
+        public REAL_t radius, c
+
+    def __init__(self, REAL_t radius=0.25, REAL_t c=100.0):
+        self.radius = radius
+        self.c = c
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r, z
+        r = sqrt((x[0]-0.5)**2.0 + (x[1]-0.5)**2.0)
+        z = r**2.0 - self.radius**2.0
+        return -4.0*self.c/cosh(self.c*z)**2.0 + 8.0*self.c**2.0*r**2.0*sinh(self.c*z)/cosh(self.c*z)**3.0
+
+
+cdef class _solBoundaryLayer2D(function):
+    cdef:
+        public REAL_t radius, c
+
+    def __init__(self, REAL_t radius=0.25, REAL_t c=100.0):
+        self.radius = radius
+        self.c = c
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r, z
+        r = sqrt((x[0]-0.5)**2.0 + (x[1]-0.5)**2.0)
+        z = r**2.0 - self.radius**2.0
+        return tanh(self.c*z)-1.0
+
+
+cdef class _solCornerSingularity2D(function):
+    cdef REAL_t twoThirds
+
+    def __init__(self):
+        self.twoThirds = 2.0/3.0
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, const REAL_t[::1] x):
+        cdef:
+            REAL_t y0, y1, theta
+        y0, y1 = x[1]-1, -x[0]+1
+        r = sqrt(y0**2.0 + y1**2.0)
+        theta = np.arctan2(y1, y0)
+        if theta < 0:
+            theta += 2.0*pi
+        return r**self.twoThirds*np.sin(self.twoThirds*theta)
+
+
+cdef class rhsBoundarySingularity2D(function):
+    cdef REAL_t alpha
+
+    def __init__(self, REAL_t alpha):
+        self.alpha = alpha
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, const REAL_t[::1] x):
+        if x[0] > 0:
+            return self.alpha*(1.-self.alpha)*x[0]**(self.alpha-2.)
+        else:
+            return 1000.
+
+
+cdef class solBoundarySingularity2D(function):
+    cdef REAL_t alpha
+
+    def __init__(self, REAL_t alpha):
+        self.alpha = alpha
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, const REAL_t[::1] x):
+        return x[0]**self.alpha
+
+
+cdef class _rhsFichera(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return -0.75*pow((x[0]-1.0)**2.0+(x[1]-1.0)**2.0+(x[2]-1.0)**2.0, -0.75)
+
+
+cdef class _solFichera(function):
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return pow((x[0]-1.0)**2.0+(x[1]-1.0)**2.0+(x[2]-1.0)**2.0, 0.25)
+
+
+cdef class rhsFunCos1DHeat(function):
+    cdef REAL_t t
+
+    def __init__(self, REAL_t t):
+        function.__init__(self)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (cos(self.t)+pi**2.0*sin(self.t))*cos(pi*x[0])
+
+
+cdef class rhsFunSource1D(function):
+    cdef REAL_t a, b
+
+    def __init__(self, REAL_t a, REAL_t b):
+        function.__init__(self)
+        self.a = a
+        self.b = b
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (self.a <= x[0]) and (x[0] < self.b)
+
+
+cdef class solCos1DHeat(function):
+    cdef REAL_t t
+
+    def __init__(self, REAL_t t):
+        function.__init__(self)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return sin(self.t)*cos(pi*x[0])
+
+
+cdef class rhsFunCos2DHeat(function):
+    cdef REAL_t t
+
+    def __init__(self, REAL_t t):
+        function.__init__(self)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (cos(self.t)+2.0*pi**2.0*sin(self.t))*cos(pi*x[0])*cos(pi*x[1])
+
+
+cdef class rhsFunCos2DNonlinear(function):
+    cdef REAL_t t, k
+
+    def __init__(self, REAL_t t, REAL_t k=2.):
+        function.__init__(self)
+        self.t = t
+        self.k = k
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return ((cos(self.t) +
+                 2.0*pi**2.0*sin(self.t))*cos(pi*x[0])*cos(pi*x[1]) -
+                (sin(self.t)*cos(pi*x[0])*cos(pi*x[1]))**self.k)
+
+
+cdef class rhsFunCos2DNonlinear_U(function):
+    cdef REAL_t t, k
+
+    def __init__(self, REAL_t t, REAL_t k=2.):
+        function.__init__(self)
+        self.t = t
+        self.k = k
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return ((cos(self.t) +
+                 2.0*pi**2.0*sin(self.t))*cos(pi*x[0])*cos(pi*x[1]) +
+                ((cos(self.t)*cos(pi*x[0])*cos(pi*x[1]))**self.k -
+                 (sin(self.t)*cos(pi*x[0])*cos(pi*x[1]))**self.k))
+
+
+cdef class rhsFunCos2DNonlinear_V(function):
+    cdef REAL_t t, k
+
+    def __init__(self, REAL_t t, REAL_t k=2.):
+        function.__init__(self)
+        self.t = t
+        self.k = k
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return ((-sin(self.t) +
+                 2.0*pi**2.0*cos(self.t))*cos(pi*x[0])*cos(pi*x[1]) +
+                ((sin(self.t)*cos(pi*x[0])*cos(pi*x[1]))**self.k -
+                 (cos(self.t)*cos(pi*x[0])*cos(pi*x[1]))**self.k))
+
+
+cdef class solCos2DHeat(function):
+    cdef REAL_t t
+
+    def __init__(self, REAL_t t):
+        function.__init__(self)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return sin(self.t)*cos(pi*x[0])*cos(pi*x[1])
+
+
+cdef class rhsFunSource2D(function):
+    cdef REAL_t[::1] a
+    cdef REAL_t r2
+
+    def __init__(self, REAL_t[::1] a, REAL_t r):
+        function.__init__(self)
+        self.a = a
+        self.r2 = r**2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (x[0]-self.a[0])**2+(x[1]-self.a[1])**2 < self.r2
+
+
+cdef class rhsTestGrayScott2D_U(function):
+    cdef REAL_t k, F, Du, Dv, t
+
+    def __init__(self, REAL_t k, REAL_t F, REAL_t Du, REAL_t Dv, REAL_t t):
+        function.__init__(self)
+        self.k = k
+        self.F = F
+        self.Du = Du
+        self.Dv = Dv
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t u, v
+        u = sin(self.t)*cos(pi*x[0])*cos(pi*x[1])
+        v = cos(self.t)*cos(pi*x[0])*cos(pi*x[1])
+        return v+2*pi**2*self.Du*u+u*v**2-self.F*(1-u)
+
+
+cdef class rhsTestGrayScott2D_V(function):
+    cdef REAL_t k, F, Du, Dv, t
+
+    def __init__(self, REAL_t k, REAL_t F, REAL_t Du, REAL_t Dv, REAL_t t):
+        function.__init__(self)
+        self.k = k
+        self.F = F
+        self.Du = Du
+        self.Dv = Dv
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t u, v
+        u = sin(self.t)*cos(pi*x[0])*cos(pi*x[1])
+        v = cos(self.t)*cos(pi*x[0])*cos(pi*x[1])
+        return -u+2*pi**2*self.Dv*v-u*v**2+(self.k+self.F)*v
+
+
+cdef class solFractional(function):
+    cdef REAL_t s, fac, radius2
+    cdef INDEX_t dim
+
+    def __init__(self, REAL_t s, INDEX_t dim, REAL_t radius=1.0):
+        function.__init__(self)
+        from scipy.special import gamma
+        self.s = s
+        self.dim = dim
+        self.radius2 = radius**2
+        self.fac = 2.**(-2.*s)*gamma(dim/2.)/gamma((dim+2.*s)/2.)/gamma(1.+s)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r2 = 0.
+        cdef INDEX_t i
+        for i in range(self.dim):
+            r2 += x[i]**2
+        if r2 <= self.radius2:
+            return self.fac*(1.-r2/self.radius2)**self.s
+        else:
+            return 0.
+
+
+from scipy.special import eval_jacobi as jacobi
+
+
+cdef class rhsFractional1D(function):
+    cdef:
+        public REAL_t s
+        REAL_t fac
+        public INDEX_t n
+
+    def __init__(self, REAL_t s, INDEX_t n):
+        from scipy.special import gamma
+        function.__init__(self)
+        self.s = s
+        self.n = n
+        self.fac = 2.**(2.*s)*gamma(0.5+s+n)*gamma(1.+s+n)/gamma(1.+n)/gamma(0.5+n)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r2 = 0.
+        r2 = x[0]**2
+        if r2 <= 1.:
+            return self.fac * jacobi(self.n, self.s, -0.5, 2.*r2-1.)
+        else:
+            return 0.
+
+
+cdef class solFractional1D(function):
+    cdef:
+        public REAL_t s
+        public INDEX_t n
+
+    def __init__(self, REAL_t s, INDEX_t n):
+        function.__init__(self)
+        self.s = s
+        self.n = n
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r2 = 0.
+        r2 = x[0]**2
+        if r2 <= 1.:
+            return (1.-r2)**self.s * jacobi(self.n, self.s, -0.5, 2.*r2-1.)
+        else:
+            return 0.
+
+
+cdef class rhsFractional2D(function):
+    cdef:
+        public REAL_t s
+        public REAL_t angular_shift
+        public INDEX_t l
+        public INDEX_t n
+        REAL_t fac
+
+    def __init__(self, REAL_t s, INDEX_t l, INDEX_t n, REAL_t angular_shift=0.):
+        function.__init__(self)
+        from scipy.special import gamma
+        self.s = s
+        self.l = l
+        self.n = n
+        self.angular_shift = angular_shift
+        self.fac = 2.**(2.*s)*gamma(1.+s+n)*gamma(1.+l+s+n)/gamma(1+n)/gamma(1.+l+n)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r2 = 0., theta = atan2(x[1], x[0])
+        r2 = x[0]**2+x[1]**2
+        if r2 <= 1.:
+            return self.fac*r2**(0.5*self.l)*cos(self.l*(theta+self.angular_shift))*jacobi(self.n, self.s, self.l, 2.*r2-1.)
+        else:
+            return 0.
+
+
+cdef class solFractional2D(function):
+    cdef:
+        public REAL_t s
+        public REAL_t angular_shift
+        public INDEX_t l
+        public INDEX_t n
+
+    def __init__(self, REAL_t s, INDEX_t l, INDEX_t n, REAL_t angular_shift=0.):
+        function.__init__(self)
+        self.s = s
+        self.l = l
+        self.n = n
+        self.angular_shift = angular_shift
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r2 = 0., theta = atan2(x[1], x[0])
+        r2 = x[0]**2+x[1]**2
+        if r2 <= 1.:
+            return (1.-r2)**self.s*r2**(0.5*self.l)*cos(self.l*(theta+self.angular_shift))*jacobi(self.n, self.s, self.l, 2.*r2-1.)
+        else:
+            return 0.
+
+
+cdef class rhsFractional2Dcombination(function):
+    cdef list functions
+
+    def __init__(self, REAL_t s, params):
+        function.__init__(self)
+        self.functions = [rhsFractional2D(s, **p) for p in params]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t val = 0.
+            INDEX_t i
+            function f
+        for i in range(len(self.functions)):
+            f = self.functions[i]
+            val += f.eval(x)
+        return val
+
+
+cdef class solFractional2Dcombination(function):
+    cdef list functions
+
+    def __init__(self, REAL_t s, params):
+        function.__init__(self)
+        self.functions = [solFractional2D(s, **p) for p in params]
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t val = 0.
+            INDEX_t i
+            function f
+        for i in range(len(self.functions)):
+            f = self.functions[i]
+            val += f.eval(x)
+        return val
+
+
+
+cdef class rhsTestFractional_U(function):
+    cdef REAL_t t
+    cdef function sol
+
+    def __init__(self, REAL_t s, INDEX_t dim, REAL_t t, REAL_t radius=1.0):
+        function.__init__(self)
+        self.sol = solFractional(s, dim, radius)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t u = self.sol.eval(x)
+        return cos(self.t)*u + (cos(self.t)**2-sin(self.t)**2)*u**2 + sin(self.t)
+
+
+cdef class rhsTestFractional_V(function):
+    cdef REAL_t t
+    cdef function sol
+
+    def __init__(self, REAL_t s, INDEX_t dim, REAL_t t, REAL_t radius=1.0):
+        function.__init__(self)
+        self.sol = solFractional(s, dim, radius)
+        self.t = t
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t u = self.sol.eval(x)
+        return -sin(self.t)*u + (-cos(self.t)**2+sin(self.t)**2)*u**2 + cos(self.t)
+
+
+cdef class rhsFractionalBrusselator_U(function):
+    cdef REAL_t t, B, Q, eta, radius2s
+    cdef function solU, solV
+
+    def __init__(self, REAL_t s1, REAL_t s2,
+                 REAL_t B, REAL_t Q, REAL_t eta,
+                 INDEX_t dim, REAL_t t, REAL_t radius=1.0):
+        function.__init__(self)
+        self.solU = solFractional(s1, dim, radius)
+        self.solV = solFractional(s2, dim, radius)
+        self.B = B
+        self.Q = Q
+        self.eta = eta
+        self.t = t
+        self.radius2s = radius**(2.*s1)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t u0 = self.solU.eval(x)*self.eta
+            REAL_t v0 = self.solV.eval(x)/self.eta
+            REAL_t s = sin(self.t)
+            REAL_t c = cos(2.*self.t)
+            REAL_t u = u0*s
+            REAL_t v = v0*c
+        return (cos(self.t)*u0) + s*self.eta/self.radius2s - ((self.B-1.)*u + self.Q**2*v + self.B/self.Q*u**2 + 2.*self.Q*u*v + u**2*v)
+
+
+cdef class rhsFractionalBrusselator_V(function):
+    cdef REAL_t t, B, Q, eta, radius2s
+    cdef function solU, solV
+
+    def __init__(self, REAL_t s1, REAL_t s2,
+                 REAL_t B, REAL_t Q, REAL_t eta,
+                 INDEX_t dim, REAL_t t, REAL_t radius=1.0):
+        function.__init__(self)
+        self.solU = solFractional(s1, dim, radius)
+        self.solV = solFractional(s2, dim, radius)
+        self.B = B
+        self.Q = Q
+        self.eta = eta
+        self.t = t
+        self.radius2s = radius**(2.*s2)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t u0 = self.solU.eval(x)*self.eta
+            REAL_t v0 = self.solV.eval(x)/self.eta
+            REAL_t s = sin(self.t)
+            REAL_t c = cos(2.*self.t)
+            REAL_t u = u0*s
+            REAL_t v = v0*c
+        return self.eta**2*(-2.*sin(2.*self.t)*v0) + c/self.eta/self.radius2s + (self.B*u + self.Q**2*v + self.B/self.Q*u**2 + 2.*self.Q*u*v + u**2*v)
+
+
+cdef class simpleAnisotropy(function):
+    cdef REAL_t epsilon
+
+    def __init__(self, epsilon=0.1):
+        self.epsilon = epsilon
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if x[0] < 0.5:
+            return 1.0
+        else:
+            return self.epsilon
+
+
+cdef class simpleAnisotropy2(function):
+    cdef REAL_t epsilon
+
+    def __init__(self, epsilon=0.1):
+        self.epsilon = epsilon
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if (x[0] < 0.5) == (x[1] < 0.5):
+            return 1.0
+        else:
+            return self.epsilon
+
+
+cdef class inclusions(function):
+    cdef REAL_t epsilon
+
+    def __init__(self, epsilon=0.1):
+        self.epsilon = epsilon
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if (x[0] % 0.4 > 0.2) and (x[1] % 0.4 > 0.2):
+            return self.epsilon
+        else:
+            return 1.0
+
+
+cdef class inclusionsHong(function):
+    cdef REAL_t epsilon
+
+    def __init__(self, epsilon=0.1):
+        self.epsilon = epsilon/2.
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if ((x[0]+1.+self.epsilon)**2+x[1]**2 < 1.) or ((x[0]-1.-self.epsilon)**2+x[1]**2 < 1.):
+            return 0.1
+        else:
+            return 1.0
+
+
+cdef inline REAL_t segmentRadius(REAL_t theta, REAL_t R, REAL_t theta1, REAL_t theta2, INDEX_t k):
+    cdef:
+        INDEX_t n
+        REAL_t Rmid, thetamid
+    n = <INDEX_t>(k*(theta-theta1)/(theta2-theta1))
+    theta1, theta2 = theta1+n*(theta2-theta1)/k, theta1+(n+1)*(theta2-theta1)/k
+    Rmid = R*cos((theta2-theta1)/2.0)
+    thetamid = (theta1+theta2)/2.0
+    return Rmid/cos(theta-thetamid)
+
+
+cdef class motorPermeability(function):
+    cdef:
+        REAL_t thetaRotor, thetaRotor2, thetaStator, thetaCoil, epsilon
+        REAL_t rRotorIn, rRotorOut, rStatorIn, rStatorOut, rCoilIn, rCoilOut
+        INDEX_t nRotorIn, nRotorOut, nStatorIn, nStatorOut
+
+    def __init__(self,
+                 epsilon=1.0/5200.0,
+                 thetaRotor=pi/12.0,
+                 thetaCoil=pi/32.0,
+                 rRotorIn=0.375,
+                 rRotorOut=0.5,
+                 rStatorIn=0.875,
+                 rStatorOut=0.52,
+                 rCoilIn=0.8,
+                 rCoilOut=0.55,
+                 nRotorOut=4,
+                 nRotorIn=8,
+                 nStatorOut=4,
+                 nStatorIn=8):
+        self.epsilon = epsilon
+        self.thetaRotor = thetaRotor
+        self.thetaCoil = thetaCoil
+        self.rRotorIn = rRotorIn
+        self.rRotorOut = rRotorOut
+        self.rStatorIn = rStatorIn
+        self.rStatorOut = rStatorOut
+        self.rCoilIn = rCoilIn
+        self.rCoilOut = rCoilOut
+        # self.thetaRotor2 = np.arctan2(rRotorOut*sin(thetaRotor),sqrt(rRotorIn**2-rRotorOut**2*sin(thetaRotor)**2))
+        # self.thetaStator = np.arctan2(rStatorOut*sin(thetaRotor),sqrt(rStatorIn**2-rStatorOut**2*sin(thetaRotor)**2))
+        self.thetaRotor2 = atan2(rRotorOut*sin(thetaRotor), sqrt(rRotorIn**2-rRotorOut**2*sin(thetaRotor)**2))
+        self.thetaStator = atan2(rStatorOut*sin(thetaRotor), sqrt(rStatorIn**2-rStatorOut**2*sin(thetaRotor)**2))
+        self.nRotorIn = nRotorIn
+        self.nRotorOut = nRotorOut
+        self.nStatorIn = nStatorIn
+        self.nStatorOut = nStatorOut
+
+    cdef inline BOOL_t inRotor(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r, theta, eps = 1e-6
+            INDEX_t k
+        r = sqrt(x[0]**2.0+x[1]**2.0)
+        # theta = np.arctan2(x[1], x[0])
+        theta = atan2(x[1], x[0])
+        k = <INDEX_t>((theta+pi/4.0) // (pi/2.0))
+        theta = abs(theta - (k * pi/2.0))
+        if self.thetaRotor2 < theta:
+            return r < segmentRadius(theta, self.rRotorIn, self.thetaRotor2, pi/2-self.thetaRotor2, self.nRotorIn)-eps
+        if theta < self.thetaRotor:
+            return r < segmentRadius(theta, self.rRotorOut, -self.thetaRotor, self.thetaRotor, self.nRotorOut)-eps
+        y = r*sin(theta)
+        return y < self.rRotorOut*sin(self.thetaRotor)-eps
+
+    cdef inline BOOL_t inStator(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r, theta, eps = 1e-6
+            INDEX_t k
+
+        r = sqrt(x[0]**2.0+x[1]**2.0)
+        # theta = np.arctan2(x[1], x[0])
+        theta = atan2(x[1], x[0])
+        k = <INDEX_t>(theta // (pi/3.0))
+        theta = abs(theta - pi/6.0 - k * pi/3.0)
+        if theta > self.thetaRotor:
+            return r > segmentRadius(theta, self.rStatorIn, self.thetaStator, pi/3.0-self.thetaStator, self.nStatorIn)+eps
+        if theta < self.thetaStator:
+            return r > segmentRadius(theta, self.rStatorOut, -self.thetaRotor, self.thetaRotor, self.nStatorOut)+eps
+
+        y = r*sin(theta)
+        if y < self.rStatorOut*sin(self.thetaRotor)-eps:
+            return r > segmentRadius(theta, self.rStatorOut, -self.thetaRotor, self.thetaRotor, self.nStatorOut)+eps
+        else:
+            return r > segmentRadius(theta, self.rStatorIn, self.thetaStator, pi/3.0-self.thetaStator, self.nStatorIn)+eps
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if self.inRotor(x):
+            return self.epsilon
+        if self.inStator(x):
+            return self.epsilon
+        return 1.0
+
+
+cdef class _rhsMotor(function):
+    cdef:
+        REAL_t thetaRotor, thetaCoil
+        REAL_t rRotorIn, rRotorOut, rStratorIn, rStratorOut, rCoilIn, rCoilOut
+
+    def __init__(self,
+                 thetaRotor=pi/12.0,
+                 thetaCoil=pi/24.0,
+                 rRotorIn=0.375,
+                 rRotorOut=0.5,
+                 rStratorIn=0.875,
+                 rStratorOut=0.52,
+                 rCoilIn=0.8,
+                 rCoilOut=0.55):
+        self.thetaRotor = thetaRotor
+        self.thetaCoil = thetaCoil
+        self.rRotorIn = rRotorIn
+        self.rRotorOut = rRotorOut
+        self.rStratorIn = rStratorIn
+        self.rStratorOut = rStratorOut
+        self.rCoilIn = rCoilIn
+        self.rCoilOut = rCoilOut
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef REAL_t r, theta, s
+        r = sqrt(x[0]**2.0+x[1]**2.0)
+        # theta = atan2(x[1], x[0])
+        if x[0] < 0:
+            # theta = np.arctan2(x[1], -x[0])
+            theta = atan2(x[1], -x[0])
+            s = np.sign(theta)
+            theta = abs(theta)
+        else:
+            # theta = np.arctan2(x[1], x[0])
+            theta = atan2(x[1], x[0])
+            s = -np.sign(theta)
+            theta = abs(theta)
+        if theta >= self.thetaCoil and theta <= self.thetaRotor:
+            if r > self.rCoilOut and r < self.rCoilIn:
+                # return 1.0*s
+                return 0.
+        elif theta >= 3.0*self.thetaRotor and theta <= 3.0*self.thetaRotor+self.thetaCoil:
+            if r > self.rCoilOut and r < self.rCoilIn:
+                # return s
+                return 0.
+        elif theta >= pi/2.0-self.thetaRotor-self.thetaCoil and theta <= pi/2.0-self.thetaRotor:
+            if r > self.rCoilOut and r < self.rCoilIn:
+                return 1.0*s
+        return 0.
+
+
+cdef class rhsMotor(function):
+    cdef:
+        list coilPairOn
+        REAL_t dist1, dist2, rCoilOut, rCoilIn
+
+    def __init__(self, coilPairOn=[0, 1, 2]):
+        self.coilPairOn = coilPairOn
+        self.dist1 = 0.16
+        self.dist2 = 0.25
+        self.rCoilIn = 0.8
+        self.rCoilOut = 0.55
+
+    cdef inline REAL_t eval(self, REAL_t[::1] z):
+        cdef:
+            REAL_t r, theta, x, y
+            INDEX_t k
+
+        r = sqrt(z[0]**2.0+z[1]**2.0)
+        # theta = np.arctan2(z[1], z[0])
+        theta = atan2(z[1], z[0])
+        k = <INDEX_t>(theta // (pi/3.0))
+        if k not in self.coilPairOn and k+3 not in self.coilPairOn:
+            return 0.
+        theta -= pi/6.0 + k * pi/3.0
+        x, y = r*cos(theta), r*sin(theta)
+        if self.dist1 < y < self.dist2 and self.rCoilOut < x < self.rCoilIn:
+            return 1.0
+        elif self.dist1 < -y < self.dist2 and self.rCoilOut < x < self.rCoilIn:
+            return -1.0
+        else:
+            return 0.
+
+
+cpdef function rhsHr(REAL_t r, INDEX_t dim, REAL_t scaling=1.):
+    if r == 0.5:
+        return constant(scaling)
+    else:
+        if dim == 1:
+            return rhsHr1D(r, scaling)
+        if dim == 2:
+            return rhsHr2D(r, scaling)
+        if dim == 3:
+            return rhsHr3D(r, scaling)
+        else:
+            raise NotImplementedError()
+
+
+cdef class rhsHr1D(function):
+    cdef REAL_t beta, scaling
+
+    def __init__(self, REAL_t r, REAL_t scaling=1.):
+        self.beta = r-0.5
+        self.scaling = scaling
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return self.scaling*pow(x[0]*(1.-x[0]), self.beta)
+
+
+cdef class rhsHr2D(function):
+    cdef REAL_t beta, scaling
+
+    def __init__(self, REAL_t r, REAL_t scaling=1.):
+        self.beta = r-0.5
+        self.scaling = scaling
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (self.scaling *
+                pow(x[0]*(1.-x[0]), self.beta) *
+                pow(x[1]*(1.-x[1]), self.beta))
+
+
+cdef class rhsHr3D(function):
+    cdef REAL_t beta, scaling
+
+    def __init__(self, REAL_t r, REAL_t scaling=1.):
+        self.beta = r-0.5
+        self.scaling = scaling
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (self.scaling *
+                pow(x[0]*(1.-x[0]), self.beta) *
+                pow(x[1]*(1.-x[1]), self.beta) *
+                pow(x[2]*(1.-x[2]), self.beta))
+
+
+cdef class rhsHr2Ddisk(function):
+    cdef REAL_t beta, scaling
+
+    def __init__(self, REAL_t r, REAL_t scaling=1.):
+        self.beta = r-0.5
+        self.scaling = scaling
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return (self.scaling *
+                pow(min(1.-pow(x[0], 2)-pow(x[1], 2), 1.), self.beta))
+
+
+cdef class logDiffusion1D(function):
+    cdef:
+        REAL_t[::1] c
+
+    def __init__(self, REAL_t[::1] c):
+        self.c = c
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t S
+            INDEX_t i
+        S = 0.
+        for i in range(1, self.c.shape[0]+1):
+            S += self.c[i-1]*sin(i*pi*x[0])
+        return exp(S)
+
+
+cdef class logDiffusion2D(function):
+    cdef:
+        REAL_t[:, ::1] c
+
+    def __init__(self, REAL_t[:, ::1] c):
+        self.c = c
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t S, sx
+            INDEX_t i, j
+        S = 0.
+        for i in range(1, self.c.shape[0]+1):
+            sx = sin(i*pi*x[0])
+            for j in range(1, self.c.shape[1]+1):
+                S += self.c[i-1, j-1] * sx*sin(j*pi*x[1])
+        return exp(S)
+
+
+cdef class fractalDiffusivity(function):
+    cdef:
+        REAL_t maxVal, offset
+
+    def __init__(self, REAL_t maxVal, REAL_t offset):
+        self.maxVal = maxVal
+        self.offset = offset
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t a = self.maxVal
+            INDEX_t i
+        for i in range(x.shape[0]):
+            if x[i] > 0.:
+                a = min(2**(-floor(log2(x[i]+self.offset))), a)
+        return a
+
+
+cdef class expDiffusivity(function):
+    cdef:
+        REAL_t growth, frequency
+
+    def __init__(self, REAL_t growth, REAL_t frequency):
+        self.growth = growth
+        self.frequency = frequency
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r2 = 0.
+            INDEX_t i
+        for i in range(x.shape[0]):
+            r2 += x[i]**2
+        return exp(self.growth*r2)*(2+cos(self.frequency*r2))
+
+
+######################################################################
+# eigenfunctions for Laplacian on unit disc
+
+cdef extern from "<math.h>" nogil:
+    double jn(int n, double x)
+
+from scipy.special import jn_zeros
+jv = jn
+
+cdef inline REAL_t norm(const REAL_t[::1] a):
+    cdef REAL_t n = 0.
+    cdef INDEX_t i
+    for i in range(a.shape[0]):
+        n += a[i]*a[i]
+    return sqrt(n)
+
+
+cdef class eigfun_disc(function):
+    cdef:
+        INDEX_t k, l
+        REAL_t a_lk, C
+    def __init__(self, k, l):
+        function.__init__(self)
+        self.k = k
+        self.l = l
+        if l == 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = 1.0/(sqrt(pi)*jv(l+1, self.a_lk))
+        elif l > 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk))
+        else:
+            l = -l
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk))
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef INDEX_t l
+        if self.l == 0:
+            return self.C * jv(self.l, self.a_lk*norm(x))
+        elif self.l > 0:
+            return self.C * jv(self.l, self.a_lk*norm(x)) * cos(self.l*atan2(x[1], x[0]))
+        else:
+            l = -self.l
+            return self.C * jv(l, self.a_lk*norm(x)) * sin(l*atan2(x[1], x[0]))
+
+
+cdef class eigfun_disc_deriv_x(function):
+    cdef:
+        INDEX_t k, l
+        REAL_t a_lk, C
+    def __init__(self, k, l):
+        function.__init__(self)
+        self.k = k
+        self.l = l
+        if l == 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = 1.0/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+        elif l > 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+        else:
+            l = -l
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t l
+            REAL_t theta, r, jm, jp
+        theta = atan2(x[1], x[0])
+        r = norm(x)
+        if self.l == 0:
+            jm = jv(self.l-1, self.a_lk*r)
+            jp = jv(self.l+1, self.a_lk*r)
+            return self.C * (jm-jp) * cos(theta)
+        elif self.l > 0:
+            jm = jv(self.l-1, self.a_lk*r)
+            jp = jv(self.l+1, self.a_lk*r)
+            return self.C * ((jm-jp) * cos(self.l*theta) * cos(theta) +
+                             (jm+jp) * sin(self.l*theta) * sin(theta))
+        else:
+            l = -self.l
+            jm = jv(l-1, self.a_lk*r)
+            jp = jv(l+1, self.a_lk*r)
+            return self.C * ((jm-jp) * sin(l*theta) * cos(theta) -
+                             (jm+jp) * cos(l*theta) * sin(theta))
+
+
+cdef class eigfun_disc_deriv_y(function):
+    cdef:
+        INDEX_t k, l
+        REAL_t a_lk, C
+    def __init__(self, k, l):
+        function.__init__(self)
+        self.k = k
+        self.l = l
+        if l == 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = 1.0/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+        elif l > 0:
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+        else:
+            l = -l
+            self.a_lk = jn_zeros(l, k+1)[k]
+            self.C = sqrt(2)/(sqrt(pi)*jv(l+1, self.a_lk)) * self.a_lk/2.
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t l
+            REAL_t theta, r, jm, jp
+        theta = atan2(x[1], x[0])
+        r = norm(x)
+        if self.l == 0:
+            jm = jv(self.l-1, self.a_lk*r)
+            jp = jv(self.l+1, self.a_lk*r)
+            return self.C * (jm-jp) * cos(theta)
+        elif self.l > 0:
+            jm = jv(self.l-1, self.a_lk*r)
+            jp = jv(self.l+1, self.a_lk*r)
+            return self.C * ((jm-jp) * cos(self.l*theta) * sin(theta) -
+                             (jm+jp) * sin(self.l*theta) * cos(theta))
+        else:
+            l = -self.l
+            jm = jv(l-1, self.a_lk*r)
+            jp = jv(l+1, self.a_lk*r)
+            return self.C * ((jm-jp) * sin(l*theta) * sin(theta) -
+                             (jm+jp) * cos(l*theta) * cos(theta))
+
+
+cdef class radialIndicator(function):
+    cdef:
+        REAL_t radius
+        BOOL_t centerIsOrigin
+        REAL_t[::1] center
+
+    def __init__(self, REAL_t radius, REAL_t[::1] center=None):
+        self.radius = radius**2
+        if center is None:
+            self.centerIsOrigin = True
+        else:
+            self.centerIsOrigin = False
+            self.center = center
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t r = 0.
+            INDEX_t i
+        if self.centerIsOrigin:
+            for i in range(x.shape[0]):
+                r += x[i]*x[i]
+        else:
+            for i in range(x.shape[0]):
+                r += (x[i]-self.center[i])*(x[i]-self.center[i])
+        return r <= self.radius
+
+
+cdef class squareIndicator(function):
+    cdef REAL_t[::1] a, b
+
+    def __init__(self, REAL_t[::1] a, REAL_t[::1] b):
+        self.a = a
+        self.b = b
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i
+        for i in range(x.shape[0]):
+            if (x[i] < self.a[i]) or (x[i] > self.b[i]):
+                return False
+        return True
+
+
+cdef class proj(function):
+    cdef:
+        function f
+        REAL_t a, b
+        function lower, upper
+        BOOL_t lowerFun, upperFun
+
+    def __init__(self, function f, tuple bounds):
+        self.f = f
+        self.lowerFun = isinstance(bounds[0], function)
+        if self.lowerFun:
+            self.lower = bounds[0]
+        else:
+            self.a = bounds[0]
+        self.upperFun = isinstance(bounds[1], function)
+        if self.upperFun:
+            self.upper = bounds[1]
+        else:
+            self.b = bounds[1]
+
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t a, b
+        if self.lowerFun:
+            a = self.lower(x)
+        else:
+            a = self.a
+        if self.upperFun:
+            b = self.upper(x)
+        else:
+            b = self.b
+        return max(a, min(b, self.f.eval(x)))
+
+
+cdef class coordinate(function):
+    cdef:
+        INDEX_t i
+
+    def __init__(self, INDEX_t i):
+        self.i = i
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        return x[self.i]
+
+
+cdef class indicatorFunctor(function):
+    cdef:
+        function indicator
+        function f
+
+    def __init__(self, function f, function indicator):
+        self.f = f
+        self.indicator = indicator
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        if self.indicator(x) > 1e-9:
+            return self.f(x)
+        else:
+            return 0.
+
+    def __repr__(self):
+        return '({} if {}>0)'.format(self.f, self.indicator)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline REAL_t evalPL1D(REAL_t[:, ::1] simplex, REAL_t[::1] uloc, REAL_t[::1] x):
+    cdef:
+        REAL_t l
+    l = (x[0]-simplex[1, 0]) / (simplex[0, 0]-simplex[1, 0])
+    return l*uloc[0] + (1.-l)*uloc[1]
+
+
+cdef class lookupFunction1D(function):
+    cdef:
+        REAL_t[:, ::1] coords
+        REAL_t[::1] vals
+        list tree
+        INDEX_t dim
+        REAL_t[:, ::1] simplex
+        REAL_t[::1] uloc
+
+    def __init__(self, REAL_t[:, ::1] coords, REAL_t[::1] vals):
+        self.coords = coords
+        self.dim = coords.shape[1]
+        self.vals = vals
+        from scipy.spatial import cKDTree
+        self.tree = [cKDTree(coords)]
+        self.simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+        self.uloc = uninitialized((self.dim+1), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j
+            INDEX_t[::1] idx
+
+        idx = self.tree[0].query(x, self.dim+1)[1].astype(INDEX)
+        for i in range(self.dim+1):
+            for j in range(self.dim):
+                self.simplex[i, j] = self.coords[idx[i], j]
+        for i in range(self.dim+1):
+            self.uloc[i] = self.vals[idx[i]]
+        return evalPL1D(self.simplex, self.uloc, x)
+
+
+cdef class lookupFunctionTensor1DNew(function):
+    cdef:
+        REAL_t[:, ::1] coordsX
+        REAL_t[::1] vals
+        REAL_t[:, ::1] simplex
+        REAL_t[::1] uloc
+        INDEX_t N
+
+    def __init__(self, REAL_t[:, ::1] coordsX, REAL_t[::1] vals, INDEX_t N):
+        self.coordsX = coordsX
+        self.vals = vals
+        self.simplex = uninitialized((2, 1), dtype=REAL)
+        self.uloc = uninitialized((2), dtype=REAL)
+        self.N = N
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, idxX
+        idxX = <INDEX_t>(self.N*x[0])
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX+i, 0]
+        for i in range(2):
+            self.uloc[i] = self.vals[idxX+i]
+        return evalPL1D(self.simplex, self.uloc, x)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline REAL_t evalPLTensor2D(REAL_t[:, ::1] simplex, REAL_t[:, ::1] uloc, REAL_t[::1] x):
+    cdef:
+        REAL_t lX, lY
+    lX = (x[0]-simplex[1, 0]) / (simplex[0, 0]-simplex[1, 0])
+    lY = (x[1]-simplex[1, 1]) / (simplex[0, 1]-simplex[1, 1])
+    return lX*lY*uloc[0, 0] + (1.-lX)*lY*uloc[1, 0] + lX*(1.-lY)*uloc[0, 1] + (1.-lX)*(1.-lY)*uloc[1, 1]
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline REAL_t evalPLTensor3D(REAL_t[:, ::1] simplex, REAL_t[:, :, ::1] uloc, REAL_t[::1] x):
+    cdef:
+        REAL_t lX, lY, lZ
+    lX = (x[0]-simplex[1, 0]) / (simplex[0, 0]-simplex[1, 0])
+    lY = (x[1]-simplex[1, 1]) / (simplex[0, 1]-simplex[1, 1])
+    lZ = (x[2]-simplex[1, 2]) / (simplex[0, 2]-simplex[1, 2])
+    return (lX*lY*lZ*uloc[0, 0, 0] + (1.-lX)*lY*lZ*uloc[1, 0, 0] +
+            lX*(1.-lY)*lZ*uloc[0, 1, 0] + (1.-lX)*(1.-lY)*lZ*uloc[1, 1, 0] +
+            lX*lY*(1.-lZ)*uloc[0, 0, 1] + (1.-lX)*lY*(1.-lZ)*uloc[1, 0, 1] +
+            lX*(1.-lY)*(1.-lZ)*uloc[0, 1, 1] + (1.-lX)*(1.-lY)*(1.-lZ)*uloc[1, 1, 1])
+
+
+cdef class lookupFunctionTensor2D(function):
+    cdef:
+        REAL_t[:, ::1] coordsX, coordsY
+        REAL_t[:, ::1] vals
+        list trees
+        REAL_t[:, ::1] simplex
+        REAL_t[:, ::1] uloc
+        REAL_t[::1] q
+
+    def __init__(self, REAL_t[:, ::1] coordsX, REAL_t[:, ::1] coordsY, REAL_t[:, ::1] vals):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.vals = vals
+        from scipy.spatial import cKDTree
+        self.trees = [cKDTree(coordsX), cKDTree(coordsY)]
+        self.simplex = uninitialized((2, 2), dtype=REAL)
+        self.uloc = uninitialized((2, 2), dtype=REAL)
+        self.q = uninitialized((1), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j
+            INDEX_t[::1] idxX, idxY
+        self.q[0] = x[0]
+        idxX = self.trees[0].query(self.q, 2)[1].astype(INDEX)
+        self.q[0] = x[1]
+        idxY = self.trees[1].query(self.q, 2)[1].astype(INDEX)
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX[i], 0]
+            self.simplex[i, 1] = self.coordsY[idxY[i], 0]
+        for i in range(2):
+            for j in range(2):
+                self.uloc[i, j] = self.vals[idxX[i], idxY[j]]
+        return evalPLTensor2D(self.simplex, self.uloc, x)
+
+
+cdef class lookupFunctionTensor2DNew(function):
+    cdef:
+        REAL_t[:, ::1] coordsX, coordsY
+        REAL_t[:, ::1] vals
+        REAL_t[:, ::1] simplex
+        REAL_t[:, ::1] uloc
+        INDEX_t N
+
+    def __init__(self, REAL_t[:, ::1] coordsX, REAL_t[:, ::1] coordsY, REAL_t[:, ::1] vals, INDEX_t N):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.vals = vals
+        self.simplex = uninitialized((2, 2), dtype=REAL)
+        self.uloc = uninitialized((2, 2), dtype=REAL)
+        self.N = N
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j, idxX, idxY
+        idxX = <INDEX_t>(self.N*x[0])
+        idxY = <INDEX_t>(self.N*x[1])
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX+i, 0]
+            self.simplex[i, 1] = self.coordsY[idxY+i, 0]
+        for i in range(2):
+            for j in range(2):
+                self.uloc[i, j] = self.vals[idxX+i, idxY+j]
+        return evalPLTensor2D(self.simplex, self.uloc, x)
+        # assert np.isfinite(res), (np.array(x), idxX, idxY, np.array(self.simplex), np.array(self.uloc), self.N)
+        # return res
+
+
+cdef class lookupFunctionTensor2DNewSym(function):
+    cdef:
+        REAL_t[::1] coordsX, coordsY
+        REAL_t[:, ::1] vals
+        REAL_t[:, ::1] simplex
+        REAL_t[:, ::1] uloc
+        REAL_t[::1] xTemp
+        INDEX_t N
+
+    def __init__(self,
+                 REAL_t[::1] coordsX,
+                 REAL_t[::1] coordsY,
+                 REAL_t[:, ::1] vals):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.vals = vals
+        self.simplex = uninitialized((2, 2), dtype=REAL)
+        self.uloc = uninitialized((2, 2), dtype=REAL)
+        self.N = 2*vals.shape[0]-1
+        assert self.coordsX.shape[0] == self.N
+        assert self.coordsY.shape[0] == self.N
+        self.xTemp = uninitialized((2), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j, idxX, idxY, I, J
+
+        for i in range(2):
+            if x[i] > 0.5:
+                self.xTemp[i] = 1.0-x[i]
+            else:
+                self.xTemp[i] = x[i]
+        idxX = <INDEX_t>(self.N*self.xTemp[0])
+        idxY = <INDEX_t>(self.N*self.xTemp[1])
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX+i]
+            self.simplex[i, 1] = self.coordsY[idxY+i]
+        if (idxX+1 < self.vals.shape[0]) and (idxY+1 < self.vals.shape[1]):
+            for i in range(2):
+                for j in range(2):
+                    self.uloc[i, j] = self.vals[idxX+i, idxY+j]
+        else:
+            for i in range(2):
+                if idxX+i >= self.vals.shape[0]:
+                    I = self.vals.shape[0]-2
+                else:
+                    I = idxX+i
+                for j in range(2):
+                    if idxY+j >= self.vals.shape[1]:
+                        J = self.vals.shape[1]-2
+                    else:
+                        J = idxY+j
+                    self.uloc[i, j] = self.vals[I, J]
+        return evalPLTensor2D(self.simplex, self.uloc, self.xTemp)
+
+
+cdef class lookupFunctionTensor3D(function):
+    cdef:
+        REAL_t[:, ::1] coordsX, coordsY, coordsZ
+        REAL_t[:, :, ::1] vals
+        list trees
+        REAL_t[:, ::1] simplex
+        REAL_t[:, :, ::1] uloc
+        REAL_t[::1] q
+
+    def __init__(self,
+                 REAL_t[:, ::1] coordsX,
+                 REAL_t[:, ::1] coordsY,
+                 REAL_t[:, ::1] coordsZ,
+                 REAL_t[:, :, ::1] vals):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.coordsZ = coordsZ
+        self.vals = vals
+        from scipy.spatial import cKDTree
+        self.trees = [cKDTree(coordsX), cKDTree(coordsY), cKDTree(coordsZ)]
+        self.simplex = uninitialized((2, 3), dtype=REAL)
+        self.uloc = uninitialized((2, 2, 2), dtype=REAL)
+        self.q = uninitialized((1), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j, k
+            INDEX_t[::1] idxX, idxY, idxZ
+        self.q[0] = x[0]
+        idxX = self.trees[0].query(self.q, 2)[1].astype(INDEX)
+        self.q[0] = x[1]
+        idxY = self.trees[1].query(self.q, 2)[1].astype(INDEX)
+        self.q[0] = x[2]
+        idxZ = self.trees[2].query(self.q, 2)[1].astype(INDEX)
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX[i], 0]
+            self.simplex[i, 1] = self.coordsY[idxY[i], 0]
+            self.simplex[i, 2] = self.coordsZ[idxZ[i], 0]
+        for i in range(2):
+            for j in range(2):
+                for k in range(2):
+                    self.uloc[i, j, k] = self.vals[idxX[i], idxY[j], idxZ[k]]
+        return evalPLTensor3D(self.simplex, self.uloc, x)
+
+
+cdef class lookupFunctionTensor3DNew(function):
+    cdef:
+        REAL_t[:, ::1] coordsX, coordsY, coordsZ
+        REAL_t[:, :, ::1] vals
+        REAL_t[:, ::1] simplex
+        REAL_t[:, :, ::1] uloc
+        INDEX_t N
+
+    def __init__(self,
+                 REAL_t[:, ::1] coordsX,
+                 REAL_t[:, ::1] coordsY,
+                 REAL_t[:, ::1] coordsZ,
+                 REAL_t[:, :, ::1] vals,
+                 INDEX_t N):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.coordsZ = coordsZ
+        self.vals = vals
+        self.simplex = uninitialized((2, 3), dtype=REAL)
+        self.uloc = uninitialized((2, 2, 2), dtype=REAL)
+        self.N = N
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j, k, idxX, idxY, idxZ
+        idxX = <INDEX_t>(self.N*x[0])
+        idxY = <INDEX_t>(self.N*x[1])
+        idxZ = <INDEX_t>(self.N*x[2])
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX+i, 0]
+            self.simplex[i, 1] = self.coordsY[idxY+i, 0]
+            self.simplex[i, 2] = self.coordsZ[idxZ+i, 0]
+        for i in range(2):
+            for j in range(2):
+                for k in range(2):
+                    self.uloc[i, j, k] = self.vals[idxX+i, idxY+j, idxZ+k]
+        return evalPLTensor3D(self.simplex, self.uloc, x)
+
+
+cdef class lookupFunctionTensor3DNewSym(function):
+    cdef:
+        REAL_t[::1] coordsX, coordsY, coordsZ
+        REAL_t[:, :, ::1] vals
+        REAL_t[:, ::1] simplex
+        REAL_t[:, :, ::1] uloc
+        REAL_t[::1] xTemp
+        INDEX_t N
+
+    def __init__(self,
+                 REAL_t[::1] coordsX,
+                 REAL_t[::1] coordsY,
+                 REAL_t[::1] coordsZ,
+                 REAL_t[:, :, ::1] vals):
+        self.coordsX = coordsX
+        self.coordsY = coordsY
+        self.coordsZ = coordsZ
+        self.vals = vals
+        self.simplex = uninitialized((2, 3), dtype=REAL)
+        self.uloc = uninitialized((2, 2, 2), dtype=REAL)
+        self.N = 2*vals.shape[0]-1
+        assert self.coordsX.shape[0] == self.N
+        assert self.coordsY.shape[0] == self.N
+        assert self.coordsZ.shape[0] == self.N
+        self.xTemp = uninitialized((3), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            INDEX_t i, j, k, idxX, idxY, idxZ, I, J, K
+
+        for i in range(3):
+            if x[i] > 0.5:
+                self.xTemp[i] = 1.0-x[i]
+            else:
+                self.xTemp[i] = x[i]
+        idxX = <INDEX_t>(self.N*self.xTemp[0])
+        idxY = <INDEX_t>(self.N*self.xTemp[1])
+        idxZ = <INDEX_t>(self.N*self.xTemp[2])
+        for i in range(2):
+            self.simplex[i, 0] = self.coordsX[idxX+i]
+            self.simplex[i, 1] = self.coordsY[idxY+i]
+            self.simplex[i, 2] = self.coordsZ[idxZ+i]
+        if (idxX+1 < self.vals.shape[0]) and (idxY+1 < self.vals.shape[1]) and (idxZ+1 < self.vals.shape[2]):
+            for i in range(2):
+                for j in range(2):
+                    for k in range(2):
+                        self.uloc[i, j, k] = self.vals[idxX+i, idxY+j, idxZ+k]
+        else:
+            for i in range(2):
+                if idxX+i >= self.vals.shape[0]:
+                    I = self.vals.shape[0]-2
+                else:
+                    I = idxX+i
+                for j in range(2):
+                    if idxY+j >= self.vals.shape[1]:
+                        J = self.vals.shape[1]-2
+                    else:
+                        J = idxY+j
+                    for k in range(2):
+                        if idxZ+k >= self.vals.shape[2]:
+                            K = self.vals.shape[2]-2
+                        else:
+                            K = idxZ+k
+                        self.uloc[i, j, k] = self.vals[I, J, K]
+        return evalPLTensor3D(self.simplex, self.uloc, self.xTemp)
+
+
+cdef class sphericalIntegral(function):
+    cdef:
+        sphericalQuadRule qr
+        function f
+        REAL_t[::1] y
+        INDEX_t dim
+
+    def __init__(self, function f, INDEX_t dim, REAL_t radius, INDEX_t numQuadNodes):
+        self.f = f
+        self.dim = dim
+        if self.dim == 1:
+            self.qr = sphericalQuadRule1D(radius)
+        elif self.dim == 2:
+            self.qr = sphericalQuadRule2D(radius, numQuadNodes)
+        else:
+            raise NotImplementedError()
+        self.y = uninitialized((self.dim), dtype=REAL)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t I = 0.
+            INDEX_t k, j
+        for k in range(self.qr.num_nodes):
+            for j in range(self.dim):
+                self.y[j] = x[j]+self.qr.vertexOffsets[k, j]
+            I += self.qr.weights[k]*self.f.eval(self.y)
+        return I
+
+
+cdef class complexFunction:
+    def __call__(self, REAL_t[::1] x):
+        return self.eval(x)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef COMPLEX_t eval(self, REAL_t[::1] x):
+        pass
+
+    def __add__(self, complexFunction other):
+        if isinstance(self, complexMulFunction):
+            if isinstance(other, complexMulFunction):
+                return complexSumFunction(self.f, self.fac, other.f, other.fac)
+            else:
+                return complexSumFunction(self.f, self.fac, other, 1.)
+        else:
+            if isinstance(other, complexMulFunction):
+                return complexSumFunction(self, 1., other.f, other.fac)
+            else:
+                return complexSumFunction(self, 1., other, 1.)
+
+    def __sub__(self, complexFunction other):
+        if isinstance(self, complexMulFunction):
+            if isinstance(other, complexMulFunction):
+                return complexSumFunction(self.f, self.fac, other.f, -other.fac)
+            else:
+                return complexSumFunction(self.f, self.fac, other, -1.)
+        else:
+            if isinstance(other, complexMulFunction):
+                return complexSumFunction(self, 1., other.f, -other.fac)
+            else:
+                return complexSumFunction(self, 1., other, -1.)
+
+    def __mul__(first, second):
+        if isinstance(first, complexFunction):
+            return complexMulFunction(first, second)
+        elif isinstance(second, complexFunction):
+            return complexMulFunction(second, first)
+        else:
+            return NotImplemented
+
+    def __neg__(self):
+        if isinstance(self, complexMulFunction):
+            return complexMulFunction(self.f, -self.fac)
+        elif isinstance(self, complexSumFunction):
+            return complexSumFunction(self.f1, -self.fac1, self.f2, -self.fac2)
+        else:
+            return complexMulFunction(self, -1.0)
+
+    def __repr__(self):
+        return '%s' % (self.__class__.__name__)
+
+
+cdef class complexSumFunction(complexFunction):
+    cdef:
+        public complexFunction f1, f2
+        public COMPLEX_t fac1, fac2
+
+    def __init__(self, complexFunction f1, COMPLEX_t fac1, complexFunction f2, COMPLEX_t fac2):
+        self.f1 = f1
+        self.fac1 = fac1
+        self.f2 = f2
+        self.fac2 = fac2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef COMPLEX_t eval(self, REAL_t[::1] x):
+        return self.fac1*self.f1.eval(x)+self.fac2*self.f2.eval(x)
+
+
+cdef class complexMulFunction(complexFunction):
+    cdef:
+        public complexFunction f
+        public COMPLEX_t fac
+
+    def __init__(self, complexFunction f, COMPLEX_t fac):
+        self.f = f
+        self.fac = fac
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef COMPLEX_t eval(self, REAL_t[::1] x):
+        return self.fac*self.f.eval(x)
+
+
+cdef class wrapRealToComplexFunction(complexFunction):
+    cdef:
+        function fun
+
+    def __init__(self, function fun):
+        self.fun = fun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef COMPLEX_t eval(self, REAL_t[::1] x):
+        return self.fun(x)
+
+
+cdef class complexLambda(complexFunction):
+    cdef:
+        object fun
+
+    def __init__(self, fun):
+        self.fun = fun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline COMPLEX_t eval(self, REAL_t[::1] x):
+        return self.fun(x)
+
+
+cdef class real(function):
+    cdef:
+        complexFunction fun
+
+    def __init__(self, complexFunction fun):
+        self.fun = fun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        return self.fun.eval(x).real
+
+
+cdef class imag(function):
+    cdef:
+        complexFunction fun
+
+    def __init__(self, complexFunction fun):
+        self.fun = fun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        return self.fun.eval(x).imag
+
+
+
+cdef class waveFunction(complexFunction):
+    cdef:
+        REAL_t[::1] waveVector
+
+    def __init__(self, REAL_t[::1] waveVector):
+        self.waveVector = waveVector
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef COMPLEX_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+        for i in range(x.shape[0]):
+            s = s+self.waveVector[i]*x[i]
+        return cos(s)+1j*sin(s)
+
+
+cdef class vectorFunction:
+    def __init__(self, list components):
+        self.rows = len(components)
+        self.components = components
+
+    def __call__(self, REAL_t[::1] x):
+        vals = uninitialized((self.rows), dtype=REAL)
+        self.eval(x, vals)
+        return vals
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self, REAL_t[::1] x, REAL_t[::1] vals):
+        cdef:
+            INDEX_t i
+            function f
+        for i in range(self.rows):
+            f = self.components[i]
+            vals[i] = f.eval(x)
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__, ','.join([f.__repr__() for f in self.components]))
+
+    def __getitem__(self, i):
+        return self.components[i]
diff --git a/fem/PyNucleus_fem/mass_1D_P0.pxi b/fem/PyNucleus_fem/mass_1D_P0.pxi
new file mode 100644
index 0000000..21d2af3
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_1D_P0.pxi
@@ -0,0 +1,22 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_1d_sym_P0(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_1d_sym_P0 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+
+        vol *= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = vol
diff --git a/fem/PyNucleus_fem/mass_1D_P0_P1.pxi b/fem/PyNucleus_fem/mass_1D_P0_P1.pxi
new file mode 100644
index 0000000..24966a1
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_1D_P0_P1.pxi
@@ -0,0 +1,23 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_1d_nonsym_P0_P1(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_1d_nonsym_P0_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.500000000000000
+
+        vol *= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = vol
+        contrib[1] = vol
diff --git a/fem/PyNucleus_fem/mass_1D_P1.pxi b/fem/PyNucleus_fem/mass_1D_P1.pxi
new file mode 100644
index 0000000..84cd2db
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_1D_P1.pxi
@@ -0,0 +1,24 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_1d_sym_P1(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_1d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.166666666666667
+
+        vol *= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = 2*vol
+        contrib[1] = vol
+        contrib[2] = 2*vol
diff --git a/fem/PyNucleus_fem/mass_1D_P2.pxi b/fem/PyNucleus_fem/mass_1D_P2.pxi
new file mode 100644
index 0000000..4250489
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_1D_P2.pxi
@@ -0,0 +1,27 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_1d_sym_P2(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_1d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0333333333333333
+
+        vol *= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = 4*vol
+        contrib[1] = -vol
+        contrib[2] = 2*vol
+        contrib[3] = 4*vol
+        contrib[4] = 2*vol
+        contrib[5] = 16*vol
diff --git a/fem/PyNucleus_fem/mass_1D_P3.pxi b/fem/PyNucleus_fem/mass_1D_P3.pxi
new file mode 100644
index 0000000..67d1f5d
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_1D_P3.pxi
@@ -0,0 +1,31 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_1d_sym_P3(mass_1d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_1d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.000595238095238095
+
+        vol *= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = 128*vol
+        contrib[1] = 19*vol
+        contrib[2] = 99*vol
+        contrib[3] = -36*vol
+        contrib[4] = 128*vol
+        contrib[5] = -36*vol
+        contrib[6] = 99*vol
+        contrib[7] = 648*vol
+        contrib[8] = -81*vol
+        contrib[9] = 648*vol
diff --git a/fem/PyNucleus_fem/mass_2D_P0.pxi b/fem/PyNucleus_fem/mass_2D_P0.pxi
new file mode 100644
index 0000000..8982032
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_2D_P0.pxi
@@ -0,0 +1,22 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_2d_sym_P0(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_2d_sym_P0 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+
+        vol *= simplexVolume2D(simplex, self.temp)
+
+        contrib[0] = vol
diff --git a/fem/PyNucleus_fem/mass_2D_P0_P1.pxi b/fem/PyNucleus_fem/mass_2D_P0_P1.pxi
new file mode 100644
index 0000000..dd0c596
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_2D_P0_P1.pxi
@@ -0,0 +1,24 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_2d_nonsym_P0_P1(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_2d_nonsym_P0_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.333333333333333
+
+        vol *= simplexVolume2D(simplex, self.temp)
+
+        contrib[0] = vol
+        contrib[1] = vol
+        contrib[2] = vol
diff --git a/fem/PyNucleus_fem/mass_2D_P1.pxi b/fem/PyNucleus_fem/mass_2D_P1.pxi
new file mode 100644
index 0000000..e93b6b0
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_2D_P1.pxi
@@ -0,0 +1,27 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_2d_sym_P1(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_2d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0833333333333333
+
+        vol *= simplexVolume2D(simplex, self.temp)
+
+        contrib[0] = 2*vol
+        contrib[1] = vol
+        contrib[2] = vol
+        contrib[3] = 2*vol
+        contrib[4] = vol
+        contrib[5] = 2*vol
diff --git a/fem/PyNucleus_fem/mass_2D_P2.pxi b/fem/PyNucleus_fem/mass_2D_P2.pxi
new file mode 100644
index 0000000..9e7da02
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_2D_P2.pxi
@@ -0,0 +1,42 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_2d_sym_P2(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_2d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.00555555555555556
+
+        vol *= simplexVolume2D(simplex, self.temp)
+
+        contrib[0] = 6*vol
+        contrib[1] = -vol
+        contrib[2] = -vol
+        contrib[3] = 0
+        contrib[4] = -4*vol
+        contrib[5] = 0
+        contrib[6] = 6*vol
+        contrib[7] = -vol
+        contrib[8] = 0
+        contrib[9] = 0
+        contrib[10] = -4*vol
+        contrib[11] = 6*vol
+        contrib[12] = -4*vol
+        contrib[13] = 0
+        contrib[14] = 0
+        contrib[15] = 32*vol
+        contrib[16] = 16*vol
+        contrib[17] = 16*vol
+        contrib[18] = 32*vol
+        contrib[19] = 16*vol
+        contrib[20] = 32*vol
diff --git a/fem/PyNucleus_fem/mass_2D_P3.pxi b/fem/PyNucleus_fem/mass_2D_P3.pxi
new file mode 100644
index 0000000..b4cfebf
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_2D_P3.pxi
@@ -0,0 +1,76 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_2d_sym_P3(mass_2d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_2d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.000148809523809524
+
+        vol *= simplexVolume2D(simplex, self.temp)
+
+        contrib[0] = 76*vol
+        contrib[1] = 11*vol
+        contrib[2] = 11*vol
+        contrib[3] = 18*vol
+        contrib[4] = 0
+        contrib[5] = 27*vol
+        contrib[6] = 27*vol
+        contrib[7] = 0
+        contrib[8] = 18*vol
+        contrib[9] = 36*vol
+        contrib[10] = 76*vol
+        contrib[11] = 11*vol
+        contrib[12] = 0
+        contrib[13] = 18*vol
+        contrib[14] = 18*vol
+        contrib[15] = 0
+        contrib[16] = 27*vol
+        contrib[17] = 27*vol
+        contrib[18] = 36*vol
+        contrib[19] = 76*vol
+        contrib[20] = 27*vol
+        contrib[21] = 27*vol
+        contrib[22] = 0
+        contrib[23] = 18*vol
+        contrib[24] = 18*vol
+        contrib[25] = 0
+        contrib[26] = 36*vol
+        contrib[27] = 540*vol
+        contrib[28] = -189*vol
+        contrib[29] = -135*vol
+        contrib[30] = -54*vol
+        contrib[31] = -135*vol
+        contrib[32] = 270*vol
+        contrib[33] = 162*vol
+        contrib[34] = 540*vol
+        contrib[35] = 270*vol
+        contrib[36] = -135*vol
+        contrib[37] = -54*vol
+        contrib[38] = -135*vol
+        contrib[39] = 162*vol
+        contrib[40] = 540*vol
+        contrib[41] = -189*vol
+        contrib[42] = -135*vol
+        contrib[43] = -54*vol
+        contrib[44] = 162*vol
+        contrib[45] = 540*vol
+        contrib[46] = 270*vol
+        contrib[47] = -135*vol
+        contrib[48] = 162*vol
+        contrib[49] = 540*vol
+        contrib[50] = -189*vol
+        contrib[51] = 162*vol
+        contrib[52] = 540*vol
+        contrib[53] = 162*vol
+        contrib[54] = 1944*vol
diff --git a/fem/PyNucleus_fem/mass_3D_P0.pxi b/fem/PyNucleus_fem/mass_3D_P0.pxi
new file mode 100644
index 0000000..7086f1d
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_3D_P0.pxi
@@ -0,0 +1,22 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_3d_sym_P0(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_3d_sym_P0 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+
+        vol *= simplexVolume3D(simplex, self.temp)
+
+        contrib[0] = vol
diff --git a/fem/PyNucleus_fem/mass_3D_P0_P1.pxi b/fem/PyNucleus_fem/mass_3D_P0_P1.pxi
new file mode 100644
index 0000000..1553c95
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_3D_P0_P1.pxi
@@ -0,0 +1,25 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_3d_nonsym_P0_P1(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_3d_nonsym_P0_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.250000000000000
+
+        vol *= simplexVolume3D(simplex, self.temp)
+
+        contrib[0] = vol
+        contrib[1] = vol
+        contrib[2] = vol
+        contrib[3] = vol
diff --git a/fem/PyNucleus_fem/mass_3D_P1.pxi b/fem/PyNucleus_fem/mass_3D_P1.pxi
new file mode 100644
index 0000000..daff7f5
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_3D_P1.pxi
@@ -0,0 +1,31 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_3d_sym_P1(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_3d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0500000000000000
+
+        vol *= simplexVolume3D(simplex, self.temp)
+
+        contrib[0] = 2*vol
+        contrib[1] = vol
+        contrib[2] = vol
+        contrib[3] = vol
+        contrib[4] = 2*vol
+        contrib[5] = vol
+        contrib[6] = vol
+        contrib[7] = 2*vol
+        contrib[8] = vol
+        contrib[9] = 2*vol
diff --git a/fem/PyNucleus_fem/mass_3D_P2.pxi b/fem/PyNucleus_fem/mass_3D_P2.pxi
new file mode 100644
index 0000000..d2f22a4
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_3D_P2.pxi
@@ -0,0 +1,76 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_3d_sym_P2(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_3d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.00238095238095238
+
+        vol *= simplexVolume3D(simplex, self.temp)
+
+        contrib[0] = 6*vol
+        contrib[1] = vol
+        contrib[2] = vol
+        contrib[3] = vol
+        contrib[4] = -4*vol
+        contrib[5] = -6*vol
+        contrib[6] = -4*vol
+        contrib[7] = -4*vol
+        contrib[8] = -6*vol
+        contrib[9] = -6*vol
+        contrib[10] = 6*vol
+        contrib[11] = vol
+        contrib[12] = vol
+        contrib[13] = -4*vol
+        contrib[14] = -4*vol
+        contrib[15] = -6*vol
+        contrib[16] = -6*vol
+        contrib[17] = -4*vol
+        contrib[18] = -6*vol
+        contrib[19] = 6*vol
+        contrib[20] = vol
+        contrib[21] = -6*vol
+        contrib[22] = -4*vol
+        contrib[23] = -4*vol
+        contrib[24] = -6*vol
+        contrib[25] = -6*vol
+        contrib[26] = -4*vol
+        contrib[27] = 6*vol
+        contrib[28] = -6*vol
+        contrib[29] = -6*vol
+        contrib[30] = -6*vol
+        contrib[31] = -4*vol
+        contrib[32] = -4*vol
+        contrib[33] = -4*vol
+        contrib[34] = 32*vol
+        contrib[35] = 16*vol
+        contrib[36] = 16*vol
+        contrib[37] = 16*vol
+        contrib[38] = 16*vol
+        contrib[39] = 8*vol
+        contrib[40] = 32*vol
+        contrib[41] = 16*vol
+        contrib[42] = 8*vol
+        contrib[43] = 16*vol
+        contrib[44] = 16*vol
+        contrib[45] = 32*vol
+        contrib[46] = 16*vol
+        contrib[47] = 8*vol
+        contrib[48] = 16*vol
+        contrib[49] = 32*vol
+        contrib[50] = 16*vol
+        contrib[51] = 16*vol
+        contrib[52] = 32*vol
+        contrib[53] = 16*vol
+        contrib[54] = 32*vol
diff --git a/fem/PyNucleus_fem/mass_3D_P3.pxi b/fem/PyNucleus_fem/mass_3D_P3.pxi
new file mode 100644
index 0000000..2951a85
--- /dev/null
+++ b/fem/PyNucleus_fem/mass_3D_P3.pxi
@@ -0,0 +1,231 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class mass_3d_sym_P3(mass_3d):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(mass_3d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.000223214285714286
+
+        vol *= simplexVolume3D(simplex, self.temp)
+
+        contrib[0] = 16*vol
+        contrib[1] = 2*vol
+        contrib[2] = 2*vol
+        contrib[3] = 2*vol
+        contrib[4] = -12*vol
+        contrib[5] = 6*vol
+        contrib[6] = 3*vol
+        contrib[7] = 3*vol
+        contrib[8] = 6*vol
+        contrib[9] = -12*vol
+        contrib[10] = -12*vol
+        contrib[11] = 6*vol
+        contrib[12] = 3*vol
+        contrib[13] = 3*vol
+        contrib[14] = 3*vol
+        contrib[15] = 3*vol
+        contrib[16] = 18*vol
+        contrib[17] = 18*vol
+        contrib[18] = 36*vol
+        contrib[19] = 18*vol
+        contrib[20] = 16*vol
+        contrib[21] = 2*vol
+        contrib[22] = 2*vol
+        contrib[23] = 6*vol
+        contrib[24] = -12*vol
+        contrib[25] = -12*vol
+        contrib[26] = 6*vol
+        contrib[27] = 3*vol
+        contrib[28] = 3*vol
+        contrib[29] = 3*vol
+        contrib[30] = 3*vol
+        contrib[31] = -12*vol
+        contrib[32] = 6*vol
+        contrib[33] = 3*vol
+        contrib[34] = 3*vol
+        contrib[35] = 18*vol
+        contrib[36] = 18*vol
+        contrib[37] = 18*vol
+        contrib[38] = 36*vol
+        contrib[39] = 16*vol
+        contrib[40] = 2*vol
+        contrib[41] = 3*vol
+        contrib[42] = 3*vol
+        contrib[43] = 6*vol
+        contrib[44] = -12*vol
+        contrib[45] = -12*vol
+        contrib[46] = 6*vol
+        contrib[47] = 3*vol
+        contrib[48] = 3*vol
+        contrib[49] = 3*vol
+        contrib[50] = 3*vol
+        contrib[51] = -12*vol
+        contrib[52] = 6*vol
+        contrib[53] = 18*vol
+        contrib[54] = 36*vol
+        contrib[55] = 18*vol
+        contrib[56] = 18*vol
+        contrib[57] = 16*vol
+        contrib[58] = 3*vol
+        contrib[59] = 3*vol
+        contrib[60] = 3*vol
+        contrib[61] = 3*vol
+        contrib[62] = 3*vol
+        contrib[63] = 3*vol
+        contrib[64] = 6*vol
+        contrib[65] = -12*vol
+        contrib[66] = 6*vol
+        contrib[67] = -12*vol
+        contrib[68] = 6*vol
+        contrib[69] = -12*vol
+        contrib[70] = 36*vol
+        contrib[71] = 18*vol
+        contrib[72] = 18*vol
+        contrib[73] = 18*vol
+        contrib[74] = 108*vol
+        contrib[75] = -54*vol
+        contrib[76] = -27*vol
+        contrib[77] = 0
+        contrib[78] = -27*vol
+        contrib[79] = 54*vol
+        contrib[80] = 54*vol
+        contrib[81] = -27*vol
+        contrib[82] = -27*vol
+        contrib[83] = 0
+        contrib[84] = 0
+        contrib[85] = 0
+        contrib[86] = 0
+        contrib[87] = 0
+        contrib[88] = -54*vol
+        contrib[89] = 0
+        contrib[90] = 108*vol
+        contrib[91] = 54*vol
+        contrib[92] = -27*vol
+        contrib[93] = 0
+        contrib[94] = -27*vol
+        contrib[95] = -27*vol
+        contrib[96] = 0
+        contrib[97] = 54*vol
+        contrib[98] = -27*vol
+        contrib[99] = 0
+        contrib[100] = 0
+        contrib[101] = 0
+        contrib[102] = 0
+        contrib[103] = 0
+        contrib[104] = -54*vol
+        contrib[105] = 108*vol
+        contrib[106] = -54*vol
+        contrib[107] = -27*vol
+        contrib[108] = 0
+        contrib[109] = 0
+        contrib[110] = 0
+        contrib[111] = 54*vol
+        contrib[112] = -27*vol
+        contrib[113] = -27*vol
+        contrib[114] = 0
+        contrib[115] = 0
+        contrib[116] = 0
+        contrib[117] = 0
+        contrib[118] = -54*vol
+        contrib[119] = 108*vol
+        contrib[120] = 54*vol
+        contrib[121] = -27*vol
+        contrib[122] = 0
+        contrib[123] = 0
+        contrib[124] = -27*vol
+        contrib[125] = 0
+        contrib[126] = 54*vol
+        contrib[127] = -27*vol
+        contrib[128] = 0
+        contrib[129] = -54*vol
+        contrib[130] = 0
+        contrib[131] = 0
+        contrib[132] = 108*vol
+        contrib[133] = -54*vol
+        contrib[134] = -27*vol
+        contrib[135] = 0
+        contrib[136] = 0
+        contrib[137] = 0
+        contrib[138] = 54*vol
+        contrib[139] = -27*vol
+        contrib[140] = 0
+        contrib[141] = -54*vol
+        contrib[142] = 0
+        contrib[143] = 0
+        contrib[144] = 108*vol
+        contrib[145] = 54*vol
+        contrib[146] = -27*vol
+        contrib[147] = 0
+        contrib[148] = 0
+        contrib[149] = -27*vol
+        contrib[150] = 0
+        contrib[151] = 0
+        contrib[152] = 0
+        contrib[153] = -54*vol
+        contrib[154] = 0
+        contrib[155] = 108*vol
+        contrib[156] = -54*vol
+        contrib[157] = 0
+        contrib[158] = -27*vol
+        contrib[159] = 0
+        contrib[160] = -27*vol
+        contrib[161] = 0
+        contrib[162] = 0
+        contrib[163] = -54*vol
+        contrib[164] = 0
+        contrib[165] = 108*vol
+        contrib[166] = -27*vol
+        contrib[167] = 54*vol
+        contrib[168] = -27*vol
+        contrib[169] = 54*vol
+        contrib[170] = -54*vol
+        contrib[171] = 0
+        contrib[172] = 0
+        contrib[173] = 0
+        contrib[174] = 108*vol
+        contrib[175] = -54*vol
+        contrib[176] = 0
+        contrib[177] = -27*vol
+        contrib[178] = 0
+        contrib[179] = 0
+        contrib[180] = 0
+        contrib[181] = -54*vol
+        contrib[182] = 108*vol
+        contrib[183] = -27*vol
+        contrib[184] = 54*vol
+        contrib[185] = -54*vol
+        contrib[186] = 0
+        contrib[187] = 0
+        contrib[188] = 0
+        contrib[189] = 108*vol
+        contrib[190] = -54*vol
+        contrib[191] = 0
+        contrib[192] = -54*vol
+        contrib[193] = 0
+        contrib[194] = 0
+        contrib[195] = 108*vol
+        contrib[196] = -54*vol
+        contrib[197] = 0
+        contrib[198] = 0
+        contrib[199] = 0
+        contrib[200] = 432*vol
+        contrib[201] = 216*vol
+        contrib[202] = 216*vol
+        contrib[203] = 216*vol
+        contrib[204] = 432*vol
+        contrib[205] = 216*vol
+        contrib[206] = 216*vol
+        contrib[207] = 432*vol
+        contrib[208] = 216*vol
+        contrib[209] = 432*vol
diff --git a/fem/PyNucleus_fem/mesh.py b/fem/PyNucleus_fem/mesh.py
new file mode 100644
index 0000000..befd040
--- /dev/null
+++ b/fem/PyNucleus_fem/mesh.py
@@ -0,0 +1,3346 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+import numpy as np
+import numpy.linalg as la
+from PyNucleus_base.factory import factory
+from PyNucleus_base.myTypes import INDEX, REAL, TAG
+from PyNucleus_base.linear_operators import sparseGraph
+from PyNucleus_base import uninitialized, uninitialized_like
+from . meshCy import (meshBase,
+                      boundaryVertices,
+                      boundaryEdges,
+                      boundaryFaces,
+                      boundaryVerticesFromBoundaryEdges,
+                      boundaryEdgesFromBoundaryFaces,
+                      radialMeshTransformation)
+from . meshPartitioning import (metisMeshPartitioner,
+                                regularMeshPartitioner,
+                                PartitionerException)
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
+
+# PHYSICAL is the physical boundary of the entire domain
+PHYSICAL = TAG(0)
+# INTERIOR_NONOVERLAPPING are the interior boundaries of
+# non-overlapping subdomains
+INTERIOR_NONOVERLAPPING = TAG(-1)
+# INTERIOR is the interior boundary of overlapping subdomains
+INTERIOR = TAG(-2)
+# don't use any boundary
+NO_BOUNDARY = np.iinfo(TAG).min
+
+# Types of boundary conditions
+DIRICHLET = 0
+NEUMANN = 1
+HOMOGENEOUS_DIRICHLET = 2
+HOMOGENEOUS_NEUMANN = 3
+NORM = 4
+
+boundaryConditions = {DIRICHLET: 'Dirichlet',
+                      NEUMANN: 'Neumann',
+                      HOMOGENEOUS_DIRICHLET: 'homogeneous Dirichlet',
+                      HOMOGENEOUS_NEUMANN: 'homogeneous Neumann'}
+
+
+class meshFactory(factory):
+    def __init__(self):
+        super(meshFactory, self).__init__()
+        self.dims = {}
+
+    def register(self, name, classType, dim, params={}, aliases=[]):
+        super(meshFactory, self).register(name, classType, params, aliases)
+        name = self.getCanonicalName(name)
+        self.dims[name] = dim
+
+    def build(self, name, noRef=0, hTarget=None, surface=False, **kwargs):
+        if isinstance(name, meshNd):
+            return name
+        
+        mesh = super(meshFactory, self).build(name, **kwargs)
+        if surface:
+            mesh = mesh.get_surface_mesh()
+            mesh.removeUnusedVertices()
+        from . import P1_DoFMap
+        dmTest = P1_DoFMap(mesh, PHYSICAL)
+        while dmTest.num_dofs == 0:
+            mesh = mesh.refine()
+            dmTest = P1_DoFMap(mesh, PHYSICAL)
+        if hTarget is None:
+            for _ in range(noRef):
+                mesh = mesh.refine()
+        else:
+            assert hTarget > 0
+            while mesh.h > hTarget:
+                mesh = mesh.refine()
+        
+        return mesh
+
+    def getDim(self, name):
+        name = self.getCanonicalName(name)
+        if name in self.aliases:
+            name = self.aliases[name][1]
+        return self.dims[name]
+
+
+def pacman(h=0.1, **kwargs):
+    from . meshConstruction import (circularSegment,
+                                    line)
+    theta = np.pi/5
+    center = np.array([0., 0.])
+    bottom = np.array([1., 0.])
+    top = np.array([np.cos(theta), np.sin(theta)])
+
+    numPointsPerUnitLength = int(np.ceil(1/h))
+
+    domain = (circularSegment(center, 1., theta, 2*np.pi, numPointsPerUnitLength) +
+              line(bottom, center)+
+              line(center, top))
+
+    mesh = domain.mesh(max_volume=h**2, min_angle=30, **kwargs)
+    return mesh
+
+
+def uniformSquare(N, M=None, ax=0, ay=0, bx=1, by=1, crossed=False):
+    if M is None:
+        M = N
+    assert N >= 2
+    assert M >= 2
+    x, y = np.meshgrid(np.linspace(ax, bx, N),
+                       np.linspace(ay, by, M))
+    vertices = [np.array([xx, yy]) for xx, yy in
+                zip(x.flatten(), y.flatten())]
+    cells = []
+    if not crossed:
+        for i in range(M-1):
+            for j in range(N-1):
+                # bottom right element
+                el = (i*N+j, i*N+j+1, (i+1)*N+j+1)
+                cells.append(el)
+                # top left element
+                el = (i*N+j, (i+1)*N+j+1, (i+1)*N+j)
+                cells.append(el)
+    else:
+        for i in range(M-1):
+            for j in range(N-1):
+                if i % 2 == 0:
+                    if j % 2 == 0:
+                        # bottom right element
+                        el = (i*N+j, i*N+j+1, (i+1)*N+j+1)
+                        cells.append(el)
+                        # top left element
+                        el = (i*N+j, (i+1)*N+j+1, (i+1)*N+j)
+                    else:
+                        # bottom left element
+                        el = (i*N+j, i*N+j+1, (i+1)*N+j)
+                        cells.append(el)
+                        # top right element
+                        el = (i*N+j+1, (i+1)*N+j+1, (i+1)*N+j)
+                else:
+                    if j % 2 == 1:
+                        # bottom right element
+                        el = (i*N+j, i*N+j+1, (i+1)*N+j+1)
+                        cells.append(el)
+                        # top left element
+                        el = (i*N+j, (i+1)*N+j+1, (i+1)*N+j)
+                    else:
+                        # bottom left element
+                        el = (i*N+j, i*N+j+1, (i+1)*N+j)
+                        cells.append(el)
+                        # top right element
+                        el = (i*N+j+1, (i+1)*N+j+1, (i+1)*N+j)
+                cells.append(el)
+
+    return mesh2d(np.array(vertices, dtype=REAL),
+                  np.array(cells, dtype=INDEX))
+
+
+def simpleSquare():
+    return uniformSquare(2)
+
+
+def crossSquare():
+    return uniformSquare(3, crossed=True)
+
+
+def gradedSquare(factor=0.6):
+    from . meshCy import gradedHypercubeTransformer
+    mesh = mesh2d(np.array([[0., 0.],
+                            [1., 0.],
+                            [0., 1.],
+                            [1., 1.]], dtype=REAL),
+                  np.array([[0, 1, 3],
+                            [3, 2, 0]], dtype=INDEX))
+    mesh.setMeshTransformation(gradedHypercubeTransformer(factor))
+    mesh = mesh.refine()
+    return mesh
+
+
+def simpleInterval(a=0., b=1., numCells=1):
+    vertices = np.zeros((numCells+1, 1), dtype=REAL)
+    cells = np.zeros((numCells, 2), dtype=INDEX)
+    for i in range(numCells):
+        vertices[i, 0] = a+(b-a)*(i/numCells)
+        cells[i, 0] = i
+        cells[i, 1] = i+1
+    vertices[-1, 0] = b
+    return mesh1d(vertices, cells)
+
+
+def disconnectedInterval(sep=0.1):
+    vertices = np.array([(0, ),
+                         (0.5-sep/2, ),
+                         (0.5+sep/2, ),
+                         (1., )], dtype=REAL)
+    cells = np.array([(0, 1), (2, 3)], dtype=INDEX)
+    return mesh1d(vertices, cells)
+
+
+def getNodes(a, b, horizon, h, strictInteraction=True):
+    diam = b-a
+    k = INDEX(diam/h)
+    if k*h < diam:
+        k += 1
+    nodes = np.linspace(a, b, k+1, dtype=REAL)
+    hInterior = nodes[1]-nodes[0]
+    k = INDEX(horizon/hInterior)
+    if k*hInterior < horizon-1e-8:
+        k += 1
+    if not strictInteraction:
+        horizon = k*hInterior
+    nodes = np.hstack((np.linspace(a-horizon, a, k+1, dtype=REAL)[:-1],
+                       nodes,
+                       np.linspace(b, b+horizon, k+1, dtype=REAL)[1:]))
+    return nodes
+
+
+def intervalWithInteraction(a, b, horizon, h=None, strictInteraction=True):
+    if h is None:
+        h = horizon
+    nodes = getNodes(a, b, horizon, h, strictInteraction)
+    vertices = nodes[:, np.newaxis]
+    num_vertices = vertices.shape[0]
+    cells = uninitialized((num_vertices-1, 2), dtype=INDEX)
+    cells[:, 0] = np.arange(0, num_vertices-1, dtype=INDEX)
+    cells[:, 1] = np.arange(1, num_vertices, dtype=INDEX)
+    return mesh1d(vertices, cells)
+
+
+def doubleIntervalWithInteractions(a=0., b=1., c=2.,
+                                   horizon1=0.1, horizon2=0.2,
+                                   h=None):
+
+    def getNumCells(l, r):
+        eps = 1e-5
+        return int(np.ceil((r-l-eps)/h))
+
+    assert horizon2 >= horizon1
+    assert horizon1 >= 0
+    if h is None:
+        if horizon1 > 0:
+            h = horizon1
+        elif horizon2 > 0:
+            h = horizon2
+        else:
+            h = 0.5
+    else:
+        if horizon1 > 0:
+            h = min([h, horizon1, horizon2])
+        elif horizon2 > 0:
+            h = min([h, horizon2])
+
+    nodes = []
+    if horizon1 > 0:
+        nodes.append(a-horizon1)
+    nodes.append(a)
+    if horizon2 > 0:
+        nodes.append(b-horizon2)
+        if horizon1 != horizon2:
+            nodes.append(b-horizon1)
+    nodes.append(b)
+    if horizon2 > 0:
+        if horizon1 != horizon2:
+            nodes.append(b+horizon1)
+        nodes.append(b+horizon2)
+    nodes.append(c)
+    if horizon2 > 0:
+        nodes.append(c+horizon2)
+    vertices = []
+    i = 0
+    k = getNumCells(nodes[i], nodes[i+1])
+    vertices.append(np.linspace(nodes[i], nodes[i+1], k+1))
+    for i in range(1, len(nodes)-1):
+        k = getNumCells(nodes[i], nodes[i+1])
+        vertices.append(np.linspace(nodes[i], nodes[i+1], k+1)[1:])
+    vertices = np.hstack(vertices)
+    vertices = vertices[:, np.newaxis]
+    num_vertices = vertices.shape[0]
+    cells = uninitialized((num_vertices-1, 2), dtype=INDEX)
+    cells[:, 0] = np.arange(0, num_vertices-1, dtype=INDEX)
+    cells[:, 1] = np.arange(1, num_vertices, dtype=INDEX)
+    return mesh1d(vertices, cells)
+
+
+def squareWithInteractions(ax, ay, bx, by,
+                           horizon,
+                           h=None,
+                           uniform=False,
+                           strictInteraction=True,
+                           innerRadius=-1,
+                           preserveLinesHorizontal=[],
+                           preserveLinesVertical=[],
+                           **kwargs):
+    if h is None:
+        h = horizon
+    if innerRadius > 0:
+        uniform = False
+    if not uniform:
+        from . meshConstruction import (circularSegment,
+                                        line,
+                                        transformationRestriction)
+        if h is None:
+            h = horizon
+        bottomLeft = np.array([ax, ay])
+        center = np.array([(ax+bx)/2, (ay+by)/2])
+
+        numPointsPerUnitLength = int(np.ceil(1/h))
+
+        assert len(preserveLinesVertical) == 0 or len(preserveLinesHorizontal) == 0
+        if len(preserveLinesVertical)+len(preserveLinesHorizontal) > 0:
+            preserve = preserveLinesVertical+preserveLinesHorizontal
+
+            c1 = circularSegment(bottomLeft, horizon, np.pi, 3/2*np.pi, numPointsPerUnitLength)
+
+            x1 = preserve[0]
+            c2 = line((ax, ay), (x1, ay))
+            for k in range(len(preserve)-1):
+                x1 = preserve[k]
+                x2 = preserve[k+1]
+                c2 = c2+line((x1, ay), (x2, ay))
+            x2 = preserve[-1]
+            c2 = c2+line((x2, ay), (bx, ay))
+            c1 = c1 + (c2+(0., -horizon))
+        else:
+            c1 = circularSegment(bottomLeft, horizon, np.pi, 3/2*np.pi, numPointsPerUnitLength)
+            c2 = line((ax, ay), (bx, ay))
+            c1 = c1 + (c2+(0., -horizon))
+        c3 = line((ax, ay), (ax, ay-horizon))
+        c4 = line((ax, ay), (ax-horizon, ay))
+        c = c1+c2+c3+c4
+
+        frame = (c + (c*(center, np.pi/2)) + (c*(center, np.pi)) + (c*(center, -np.pi/2)))
+
+        if len(preserveLinesVertical) > 0:
+            d = line((0, ay-horizon), (0, ay))
+            x1 = preserve[0]
+            d = d + line((0, ay), (0, x1))
+            for k in range(len(preserve)-1):
+                x1 = preserve[k]
+                x2 = preserve[k+1]
+                d += line((0, x1), (0, x2))
+            x2 = preserve[-1]
+            d = d + line((0, x2), (0, by))
+            d = d + line((0, by), (0, by+horizon))
+            for x in preserveLinesVertical:
+                assert ax <= x <= bx
+                frame += (d+(x, 0.))
+        if len(preserveLinesHorizontal) > 0:
+            d = line((ax-horizon, 0), (ax, 0))+line((ax, 0), (bx, 0))+line((bx, 0), (bx+horizon, 0))
+
+            d = line((ax-horizon, 0), (ax, 0))
+            x1 = preserve[0]
+            d = d + line((ax, 0), (x1, 0))
+            for k in range(len(preserve)-1):
+                x1 = preserve[k]
+                x2 = preserve[k+1]
+                d += line((x1, 0), (x2, 0))
+            x2 = preserve[-1]
+            d = d + line((x2, 0), (bx, 0))
+            d = d + line((bx, 0), (bx+horizon, 0))
+
+            for y in preserveLinesHorizontal:
+                assert ay <= y <= by
+                frame += (d+(0, y))
+
+        if innerRadius > 0:
+            frame += transformationRestriction(circularSegment(center, innerRadius, 0, 2*np.pi, numPointsPerUnitLength), center-(innerRadius, innerRadius), center+(innerRadius, innerRadius))
+            mesh = frame.mesh(max_volume=h**2, min_angle=30, **kwargs)
+        else:
+            frame.holes.append(center)
+            mesh = frame.mesh(max_volume=h**2, min_angle=30, **kwargs)
+
+            eps = 1e-10
+            N1 = np.logical_and(np.absolute(mesh.vertices_as_array[:, 0]-ax) < eps,
+                                np.logical_and(mesh.vertices_as_array[:, 1] >= ay-eps,
+                                               mesh.vertices_as_array[:, 1] <= by+eps)).sum()
+            N2 = np.logical_and(np.absolute(mesh.vertices_as_array[:, 0]-bx) < eps,
+                                np.logical_and(mesh.vertices_as_array[:, 1] >= ay-eps,
+                                               mesh.vertices_as_array[:, 1] <= by+eps)).sum()
+            M1 = np.logical_and(np.absolute(mesh.vertices_as_array[:, 1]-ay) < eps,
+                                np.logical_and(mesh.vertices_as_array[:, 0] >= ax-eps,
+                                               mesh.vertices_as_array[:, 0] <= bx+eps)).sum()
+            M2 = np.logical_and(np.absolute(mesh.vertices_as_array[:, 1]-by) < eps,
+                                np.logical_and(mesh.vertices_as_array[:, 0] >= ax-eps,
+                                               mesh.vertices_as_array[:, 0] <= bx+eps)).sum()
+            assert N1 == N2
+            assert M1 == M2
+            mesh2 = uniformSquare(N=N1, M=M1, ax=ax, ay=ay, bx=bx, by=by)
+            mesh = snapMeshes(mesh, mesh2)
+
+        location = uninitialized((mesh.num_vertices), dtype=INDEX)
+        eps = 1e-9
+        for x in preserveLinesVertical:
+            for vertexNo in range(mesh.num_vertices):
+                if mesh.vertices[vertexNo, 0] < x-eps:
+                    location[vertexNo] = 0
+                elif mesh.vertices[vertexNo, 0] > x+eps:
+                    location[vertexNo] = 2
+                else:
+                    location[vertexNo] = 1
+            for cellNo in range(mesh.num_cells):
+                cellLoc = set()
+                for vertexNo in range(mesh.dim+1):
+                    cellLoc.add(location[mesh.cells[cellNo, vertexNo]])
+                assert max(cellLoc)-min(cellLoc) <= 1, (mesh.vertices_as_array[mesh.cells_as_array[cellNo, :], :], cellLoc)
+        for y in preserveLinesHorizontal:
+            for vertexNo in range(mesh.num_vertices):
+                if mesh.vertices[vertexNo, 1] < y-eps:
+                    location[vertexNo] = 0
+                elif mesh.vertices[vertexNo, 1] > y+eps:
+                    location[vertexNo] = 2
+                else:
+                    location[vertexNo] = 1
+            for cellNo in range(mesh.num_cells):
+                cellLoc = set()
+                for vertexNo in range(mesh.dim+1):
+                    cellLoc.add(location[mesh.cells[cellNo, vertexNo]])
+                assert max(cellLoc)-min(cellLoc) <= 1, mesh.vertices_as_array[mesh.cells_as_array[cellNo, :], :]
+    else:
+        x = getNodes(ax, bx, horizon, h, strictInteraction)
+        y = getNodes(ay, by, horizon, h, strictInteraction)
+        M = x.shape[0]
+        N = y.shape[0]
+        x, y = np.meshgrid(x, y)
+        vertices = [np.array([xx, yy]) for xx, yy in
+                    zip(x.flatten(), y.flatten())]
+        cells = []
+        for i in range(M-1):
+            for j in range(N-1):
+                # bottom right element
+                el = (i*N+j, i*N+j+1, (i+1)*N+j+1)
+                cells.append(el)
+                # top left element
+                el = (i*N+j, (i+1)*N+j+1, (i+1)*N+j)
+                cells.append(el)
+        mesh = mesh2d(np.array(vertices, dtype=REAL),
+                      np.array(cells, dtype=INDEX))
+    return mesh
+
+
+def doubleSquareWithInteractions(ax=0., ay=0., bx=1., by=1., cx=2., cy=1.,
+                                 horizon1=0.1, horizon2=0.2,
+                                 h=None,
+                                 returnSketch=False,
+                                 **kwargs):
+    from . meshConstruction import (circularSegment,
+                                    line,
+                                    polygon,
+                                    transformationRestriction)
+    assert horizon2 >= horizon1
+    assert horizon1 >= 0
+    if h is None:
+        if horizon1 > 0:
+            h = horizon1
+        elif horizon2 > 0:
+            h = horizon2
+        else:
+            h = 0.5
+    else:
+        if horizon1 > 0:
+            h = min([h, horizon1, horizon2])
+        elif horizon2 > 0:
+            h = min([h, horizon2])
+
+    bottomLeft = np.array([ax, ay])
+    bottomMid = np.array([bx, ay])
+    bottomRight = np.array([cx, ay])
+    topLeft = np.array([ax, by])
+    topMid = np.array([bx, by])
+    topRight = np.array([cx, by])
+
+    centerLeft = np.array([(ax+bx)/2, (ay+by)/2])
+    centerRight = np.array([(bx+cx)/2, (ay+cy)/2])
+
+    for k in range(10):
+        numPointsPerUnitLength = int(np.ceil(1/(h*0.8**(k/2))))
+
+        if horizon2 > 0:
+            magicAngle = 0.5*np.pi-np.arcsin(horizon1/horizon2)
+            magicLen = horizon2*np.cos(0.5*np.pi-magicAngle)
+
+            # the four/six inner squares
+            inner = polygon([bottomLeft, bottomMid-(horizon2, 0),
+                             topMid-(horizon2, 0), topLeft], num_points_per_unit_len=numPointsPerUnitLength)
+            if horizon1 < horizon2:
+                inner += polygon([bottomMid-(horizon2, 0), bottomMid-(horizon1, 0),
+                                  topMid-(horizon1, 0), topMid-(horizon2, 0)], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+                inner += polygon([bottomMid-(horizon1, 0), bottomMid,
+                                  topMid, topMid-(horizon1, 0)], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+                inner += polygon([bottomMid, bottomMid+(horizon1, 0),
+                                  topMid+(horizon1, 0), topMid], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+                inner += polygon([bottomMid+(horizon1, 0), bottomMid+(horizon2, 0),
+                                  topMid+(horizon2, 0), topMid+(horizon1, 0)], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+            else:
+                inner += polygon([bottomMid-(horizon2, 0), bottomMid,
+                                  topMid, topMid-(horizon2, 0)], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+                inner += polygon([bottomMid, bottomMid+(horizon2, 0),
+                                  topMid+(horizon2, 0), topMid], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+            inner += polygon([bottomMid+(horizon2, 0), bottomRight,
+                              topRight, topMid+(horizon2, 0)], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+        else:
+            inner = polygon([bottomLeft, bottomMid, topMid, topLeft], num_points_per_unit_len=numPointsPerUnitLength)
+            inner += polygon([bottomMid, bottomRight, topRight, topMid], doClose=False, num_points_per_unit_len=numPointsPerUnitLength)
+
+            mesh = inner.mesh(h=h*0.8**(k/2), **kwargs)
+            frame = inner
+
+        if horizon2 > 0:
+            # interaction domain for right domain
+            d1 = (line(bottomMid, bottomRight)+(0, -horizon2) + circularSegment(bottomRight, horizon2, 1.5*np.pi, 2*np.pi, numPointsPerUnitLength))
+            d2 = (line(bottomRight, topRight)+(horizon2, 0) + circularSegment(topRight, horizon2, 0, 0.5*np.pi, numPointsPerUnitLength))
+            d3 = ((line(topRight, topMid)+(0, horizon2)) +
+                  transformationRestriction(circularSegment(topMid, horizon2, 0.5*np.pi, 0.5*np.pi+magicAngle, numPointsPerUnitLength), topMid+(-horizon2, horizon1+1e-9), topMid+(0, horizon2)) +
+                  transformationRestriction(circularSegment(topMid, horizon2, 0.5*np.pi + magicAngle, np.pi, numPointsPerUnitLength), topMid+(-horizon2, 0), topMid+(-magicLen-1e-9, horizon1)))
+            d4 = (transformationRestriction(circularSegment(bottomMid, horizon2, np.pi, np.pi + (0.5*np.pi-magicAngle), numPointsPerUnitLength), bottomMid+(-horizon2, -horizon1+1e-9), bottomMid+(-magicLen, 0)) +
+                  transformationRestriction(circularSegment(bottomMid, horizon2, np.pi + (0.5*np.pi-magicAngle), 1.5*np.pi, numPointsPerUnitLength), bottomMid+(-horizon2, -horizon2), bottomMid+(0, -horizon1-1e-9)))
+            outer = d1+d2+d3+d4
+
+            # two right corners
+            c6 = line(bottomRight, bottomRight-(0, horizon2)) + line(bottomRight, bottomRight+(horizon2, 0))
+            c6 = c6 + (c6*(centerRight, 0.5*np.pi))
+            outer += c6
+
+            # the two mid corners
+            c7 = line(topMid+(0, horizon2), topMid+(0, horizon1)) + line(topMid+(0, horizon1), topMid)
+            c8 = line(bottomMid, bottomMid-(0, horizon1)) + line(bottomMid-(0, horizon1), bottomMid-(0, horizon2))
+            outer += c7+c8
+
+            if horizon1 > 0:
+                # interaction domain for left domain
+                e1 = circularSegment(topMid, horizon1, 0, 0.5*np.pi, num_points_per_unit_len=numPointsPerUnitLength)
+                e2 = (line(topMid, topMid-(magicLen, 0)) + (0, horizon1)) + (line(topMid-(magicLen, 0), topLeft) + (0, horizon1))
+                e3 = circularSegment(topLeft, horizon1, 0.5*np.pi, np.pi, num_points_per_unit_len=numPointsPerUnitLength)
+                e4 = line(topLeft, bottomLeft)+(-horizon1, 0)
+                e5 = circularSegment(bottomLeft, horizon1, np.pi, 1.5*np.pi, num_points_per_unit_len=numPointsPerUnitLength)
+                e6 = (line(bottomLeft, bottomMid-(magicLen, 0))+(0, -horizon1)) + (line(bottomMid-(magicLen, 0), bottomMid)+(0, -horizon1))
+                e7 = circularSegment(bottomMid, horizon1, 1.5*np.pi, 2*np.pi, num_points_per_unit_len=numPointsPerUnitLength)
+                outer += e1+e2+e3+e4+e5+e6+e7
+
+            # preserve right angles near corners
+            if horizon1 > 0:
+                # two left corners
+                c5 = line(topLeft, topLeft+(0, horizon1))+line(topLeft, topLeft-(horizon1, 0))
+                c5 = c5 + (c5*(centerLeft, 0.5*np.pi))
+                outer += c5
+
+            frame = inner+outer
+            mesh = frame.mesh(h=h*0.8**(k/2), **kwargs)
+
+        if mesh.h <= h:
+            if returnSketch:
+                return mesh, frame
+            else:
+                return mesh
+    if returnSketch:
+        return mesh, frame
+    else:
+        return mesh
+
+
+def discWithInteraction(radius, horizon, h=0.25):
+    if horizon > 0:
+        outerRadius = radius + horizon
+        if h > horizon:
+            LOGGER.warn("h = {} > horizon = {}. Using h=horizon instead.".format(h, horizon))
+            h = horizon
+        return circleWithInnerRadius(int(2*np.pi*outerRadius/h),
+                                     radius=outerRadius,
+                                     innerRadius=radius,
+                                     max_volume=h**2)
+    else:
+        return circle(int(2*np.pi*radius/h),
+                      radius=radius,
+                      max_volume=h**2)
+
+
+def discWithIslands(horizon=0., radius=1., islandOffCenter=0.35, islandDiam=0.5):
+    from . meshConstruction import circle, rectangle
+    numPointsPerLength = 4
+    assert islandOffCenter > islandDiam/2
+    assert np.sqrt(2)*(islandOffCenter+islandDiam/2) < radius
+    assert horizon >= 0.
+    c = circle((0,0), radius, num_points_per_unit_len=numPointsPerLength)
+    if horizon > 0:
+        c += circle((0,0), radius+horizon, num_points_per_unit_len=numPointsPerLength)
+    island = rectangle((-islandDiam/2, -islandDiam/2), (islandDiam/2, islandDiam/2))
+    c += (island+(islandOffCenter, islandOffCenter))
+    c += (island+(-islandOffCenter, islandOffCenter))
+    c += (island+(islandOffCenter, -islandOffCenter))
+    c += (island+(-islandOffCenter, -islandOffCenter))
+    mesh = c.mesh(min_angle=30)
+    return mesh
+
+
+def simpleBox():
+    vertices = np.array([(0, 0, 0),
+                         (1, 0, 0),
+                         (1, 1, 0),
+                         (0, 1, 0),
+                         (0, 0, 1),
+                         (1, 0, 1),
+                         (1, 1, 1),
+                         (0, 1, 1)], dtype=REAL)
+    cells = np.array([(0, 1, 6, 5),
+                      (0, 1, 2, 6),
+                      (0, 4, 5, 6),
+                      (0, 4, 6, 7),
+                      (0, 2, 3, 6),
+                      (0, 3, 7, 6)], dtype=INDEX)
+    return mesh3d(vertices, cells)
+
+
+def gradedBox(factor=0.6):
+    from . meshCy import gradedHypercubeTransformer
+    mesh = simpleBox()
+    mesh.setMeshTransformation(gradedHypercubeTransformer(factor))
+    mesh = mesh.refine()
+    return mesh
+
+
+def standardSimplex(d):
+    vertices = np.zeros((d+1, d), dtype=REAL)
+    cells = np.zeros((1, d+1), dtype=INDEX)
+    for i in range(d):
+        vertices[i+1, i] = 1.
+        cells[0, i+1] = i+1
+    if d == 1:
+        return mesh1d(vertices, cells)
+    elif d == 2:
+        return mesh2d(vertices, cells)
+    elif d == 3:
+        return mesh3d(vertices, cells)
+    else:
+        raise NotImplementedError()
+
+
+def standardSimplex2D():
+    return standardSimplex(2)
+
+
+def standardSimplex3D():
+    return standardSimplex(3)
+
+
+def simpleFicheraCube():
+    vertices = np.array([(0, 0, 0),
+                         (1, 0, 0),
+                         (1, 1, 0),
+                         (0, 1, 0),
+                         (0, 0, 1),
+                         (1, 0, 1),
+                         (1, 1, 1),
+                         (0, 1, 1),
+                         #
+                         (2, 0, 0),
+                         (2, 1, 0),
+                         (2, 0, 1),
+                         (2, 1, 1),
+                         #
+                         (0, 0, 2),
+                         (1, 0, 2),
+                         (1, 1, 2),
+                         (0, 1, 2),
+                         #
+                         (0, 2, 0),
+                         (1, 2, 0),
+                         (2, 2, 0),
+                         (2, 2, 1),
+                         (1, 2, 1),
+                         (0, 2, 1),
+                         (2, 2, 2),
+                         (1, 2, 2),
+                         (0, 2, 2),
+                         (2, 1, 2)], dtype=REAL)
+
+    def boxCells(a, b, c, d, e, f, g, h):
+        return np.array([(a, b, g, f),
+                         (a, b, c, g),
+                         (a, e, f, g),
+                         (a, e, g, h),
+                         (a, c, d, g),
+                         (a, d, h, g)], dtype=INDEX)
+
+    cells = np.vstack((boxCells(0, 1, 2, 3, 4, 5, 6, 7),
+                       boxCells(1, 8, 9, 2, 5, 10, 11, 6),
+                       boxCells(4, 5, 6, 7, 12, 13, 14, 15),
+                       boxCells(3, 2, 17, 16, 7, 6, 20, 21),
+                       boxCells(2, 9, 18, 17, 6, 11, 19, 20),
+                       boxCells(7, 6, 20, 21, 15, 14, 23, 24),
+                       boxCells(6, 11, 19, 20, 14, 25, 22, 23)))
+    return mesh3d(vertices, cells)
+
+
+def simpleLshape():
+    vertices = np.array([(0, 0),  # 0
+                         (1, 0),  # 1
+                         (2, 0),  # 2
+                         (2, 1),  # 3
+                         (1, 1),  # 4
+                         (0, 1),  # 5
+                         (0, 2),  # 6
+                         (1, 2)], dtype=REAL)  # 7
+
+    cells = np.array([(0, 1, 4), (0, 4, 5), (1, 2, 3),
+                      (1, 3, 4), (5, 4, 7), (5, 7, 6)], dtype=INDEX)
+    return mesh2d(vertices, cells)
+
+
+def disconnectedDomain(sep=0.1):
+    vertices = np.array([(0, 0),
+                         (1, 0),
+                         (1, 0.5-sep/2),
+                         (0, 0.5-sep/2),
+                         (0, 0.5+sep/2),
+                         (1, 0.5+sep/2),
+                         (1, 1),
+                         (0, 1)], dtype=REAL)
+
+    cells = np.array([(0, 1, 2), (0, 2, 3),
+                      (4, 5, 6), (4, 6, 7)], dtype=INDEX)
+    return mesh2d(vertices, cells)
+
+
+def Lshape(d):
+    from mshr import Rectangle, generate_mesh
+    from dolfin import Point
+    domain = (Rectangle(Point(0, 0), Point(2, 2))
+              - Rectangle(Point(1, 1), Point(2, 2)))
+    mesh = generate_mesh(domain, d)
+    vertices = [x for x in mesh.coordinates()]
+    cells = mesh.cells()
+    return mesh2d(vertices, cells)
+
+
+def circle(n, radius=1., returnFacets=False, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    if 'min_angle' not in kwargs:
+        kwargs['min_angle'] = 30
+
+    points = []
+    facets = []
+    for i in range(n):
+        points.append((radius*np.cos(i*2*np.pi/n), radius*np.sin(i*2*np.pi/n)))
+    for i in range(1, n):
+        facets.append((i-1, i))
+    facets.append((n-1, 0))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    # Make sure that one node is on the origin.
+    # Otherwise the radialMeshTransformation does weird stuff
+    k = np.linalg.norm(mesh.vertices_as_array, axis=1).argmin()
+    mesh.vertices[k, :] = 0.
+    mesh.resetMeshInfo()
+    assert mesh.delta < 10.
+    from . meshCy import radialMeshTransformer
+    mesh.setMeshTransformation(radialMeshTransformer())
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def circleWithInnerRadius(n, radius=2., innerRadius=1., returnFacets=False, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    from . import mesh2d
+    mesh_info = MeshInfo()
+
+    if 'min_angle' not in kwargs:
+        kwargs['min_angle'] = 30
+
+    points = []
+    facets = []
+    for i in range(n):
+        points.append((radius*np.cos(i*2*np.pi/n),
+                       radius*np.sin(i*2*np.pi/n)))
+    for i in range(1, n):
+        facets.append((i-1, i))
+    facets.append((n-1, 0))
+
+    nInner = int(round(n*innerRadius/radius))
+
+    for i in range(nInner):
+        points.append((innerRadius*np.cos(i*2*np.pi/nInner),
+                       innerRadius*np.sin(i*2*np.pi/nInner)))
+    for i in range(1, nInner):
+        facets.append((n+i-1, n+i))
+    facets.append((n-1+nInner, n))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    # Make sure that one node is on the origin.
+    # Otherwise the radialMeshTransformation does weird stuff
+    k = np.linalg.norm(mesh.vertices_as_array, axis=1).argmin()
+    mesh.vertices[k, :] = 0.
+    mesh.resetMeshInfo()
+    assert mesh.delta < 10.
+    from . meshCy import radialMeshTransformer
+    mesh.setMeshTransformation(radialMeshTransformer())
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def gradedIntervals(intervals, h):
+    numIntervals = len(intervals)
+
+    intervals = list(sorted(intervals, key=lambda int: int[0]))
+
+    intervalSizes = []
+    Ms = np.zeros((2*len(intervals)), dtype=INDEX)
+    for intNo, interval in enumerate(intervals):
+        mu1 = interval[2]
+        mu2 = interval[3]
+        if mu1 is None:
+            if mu2 is None:
+                raise NotImplementedError()
+            else:
+                radius = interval[1]-interval[0]
+                Ms[2*intNo] = 0
+                Ms[2*intNo+1] = max(int(np.ceil(1/(1-(1-h/radius)**(1/mu2)))), 1)
+        else:
+            if mu2 is None:
+                radius = interval[1]-interval[0]
+                Ms[2*intNo] = max(int(np.ceil(1/(1-(1-h/radius)**(1/mu1)))), 1)
+                Ms[2*intNo+1] = 0
+            else:
+                radius = 0.5*(interval[1]-interval[0])
+                Ms[2*intNo] = max(int(np.ceil(1/(1-(1-h/radius)**(1/mu1)))), 1)
+                Ms[2*intNo+1] = max(int(np.ceil(1/(1-(1-h/radius)**(1/mu2)))), 1)
+    points = np.zeros((Ms.sum()+1, 1), dtype=REAL)
+
+    for intNo, interval in enumerate(intervals):
+        mu1 = interval[2]
+        mu2 = interval[3]
+        M1 = Ms[2*intNo]
+        M2 = Ms[2*intNo+1]
+        if M1 > 0 and M2 > 0:
+            radius = 0.5*(interval[1]-interval[0])
+            center = 0.5*(interval[0]+interval[1])
+        else:
+            radius = interval[1]-interval[0]
+            if M1 == 0:
+                center = interval[0]
+            else:
+                center = interval[1]
+
+        indexCenter = Ms[:2*intNo+1].sum()
+        points[indexCenter, 0] = center
+        M = Ms[2*intNo]
+        for j in range(1, M+1):
+            points[indexCenter-j, 0] = center - radius*(1 - (1-j/M)**mu1)
+        M = Ms[2*intNo+1]
+        for j in range(1, M+1):
+            points[indexCenter+j, 0] = center + radius*(1 - (1-j/M)**mu2)
+
+    cells = np.empty((Ms.sum(), 2), dtype=INDEX)
+    cells[:, 0] = np.arange(0, Ms.sum(), dtype=INDEX)
+    cells[:, 1] = np.arange(1, Ms.sum()+1, dtype=INDEX)
+
+    mesh = mesh1d(points, cells)
+    from . meshCy import multiIntervalMeshTransformer
+    mesh.setMeshTransformation(multiIntervalMeshTransformer(intervals))
+    return mesh
+
+
+def graded_interval(h, mu=2., mu2=None, radius=1.):
+    if mu2 is None:
+        mu2 = mu
+    intervals = [(-radius, radius, mu, mu2)]
+    return gradedIntervals(intervals, h)
+
+
+def double_graded_interval(h, mu_ll=2., mu_rr=2., mu_lr=None, mu_rl=None, a=-1., b=1.):
+    if mu_lr is None:
+        mu_lr = mu_ll
+    if mu_rl is None:
+        mu_rl = mu_rr
+    intervals = [(a, 0., mu_ll, mu_lr), (0., b, mu_rl, mu_rr)]
+    return gradedIntervals(intervals, h)
+
+
+def double_graded_interval_with_interaction(horizon, h=None, mu_ll=2., mu_rr=2., mu_lr=None, mu_rl=None, a=-1., b=1.):
+    if h is None:
+        h = horizon/2
+    else:
+        h = min(horizon/2, h)
+    if mu_lr is None:
+        mu_lr = mu_ll
+    if mu_rl is None:
+        mu_rl = mu_rr
+    intervals = [(a-horizon, a, None, mu_ll), (a, 0., mu_ll, mu_lr), (0., b, mu_rl, mu_rr), (b, b+horizon, mu_rr, None)]
+    return gradedIntervals(intervals, h)
+
+
+def graded_circle(M, mu=2., radius=1., returnFacets=False, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = []
+    facets = []
+
+    points.append((0, 0))
+    rold = 0
+    for j in range(1, M+1):
+        rj = radius*(1 - (1-j/M)**mu)
+        hj = rj-rold
+        n = int(np.floor(2*np.pi*rj/hj))
+        for i in range(n):
+            points.append((rj*np.cos(i*2*np.pi/n), rj*np.sin(i*2*np.pi/n)))
+        rold = rj
+        for i in range(len(points)-n+1, len(points)):
+            facets.append((i-1, i))
+        facets.append((len(points)-1, len(points)-n))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def double_graded_circle(M,
+                         muInterior=2., muExterior=2.,
+                         rInterior=1., rExterior=2.,
+                         returnFacets=False, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = []
+    facets = []
+
+    points.append((0, 0))
+    rold = 0
+    for j in range(1, M+1):
+        rj = rInterior*(1 - (1-j/M)**muInterior)
+        # print(rj)
+        hj = rj-rold
+        n = int(np.floor(2*np.pi*rj/hj))
+        for i in range(n):
+            points.append((rj*np.cos(i*2*np.pi/n), rj*np.sin(i*2*np.pi/n)))
+        rold = rj
+        for i in range(len(points)-n+1, len(points)):
+            facets.append((i-1, i))
+        facets.append((len(points)-1, len(points)-n))
+
+    # rold = rInterior
+    # M = int(((rExterior-rInterior)/rInterior)*M)
+    for j in range(1, M+1):
+        rj = rInterior + (rExterior-rInterior)*(j/M)**muExterior
+        # print(rj)
+        hj = rj-rold
+        n = int(np.floor(2*np.pi*rj/hj))
+        for i in range(n):
+            points.append((rj*np.cos(i*2*np.pi/n), rj*np.sin(i*2*np.pi/n)))
+        rold = rj
+        for i in range(len(points)-n+1, len(points)):
+            facets.append((i-1, i))
+        facets.append((len(points)-1, len(points)-n))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def cutoutCircle(n, radius=1., cutoutAngle=np.pi/2.,
+                 returnFacets=False, minAngle=30, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+    n = n-1
+
+    points = [(0., 0.)]
+    facets = []
+    for i in range(n+1):
+        points.append((radius*np.cos(i*(2*np.pi-cutoutAngle)/n),
+                       radius*np.sin(i*(2*np.pi-cutoutAngle)/n)))
+    for i in range(1, n+2):
+        facets.append((i-1, i))
+    facets.append((n+1, 0))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, min_angle=minAngle, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def twinCircle(n, radius=1., sep=0.1, **kwargs):
+    mesh = circle(n, radius, **kwargs)
+    vertices = np.vstack((mesh.vertices, mesh.vertices))
+    cells = np.vstack((mesh.cells, mesh.cells))
+    vertices[:mesh.num_vertices, 0] += radius+sep/2
+    vertices[mesh.num_vertices:, 0] -= radius+sep/2
+    cells[mesh.num_cells:, :] += mesh.num_vertices
+    return mesh2d(vertices, cells)
+
+
+def dumbbell(n=8, radius=1., barAngle=np.pi/4, barLength=3,
+             returnFacets=False, minAngle=30, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = []
+    facets = []
+    for i in range(n):
+        points.append((-barLength/2 +
+                       radius*np.cos(barAngle/2+i*(2*np.pi-barAngle)/(n-1)),
+                       radius*np.sin(barAngle/2+i*(2*np.pi-barAngle)/(n-1))))
+    for i in range(n):
+        points.append((barLength/2 +
+                       radius*np.cos(np.pi+barAngle/2+i*(2*np.pi-barAngle)/(n-1)),
+                       radius*np.sin(np.pi+barAngle/2+i*(2*np.pi-barAngle)/(n-1))))
+    for i in range(1, 2*n):
+        facets.append((i-1, i))
+    facets.append((2*n-1, 0))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, min_angle=minAngle, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def wrench(n=8, radius=0.17, radius2=0.3, barLength=2, returnFacets=False, minAngle=30, **kwargs):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = []
+    facets = []
+    n = 2
+    for i in range(n+1):
+        points.append((barLength +
+                       radius*np.cos(i*(np.pi/2)/n),
+                       radius*np.sin(i*(np.pi/2)/n)))
+    n = 3
+    for i in range(n+1):
+        points.append((-radius2 +
+                       radius2*np.cos(i*np.pi/n),
+                       radius+radius2*np.sin(i*np.pi/n)))
+
+    r = np.sqrt((1.5*radius2)**2 + radius**2)
+    th = np.arctan2(radius, 1.5*radius2)
+    n = 1
+    for i in range(n+1):
+        points.append((-2.5*radius2+r*np.cos(th-th*i/n),
+                       r*np.sin(th-th*i/n)))
+
+    for p in reversed(points[1:-1]):
+        q = p[0], -p[1]
+        points.append(q)
+
+    for i in range(1, len(points)):
+        facets.append((i-1, i))
+    facets.append((len(points)-1, 0))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, min_angle=minAngle, **kwargs)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def rectangle(nx, ny, bx=1., by=1., ax=0., ay=0., **kwargs):
+    from . meshConstruction import rectangle
+    frame = rectangle((ax, ay), (bx, by), num_points=[nx+1, ny+1, nx+1, ny+1])
+    mesh = frame.mesh(**kwargs)
+    return mesh
+
+
+def Hshape(a=1., b=1., c=0.3, h=0.2, returnFacets=False):
+    from meshpy.triangle import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = [(0., 0.), (a, 0.), (a, b), (a+c, b), (a+c, 0.), (a+c+a, 0.),
+              (a+c+a, b+b+h), (a+c, b+b+h), (a+c, b+h), (a, b+h),
+              (a, b+b+h), (0, b+b+h)]
+    facets = []
+    for i in range(1, len(points)):
+        facets.append((i-1, i))
+    facets.append((len(points)-1, 0))
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info, min_angle=30)
+    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    if returnFacets:
+        return mesh, np.array(points), np.array(facets)
+    else:
+        return mesh
+
+
+def ball2(radius=1.):
+    from meshpy.tet import MeshInfo, build
+    mesh_info = MeshInfo()
+
+    points = [(radius, 0, 0), (0, radius, 0), (-radius, 0, 0), (0, -radius, 0),
+              (0, 0, radius), (0, 0, -radius)]
+    facets = [(0, 1, 4), (1, 2, 4), (2, 3, 4), (3, 0, 4),
+              (1, 0, 5), (2, 1, 5), (3, 2, 5), (0, 3, 5)]
+
+    mesh_info.set_points(points)
+    mesh_info.set_facets(facets)
+    mesh_meshpy = build(mesh_info)
+    mesh = mesh3d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    return mesh
+
+
+def ball(radius=1., points=4, radial_subdiv=None, **kwargs):
+    """
+    Build mesh for 3D ball as surface of revolution.
+    points         determines the number of points on the curve.
+    radial_subdiv  determines the number of steps in the rotation.
+    """
+    from meshpy.tet import MeshInfo, build, Options
+    from meshpy.geometry import generate_surface_of_revolution, EXT_OPEN, GeometryBuilder
+    from meshpy.geometry import make_ball
+
+    if radial_subdiv is None:
+        radial_subdiv = 2*points+2
+
+    dphi = np.pi/points
+
+    def truncate(r):
+        if abs(r) < 1e-10:
+            return 0
+        else:
+            return r
+
+    rz = [(truncate(radius*np.sin(i*dphi)), radius*np.cos(i*dphi)) for i in range(points+1)]
+
+    geob = GeometryBuilder()
+    geob.add_geometry(*generate_surface_of_revolution(rz,
+                                                      closure=EXT_OPEN,
+                                                      radial_subdiv=radial_subdiv))
+    # geob.add_geometry(*make_ball(radius, radial_subdiv))
+    mesh_info = MeshInfo()
+    geob.set(mesh_info)
+    mesh_meshpy = build(mesh_info, **kwargs)  # , options=Options(switches='pq1.2/10')
+    mesh = mesh3d(np.array(mesh_meshpy.points, dtype=REAL),
+                  np.array(mesh_meshpy.elements, dtype=INDEX))
+    return mesh
+
+
+def ballNd(dim, radius, h):
+    if dim == 1:
+        mesh = simpleInterval(-radius, radius)
+        while mesh.h > h:
+            mesh, lookup = mesh.refine(returnLookup=True)
+            radialMeshTransformation(mesh, lookup)
+        return mesh
+    elif dim == 2:
+        return circle(int(np.ceil(2*np.pi*radius/h)), radius, max_volume=0.5*h**2)
+    elif dim == 3:
+        mesh = ball(radius)
+        while mesh.h > h:
+            mesh, lookup = mesh.refine(returnLookup=True)
+            radialMeshTransformation(mesh, lookup)
+        return mesh
+    else:
+        raise NotImplementedError()
+
+
+def gradeMesh(mesh, grading):
+    vertices = mesh.vertices_as_array
+    norms = np.linalg.norm(vertices, axis=1)
+    for i in range(vertices.shape[0]):
+        n = norms[i]
+        if n > 0:
+            vertices[i, :] *= grading(n)/n
+    mesh.resetMeshInfo()
+
+
+def gradeUniformBall(mesh,
+                     muInterior=2., muExterior=2.,
+                     rInterior=1., rExterior=None, rExteriorInitial=None):
+    if rExteriorInitial is None:
+        rExteriorInitial = np.linalg.norm(mesh.vertices, axis=1).max()
+    assert rInterior < rExteriorInitial
+    if rExterior is None:
+        rExterior = rExteriorInitial
+
+    def grading(r):
+        if r <= rInterior:
+            return rInterior*(1-(1-r/rInterior)**muInterior)
+        else:
+            return rInterior + (rExterior-rInterior)*((r-rInterior)/(rExteriorInitial-rInterior))**muExterior
+
+    gradeMesh(mesh, grading)
+
+
+class meshNd(meshBase):
+    def __init__(self, vertices, cells):
+        super(meshNd, self).__init__(vertices, cells)
+
+    def __getstate__(self):
+        if hasattr(self, '_boundaryVertices'):
+            boundaryVertices = self.boundaryVertices
+            boundaryVertexTags = self.boundaryVertexTags
+        else:
+            boundaryVertices = None
+            boundaryVertexTags = None
+        if hasattr(self, '_boundaryEdges'):
+            boundaryEdges = self.boundaryEdges
+            boundaryEdgeTags = self.boundaryEdgeTags
+        else:
+            boundaryEdges = None
+            boundaryEdgeTags = None
+        if hasattr(self, '_boundaryFaces'):
+            boundaryFaces = self.boundaryFaces
+            boundaryFaceTags = self.boundaryFaceTags
+        else:
+            boundaryFaces = None
+            boundaryFaceTags = None
+        return (super(meshNd, self).__getstate__(),
+                boundaryVertices, boundaryVertexTags,
+                boundaryEdges, boundaryEdgeTags,
+                boundaryFaces, boundaryFaceTags)
+
+    def __setstate__(self, state):
+        super(meshNd, self).__setstate__(state[0])
+        if state[1] is not None:
+            self._boundaryVertices = state[1]
+            self._boundaryVertexTags = state[2]
+        if state[3] is not None:
+            self._boundaryEdges = state[3]
+            self._boundaryEdgeTags = state[4]
+        if state[5] is not None:
+            self._boundaryFaces = state[5]
+            self._boundaryFaceTags = state[6]
+
+    def get_boundary_vertices(self):
+        if not hasattr(self, '_boundaryVertices'):
+            if self.manifold_dim >= 2:
+                self._boundaryVertices = boundaryVerticesFromBoundaryEdges(self.boundaryEdges)
+            else:
+                self._boundaryVertices = boundaryVertices(self.cells)
+            return self._boundaryVertices
+        else:
+            return self._boundaryVertices
+
+    def set_boundary_vertices(self, value):
+        self._boundaryVertices = value
+
+    boundaryVertices = property(fget=get_boundary_vertices,
+                                fset=set_boundary_vertices)
+
+    def get_boundary_edges(self):
+        if not hasattr(self, '_boundaryEdges'):
+            if self.manifold_dim == 1:
+                self._boundaryEdges = uninitialized((0, 2), dtype=INDEX)
+            elif self.manifold_dim == 2:
+                self._boundaryEdges = boundaryEdges(self.cells)
+            elif self.manifold_dim == 3:
+                self._boundaryEdges = boundaryEdgesFromBoundaryFaces(self.boundaryFaces)
+            return self._boundaryEdges
+        else:
+            return self._boundaryEdges
+
+    def set_boundary_edges(self, value):
+        assert value.shape[1] == 2
+        assert value.dtype == INDEX
+        self._boundaryEdges = value
+
+    boundaryEdges = property(fget=get_boundary_edges,
+                             fset=set_boundary_edges)
+
+    def get_boundary_faces(self):
+        if not hasattr(self, '_boundaryFaces'):
+            if self.dim <= 2:
+                self._boundaryFaces = uninitialized((0, 3), dtype=INDEX)
+            elif self.dim == 3:
+                self._boundaryFaces = boundaryFaces(self.cells)
+            return self._boundaryFaces
+        else:
+            return self._boundaryFaces
+
+    def set_boundary_faces(self, value):
+        assert value.shape[1] == 3
+        self._boundaryFaces = value
+
+    boundaryFaces = property(fget=get_boundary_faces,
+                             fset=set_boundary_faces)
+
+    def get_boundary_cells(self):
+        if not hasattr(self, '_boundaryCells'):
+            if self.manifold_dim == 2:
+                self._boundaryEdges, self._boundaryCells = boundaryEdges(self.cells, returnBoundaryCells=True)
+            else:
+                raise NotImplementedError()
+        return self._boundaryCells
+
+    def set_boundary_cells(self, value):
+        assert value.ndim == 1
+        self._boundaryCells = value
+
+    boundaryCells = property(fget=get_boundary_cells,
+                             fset=set_boundary_cells)
+
+    def get_interiorVertices(self):
+        if not hasattr(self, '_interiorVertices'):
+            temp = np.ones(self.vertices.shape[0], dtype=np.bool)
+            temp[self.boundaryVertices] = 0
+            self._interiorVertices = temp.nonzero()[0]
+            return self._interiorVertices
+        else:
+            return self._interiorVertices
+
+    def getInteriorVerticesByTag(self, tag=None):
+        if not isinstance(tag, list) and tag == NO_BOUNDARY:
+            return np.arange(self.num_vertices, dtype=INDEX)
+        else:
+            bv = self.getBoundaryVerticesByTag(tag)
+            idx = np.ones(self.num_vertices, dtype=np.bool)
+            idx[bv] = False
+            return np.nonzero(idx)[0].astype(INDEX)
+
+    def get_diam(self):
+        from numpy.linalg import norm
+        vertices = self.vertices_as_array
+        return norm(vertices.max(axis=0)-vertices.min(axis=0), 2)
+
+    interiorVertices = property(fget=get_interiorVertices)
+    diam = property(fget=get_diam)
+
+    def copy(self):
+        newMesh = super(meshNd, self).copy()
+        if hasattr(self, '_boundaryVertices'):
+            newMesh._boundaryVertices = self._boundaryVertices.copy()
+        if hasattr(self, '_boundaryVertexTags'):
+            newMesh._boundaryVertexTags = self._boundaryVertexTags.copy()
+        if hasattr(self, '_boundaryEdges'):
+            newMesh._boundaryEdges = self._boundaryEdges.copy()
+        if hasattr(self, '_boundaryEdgeTags'):
+            newMesh._boundaryEdgeTags = self._boundaryEdgeTags.copy()
+        if hasattr(self, '_boundaryFaces'):
+            newMesh._boundaryFaces = self._boundaryFaces.copy()
+        if hasattr(self, '_boundaryFaceTags'):
+            newMesh._boundaryFaceTags = self._boundaryFaceTags.copy()
+        return newMesh
+
+    def __repr__(self):
+        return ('{} with {:,} vertices '
+                + 'and {:,} cells').format(self.__class__.__name__,
+                                           self.num_vertices,
+                                           self.num_cells)
+
+    def get_boundary_vertex_tags(self):
+        if not hasattr(self, '_boundaryVertexTags'):
+            self._boundaryVertexTags = PHYSICAL*np.zeros((self.boundaryVertices.shape[0]),
+                                                         dtype=TAG)
+        return self._boundaryVertexTags
+
+    def set_boundary_vertex_tags(self, value):
+        assert value.shape[0] == self.boundaryVertices.shape[0]
+        assert value.dtype == TAG
+        self._boundaryVertexTags = value
+
+    boundaryVertexTags = property(fset=set_boundary_vertex_tags,
+                                  fget=get_boundary_vertex_tags)
+
+    def tagBoundaryVertices(self, tagFunc):
+        boundaryVertexTags = uninitialized((self.boundaryVertices.shape[0]),
+                                      dtype=TAG)
+        for i, j in enumerate(self.boundaryVertices):
+            v = self.vertices[j, :]
+            boundaryVertexTags[i] = tagFunc(v)
+        self.boundaryVertexTags = boundaryVertexTags
+
+    def replaceBoundaryVertexTags(self, tagFunc, tagsToReplace=set()):
+        boundaryVertexTags = uninitialized((self.boundaryVertices.shape[0]),
+                                      dtype=TAG)
+        for i, j in enumerate(self.boundaryVertices):
+            if self.boundaryVertexTags[i] in tagsToReplace:
+                v = self.vertices[j, :]
+                boundaryVertexTags[i] = tagFunc(v)
+            else:
+                boundaryVertexTags[i] = self.boundaryVertexTags[i]
+        self.boundaryVertexTags = boundaryVertexTags
+
+    def getBoundaryVerticesByTag(self, tag=None, sorted=False):
+        if tag is None:
+            bv = self.boundaryVertices
+        elif isinstance(tag, list) and tag[0] is None:
+            bv = self.boundaryVertices
+        elif isinstance(tag, list):
+            idx = (self.boundaryVertexTags == tag[0])
+            for t in tag[1:]:
+                idx = np.logical_or(idx, (self.boundaryVertexTags == t))
+            bv = self.boundaryVertices[idx]
+        else:
+            bv = self.boundaryVertices[self.boundaryVertexTags == tag]
+        if sorted:
+            bv.sort()
+        return bv
+
+    def get_boundary_edge_tags(self):
+        if not hasattr(self, '_boundaryEdgeTags'):
+            self._boundaryEdgeTags = PHYSICAL*np.ones(self.boundaryEdges.shape[0],
+                                                      dtype=TAG)
+        return self._boundaryEdgeTags
+
+    def set_boundary_edge_tags(self, value):
+        assert value.shape[0] == self.boundaryEdges.shape[0]
+        self._boundaryEdgeTags = value
+
+    boundaryEdgeTags = property(fset=set_boundary_edge_tags,
+                                fget=get_boundary_edge_tags)
+
+    def tagBoundaryEdges(self, tagFunc):
+        boundaryEdgeTags = uninitialized(self.boundaryEdges.shape[0],
+                                    dtype=TAG)
+        for i in range(self.boundaryEdges.shape[0]):
+            e = self.boundaryEdges[i, :]
+            v0 = self.vertices[e[0]]
+            v1 = self.vertices[e[1]]
+            boundaryEdgeTags[i] = tagFunc(v0, v1)
+        self.boundaryEdgeTags = boundaryEdgeTags
+
+    def replaceBoundaryEdgeTags(self, tagFunc, tagsToReplace=set()):
+        boundaryEdgeTags = uninitialized((self.boundaryEdges.shape[0]),
+                                    dtype=TAG)
+        for i in range(self.boundaryEdges.shape[0]):
+            if self.boundaryEdgeTags[i] in tagsToReplace:
+                e = self.boundaryEdges[i, :]
+                v0 = self.vertices[e[0]]
+                v1 = self.vertices[e[1]]
+                boundaryEdgeTags[i] = tagFunc(v0, v1)
+            else:
+                boundaryEdgeTags[i] = self.boundaryEdgeTags[i]
+        self.boundaryEdgeTags = boundaryEdgeTags
+
+    def getBoundaryEdgesByTag(self, tag=None, returnBoundaryCells=False):
+        if tag is None:
+            if not returnBoundaryCells:
+                return self.boundaryEdges
+            else:
+                assert self.dim == 2
+                return self.boundaryEdges, self.boundaryCells
+        else:
+            if not type(tag) is list:
+                tag = [tag]
+            idx = (self.boundaryEdgeTags == tag[0])
+            for t in tag[1:]:
+                idx = np.logical_or(idx, (self.boundaryEdgeTags == t))
+            if not returnBoundaryCells:
+                return self.boundaryEdges[idx, :]
+            else:
+                return self.boundaryEdges[idx, :], self.boundaryCells[idx]
+
+    def get_boundary_face_tags(self):
+        if not hasattr(self, '_boundaryFaceTags'):
+            self._boundaryFaceTags = PHYSICAL*np.ones(self.boundaryFaces.shape[0],
+                                                      dtype=TAG)
+        return self._boundaryFaceTags
+
+    def set_boundary_face_tags(self, value):
+        assert value.shape[0] == self.boundaryFaces.shape[0]
+        self._boundaryFaceTags = value
+
+    boundaryFaceTags = property(fset=set_boundary_face_tags,
+                                fget=get_boundary_face_tags)
+
+    def tagBoundaryFaces(self, tagFunc):
+        boundaryFaceTags = uninitialized(self.boundaryFaces.shape[0],
+                                    dtype=TAG)
+        for i in range(self.boundaryFaces.shape[0]):
+            f = self.boundaryFaces[i, :]
+            v0 = self.vertices[f[0]]
+            v1 = self.vertices[f[1]]
+            v2 = self.vertices[f[2]]
+            boundaryFaceTags[i] = tagFunc(v0, v1, v2)
+        self.boundaryFaceTags = boundaryFaceTags
+
+    def getBoundaryFacesByTag(self, tag=None):
+        if tag is None:
+            return self.boundaryFaces
+        elif type(tag) is list:
+            idx = (self.boundaryFaceTags == tag[0])
+            for t in tag[1:]:
+                idx = np.logical_or(idx, (self.boundaryFaceTags == t))
+            return self.boundaryFaces[idx]
+        else:
+            return self.boundaryFaces[self.boundaryFaceTags == tag]
+
+    def HDF5write(self, node):
+        COMPRESSION = 'gzip'
+        node.create_dataset('vertices', data=self.vertices,
+                            compression=COMPRESSION)
+        node.create_dataset('cells', data=self.cells,
+                            compression=COMPRESSION)
+        if hasattr(self, '_boundaryVertices'):
+            node.create_dataset('boundaryVertices',
+                                data=self.boundaryVertices,
+                                compression=COMPRESSION)
+        if hasattr(self, '_boundaryVertexTags'):
+            node.create_dataset('boundaryVertexTags',
+                                data=self.boundaryVertexTags,
+                                compression=COMPRESSION)
+        if hasattr(self, '_boundaryEdges'):
+            node.create_dataset('boundaryEdges',
+                                data=self.boundaryEdges,
+                                compression=COMPRESSION)
+        if hasattr(self, '_boundaryEdgeTags'):
+            node.create_dataset('boundaryEdgeTags',
+                                data=self.boundaryEdgeTags,
+                                compression=COMPRESSION)
+        if hasattr(self, '_boundaryFaces'):
+            node.create_dataset('boundaryFaces',
+                                data=self.boundaryFaces,
+                                compression=COMPRESSION)
+        if hasattr(self, '_boundaryFaceTags'):
+            node.create_dataset('boundaryFaceTags',
+                                data=self.boundaryFaceTags,
+                                compression=COMPRESSION)
+        node.attrs['dim'] = self.dim
+
+    @staticmethod
+    def HDF5read(node):
+        dim = node.attrs['dim']
+        vertices = np.array(node['vertices'], dtype=REAL)
+        cells = np.array(node['cells'], dtype=INDEX)
+        if dim == 1:
+            mesh = mesh1d(vertices, cells)
+        elif dim == 2:
+            mesh = mesh2d(vertices, cells)
+        elif dim == 3:
+            mesh = mesh3d(vertices, cells)
+        if 'boundaryVertices' in node:
+            mesh.boundaryVertices = np.array(node['boundaryVertices'],
+                                             dtype=INDEX)
+        if 'boundaryVertexTags' in node:
+            mesh.boundaryVertexTags = np.array(node['boundaryVertexTags'],
+                                               dtype=TAG)
+        if 'boundaryEdges' in node:
+            mesh.boundaryEdges = np.array(node['boundaryEdges'],
+                                          dtype=INDEX)
+        if 'boundaryEdgeTags' in node:
+            mesh.boundaryEdgeTags = np.array(node['boundaryEdgeTags'],
+                                             dtype=TAG)
+        if 'boundaryFaces' in node:
+            mesh.boundaryFaces = np.array(node['boundaryFaces'],
+                                          dtype=INDEX)
+        if 'boundaryFaceTags' in node:
+            mesh.boundaryFaceTags = np.array(node['boundaryFaceTags'],
+                                             dtype=TAG)
+        return mesh
+
+    def exportVTK(self, filename, cell_data=None):
+        import meshio
+        if self.dim == 1:
+            cell_type = 'line'
+        elif self.dim == 2:
+            cell_type = 'triangle'
+        elif self.dim == 3:
+            cell_type = 'tetra'
+        else:
+            raise NotImplementedError()
+        vertices = np.zeros((self.num_vertices, 3), dtype=REAL)
+        vertices[:, 3-self.dim:] = self.vertices_as_array
+        meshio.write(filename,
+                     meshio.Mesh(vertices,
+                                 {cell_type: self.cells_as_array},
+                                 cell_data=cell_data),
+                     file_format='vtk')
+
+    def exportSolutionVTK(self, x, DoFMap, filename, labels='solution', cell_data=None):
+        import meshio
+        if not isinstance(x, (list, tuple)):
+            x = [x]
+            labels = [labels]
+        sols = []
+        for xx in x:
+            sol, _ = DoFMap.linearPart(self, xx)
+            v2d = uninitialized((self.num_vertices, 1), dtype=INDEX)
+            DoFMap.getVertexDoFs(v2d)
+            sol2 = uninitialized((self.num_vertices))
+            for i in range(self.num_vertices):
+                sol2[i] = sol[v2d[i, 0]]
+            sols.append(sol2)
+        if self.dim == 1:
+            cell_type = 'line'
+        elif self.dim == 2:
+            cell_type = 'triangle'
+        elif self.dim == 3:
+            cell_type = 'tetra'
+        else:
+            raise NotImplementedError()
+        vertices = np.zeros((self.num_vertices, 3), dtype=REAL)
+        vertices[:, 3-self.dim:] = self.vertices_as_array
+        meshio.write(filename,
+                     meshio.Mesh(vertices,
+                                 {cell_type: self.cells_as_array},
+                                 point_data={label: np.array(sol) for label, sol in zip(labels, sols)},
+                                 cell_data=cell_data,),
+                     file_format='vtk')
+
+    @staticmethod
+    def readMesh(filename, file_format=None):
+        import meshio
+        mesh = meshio.read(filename, file_format)
+        vertices = mesh.points.astype(REAL)
+        dim = vertices.shape[1]
+        assert len(mesh.cells)
+        cell_type = mesh.cells[0].type
+        if cell_type == 'line':
+            dim = 1
+            meshType = mesh1d
+        elif cell_type == 'triangle':
+            dim = 2
+            meshType = mesh2d
+        elif cell_type == 'tetra':
+            dim = 3
+            meshType = mesh3d
+        else:
+            raise NotImplementedError()
+        vertices = np.ascontiguousarray(vertices[:, :dim])
+        cells = mesh.cells[0].data.astype(INDEX)
+        return meshType(vertices, cells)
+
+    def getPartitions(self, numPartitions, partitioner='metis', partitionerParams={}):
+        # partition mesh cells
+        if partitioner == 'regular':
+            mP = regularMeshPartitioner(self)
+            defaultParams = {'partitionedDimensions': self.dim}
+            if 'regular' in partitionerParams:
+                defaultParams.update(partitionerParams['regular'])
+            part, actualNumPartitions = mP.partitionCells(numPartitions,
+                                                          partitionedDimensions=defaultParams['partitionedDimensions'])
+        elif partitioner == 'metis':
+            mP = metisMeshPartitioner(self)
+            defaultParams = {'partition_weights': None}
+            if 'metis' in partitionerParams:
+                defaultParams.update(partitionerParams['metis'])
+            part, actualNumPartitions = mP.partitionCells(numPartitions,
+                                                          partition_weights=defaultParams['partition_weights'])
+        else:
+            raise NotImplementedError()
+        if not actualNumPartitions == numPartitions:
+            raise PartitionerException('Partitioner returned {} partitions instead of {}.'.format(actualNumPartitions, numPartitions))
+        return part
+
+    def getCuthillMckeeVertexOrder(self):
+        from PyNucleus_base.linear_operators import sparseGraph
+        from PyNucleus_base.sparseGraph import cuthill_mckee
+        from . import P1_DoFMap
+        dm = P1_DoFMap(self, -10)
+        A = dm.buildSparsityPattern(self.cells)
+        graph = sparseGraph(A.indices, A.indptr, A.shape[0], A.shape[1])
+        idx = uninitialized((dm.num_dofs), dtype=INDEX)
+        cuthill_mckee(graph, idx)
+        return idx
+
+    def global_h(self, comm):
+        h = self.h
+        if comm is None:
+            return h
+        else:
+            return comm.allreduce(h, op=MPI.MAX)
+
+    def global_hmin(self, comm):
+        hmin = self.hmin
+        if comm is None:
+            return hmin
+        else:
+            return comm.allreduce(hmin, op=MPI.MIN)
+
+    def global_volume(self, comm):
+        vol = self.volume
+        if comm is None:
+            return vol
+        else:
+            return comm.allreduce(vol, op=MPI.SUM)
+
+    def global_diam(self, comm):
+        if comm is None:
+            return self.diam()
+        from numpy.linalg import norm
+        m = self.vertices.min(axis=0)
+        M = self.vertices.max(axis=0)
+        comm.Allreduce(m, MPI.IN_PLACE, op=MPI.MIN)
+        comm.Allreduce(M, MPI.IN_PLACE, op=MPI.MAX)
+        return norm(M-m, 2)
+
+    def get_surface(self):
+        if self.dim == 1:
+            return 1.0
+        else:
+            return self.get_surface_mesh().volume
+
+    surface = property(fget=get_surface)
+
+    def get_surface_mesh(self, tag=None):
+        if self.dim == 1:
+            bv = self.getBoundaryVerticesByTag(tag)
+            cells = uninitialized((len(bv), 1), dtype=INDEX)
+            cells[:, 0] = bv
+            surface = mesh0d(self.vertices, cells)
+        elif self.dim == 2:
+            surface = mesh1d(self.vertices, self.getBoundaryEdgesByTag(tag))
+        elif self.dim == 3:
+            surface = mesh2d(self.vertices, self.getBoundaryFacesByTag(tag))
+        else:
+            raise NotImplementedError()
+        surface.setMeshTransformation(self.transformer)
+        return surface
+
+    def reorderVertices(self, idx):
+        invidx = uninitialized_like(idx)
+        invidx[idx] = np.arange(self.num_vertices, dtype=INDEX)
+        self.vertices = self.vertices_as_array[idx, :]
+        if hasattr(self, '_boundaryVertices'):
+            self._boundaryVertices = invidx[self._boundaryVertices].astype(INDEX)
+        if hasattr(self, '_boundaryEdges'):
+            self._boundaryEdges = invidx[self._boundaryEdges].astype(INDEX)
+        if hasattr(self, '_boundaryFaces'):
+            self._boundaryEdges = invidx[self._boundaryEdges].astype(INDEX)
+        self.cells = invidx[self.cells_as_array[:, :]].astype(INDEX)
+
+
+class mesh0d(meshNd):
+    pass
+
+
+class mesh1d(meshNd):
+    def plot(self, boundary=None, info=False):
+        import matplotlib.pyplot as plt
+        X = np.array([v[0] for v in self.vertices])
+        if self.vertices.shape[1] == 1:
+            Y = np.zeros_like(X)
+            lenX = X.max()-X.min()
+            plt.xlim([X.min()-lenX*0.1, X.max()+lenX*0.1])
+            plt.plot(X, Y, 'o-', zorder=1)
+        else:
+            v = self.vertices_as_array
+            c = self.cells_as_array
+            plt.plot([v[c[:, 0], 0],
+                      v[c[:, 1], 0]],
+                     [v[c[:, 0], 1],
+                      v[c[:, 1], 1]],
+                     c='k')
+            plt.scatter(self.vertices_as_array[:, 0], self.vertices_as_array[:, 1])
+            lenX = v[:, 0].max()-v[:, 0].min()
+            plt.xlim([v[:, 0].min()-lenX*0.1, v[:, 0].max()+lenX*0.1])
+            lenY = v[:, 1].max()-v[:, 1].min()
+            plt.ylim([v[:, 1].min()-lenY*0.1, v[:, 1].max()+lenY*0.1])
+            plt.axis('equal')
+        if info:
+            tags = set(self.boundaryEdgeTags)
+            tags = tags.union(self.boundaryVertexTags)
+            cm = plt.get_cmap('gist_rainbow')
+            num_colors = len(tags)
+            colors = {tag: cm(i/num_colors) for i, tag in enumerate(tags)}
+            for i, c in enumerate(self.cells):
+                midpoint = (self.vertices_as_array[c[0]]
+                            + self.vertices_as_array[c[1]])/2
+                plt.text(midpoint[0], 0, str(i), style='italic')
+            for i, v in enumerate(self.vertices_as_array):
+                plt.text(v, 0, i)
+            for vno, tag in zip(self.boundaryVertices,
+                                self.boundaryVertexTags):
+                v = self.vertices_as_array[vno, :]
+                plt.text(v[0], 0, tag, horizontalalignment='right',
+                         verticalalignment='top', color=colors[tag])
+            for i, (e, tag) in enumerate(zip(self.boundaryEdges,
+                                             self.boundaryEdgeTags)):
+                v = (self.vertices_as_array[e[0]]+self.vertices_as_array[e[1]])/2
+                plt.text(v[0], 0, tag, color=colors[tag])
+
+    def plotPrepocess(self, x, DoFMap):
+        from . DoFMaps import P0_DoFMap
+        if not isinstance(DoFMap, P0_DoFMap):
+            positions = uninitialized((DoFMap.num_dofs+DoFMap.num_boundary_dofs), dtype=REAL)
+            dof2pos = np.full((DoFMap.num_boundary_dofs), dtype=INDEX, fill_value=-1)
+            bDoF = DoFMap.num_dofs
+            simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+            for cellNo in range(self.num_cells):
+                self.getSimplex_py(cellNo, simplex)
+                for i in range(DoFMap.dofs_per_element):
+                    dof = DoFMap.cell2dof_py(cellNo, i)
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    if dof >= 0:
+                        positions[dof] = pos[0]
+                    else:
+                        p = dof2pos[-dof-1]
+                        if p == -1:
+                            p = dof2pos[-dof-1] = bDoF
+                            bDoF += 1
+                        positions[p] = pos[0]
+            if x.ndim == 1:
+                xx = np.zeros((DoFMap.num_dofs+DoFMap.num_boundary_dofs), dtype=REAL)
+                xx[:DoFMap.num_dofs] = x
+            else:
+                xx = np.zeros((x.shape[0], self.num_vertices), dtype=REAL)
+                xx[:, :DoFMap.num_dofs] = x
+        else:
+            positions = uninitialized((2*(DoFMap.num_dofs+DoFMap.num_boundary_dofs)), dtype=REAL)
+            dof2pos = np.full((DoFMap.num_boundary_dofs), dtype=INDEX, fill_value=-1)
+            bDoF = DoFMap.num_dofs
+            simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+            for cellNo in range(self.num_cells):
+                self.getSimplex_py(cellNo, simplex)
+                for i in range(DoFMap.dofs_per_element):
+                    dof = DoFMap.cell2dof_py(cellNo, i)
+                    if dof >= 0:
+                        positions[2*dof] = min(simplex[0, 0], simplex[1, 0])+1e-9
+                        positions[2*dof+1] = max(simplex[0, 0], simplex[1, 0])-1e-9
+                    else:
+                        p = dof2pos[-dof-1]
+                        if p == -1:
+                            p = dof2pos[-dof-1] = bDoF
+                            bDoF += 1
+                        positions[2*p] = min(simplex[0, 0], simplex[1, 0])+1e-9
+                        positions[2*p+1] = max(simplex[0, 0], simplex[1, 0])-1e-9
+            if x.ndim == 1:
+                xx = np.zeros((2*(DoFMap.num_dofs+DoFMap.num_boundary_dofs)), dtype=REAL)
+                xx[:2*DoFMap.num_dofs-1:2] = x
+                xx[1:2*DoFMap.num_dofs:2] = x
+            else:
+                xx = np.zeros((2*x.shape[0], self.num_vertices), dtype=REAL)
+                xx[:, :2*DoFMap.num_dofs-1:2] = x
+                xx[:, 1:2*DoFMap.num_dofs:2] = x
+            positions = np.concatenate((positions, self.vertices_as_array[:, 0]))
+            if x.ndim == 1:
+                shape = (self.num_vertices, )
+            else:
+                shape = (x.shape[0], self.num_vertices)
+            xx = np.hstack((xx, np.full(shape, fill_value=np.nan, dtype=REAL)))
+        idx = np.argsort(positions)
+        positions = positions[idx]
+        if x.ndim == 1:
+            xx = xx[idx]
+        else:
+            xx = xx[:, idx]
+        return positions, xx
+
+    def plotFunction(self, x, DoFMap=None, tag=0, flat=False, yvals=None, fig=None, ax=None, update=None, **kwargs):
+        import matplotlib.pyplot as plt
+        if fig is None:
+            fig = plt.gcf()
+        if ax is None:
+            ax = fig.gca()
+        if DoFMap:
+            positions, sol = self.plotPrepocess(x, DoFMap)
+        else:
+            if x.shape[0] == self.num_cells:
+                from . DoFMaps import P0_DoFMap
+                dm = P0_DoFMap(self)
+                positions, sol = self.plotPrepocess(x, dm)
+            elif x.shape[0] < self.num_vertices:
+                positions = self.vertices_as_array[:, 0]
+                sol = np.zeros((self.num_vertices))
+                sol[self.getInteriorVerticesByTag(tag)] = x
+            else:
+                positions = self.vertices_as_array[:, 0]
+                sol = x
+            idx = np.argsort(positions)
+            positions = positions[idx]
+            sol = sol[idx]
+
+        if sol.ndim == 1:
+            if update is None:
+                return ax.plot(positions, sol, **kwargs)[0]
+            else:
+                update.set_data(positions, sol)
+        else:
+            from matplotlib import cm
+            assert yvals is not None
+            X, Y = np.meshgrid(positions, yvals)
+            if flat:
+                ax.pcolor(X, Y,
+                          sol, cmap=cm.jet,
+                          **kwargs)
+            else:
+                fig = plt.gcf()
+                fig.delaxes(fig.gca())
+                ax = fig.add_subplot(projection='3d')
+                ax.plot_surface(X, Y, sol, cmap=cm.jet, **kwargs)
+
+    def plotDoFMap(self, DoFMap, printDoFIndices=True):
+        "Plot the DoF numbers on the mesh."
+        import matplotlib.pyplot as plt
+        from matplotlib import rc_context
+        self.plot()
+        pos = DoFMap.getDoFCoordinates()
+        if printDoFIndices:
+            with rc_context({'text.usetex': False}):
+                for dof in range(DoFMap.num_dofs):
+                    plt.text(pos[dof, 0], 0, str(dof))
+        else:
+            plt.scatter(pos[:, 0], np.zeros((pos.shape[0])), marker='x', s=60)
+
+    def plotMeshOverlap(self, overlap):
+        "Plot a single mesh overlap."
+        from . meshOverlaps import meshOverlap
+        assert isinstance(overlap, meshOverlap)
+        import matplotlib.pyplot as plt
+        # self.plot(boundary=True)
+        self.plot(boundary=False)
+        for i in range(overlap.num_vertices):
+            v = self.cells[overlap.vertices[i, 0], overlap.vertices[i, 1]]
+            plt.text(self.vertices[v, 0], self.vertices[v, 1], str(i))
+        for i in range(overlap.num_cells):
+            cellNo = overlap.cells[i]
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            XY = simplex.mean(axis=0)
+            plt.text(XY[0], 0, str(i))
+
+    def plotOverlapManager(self, overlap):
+        "Plot all mesh overlaps in an overlap manager."
+        from . meshOverlaps import overlapManager
+        assert isinstance(overlap, overlapManager)
+        import matplotlib.pyplot as plt
+        self.plot()
+        x = np.zeros((self.num_cells), dtype=REAL)
+        for subdomain in overlap.overlaps:
+            for cellNo in overlap.overlaps[subdomain].cells:
+                x[cellNo] += 1
+        for cellNo in range(self.num_cells):
+            plt.text(self.vertices[self.cells[cellNo, :], 0].mean(), 0, str(x[cellNo]))
+        plt.axis('equal')
+
+    def plotAlgebraicOverlap(self, DoFMap, overlap):
+        "Plot a single algebraic overlap."
+        from . algebraicOverlaps import algebraicOverlap
+        assert isinstance(overlap, algebraicOverlap)
+        import matplotlib.pyplot as plt
+        self.plot(boundary=True)
+        dofDict = {}
+        for i, dof in enumerate(overlap.shared_dofs):
+            dofDict[dof] = i
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            for i in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof_py(cellNo, i)
+                try:
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    plt.text(pos[0], 0, str(dofDict[dof]))
+                except:
+                    pass
+
+    def plotAlgebraicOverlapManager(self, DoFMap, overlap):
+        from . algebraicOverlaps import algebraicOverlapManager
+        assert isinstance(overlap, algebraicOverlapManager)
+        import matplotlib.pyplot as plt
+        self.plot(boundary=True)
+        x = np.zeros((DoFMap.num_dofs), dtype=REAL)
+        for subdomainNo in overlap.overlaps:
+            for i, dof in enumerate(overlap.overlaps[subdomainNo].shared_dofs):
+                x[dof] += 1
+        self.plotFunctionDoFMap(DoFMap, x)
+
+    def plotFunctionDoFMap(self, DoFMap, x):
+        "Display function values for every DoF."
+        import matplotlib.pyplot as plt
+        self.plot()
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            for i in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof_py(cellNo, i)
+                if dof >= 0:
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    plt.text(pos[0], 0, '{:.2}'.format(x[dof]))
+
+    def sortVertices(self):
+        idx = np.argsort(self.vertices_as_array, axis=0).ravel()
+        self.reorderVertices(idx)
+
+
+class mesh2d(meshNd):
+    """
+    2D mesh
+
+    Attributes:
+    vertices
+    cells
+    boundaryVertices
+    boundaryEdges
+    boundaryVertexTags
+    boundaryEdgeTags
+    """
+
+    def getInteriorMap(self, tag):
+        """
+        Returns a map from the vertex numbers of the mesh
+        to the interior vertices.
+        """
+        bdofs = self.getBoundaryVerticesByTag(tag)
+        mapping = -1*np.ones((self.num_vertices), dtype=INDEX)
+        iV = np.ones(self.num_vertices, dtype=np.bool)
+        iV[bdofs] = 0
+        iV = iV.nonzero()[0]
+        mapping[iV] = np.arange(len(iV), dtype=INDEX)
+        return mapping
+
+    def plot(self, boundary=None, info=False, padding=0.1, fill=False, **kwargs):
+        import matplotlib.pyplot as plt
+        from matplotlib import rcParams
+        vertices = self.vertices_as_array
+        X, Y = vertices[:, 0], vertices[:, 1]
+        triangles = self.cells
+        lenX = X.max()-X.min()
+        lenY = Y.max()-Y.min()
+        plt.axis('equal')
+        plt.xlim([X.min()-lenX*padding, X.max()+lenX*padding])
+        plt.ylim([Y.min()-lenY*padding, Y.max()+lenY*padding])
+        if fill:
+            plt.tripcolor(X, Y, triangles, np.ones(triangles.shape[0]), 'k-', zorder=1, alpha=0.3 if boundary else 1., **kwargs)
+        else:
+            if 'alpha' not in kwargs:
+                kwargs['alpha'] = 0.3 if boundary else 1.
+            plt.triplot(X, Y, triangles, 'k-', zorder=1, **kwargs)
+        if boundary:
+            tags = set(self.boundaryEdgeTags)
+            tags = tags.union(self.boundaryVertexTags)
+            cm = plt.get_cmap('gist_rainbow')
+            num_colors = len(tags)
+            colors = {tag: cm(i/(num_colors)) for i, tag in enumerate(sorted(tags))}
+            vertices = self.vertices_as_array
+            for bv, tag in zip(self.boundaryVertices, self.boundaryVertexTags):
+                XY = vertices[bv, :]
+                plt.plot([XY[0]], [XY[1]], '-o',
+                         linewidth=0*rcParams["lines.linewidth"],
+                         markersize=10,
+                         color=colors[tag],
+                         zorder=3)
+            for be, tag in zip(self.boundaryEdges, self.boundaryEdgeTags):
+                XY = vertices[be, :]
+                plt.plot(XY[:, 0], XY[:, 1], 'k-',
+                         linewidth=3*rcParams["lines.linewidth"],
+                         color=colors[tag],
+                         zorder=2)
+
+        if info:
+            tags = set(self.boundaryEdgeTags)
+            tags = tags.union(self.boundaryVertexTags)
+            cm = plt.get_cmap('gist_rainbow')
+            num_colors = len(tags)
+            colors = {tag: cm(i/num_colors) for i, tag in enumerate(tags)}
+            vertices = self.vertices_as_array
+            for i, c in enumerate(self.cells):
+                midpoint = (vertices[c[0]]
+                            + vertices[c[1]]
+                            + vertices[c[2]])/3
+                plt.text(midpoint[0], midpoint[1], str(i), style='italic')
+            for i, v in enumerate(vertices):
+                plt.text(v[0], v[1], i)
+            for vno, tag in zip(self.boundaryVertices,
+                                self.boundaryVertexTags):
+                v = self.vertices[vno, :]
+                plt.text(v[0], v[1], tag, horizontalalignment='right',
+                         verticalalignment='top', color=colors[tag])
+            for i, (e, tag) in enumerate(zip(self.boundaryEdges,
+                                             self.boundaryEdgeTags)):
+                v = (vertices[e[0]]+vertices[e[1]])/2
+                plt.text(v[0], v[1], tag, color=colors[tag])
+
+    def plotPrepocess(self, x, DoFMap=None, tag=0):
+        from . DoFMaps import P1_DoFMap, P0_DoFMap
+        if DoFMap and isinstance(DoFMap, P0_DoFMap):
+            if DoFMap.num_dofs < self.num_cells:
+                from . DoFMaps import getSubMapRestrictionProlongation
+                dm = P0_DoFMap(self, -10)
+                _, P = getSubMapRestrictionProlongation(dm, DoFMap)
+                y = P*x
+                return self.plotPrepocess(y)
+            else:
+                return self.plotPrepocess(x)
+        elif DoFMap and not isinstance(DoFMap, P1_DoFMap):
+            return self.plotPrepocess(DoFMap.linearPart(self, x)[0])
+        elif DoFMap and isinstance(DoFMap, P1_DoFMap):
+            if DoFMap.num_dofs < self.num_vertices:
+                y, dm = DoFMap.augmentWithZero(x)
+            else:
+                y = x
+                dm = DoFMap
+            v2d = uninitialized((self.num_vertices, dm.dofs_per_vertex), dtype=INDEX)
+            dm.getVertexDoFs(v2d)
+            z = uninitialized((self.num_vertices), dtype=REAL)
+            for v in range(self.num_vertices):
+                z[v] = y[v2d[v, 0]]
+            return self.plotPrepocess(z)
+        else:
+            v = self.vertices_as_array
+            X, Y = v[:, 0], v[:, 1]
+            if x.shape[0] == self.num_vertices:
+                sol = x
+            elif x.shape[0] == self.num_cells:
+                sol = x
+            else:
+                sol = np.zeros(self.num_vertices)
+                if DoFMap is not None:
+                    tag = DoFMap.tag
+                sol[self.getInteriorVerticesByTag(tag)] = x
+            return X, Y, sol
+
+    def plotFunction(self, x, flat=False, DoFMap=None, tag=0, update=None, contour=False, ax=None, **kwargs):
+        import matplotlib.pyplot as plt
+        from matplotlib import cm
+        X, Y, sol = self.plotPrepocess(x, DoFMap, tag)
+        if flat:
+            plt.axis('equal')
+            if update is None:
+                try:
+                    cb = plt.gca().collections[-1].colorbar
+                    cb.remove()
+                except:
+                    pass
+                update = plt.tripcolor(X, Y, self.cells, sol, cmap=cm.jet, linewidth=0, **kwargs)
+                plt.colorbar()
+                if contour:
+                    update2 = plt.tricontour(X, Y, self.cells, sol, colors=['k'])
+                    update = [update, update2]
+                return update
+            else:
+                if contour:
+                    update[0].set_array(sol)
+                    for cp in update[1].collections:
+                        cp.remove()
+                    update[1] = plt.tricontour(X, Y, self.cells, sol, colors=['k'])
+                else:
+                    update.set_array(sol)
+        else:
+            from . DoFMaps import P0_DoFMap
+            if isinstance(DoFMap, P0_DoFMap):
+                assert self.num_cells == sol.shape[0]
+                newVertices = uninitialized(((self.dim+1)*self.num_cells, self.dim),
+                                            dtype=REAL)
+                newCells = uninitialized((self.num_cells, self.dim+1),
+                                         dtype=INDEX)
+                newSol = uninitialized(((self.dim+1)*self.num_cells, ),
+                                       dtype=REAL)
+                k = 0
+                for cellNo in range(self.num_cells):
+                    for vertexNo in range(self.dim+1):
+                        vertex = self.cells[cellNo, vertexNo]
+                        for j in range(self.dim):
+                            newVertices[k, j] = self.vertices[vertex, j]
+                        newCells[cellNo, vertexNo] = k
+                        newSol[(self.dim+1)*cellNo+vertexNo] = sol[cellNo]
+                        k += 1
+                X, Y = newVertices[:, 0], newVertices[:, 1]
+                sol = newSol
+                cells = newCells
+            else:
+                cells = self.cells
+            if ax is None:
+                fig = plt.gcf()
+                fig.delaxes(fig.gca())
+                ax = fig.add_subplot(projection='3d')
+            ax.plot_trisurf(X, Y, cells, sol, cmap=cm.jet, linewidth=0, **kwargs)
+            return ax
+
+    def plotDoFMap(self, DoFMap, printDoFIndices=True):
+        "Plot the DoF numbers on the mesh."
+        import matplotlib.pyplot as plt
+        from matplotlib import rc_context
+        self.plot(alpha=0.3)
+        pos = DoFMap.getDoFCoordinates()
+        if printDoFIndices:
+            with rc_context({'text.usetex': False}):
+                for dof in range(DoFMap.num_dofs):
+                    plt.text(pos[dof, 0], pos[dof, 1], str(dof),
+                             horizontalalignment='center',
+                             verticalalignment='center')
+        else:
+            plt.scatter(pos[:, 0], pos[:, 1])
+
+    def plotFunctionDoFMap(self, DoFMap, x):
+        "Display function values for every DoF."
+        import matplotlib.pyplot as plt
+        self.plot()
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices_as_array[self.cells[cellNo, :], :]
+            for i in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof_py(cellNo, i)
+                if dof >= 0:
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    plt.text(pos[0], pos[1], '{:.2}'.format(x[dof]))
+
+    def plotInterface(self, interface):
+        "Plot a single mesh interface."
+        import matplotlib.pyplot as plt
+        from . meshOverlaps import meshInterface
+        assert isinstance(interface, meshInterface)
+        self.plot()
+        for i in range(interface.num_edges):
+            cellNo = interface.edges[i, 0]
+            edgeNo = interface.edges[i, 1]
+            order = interface.edges[i, 2]
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            if edgeNo == 0:
+                idx = (0, 1)
+            elif edgeNo == 1:
+                idx = (1, 2)
+            else:
+                idx = (2, 0)
+            if order != 0:
+                idx = (idx[1], idx[0])
+            XY = simplex[idx, :]
+            plt.plot(XY[:, 0], XY[:, 1], 'k-',
+                     linewidth=3,
+                     # color=colors[tag],
+                     zorder=2)
+            plt.text(XY[:, 0].mean(), XY[:, 1].mean(), str(i))
+
+    def plotMeshOverlap(self, overlap):
+        "Plot a single mesh overlap."
+        from . meshOverlaps import meshOverlap
+        assert isinstance(overlap, meshOverlap)
+        import matplotlib.pyplot as plt
+        # self.plot(boundary=True)
+        self.plot(boundary=False)
+        for i in range(overlap.num_vertices):
+            v = self.cells[overlap.vertices[i, 0], overlap.vertices[i, 1]]
+            plt.text(self.vertices[v, 0], self.vertices[v, 1], str(i))
+        for i in range(overlap.num_cells):
+            cellNo = overlap.cells[i]
+            simplex = self.vertices_as_array[self.cells[cellNo, :], :]
+            XY = simplex.mean(axis=0)
+            plt.text(XY[0], XY[1], str(i))
+        plt.title('Overlap of subdomain {} with {}'.format(overlap.mySubdomainNo, overlap.otherSubdomainNo))
+
+    def plotOverlapManager(self, overlap):
+        "Plot all mesh overlaps in an overlap manager."
+        from . meshOverlaps import overlapManager
+        assert isinstance(overlap, overlapManager)
+        import matplotlib.pyplot as plt
+        self.plot()
+        x = np.zeros((self.num_cells), dtype=REAL)
+        for subdomain in overlap.overlaps:
+            for cellNo in overlap.overlaps[subdomain].cells:
+                x[cellNo] += subdomain+1
+        plt.tripcolor(self.vertices[:, 0], self.vertices[:, 1],
+                      self.cells, x)
+        plt.axis('equal')
+
+    def plotAlgebraicOverlap(self, DoFMap, overlap):
+        "Plot a single algebraic overlap."
+        from . algebraicOverlaps import algebraicOverlap
+        assert isinstance(overlap, algebraicOverlap)
+        import matplotlib.pyplot as plt
+        self.plot(boundary=True)
+        dofDict = {}
+        for i, dof in enumerate(overlap.shared_dofs):
+            dofDict[dof] = i
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices_as_array[self.cells[cellNo, :], :]
+            for i in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof_py(cellNo, i)
+                try:
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    plt.text(pos[0], pos[1], str(dofDict[dof]))
+                except:
+                    pass
+
+    def plotAlgebraicOverlapManager(self, DoFMap, overlap):
+        from . algebraicOverlaps import algebraicOverlapManager
+        assert isinstance(overlap, algebraicOverlapManager)
+        self.plot(boundary=True)
+        x = np.zeros((DoFMap.num_dofs), dtype=REAL)
+        for subdomainNo in overlap.overlaps:
+            for i, dof in enumerate(overlap.overlaps[subdomainNo].shared_dofs):
+                x[dof] += 1
+        self.plotFunctionDoFMap(DoFMap, x)
+
+    def plotVertexPartitions(self, numPartitions, partitioner='metis',
+                             interior=False, padding=0.1):
+        import matplotlib.pyplot as plt
+        if isinstance(partitioner, str):
+            if partitioner == 'metis':
+                partitioner = metisMeshPartitioner(self)
+            elif partitioner == 'regular':
+                partitioner = regularMeshPartitioner(self)
+            else:
+                raise NotImplementedError()
+            part, numPartitions = partitioner.partitionVertices(numPartitions,
+                                                                interior)
+        elif isinstance(partitioner, sparseGraph):
+            part = np.zeros((partitioner.nnz))
+            for p in range(partitioner.num_rows):
+                for jj in range(partitioner.indptr[p], partitioner.indptr[p+1]):
+                    part[partitioner.indices[jj]] = p
+            numPartitions = partitioner.shape[0]
+        else:
+            raise NotImplementedError()
+        self.plot()
+        cm = plt.get_cmap('gist_rainbow')
+        X, Y = self.vertices[:, 0], self.vertices[:, 1]
+        lenX = X.max()-X.min()
+        lenY = Y.max()-Y.min()
+        plt.axis('equal')
+        plt.xlim([X.min()-lenX*padding, X.max()+lenX*padding])
+        plt.ylim([Y.min()-lenY*padding, Y.max()+lenY*padding])
+        if not X.shape[0] == part.shape[0]:
+            part2 = -np.ones((X.shape[0]))
+            part2[self.interiorVertices] = part
+            part = part2
+        for i in range(numPartitions):
+            plt.tricontourf(X, Y,
+                            part == i,
+                            levels=[0.7, 1.1],
+                            colors=[cm(i/numPartitions)])
+
+    def plotCellPartitions(self, numPartitions, partitioner='metis'):
+        import matplotlib.pyplot as plt
+        if isinstance(partitioner, str):
+            if partitioner == 'metis':
+                partitioner = metisMeshPartitioner(self)
+            elif partitioner == 'regular':
+                partitioner = regularMeshPartitioner(self)
+            else:
+                raise NotImplementedError()
+        part, numPartitions = partitioner.partitionCells(numPartitions)
+        plt.tripcolor(self.vertices[:, 0], self.vertices[:, 1],
+                      self.cells, part)
+        plt.triplot(self.vertices[:, 0], self.vertices[:, 1],
+                    self.cells, '-', zorder=1)
+
+    def plotGraph(self, A, dofmap):
+        from PyNucleus_base.linear_operators import CSR_LinearOperator
+        import matplotlib.pyplot as plt
+        assert isinstance(A, CSR_LinearOperator)
+        dof2vertex = {}
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            coords = dofmap.getNodalCoordinates_py(simplex)
+            dofs = []
+            for j in range(dofmap.dofs_per_element):
+                dofs.append(dofmap.cell2dof_py(cellNo, j))
+            for i, dof1 in enumerate(dofs):
+                if dof1 < 0:
+                    continue
+                for j, dof2 in enumerate(dofs):
+                    if dof2 < 0:
+                        continue
+                    if A.getEntry_py(dof1, dof2) != 0.:
+                        if i == j:
+                            plt.plot([coords[i, 0], coords[j, 0]],
+                                     [coords[i, 1], coords[j, 1]],
+                                     marker='o',
+                                     ms=8,
+                                     c='r', lw=4)
+                        else:
+                            plt.plot([coords[i, 0], coords[j, 0]],
+                                     [coords[i, 1], coords[j, 1]],
+                                     c='g', lw=4)
+
+    def sortVertices(self):
+        idx = np.argsort(self.vertices_as_array.view('d,d'), order=['f1','f0'], axis=0).flat[:self.vertices.shape[0]]
+        self.reorderVertices(idx)
+
+
+class mesh3d(meshNd):
+    """
+    3D mesh
+
+    Attributes:
+    vertices
+    cells
+    boundaryVertices
+    boundaryEdges
+    boundaryFaces
+    boundaryVertexTags
+    boundaryEdgeTags
+    boundaryFaceTags
+    """
+
+    def plot(self):
+        import matplotlib.pyplot as plt
+        from mpl_toolkits.mplot3d import Axes3D
+        from itertools import combinations
+        fig = plt.figure()
+        ax = fig.add_subplot(111, projection='3d')
+        for i in range(self.cells.shape[0]):
+            for j,k in combinations(range(4), 2):
+                u = self.vertices[self.cells[i, j], :]
+                v = self.vertices[self.cells[i, k], :]
+                ax.plot([u[0], v[0]], [u[1], v[1]], [u[2], v[2]], 'k')
+
+    def plot_surface(self, boundary=False):
+        import matplotlib.pyplot as plt
+        from mpl_toolkits.mplot3d import Axes3D
+        from mpl_toolkits.mplot3d.art3d import Poly3DCollection
+        from matplotlib import rcParams
+        # from itertools import combinations
+        fig = plt.figure()
+        ax = fig.add_subplot(111, projection='3d')
+        # for i in range(self.boundaryFaces.shape[0]):
+        #     for j, k in combinations(range(3), 2):
+        #         u = self.vertices[self.boundaryFaces[i, j], :]
+        #         v = self.vertices[self.boundaryFaces[i, k], :]
+        #         ax.plot([u[0], v[0]], [u[1], v[1]], [u[2], v[2]], 'k', zorder=-1)
+        tags = set(self.boundaryFaceTags)
+        tags = tags.union(self.boundaryEdgeTags)
+        tags = tags.union(self.boundaryVertexTags)
+        cm = plt.get_cmap('gist_rainbow')
+        num_colors = len(tags)
+        colors = {tag: cm(i/num_colors) for i, tag in enumerate(tags)}
+        tri = Poly3DCollection([self.vertices_as_array[self.boundaryFaces[i, :], :]
+                                for i in range(self.boundaryFaces.shape[0])],
+                               facecolors=[colors[t] for t in self.boundaryFaceTags],
+                               edgecolors=(0, 0, 0, 1), lw=1)
+        ax.add_collection3d(tri)
+        if boundary:
+            scatterDict = {}
+            for bv, tag in zip(self.boundaryVertices, self.boundaryVertexTags):
+                XY = self.vertices[bv, :]
+                try:
+                    scatterDict[tag].append(XY)
+                except KeyError:
+                    scatterDict[tag] = [XY]
+            for tag in scatterDict:
+                XY = np.vstack(scatterDict[tag])
+                print(XY.shape, colors[tag])
+                plt.scatter(XY[:, 0], XY[:, 1], zs=XY[:, 2],
+                            s=100,
+                            c=colors[tag],
+                            zorder=3,
+                            depthshade=False)
+            # for be, tag in zip(self.boundaryEdges, self.boundaryEdgeTags):
+            #     XY = self.vertices[be, :]
+            #     plt.plot(XY[:, 0], XY[:, 1], 'k-', zs=XY[:, 2],
+            #              linewidth=3*rcParams["lines.linewidth"],
+            #              color=colors[tag],
+            #              zorder=2)
+
+    def plotVTK(self, boundary=False, opacity=1.0):
+        import vtk
+        from vtk.util.numpy_support import numpy_to_vtk, numpy_to_vtkIdTypeArray
+        import matplotlib.pyplot as plt
+
+        points = vtk.vtkPoints()
+        points.SetData(numpy_to_vtk(self.vertices, deep=1))
+
+        cm = plt.get_cmap('gist_rainbow')
+        tags = set(self.boundaryFaceTags)
+        tags = tags.union(self.boundaryEdgeTags)
+        tags = tags.union(self.boundaryVertexTags)
+        num_colors = len(tags)
+        ccs = {tag: cm(i/num_colors) for i, tag in enumerate(tags)}
+
+        if boundary:
+            toPlot = [
+                (self.boundaryFaces, self.boundaryFaceTags),
+                (self.boundaryEdges, self.boundaryEdgeTags),
+                (self.boundaryVertices[:, np.newaxis], self.boundaryVertexTags)
+            ]
+        else:
+            toPlot = [(self.cells, np.zeros((self.num_cells), dtype=TAG))]
+
+        colors = vtk.vtkUnsignedCharArray()
+        colors.SetName("Colors")
+        colors.SetNumberOfComponents(3)
+        colors.SetNumberOfTuples(sum([cells.shape[0] for cells, _ in toPlot]))
+        myCells = []
+        myCellTypes = []
+        numCells = 0
+        k = 0
+        for cells, tags in toPlot:
+            if cells.shape[1] == 1:
+                cellType = vtk.VTK_VERTEX
+            elif cells.shape[1] == 2:
+                cellType = vtk.VTK_LINE
+            elif cells.shape[1] == 3:
+                cellType = vtk.VTK_TRIANGLE
+            elif cells.shape[1] == 4:
+                cellType = vtk.VTK_TETRA
+            else:
+                raise NotImplementedError()
+            myCellTypes.append(cellType*np.ones((cells.shape[0]), dtype=np.int))
+            myCells.append(np.hstack((cells.shape[1]*np.ones((cells.shape[0], 1), dtype=np.int64),
+                                      cells.astype(np.int64))).ravel())
+            numCells += cells.shape[0]
+            for i in range(cells.shape[0]):
+                c = ccs[tags[i]]
+                colors.InsertTuple3(k, 255*c[0], 255*c[1], 255*c[2])
+                k += 1
+        c3 = np.concatenate(myCells)
+        c2 = numpy_to_vtkIdTypeArray(c3, deep=1)
+        c = vtk.vtkCellArray()
+        c.SetCells(numCells, c2)
+
+        ugrid = vtk.vtkUnstructuredGrid()
+        cellTypes = np.concatenate(myCellTypes)
+        ugrid.SetCells(cellTypes, c)
+        ugrid.SetPoints(points)
+        ugrid.GetCellData().SetScalars(colors)
+
+        ugridMapper = vtk.vtkDataSetMapper()
+        ugridMapper.SetInputData(ugrid)
+
+        ugridActor = vtk.vtkActor()
+        ugridActor.SetMapper(ugridMapper)
+        if not boundary:
+            ugridActor.GetProperty().EdgeVisibilityOn()
+        else:
+            ugridActor.GetProperty().SetLineWidth(10)
+            ugridActor.GetProperty().SetPointSize(30)
+        ugridActor.GetProperty().SetOpacity(opacity)
+
+        return ugridActor
+
+    def plotInterfaceVTK(self, interface):
+        import vtk
+        from vtk.util.numpy_support import numpy_to_vtk, numpy_to_vtkIdTypeArray
+        from . meshOverlaps import sharedMesh, simplexMapper3D
+        assert isinstance(interface, sharedMesh)
+
+        points = vtk.vtkPoints()
+        points.SetData(numpy_to_vtk(self.vertices, deep=1))
+
+        sM = simplexMapper3D(self)
+
+        cellsCells = self.cells[interface.cells, :]
+        cellsFaces = uninitialized((interface.num_faces, 3), dtype=INDEX)
+        for i in range(interface.num_faces):
+            cellsFaces[i, :] = sM.getFaceInCell_py(interface.faces[i, 0],
+                                                   interface.faces[i, 1])
+        cellsEdges = uninitialized((interface.num_edges, 2), dtype=INDEX)
+        for i in range(interface.num_edges):
+            cellsEdges[i, :] = sM.getEdgeInCell_py(interface.edges[i, 0],
+                                                   interface.edges[i, 1])
+        cellsVertices = uninitialized((interface.num_vertices, 1), dtype=INDEX)
+        for i in range(interface.num_vertices):
+            cellsVertices[i, 0] = sM.getVertexInCell_py(interface.vertices[i, 0],
+                                                        interface.vertices[i, 1])
+
+        toPlot = [
+            cellsCells, cellsFaces, cellsEdges, cellsVertices
+        ]
+
+        myCells = []
+        myCellTypes = []
+        numCells = 0
+        for cells in toPlot:
+            if cells.shape[1] == 1:
+                cellType = vtk.VTK_VERTEX
+            elif cells.shape[1] == 2:
+                cellType = vtk.VTK_LINE
+            elif cells.shape[1] == 3:
+                cellType = vtk.VTK_TRIANGLE
+            elif cells.shape[1] == 4:
+                cellType = vtk.VTK_TETRA
+            else:
+                raise NotImplementedError()
+            myCellTypes.append(cellType*np.ones((cells.shape[0]), dtype=np.int))
+            myCells.append(np.hstack((cells.shape[1]*np.ones((cells.shape[0], 1), dtype=np.int64),
+                                      cells.astype(np.int64))).ravel())
+            numCells += cells.shape[0]
+        c3 = np.concatenate(myCells)
+        c2 = numpy_to_vtkIdTypeArray(c3, deep=1)
+        c = vtk.vtkCellArray()
+        c.SetCells(numCells, c2)
+
+        ugrid = vtk.vtkUnstructuredGrid()
+        cellTypes = np.concatenate(myCellTypes)
+        ugrid.SetCells(cellTypes, c)
+        ugrid.SetPoints(points)
+
+        ugridMapper = vtk.vtkDataSetMapper()
+        ugridMapper.SetInputData(ugrid)
+
+        ugridActor = vtk.vtkActor()
+        ugridActor.SetMapper(ugridMapper)
+        ugridActor.GetProperty().SetLineWidth(10)
+        ugridActor.GetProperty().SetPointSize(30)
+
+        return ugridActor
+
+    def get_surface_mesh(self, tag=None):
+        return mesh2d(self.vertices, self.getBoundaryFacesByTag(tag))
+
+    def checkDoFMap(self, DoFMap):
+        "Plot the DoF numbers on the mesh."
+        recorderdDofs = {}
+        for cellNo in range(self.num_cells):
+            simplex = self.vertices[self.cells[cellNo, :], :]
+            for i in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof_py(cellNo, i)
+                if dof >= 0:
+                    pos = np.dot(DoFMap.nodes[i, :], simplex)
+                    try:
+                        posOld = recorderdDofs[dof]
+                        assert np.allclose(pos, posOld)
+                    except KeyError:
+                        recorderdDofs[dof] = pos
+        return recorderdDofs
+
+    def sortVertices(self):
+        idx = np.argsort(self.vertices_as_array.view('d,d,d'), order=['f2', 'f1', 'f0'], axis=0).flat[:self.vertices.shape[0]]
+        self.reorderVertices(idx)
+
+
+def stitchSubdomains(subdomains, overlapManagers, returnR=False, ncs=None):
+    """
+    Stitch subdomains together.
+    Works for 2D.
+    """
+    vertices = uninitialized((0, subdomains[0].dim), dtype=INDEX)
+    cells = uninitialized((0, subdomains[0].dim+1), dtype=INDEX)
+    globalIndices = []
+    numPartitions = len(subdomains)
+    # FIX: If we have real overlap (overlapping elements, not vertices),
+    #      I'm adding vertices twice
+    for i in range(numPartitions):
+        if ncs:
+            subdomainVertices = subdomains[i].vertices[:ncs[i][0], :]
+        else:
+            subdomainVertices = subdomains[i].vertices
+        subdomainNumVertices = subdomainVertices.shape[0]
+        # form vector bv
+        # if vertex is in previous subdomains, set number of a subdomain, else -1
+        bv = -1*np.ones(subdomainNumVertices, dtype=INDEX)
+        # loop over all overlaps with subdomains that we already incorporated
+        # for j in range(i-1, -1, -1):
+        for j in range(i):
+            bv[np.array(overlapManagers[i][j].overlap2local, dtype=INDEX)] = j
+        # append all new vertices
+        k = len(vertices)
+        nv = (bv == -1)
+        vertices = np.vstack((vertices,
+                              np.compress(nv, subdomainVertices, axis=0)))
+
+        # find new indices after discarding of the known vertices
+        globalIndicesSubdomain = uninitialized(subdomainNumVertices,
+                                          dtype=INDEX)
+        globalIndicesSubdomain[nv] = np.arange(k, k+nv.sum())
+
+        for j in np.compress(np.logical_not(nv),
+                             np.arange(subdomainNumVertices)):
+            otherSubdomain = bv[j]
+            # translate to overlap index in domain i
+            m = overlapManagers[i].translate_local_overlap(otherSubdomain,
+                                                           np.array([j]))
+            # translate to local index in domain otherSubdomain
+            m = overlapManagers[otherSubdomain].translate_overlap_local(i, m)
+            # translate to global index
+            globalIndicesSubdomain[j] = globalIndices[otherSubdomain][m]
+        globalIndices.append(globalIndicesSubdomain)
+
+        if ncs:
+            subdomainCells = subdomains[i].cells[:ncs[i][1], :]
+            addCell = np.ones(subdomainCells.shape[0], dtype=np.bool)
+        else:
+            subdomainCells = subdomains[i].cells
+            # translate cells to new indices
+            # get subdomain number for every vertex in every cell
+            ww = np.take(bv, subdomainCells.T)
+            # take cell wise min
+            cellMinSubdomain = ww.min(axis=0)
+            # take cell wise max
+            cellMaxSubdomain = ww.max(axis=0)
+            # only take cells that have at least one new vertex,
+            # or that have vertices on different subdomains
+            # FIX: the last condition is not obvious
+            addCell = np.logical_or(cellMinSubdomain == -1,
+                                    np.logical_and(cellMinSubdomain > -1,
+                                                   cellMinSubdomain < cellMaxSubdomain))
+            # addCell = cellMinSubdomain == -1
+            # xx = np.logical_and(cellMinSubdomain > -1,
+            #                 cellMinSubdomain < cellMaxSubdomain)
+            # print(i, xx.sum())
+            # print(ww[:, xx])
+        s = (addCell.sum(), subdomains[0].dim+1)
+        newcells = np.compress(addCell, subdomainCells, axis=0)
+        newcells = globalIndicesSubdomain[newcells.ravel()].reshape(s)
+        cells = np.vstack((cells, newcells))
+    if subdomains[0].dim == 1:
+        mesh = mesh1d(vertices, cells)
+    elif subdomains[0].dim == 2:
+        mesh = mesh2d(vertices, cells)
+    elif subdomains[0].dim == 3:
+        mesh = mesh3d(vertices, cells)
+    if returnR:
+        return (mesh, globalIndices)
+    else:
+        return mesh
+
+
+def stitchOverlappingMeshes(meshes, overlapManagers):
+    dim = meshes[0].dim
+    global_vertices = uninitialized((0, dim), dtype=REAL)
+    global_cells = uninitialized((0, dim+1), dtype=INDEX)
+    global_boundary_vertices = {}
+    global_boundary_edges = {}
+    numPartitions = len(meshes)
+    localCellLookup = {}
+    globalCellLookup = []
+    for mySubdomainNo in range(numPartitions):
+        translate = -np.ones((meshes[mySubdomainNo].num_vertices), dtype=INDEX)
+        idx = np.ones((meshes[mySubdomainNo].cells.shape[0]), dtype=np.bool)
+        lookup = -np.ones((meshes[mySubdomainNo].num_cells), dtype=INDEX)
+        for otherSubdomainNo in range(mySubdomainNo):
+            if otherSubdomainNo not in overlapManagers[mySubdomainNo].overlaps:
+                continue
+            idx[overlapManagers[mySubdomainNo].overlaps[otherSubdomainNo].cells] = False
+            for k in range(overlapManagers[mySubdomainNo].overlaps[otherSubdomainNo].cells.shape[0]):
+                p = overlapManagers[mySubdomainNo].overlaps[otherSubdomainNo].cells[k]
+                q = overlapManagers[otherSubdomainNo].overlaps[mySubdomainNo].cells[k]
+                translate[meshes[mySubdomainNo].cells_as_array[p, :]] = meshes[otherSubdomainNo].cells_as_array[q, :]
+                lookup[p] = globalCellLookup[otherSubdomainNo][q]
+        # get global vertex indices
+        numVertices = numVerticesNew = global_vertices.shape[0]
+        for k in range(meshes[mySubdomainNo].num_vertices):
+            if translate[k] == -1:
+                translate[k] = numVerticesNew
+                numVerticesNew += 1
+        # translate vertex indices in cells to global indices
+        for k in range(meshes[mySubdomainNo].num_cells):
+            for m in range(dim+1):
+                meshes[mySubdomainNo].cells[k, m] = translate[meshes[mySubdomainNo].cells[k, m]]
+        global_vertices = np.vstack((global_vertices,
+                                     meshes[mySubdomainNo].vertices_as_array[translate >= numVertices, :]))
+        num_cells = global_cells.shape[0]
+        global_cells = np.vstack((global_cells,
+                                  meshes[mySubdomainNo].cells_as_array[idx, :]))
+
+        for vertexNo in range(meshes[mySubdomainNo].boundaryVertices.shape[0]):
+            v = translate[meshes[mySubdomainNo].boundaryVertices[vertexNo]]
+            try:
+                global_boundary_vertices[v].append(meshes[mySubdomainNo].boundaryVertexTags[vertexNo])
+            except KeyError:
+                global_boundary_vertices[v] = [meshes[mySubdomainNo].boundaryVertexTags[vertexNo]]
+
+        for edgeNo in range(meshes[mySubdomainNo].boundaryEdges.shape[0]):
+            e = (translate[meshes[mySubdomainNo].boundaryEdges[edgeNo, 0]],
+                 translate[meshes[mySubdomainNo].boundaryEdges[edgeNo, 1]])
+            try:
+                global_boundary_edges[e].append(meshes[mySubdomainNo].boundaryEdgeTags[edgeNo])
+            except KeyError:
+                global_boundary_edges[e] = [meshes[mySubdomainNo].boundaryEdgeTags[edgeNo]]
+
+        for k in range(meshes[mySubdomainNo].num_cells):
+            if idx[k]:
+                localCellLookup[num_cells] = [(mySubdomainNo, k)]
+                lookup[k] = num_cells
+                num_cells += 1
+            else:
+                localCellLookup[lookup[k]].append((mySubdomainNo, k))
+        globalCellLookup.append(lookup)
+    if dim == 1:
+        global_mesh = mesh1d(global_vertices, global_cells)
+    elif dim == 2:
+        global_mesh = mesh2d(global_vertices, global_cells)
+    else:
+        raise NotImplementedError()
+    boundaryVertices = uninitialized((len(global_boundary_vertices)), dtype=INDEX)
+    boundaryVertexTags = uninitialized((len(global_boundary_vertices)), dtype=TAG)
+    for vertexNo, vertex in enumerate(global_boundary_vertices):
+        boundaryVertices[vertexNo] = vertex
+        global_boundary_vertices[vertex] = list(set(global_boundary_vertices[vertex]))
+        boundaryVertexTags[vertexNo] = max(global_boundary_vertices[vertex])
+    global_mesh._boundaryVertices = boundaryVertices
+    global_mesh._boundaryVertexTags = boundaryVertexTags
+
+    boundaryEdges = uninitialized((len(global_boundary_edges), 2), dtype=INDEX)
+    boundaryEdgeTags = uninitialized((len(global_boundary_edges)), dtype=TAG)
+    for edgeNo, edge in enumerate(global_boundary_edges):
+        boundaryEdges[edgeNo, :] = edge
+        global_boundary_edges[edge] = list(set(global_boundary_edges[edge]))
+        # assert len(global_boundary_edges[edge]) == 1, global_boundary_edges[edge]
+        boundaryEdgeTags[edgeNo] = max(global_boundary_edges[edge])
+    global_mesh._boundaryEdges = boundaryEdges
+    global_mesh._boundaryEdgeTags = boundaryEdgeTags
+    return global_mesh, localCellLookup
+
+
+def stitchNonoverlappingMeshes(meshes, interfaceManagers):
+    global_vertices = uninitialized((0, meshes[0].dim), dtype=REAL)
+    global_cells = uninitialized((0, meshes[0].dim+1), dtype=INDEX)
+    numPartitions = len(meshes)
+    localCellLookup = {}
+    global_boundary_vertices = {}
+    global_boundary_edges = {}
+    global_boundary_faces = {}
+    for mySubdomainNo in range(numPartitions):
+        translate = -np.ones((meshes[mySubdomainNo].num_vertices), dtype=INDEX)
+        for otherSubdomainNo in range(mySubdomainNo):
+            if otherSubdomainNo not in interfaceManagers[mySubdomainNo].interfaces:
+                continue
+            # idx[interfaceManagers[mySubdomainNo].overlaps[otherSubdomainNo].cells] = False
+            for k in range(interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].vertices.shape[0]):
+                cellNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].vertices[k, 0]
+                vertexNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].vertices[k, 1]
+                p = meshes[mySubdomainNo].cells[cellNo, vertexNo]
+                cellNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].vertices[k, 0]
+                vertexNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].vertices[k, 1]
+                q = meshes[otherSubdomainNo].cells[cellNo, vertexNo]
+                translate[p] = q
+            if meshes[0].dim >= 2:
+                for k in range(interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].edges.shape[0]):
+                    cellNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].edges[k, 0]
+                    edgeNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].edges[k, 1]
+                    order = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].edges[k, 2]
+                    if edgeNo == 0:
+                        vertexNo1, vertexNo2 = 0, 1
+                    elif edgeNo == 1:
+                        vertexNo1, vertexNo2 = 1, 2
+                    elif edgeNo == 2:
+                        vertexNo1, vertexNo2 = 2, 0
+                    elif edgeNo == 3:
+                        vertexNo1, vertexNo2 = 0, 3
+                    elif edgeNo == 4:
+                        vertexNo1, vertexNo2 = 1, 3
+                    else:
+                        vertexNo1, vertexNo2 = 2, 3
+                    if order == 1:
+                        vertexNo1, vertexNo2 = vertexNo2, vertexNo1
+                    p1 = meshes[mySubdomainNo].cells[cellNo, vertexNo1]
+                    p2 = meshes[mySubdomainNo].cells[cellNo, vertexNo2]
+
+                    cellNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].edges[k, 0]
+                    edgeNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].edges[k, 1]
+                    order = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].edges[k, 2]
+                    if edgeNo == 0:
+                        vertexNo1, vertexNo2 = 0, 1
+                    elif edgeNo == 1:
+                        vertexNo1, vertexNo2 = 1, 2
+                    elif edgeNo == 2:
+                        vertexNo1, vertexNo2 = 2, 0
+                    elif edgeNo == 3:
+                        vertexNo1, vertexNo2 = 0, 3
+                    elif edgeNo == 4:
+                        vertexNo1, vertexNo2 = 1, 3
+                    else:
+                        vertexNo1, vertexNo2 = 2, 3
+                    if order == 1:
+                        vertexNo1, vertexNo2 = vertexNo2, vertexNo1
+                    q1 = meshes[otherSubdomainNo].cells[cellNo, vertexNo1]
+                    q2 = meshes[otherSubdomainNo].cells[cellNo, vertexNo2]
+
+                    translate[p1] = q1
+                    translate[p2] = q2
+            # missing faces here
+            if meshes[0].dim >= 3:
+                for k in range(interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].faces.shape[0]):
+                    cellNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].faces[k, 0]
+                    faceNo = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].faces[k, 1]
+                    order = interfaceManagers[mySubdomainNo].interfaces[otherSubdomainNo].faces[k, 2]
+
+                    if faceNo == 0:
+                        vertexNo1, vertexNo2, vertexNo3 = 0, 2, 1
+                        # edgeNo1, edgeNo2, edgeNo3 = 2, 1, 0
+                    elif faceNo == 1:
+                        vertexNo1, vertexNo2, vertexNo3 = 0, 1, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 0, 4, 3
+                    elif faceNo == 2:
+                        vertexNo1, vertexNo2, vertexNo3 = 1, 2, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 1, 5, 4
+                    else:
+                        vertexNo1, vertexNo2, vertexNo3 = 2, 0, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 2, 3, 5
+
+                    if order == 1:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo2, vertexNo3, vertexNo1
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo2, edgeNo3, edgeNo1
+                    elif order == 2:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo3, vertexNo1, vertexNo2
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo3, edgeNo1, edgeNo2
+                    elif order == -1:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo2, vertexNo1, vertexNo3
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo1, edgeNo3, edgeNo2
+                    elif order == -2:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo1, vertexNo3, vertexNo2
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo3, edgeNo2, edgeNo1
+                    elif order == -3:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo3, vertexNo2, vertexNo1
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo2, edgeNo1, edgeNo3
+
+                    p1 = meshes[mySubdomainNo].cells[cellNo, vertexNo1]
+                    p2 = meshes[mySubdomainNo].cells[cellNo, vertexNo2]
+                    p3 = meshes[mySubdomainNo].cells[cellNo, vertexNo3]
+
+                    cellNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].faces[k, 0]
+                    faceNo = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].faces[k, 1]
+                    order = interfaceManagers[otherSubdomainNo].interfaces[mySubdomainNo].faces[k, 2]
+
+                    if faceNo == 0:
+                        vertexNo1, vertexNo2, vertexNo3 = 0, 2, 1
+                        # edgeNo1, edgeNo2, edgeNo3 = 2, 1, 0
+                    elif faceNo == 1:
+                        vertexNo1, vertexNo2, vertexNo3 = 0, 1, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 0, 4, 3
+                    elif faceNo == 2:
+                        vertexNo1, vertexNo2, vertexNo3 = 1, 2, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 1, 5, 4
+                    else:
+                        vertexNo1, vertexNo2, vertexNo3 = 2, 0, 3
+                        # edgeNo1, edgeNo2, edgeNo3 = 2, 3, 5
+
+                    if order == 1:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo2, vertexNo3, vertexNo1
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo2, edgeNo3, edgeNo1
+                    elif order == 2:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo3, vertexNo1, vertexNo2
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo3, edgeNo1, edgeNo2
+                    elif order == -1:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo2, vertexNo1, vertexNo3
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo1, edgeNo3, edgeNo2
+                    elif order == -2:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo1, vertexNo3, vertexNo2
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo3, edgeNo2, edgeNo1
+                    elif order == -3:
+                        vertexNo1, vertexNo2, vertexNo3 = vertexNo3, vertexNo2, vertexNo1
+                        # edgeNo1, edgeNo2, edgeNo3 = edgeNo2, edgeNo1, edgeNo3
+
+                    q1 = meshes[otherSubdomainNo].cells[cellNo, vertexNo1]
+                    q2 = meshes[otherSubdomainNo].cells[cellNo, vertexNo2]
+                    q3 = meshes[otherSubdomainNo].cells[cellNo, vertexNo3]
+
+                    translate[p1] = q1
+                    translate[p2] = q2
+                    translate[p3] = q3
+
+        numVertices = numVerticesNew = global_vertices.shape[0]
+        for k in range(meshes[mySubdomainNo].num_vertices):
+            if translate[k] == -1:
+                translate[k] = numVerticesNew
+                numVerticesNew += 1
+        for k in range(meshes[mySubdomainNo].num_cells):
+            for m in range(meshes[mySubdomainNo].dim+1):
+                meshes[mySubdomainNo].cells[k, m] = translate[meshes[mySubdomainNo].cells[k, m]]
+        global_vertices = np.vstack((global_vertices,
+                                     meshes[mySubdomainNo].vertices_as_array[translate >= numVertices, :]))
+        num_cells = global_cells.shape[0]
+        global_cells = np.vstack((global_cells,
+                                  meshes[mySubdomainNo].cells))
+
+        # add boundary vertices to global mesh
+        for boundaryVertexNo in range(meshes[mySubdomainNo].boundaryVertices.shape[0]):
+            vertexNo = meshes[mySubdomainNo].boundaryVertices[boundaryVertexNo]
+            v = translate[vertexNo]
+            try:
+                global_boundary_vertices[v].append(meshes[mySubdomainNo].boundaryVertexTags[boundaryVertexNo])
+            except KeyError:
+                global_boundary_vertices[v] = [meshes[mySubdomainNo].boundaryVertexTags[boundaryVertexNo]]
+
+        # add boundary edges to global mesh
+        for edgeNo in range(meshes[mySubdomainNo].boundaryEdges.shape[0]):
+            e = (translate[meshes[mySubdomainNo].boundaryEdges[edgeNo, 0]],
+                 translate[meshes[mySubdomainNo].boundaryEdges[edgeNo, 1]])
+            try:
+                global_boundary_edges[e].append(meshes[mySubdomainNo].boundaryEdgeTags[edgeNo])
+            except KeyError:
+                global_boundary_edges[e] = [meshes[mySubdomainNo].boundaryEdgeTags[edgeNo]]
+
+        # add boundary faces to global mesh
+        for faceNo in range(meshes[mySubdomainNo].boundaryFaces.shape[0]):
+            e = (translate[meshes[mySubdomainNo].boundaryFaces[faceNo, 0]],
+                 translate[meshes[mySubdomainNo].boundaryFaces[faceNo, 1]],
+                 translate[meshes[mySubdomainNo].boundaryFaces[faceNo, 2]])
+            try:
+                global_boundary_faces[e].append(meshes[mySubdomainNo].boundaryFaceTags[faceNo])
+            except KeyError:
+                global_boundary_faces[e] = [meshes[mySubdomainNo].boundaryFaceTags[faceNo]]
+
+        for k in range(meshes[mySubdomainNo].num_cells):
+            localCellLookup[num_cells] = [(mySubdomainNo, k)]
+            num_cells += 1
+    if meshes[0].dim == 1:
+        global_mesh = mesh1d(global_vertices, global_cells)
+    elif meshes[0].dim == 2:
+        global_mesh = mesh2d(global_vertices, global_cells)
+    elif meshes[0].dim == 3:
+        global_mesh = mesh3d(global_vertices, global_cells)
+
+    boundaryVertices = uninitialized((len(global_boundary_vertices)), dtype=INDEX)
+    boundaryVertexTags = uninitialized((len(global_boundary_vertices)), dtype=TAG)
+    for vertexNo, vertex in enumerate(global_boundary_vertices):
+        boundaryVertices[vertexNo] = vertex
+        global_boundary_vertices[vertex] = list(set(global_boundary_vertices[vertex]))
+        boundaryVertexTags[vertexNo] = max(global_boundary_vertices[vertex])
+    global_mesh._boundaryVertices = boundaryVertices
+    global_mesh._boundaryVertexTags = boundaryVertexTags
+
+    if meshes[0].dim >= 2:
+        boundaryEdges = uninitialized((len(global_boundary_edges), 2), dtype=INDEX)
+        boundaryEdgeTags = uninitialized((len(global_boundary_edges)), dtype=TAG)
+        for edgeNo, edge in enumerate(global_boundary_edges):
+            boundaryEdges[edgeNo, :] = edge
+            global_boundary_edges[edge] = list(set(global_boundary_edges[edge]))
+            assert len(global_boundary_edges[edge]) == 1, global_boundary_edges[edge]
+            boundaryEdgeTags[edgeNo] = global_boundary_edges[edge][0]
+        global_mesh._boundaryEdges = boundaryEdges
+        global_mesh._boundaryEdgeTags = boundaryEdgeTags
+
+    if meshes[0].dim >= 3:
+        boundaryFaces = uninitialized((len(global_boundary_faces), 3), dtype=INDEX)
+        boundaryFaceTags = uninitialized((len(global_boundary_faces)), dtype=TAG)
+        for faceNo, face in enumerate(global_boundary_faces):
+            boundaryFaces[faceNo, :] = face
+            global_boundary_faces[face] = list(set(global_boundary_faces[face]))
+            assert len(global_boundary_faces[face]) == 1, global_boundary_faces[face]
+            boundaryFaceTags[faceNo] = global_boundary_faces[face][0]
+        global_mesh._boundaryFaces = boundaryFaces
+        global_mesh._boundaryFaceTags = boundaryFaceTags
+
+    return global_mesh, localCellLookup
+
+
+def stitchSolutions(global_mesh, DoFMaps, localCellLookup, solutions, tag=0):
+    from . DoFMaps import getAvailableDoFMaps, str2DoFMap
+    for element in getAvailableDoFMaps():
+        DoFMap = str2DoFMap(element)
+        if isinstance(DoFMaps[0], DoFMap):
+            dm_global = DoFMap(global_mesh, tag=tag)
+            break
+    else:
+        raise NotImplementedError(DoFMaps[0])
+    x = dm_global.empty(dtype=solutions[0].dtype)
+    for cellNo in range(global_mesh.num_cells):
+        for k in range(dm_global.dofs_per_element):
+            dofGlobal = dm_global.cell2dof_py(cellNo, k)
+            if dofGlobal >= 0:
+                for subdomainNo, localCellNo in localCellLookup[cellNo]:
+                    dofLocal = DoFMaps[subdomainNo].cell2dof_py(localCellNo, k)
+                    if dofLocal >= 0:
+                        x[dofGlobal] = solutions[subdomainNo][dofLocal]
+    return x, dm_global
+
+
+def getMappingToGlobalDoFMap(mesh, meshOverlaps, DoFMap, comm=None, collectRank=0, tag=0):
+    meshes = comm.gather(mesh, root=collectRank)
+    overlapManagers = comm.gather(meshOverlaps, root=collectRank)
+    DoFMaps = comm.gather(DoFMap, root=collectRank)
+    if comm.rank == collectRank:
+        from . meshOverlaps import interfaceManager, overlapManager
+        if isinstance(overlapManagers[0], overlapManager):
+            mesh_global, localCellLookup = stitchOverlappingMeshes(meshes, overlapManagers)
+        elif isinstance(overlapManagers[0], interfaceManager):
+            mesh_global, localCellLookup = stitchNonoverlappingMeshes(meshes, overlapManagers)
+        else:
+            raise NotImplementedError()
+        from . DoFMaps import getAvailableDoFMaps, str2DoFMap
+        for element in getAvailableDoFMaps():
+            DoFMap = str2DoFMap(element)
+            if isinstance(DoFMaps[0], DoFMap):
+                dm_global = DoFMap(mesh_global, tag=tag)
+                break
+        else:
+            raise NotImplementedError()
+        mappings = [uninitialized((dm.num_dofs), dtype=INDEX) for dm in DoFMaps]
+        for cellNo in range(mesh_global.num_cells):
+            for k in range(dm_global.dofs_per_element):
+                dofGlobal = dm_global.cell2dof_py(cellNo, k)
+                if dofGlobal >= 0:
+                    for subdomainNo, localCellNo in localCellLookup[cellNo]:
+                        dofLocal = DoFMaps[subdomainNo].cell2dof_py(localCellNo, k)
+                        if dofLocal >= 0:
+                            mappings[subdomainNo][dofLocal] = dofGlobal
+        return mesh_global, dm_global, mappings
+    else:
+        return None, None, None
+
+
+def accumulate2global(mesh, meshOverlaps, DoFMap, vec,
+                      comm=None, collectRank=0, tag=0):
+    """
+    Send subdomain meshes and solutions to root node, stitch together
+    meshes and solution. Assumes that solution is already accumulated.
+    """
+    if comm is not None and comm.size > 1:
+        meshes = comm.gather(mesh, root=collectRank)
+        overlapManagers = comm.gather(meshOverlaps, root=collectRank)
+        if isinstance(vec, list):
+            assert isinstance(DoFMap, list) and len(vec) == len(DoFMap)
+            DoFMaps = []
+            vecs = []
+            for i in range(len(DoFMap)):
+                DoFMaps.append(comm.gather(DoFMap[i], root=collectRank))
+                vecs.append(comm.gather(vec[i], root=collectRank))
+        else:
+            DoFMaps = [comm.gather(DoFMap, root=collectRank)]
+            vecs = [comm.gather(vec, root=collectRank)]
+        if comm.rank == collectRank:
+            from . meshOverlaps import interfaceManager, overlapManager
+            if isinstance(overlapManagers[0], overlapManager):
+                mesh_global, localCellLookup = stitchOverlappingMeshes(meshes, overlapManagers)
+            elif isinstance(overlapManagers[0], interfaceManager):
+                mesh_global, localCellLookup = stitchNonoverlappingMeshes(meshes, overlapManagers)
+            else:
+                raise NotImplementedError()
+            if vec is not None:
+                global_vecs = []
+                global_dms = []
+                for dms, vectors in zip(DoFMaps, vecs):
+                    x, dm_global = stitchSolutions(mesh_global, dms, localCellLookup, vectors, tag)
+                    global_vecs.append(x)
+                    global_dms.append(dm_global)
+                if len(global_vecs) == 1:
+                    x = global_vecs[0]
+                    dm_global = global_dms[0]
+                else:
+                    x = global_vecs
+                    dm_global = global_dms
+            else:
+                x, dm_global = None, None
+            return mesh_global, x, dm_global
+        else:
+            return None, None, None
+    else:
+        if len(vec) == 1:
+            vec = vec[0]
+            DoFMap = DoFMap[0]
+        return mesh, vec, DoFMap
+
+
+def getGlobalPartitioning(mesh, meshOverlaps, comm, collectRank=0):
+    meshes = comm.gather(mesh, root=collectRank)
+    overlapManagers = comm.gather(meshOverlaps, root=collectRank)
+    if comm.rank == collectRank:
+        from . meshOverlaps import interfaceManager, overlapManager
+        if isinstance(overlapManagers[0], overlapManager):
+            mesh_global, localCellLookup = stitchOverlappingMeshes(meshes, overlapManagers)
+        elif isinstance(overlapManagers[0], interfaceManager):
+            mesh_global, localCellLookup = stitchNonoverlappingMeshes(meshes, overlapManagers)
+        else:
+            raise NotImplementedError()
+        return mesh_global, localCellLookup
+    else:
+        return None, None
+
+
+def getSubSolution(new_mesh, dm, x, selectedCells):
+    from . DoFMaps import getAvailableDoFMaps, str2DoFMap
+    for element in getAvailableDoFMaps():
+        DoFMap = str2DoFMap(element)
+        if isinstance(dm, DoFMap):
+            dmSub = DoFMap(new_mesh, tag=-1)
+            break
+    else:
+        raise NotImplementedError()
+    y = np.zeros((dmSub.num_dofs), dtype=REAL)
+    for cellSub, cellGlobal in enumerate(selectedCells):
+        for k in range(dmSub.dofs_per_element):
+            dofSub = dmSub.cell2dof_py(cellSub, k)
+            dofGlobal = dm.cell2dof_py(cellGlobal, k)
+            if dofSub >= 0 and dofGlobal >= 0:
+                y[dofSub] = x[dofGlobal]
+    return dmSub, y
+
+
+def getSubMeshSolution(mesh, DoFMap, solution, selectedCells):
+    from . meshCy import getSubmesh
+    new_mesh = getSubmesh(mesh, selectedCells)
+    dmSub, y = getSubSolution(new_mesh, DoFMap, solution, selectedCells)
+    return new_mesh, y, dmSub
+
+
+def getRestrictionProlongationSubmesh(mesh, selectedCells, dm, dm_trunc):
+    from PyNucleus_base.linear_operators import CSR_LinearOperator
+    indptr = np.arange(dm_trunc.num_dofs+1, dtype=INDEX)
+    indices = np.zeros((dm_trunc.num_dofs), dtype=INDEX)
+    data = np.ones((dm_trunc.num_dofs), dtype=REAL)
+    for cell_trunc in range(selectedCells.shape[0]):
+        cell = selectedCells[cell_trunc]
+        for i in range(dm.dofs_per_element):
+            dof = dm.cell2dof_py(cell, i)
+            dof_trunc = dm_trunc.cell2dof_py(cell_trunc, i)
+            if dof >= 0 and dof_trunc >= 0:
+                indices[dof_trunc] = dof
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = dm.num_dofs
+    P = R.transpose()
+    return R, P
+
+
+def plotFunctions(mesh, dm, funs, labels=None, fig=None):
+    from . functions import function
+    if dm.num_dofs > 50000 or mesh.dim >= 3:
+        return
+    if fig is None:
+        import matplotlib.pyplot as plt
+        fig = plt.gcf()
+    if labels is None:
+        labels = ['']*len(funs)
+    else:
+        assert len(funs) == len(labels)
+    for f, l in zip(funs, labels):
+        if isinstance(f, function):
+            f = dm.interpolate(f)
+        mesh.plotFunction(f, DoFMap=dm, label=l)
+    fig.legend()
+
+
+class plotManager:
+    def __init__(self, mesh, dm, useSubPlots=False, defaults={}, interfaces=None):
+        self.mesh = mesh
+        self.dm = dm
+        self.plots = []
+        self.useSubPlots = useSubPlots
+        if self.mesh.dim == 2:
+            self.useSubPlots = True
+        self.defaults = defaults
+        self.interfaces = interfaces
+        self.comm = interfaces.comm if self.interfaces is not None else None
+        self.prepared = False
+
+    def add(self, x, **kwargs):
+        assert not self.prepared
+        self.plots.append([x, kwargs])
+
+    def preparePlots(self, tag=PHYSICAL):
+        from . functions import function
+        solutions = []
+        for k in range(len(self.plots)):
+            if isinstance(self.plots[k][0], function):
+                self.plots[k][0] = self.dm.interpolate(self.plots[k][0])
+            solutions.append(self.plots[k][0])
+        (global_mesh,
+         global_solutions,
+         global_dm) = accumulate2global(self.mesh, self.interfaces, [self.dm]*len(solutions),
+                                        solutions, comm=self.comm, tag=tag)
+        if self.comm is None or self.comm.rank == 0:
+            self.mesh = global_mesh
+            if isinstance(global_solutions, list):
+                for k in range(len(self.plots)):
+                    self.plots[k][0] = global_solutions[k]
+                self.dm = global_dm[0]
+            else:
+                self.plots[0][0] = global_solutions
+                self.dm = global_dm
+        self.prepared = True
+
+    def plot(self, legendOutside=False):
+        import matplotlib.pyplot as plt
+
+        assert self.comm is None or self.comm.rank == 0
+
+        if not self.prepared:
+            self.preparePlots()
+
+        needLegend = False
+        if not self.useSubPlots:
+            for x, k in self.plots:
+                if 'label' in k:
+                    needLegend = True
+                self.mesh.plotFunction(x, DoFMap=self.dm, **k)
+            if needLegend:
+                if legendOutside:
+                    plt.gca().legend(loc='lower left',
+                                     bbox_to_anchor=(-0.1, 1.2),
+                                     borderaxespad=0)
+                else:
+                    plt.gca().legend()
+        else:
+            numPlots = len(self.plots)
+            plotsPerDirX = int(np.ceil(np.sqrt(numPlots)))
+            plotsPerDirY = int(np.ceil(numPlots/plotsPerDirX))
+            for k in range(len(self.plots)):
+                ax = plt.gcf().add_subplot(plotsPerDirX, plotsPerDirY, k+1)
+                plt.sca(ax)
+                if k >= numPlots:
+                    plt.gcf().delaxes(ax)
+                else:
+                    kwargs = self.defaults.copy()
+                    kwargs.update(self.plots[k][1])
+                    label = kwargs.pop('label', '')
+                    vmin = kwargs.pop('vmin', None)
+                    vmax = kwargs.pop('vmax', None)
+                    x = self.plots[k][0]
+                    self.mesh.plotFunction(x, DoFMap=self.dm, **kwargs)
+                    ax.set_ylim([vmin, vmax])
+                    ax.set_title(label)
+
+
+def snapMeshes(mesh1, mesh2):
+    from scipy.spatial import KDTree
+    from PyNucleus_base import uninitialized
+
+    tree = KDTree(mesh1.vertices)
+    vertexCount = mesh1.num_vertices
+    vertexTranslation = -np.ones((mesh2.num_vertices), dtype=INDEX)
+
+    eps = 1e-9
+    vertices2 = mesh2.vertices_as_array
+    verticesToAdd = []
+    for vertexNo in range(mesh2.num_vertices):
+        neighbors = tree.query_ball_point(vertices2[vertexNo, :], eps)
+        if len(neighbors) == 0:
+            verticesToAdd.append(vertexNo)
+            vertexTranslation[vertexNo] = vertexCount
+            vertexCount += 1
+        elif len(neighbors) == 1:
+            vertexTranslation[vertexNo] = neighbors[0]
+        else:
+            raise NotImplementedError()
+    vertices = np.vstack((mesh1.vertices_as_array,
+                          mesh2.vertices_as_array[verticesToAdd, :]))
+    translatedCells = uninitialized((mesh2.num_cells, mesh2.manifold_dim+1), dtype=INDEX)
+    for cellNo in range(mesh2.num_cells):
+        for vertexNo in range(mesh2.manifold_dim+1):
+            translatedCells[cellNo, vertexNo] = vertexTranslation[mesh2.cells[cellNo, vertexNo]]
+    cells = np.vstack((mesh1.cells_as_array,
+                       translatedCells))
+    mesh = mesh2d(vertices, cells)
+    if mesh1.transformer is None:
+        mesh.setMeshTransformation(mesh2.transformer)
+    elif mesh2.transformer is None:
+        mesh.setMeshTransformation(mesh1.transformer)
+    else:
+        raise NotImplementedError()
+    return mesh
diff --git a/fem/PyNucleus_fem/meshConstruction.py b/fem/PyNucleus_fem/meshConstruction.py
new file mode 100644
index 0000000..519fbfd
--- /dev/null
+++ b/fem/PyNucleus_fem/meshConstruction.py
@@ -0,0 +1,294 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base import INDEX, REAL
+from . import mesh2d
+from meshpy.triangle import MeshInfo, build
+from scipy.spatial import cKDTree
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
+
+class segment:
+    def __init__(self, points, facets, holes=[]):
+        self.points = points
+        self.facets = facets
+        self.holes = holes
+        self.meshTransformations = []
+
+    def __add__(self, other):
+        if isinstance(other, tuple):
+            newPoints = [(other[0]+p[0], other[1]+p[1]) for p in self.points]
+            newHoles = [(other[0]+p[0], other[1]+p[1]) for p in self.holes]
+            newSegment = segment(newPoints, self.facets, newHoles)
+
+            for t in self.meshTransformations:
+                def transform(x1, x2, xNew):
+                    xTemp = xNew-other
+                    t(x1-other, x2-other, xTemp)
+                    xNew[:] = other+xTemp
+
+                newSegment.meshTransformations.append(transform)
+
+            return newSegment
+        elif isinstance(other, segment):
+            points = self.points+other.points
+            holes = self.holes+other.holes
+            facets = []
+            offset = len(self.points)
+            for f in self.facets:
+                facets.append(f)
+            for f in other.facets:
+                f2 = (f[0]+offset, f[1]+offset)
+                facets.append(f2)
+
+            kd = cKDTree(points)
+            idx = -np.ones((len(points)), dtype=INDEX)
+            idxUnique = -np.ones((len(points)), dtype=INDEX)
+            for t in kd.query_pairs(1e-6):
+                idx[max(t)] = min(t)
+            k = 0
+            for i in range(idx.shape[0]):
+                if idx[i] == -1:
+                    idx[i] = k
+                    idxUnique[k] = i
+                    k += 1
+                else:
+                    idx[i] = idx[idx[i]]
+            idxUnique = idxUnique[:k]
+            points = [points[i] for i in idxUnique]
+            facets = [(idx[f[0]], idx[f[1]]) for f in facets]
+
+            sumSeg = segment(points, facets, holes)
+            sumSeg.meshTransformations = self.meshTransformations+other.meshTransformations
+            return sumSeg
+        else:
+            raise NotImplementedError(other)
+
+    def __mul__(self, other):
+        if isinstance(other, tuple):
+            c = np.array(other[0])
+            angle = other[1]
+            rot = np.array([[np.cos(angle), -np.sin(angle)],
+                            [np.sin(angle), np.cos(angle)]])
+
+            points = [c+rot.dot(p-c) for p in self.points]
+            holes = [c+rot.dot(p-c) for p in self.holes]
+            newSegment = segment(points, self.facets, holes)
+
+            for t in self.meshTransformations:
+                def transform(x1, x2, xNew):
+                    xTemp = c+rot.T.dot(xNew-c)
+                    t(c+rot.T.dot(x1-c),
+                      c+rot.T.dot(x2-c),
+                      xTemp)
+                    xNew[:] = c+rot.dot(xTemp-c)
+
+                newSegment.meshTransformations.append(transform)
+
+            return newSegment
+        else:
+            raise NotImplementedError()
+
+    def plot(self, plotArrows=False):
+        import matplotlib.pyplot as plt
+        plt.scatter([p[0] for p in self.points], [p[1] for p in self.points])
+        for f in self.facets:
+            plt.plot([self.points[f[0]][0], self.points[f[1]][0]],
+                     [self.points[f[0]][1], self.points[f[1]][1]])
+            if plotArrows:
+                plt.arrow(self.points[f[0]][0], self.points[f[0]][1],
+                          0.5*(self.points[f[1]][0]-self.points[f[0]][0]),
+                          0.5*(self.points[f[1]][1]-self.points[f[0]][1]),
+                          head_width=0.05, head_length=0.1)
+
+    def get_num_points(self):
+        return len(self.points)
+
+    def get_num_facets(self):
+        return len(self.facets)
+
+    def get_num_holes(self):
+        return len(self.holes)
+
+    def get_num_mesh_transformations(self):
+        return len(self.meshTransformations)
+
+    num_points = property(fget=get_num_points)
+    num_facets = property(fget=get_num_facets)
+    num_holes = property(fget=get_num_holes)
+    num_mesh_transformations = property(fget=get_num_mesh_transformations)
+
+    def mesh(self, **kwargs):
+        mesh_info = MeshInfo()
+        mesh_info.set_points(self.points)
+        mesh_info.set_facets(self.facets)
+        mesh_info.set_holes(self.holes)
+
+        if 'min_angle' not in kwargs:
+            kwargs['min_angle'] = 30
+
+        if 'h' in kwargs:
+            h = kwargs.pop('h')
+            if 'href' in kwargs:
+                href = kwargs.pop('href')
+                for k in range(href):
+                    fraction = 0.8**k
+                    kwargs['max_volume'] = 0.5 * h**2 * fraction
+                    mesh_meshpy = build(mesh_info, **kwargs)
+                    mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                                  np.array(mesh_meshpy.elements, dtype=INDEX))
+                    if mesh.h <= h:
+                        break
+                else:
+                    LOGGER.warn("Meshed {} times, but could not achieve h={}. Instead h={}.".format(href, h, mesh.h))
+            else:
+                kwargs['max_volume'] = 0.5 * h**2
+                mesh_meshpy = build(mesh_info, **kwargs)
+                mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                              np.array(mesh_meshpy.elements, dtype=INDEX))
+        else:
+            mesh_meshpy = build(mesh_info, **kwargs)
+            mesh = mesh2d(np.array(mesh_meshpy.points, dtype=REAL),
+                          np.array(mesh_meshpy.elements, dtype=INDEX))
+        mesh.setMeshTransformation(self.getMeshTransformer())
+        return mesh
+
+    def getMeshTransformer(self):
+        from . meshCy import meshTransformer
+        from . meshCy import decode_edge_python
+
+        class myMeshTransformer(meshTransformer):
+            def __init__(self, meshTransformations):
+                self.meshTransformations = meshTransformations
+
+            def __call__(self, mesh, lookup):
+                if len(self.meshTransformations) == 0:
+                    return
+                for encodeVal in lookup:
+                    e = decode_edge_python(encodeVal)
+                    x1 = mesh.vertices_as_array[e[0], :]
+                    x2 = mesh.vertices_as_array[e[1], :]
+                    vertexNo = lookup[encodeVal]
+                    xNew = mesh.vertices_as_array[vertexNo, :]
+                    for t in self.meshTransformations:
+                        if t(x1, x2, xNew):
+                            break
+
+        return myMeshTransformer(self.meshTransformations)
+
+
+class circularSegment(segment):
+    def __init__(self, center, radius, start_angle, stop_angle, num_points_per_unit_len=None, num_points=None):
+        if num_points is None:
+            num_points = int(np.ceil(radius*(stop_angle-start_angle) * num_points_per_unit_len))+1
+        if stop_angle-start_angle < 1e-9:
+            points = []
+            facets = []
+        else:
+            if abs(stop_angle-start_angle-2*np.pi) < 1e-9:
+                points = [(center[0]+radius*np.cos(theta),
+                           center[1]+radius*np.sin(theta)) for theta in np.linspace(start_angle, stop_angle, num_points-1, endpoint=False)]
+                facets = [(i, i+1) for i in range(num_points-2)]+[(num_points-2, 0)]
+            else:
+                points = [(center[0]+radius*np.cos(theta),
+                           center[1]+radius*np.sin(theta)) for theta in np.linspace(start_angle, stop_angle, num_points)]
+                facets = [(i, i+1) for i in range(num_points-1)]
+        self.center = center
+        self.radius = radius
+        self.start_angle = start_angle
+        self.stop_angle = stop_angle
+        super(circularSegment, self).__init__(points, facets)
+        self.meshTransformations = [self.meshTransformation]
+
+    def meshTransformation(self, x1, x2, xNew):
+        rNew = np.linalg.norm(xNew-self.center)
+        if rNew <= self.radius:
+            theta = np.arctan2(xNew[1]-self.center[1],
+                               xNew[0]-self.center[0])
+            if theta < 0:
+                theta += 2*np.pi
+            assert 0 <= theta and theta <= 2*np.pi, (theta, 2*np.pi-theta)
+            if (self.start_angle <= theta) and (theta <= self.stop_angle):
+                if np.vdot(x1-self.center, x2-self.center) <= 0.:
+                    return
+                r1 = np.linalg.norm(x1-self.center)
+                r2 = np.linalg.norm(x2-self.center)
+                r = 0.5*r1+0.5*r2
+                if r > 2*rNew:
+                    print(r, rNew)
+                xNew[:] = self.center + (xNew-self.center)*r/rNew
+
+
+class circle(circularSegment):
+    def __init__(self, center, radius, num_points_per_unit_len=None, num_points=None):
+        super(circle, self).__init__(center, radius, 0, 2*np.pi, num_points_per_unit_len, num_points)
+        self.points.append(center)
+
+
+class line(segment):
+    def __init__(self, start, end, num_points=None, num_points_per_unit_len=None):
+        length2 = (end[0]-start[0])**2 + (end[1]-start[1])**2
+        if num_points_per_unit_len is None and num_points is None:
+            num_points = 2
+        elif num_points_per_unit_len is not None:
+            length = np.sqrt(length2)
+            num_points = int(np.ceil(length*num_points_per_unit_len))+1
+        if length2 < 1e-9:
+            points = []
+            facets = []
+        else:
+            points = [(start[0]+t*(end[0]-start[0]),
+                       start[1]+t*(end[1]-start[1])) for t in np.linspace(0, 1, num_points)]
+            facets = [(i, i+1) for i in range(num_points-1)]
+        super(line, self).__init__(points, facets)
+
+
+def polygon(points, doClose=True, num_points=None, num_points_per_unit_len=None):
+    if num_points is None:
+        num_points = [None]*len(points)
+    elif doClose:
+        assert len(num_points) == len(points)
+    else:
+        assert len(num_points) == len(points)-1
+    segments = line(points[0], points[1], num_points=num_points[0], num_points_per_unit_len=num_points_per_unit_len)
+    for i in range(1, len(points)-1):
+        segments += line(points[i], points[i+1], num_points=num_points[i], num_points_per_unit_len=num_points_per_unit_len)
+    if doClose:
+        segments += line(points[len(points)-1], points[0], num_points=num_points[len(points)-1], num_points_per_unit_len=num_points_per_unit_len)
+    return segments
+
+
+def rectangle(a, b, num_points=None, num_points_per_unit_len=None):
+    assert a[0] < b[0]
+    assert a[1] < b[1]
+    points = [a, (b[0], a[0]), b, (a[0], b[0])]
+    rect = polygon(points, doClose=True, num_points=num_points, num_points_per_unit_len=num_points_per_unit_len)
+
+    def meshTransformation(x1, x2, xNew):
+        eps = 1e-10
+        if ((a[0]-eps <= x1[0] <= b[0]+eps) and (a[1]-eps <= x1[1] <= b[1]+eps) and
+            (a[0]-eps <= x2[0] <= b[0]+eps) and (a[1]-eps <= x2[1] <= b[1]+eps)):
+            xNew[:] = 0.5*(x1+x2)
+            return True
+
+    rect.meshTransformation = [meshTransformation]
+    return rect
+
+
+class transformationRestriction(segment):
+    def __init__(self, seg, p1, p2):
+        super(transformationRestriction, self).__init__(seg.points, seg.facets)
+        for t in seg.meshTransformations:
+            def transform(x1, x2, xNew):
+                if ((p1[0] <= xNew[0]) and (xNew[0] <= p2[0]) and
+                    (p1[1] <= xNew[1]) and (xNew[1] <= p2[1])):
+                    t(x1, x2, xNew)
+            self.meshTransformations.append(transform)
diff --git a/fem/PyNucleus_fem/meshCy.pxd b/fem/PyNucleus_fem/meshCy.pxd
new file mode 100644
index 0000000..6fc64e8
--- /dev/null
+++ b/fem/PyNucleus_fem/meshCy.pxd
@@ -0,0 +1,122 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_base.intTuple cimport intTuple
+from PyNucleus_base.tupleDict cimport tupleDictINDEX
+cimport numpy as np
+from . simplexMapper cimport simplexMapper, simplexMapper1D, simplexMapper2D, simplexMapper3D
+
+ctypedef REAL_t[:, ::1] vertices_t
+ctypedef INDEX_t[:, ::1] cells_t
+
+
+cdef class meshTransformer:
+    pass
+
+
+cdef class gradedMeshTransformer(meshTransformer):
+    cdef:
+        REAL_t mu, mu2, radius
+
+
+cdef class multiIntervalMeshTransformer(meshTransformer):
+    cdef:
+        list intervals
+
+
+cdef class meshBase:
+    cdef:
+        public vertices_t vertices
+        public cells_t cells
+        readonly INDEX_t num_vertices, num_cells, dim, manifold_dim
+        REAL_t _h, _delta, _volume, _hmin
+        REAL_t[::1] _volVector
+        REAL_t[::1] _hVector
+        public simplexMapper simplexMapper
+        public meshTransformer transformer
+    cdef void computeMeshQuantities(self)
+    cdef void getSimplex(self,
+                         const INDEX_t cellIdx,
+                         REAL_t[:, ::1] simplex)
+    cdef BOOL_t vertexInCell(self, REAL_t[::1] vertex,
+                           INDEX_t cellNo,
+                           REAL_t[:, ::1] simplexMem,
+                           REAL_t[::1] baryMem)
+
+
+cdef void decode_edge(ENCODE_t encodeVal, INDEX_t[::1] e)
+
+cdef void vectorProduct(const REAL_t[::1] v, const REAL_t[::1] w, REAL_t[::1] z)
+
+cdef REAL_t volume0D(REAL_t[:, ::1] span)
+cdef REAL_t volume1D(REAL_t[::1] v0)
+cdef REAL_t volume1Dnew(REAL_t[:, ::1] span)
+cdef REAL_t volume1D_in_2D(REAL_t[:, ::1] span)
+cdef REAL_t volume2D(REAL_t[::1] v0, REAL_t[::1] v1)
+cdef REAL_t volume2Dnew(REAL_t[:, ::1] span)
+cdef REAL_t volume2D_in_3D(REAL_t[::1] v0, REAL_t[::1] v1)
+cdef REAL_t volume3D(REAL_t[:, ::1] span)
+cdef REAL_t volume3Dnew(REAL_t[:, ::1] span, REAL_t[::1] temp)
+cdef REAL_t volume2D_in_3Dnew(REAL_t[:, ::1] span)
+
+cdef REAL_t volume0Dsimplex(REAL_t[:, ::1] simplex)
+cdef REAL_t volume1Dsimplex(REAL_t[:, ::1] simplex)
+cdef REAL_t volume2Dsimplex(REAL_t[:, ::1] simplex)
+cdef REAL_t volume1Din2Dsimplex(REAL_t[:, ::1] simplex)
+
+cdef ENCODE_t encode_edge(INDEX_t[::1] e)
+cdef void sortEdge(INDEX_t c0, INDEX_t c1, INDEX_t[::1] e)
+
+cdef void sortFace(INDEX_t c0, INDEX_t c1, INDEX_t c2, INDEX_t[::1] f)
+cdef tuple encode_face(INDEX_t[::1] f)
+cdef void decode_face(tuple encodeVal, INDEX_t[::1] f)
+
+
+cdef class faceVals:
+    cdef:
+        INDEX_t ** indexL
+        INDEX_t ** indexR
+        INDEX_t ** vals
+        np.uint8_t[::1] counts
+        np.uint8_t initial_length
+        np.uint8_t length_inc
+        np.uint8_t[::1] lengths
+        INDEX_t num_dofs, nnz
+        BOOL_t deleteHits
+        INDEX_t i, jj
+    cdef inline INDEX_t enterValue(self, const INDEX_t[::1] f, INDEX_t val)
+    cdef inline INDEX_t getValue(self, const INDEX_t[::1] f)
+    cdef void startIter(self)
+    cdef BOOL_t next(self, INDEX_t[::1] f, INDEX_t * val)
+
+
+cdef class cellFinder(object):
+    cdef:
+        meshBase mesh
+        public REAL_t[:, ::1] simplex
+        public REAL_t[::1] bary
+        tuple kd
+        INDEX_t numCandidates
+    cdef INDEX_t findCell(self, REAL_t[::1] vertex)
+
+
+cdef class cellFinder2:
+    cdef:
+        meshBase mesh
+        REAL_t[::1] diamInv, x_min
+        public dict lookup
+        public dict v2c
+        REAL_t[:, ::1] simplex
+        REAL_t[::1] bary
+        INDEX_t[::1] key
+        intTuple myKey
+    cdef INDEX_t findCell(self, REAL_t[::1] vertex)
+
+cdef void getBarycentricCoords1D(REAL_t[:, ::1] simplex, REAL_t[::1] x, REAL_t[::1] bary)
+cdef void getBarycentricCoords2D(REAL_t[:, ::1] simplex, REAL_t[::1] x, REAL_t[::1] bary)
diff --git a/fem/PyNucleus_fem/meshCy.pyx b/fem/PyNucleus_fem/meshCy.pyx
new file mode 100644
index 0000000..e643a62
--- /dev/null
+++ b/fem/PyNucleus_fem/meshCy.pyx
@@ -0,0 +1,2374 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport sqrt
+from PyNucleus_base import uninitialized
+import numpy as np
+cimport numpy as np
+cimport cython
+# from libcpp.unordered_map cimport unordered_map
+# from libcpp.map cimport map
+from libc.stdlib cimport malloc, realloc, free
+from libc.stdlib cimport qsort
+
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, TAG
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, TAG_t
+from PyNucleus_base.blas cimport mydot
+import warnings
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+
+
+cdef class meshTransformer:
+    def __init__(self):
+        pass
+
+    def __call__(self, meshBase mesh, dict lookup):
+        raise NotImplementedError()
+
+
+cdef class radialMeshTransformer(meshTransformer):
+    def __init__(self):
+        super(radialMeshTransformer, self).__init__()
+
+    def __call__(self, meshBase mesh, dict lookup):
+        cdef:
+            INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+            ENCODE_t encodeVal
+            INDEX_t vertexNo
+            REAL_t r1, r2, r, r3
+            INDEX_t dim = mesh.dim
+            REAL_t[:, ::1] vertices = mesh.vertices
+        for encodeVal in lookup:
+            decode_edge(encodeVal, e)
+            vertexNo = lookup[encodeVal]
+            r1 = 0.
+            for i in range(dim):
+                r1 += vertices[e[0], i]**2
+            r1 = sqrt(r1)
+            r2 = 0.
+            for i in range(dim):
+                r2 += vertices[e[1], i]**2
+            r2 = sqrt(r2)
+            r = 0.5*r1 + 0.5*r2
+            r3 = 0.
+            for i in range(dim):
+                r3 += vertices[vertexNo, i]**2
+            r3 = sqrt(r3)
+            for i in range(dim):
+                mesh.vertices[vertexNo, i] *= r/r3
+
+
+cdef class gradedMeshTransformer(meshTransformer):
+    def __init__(self, REAL_t mu=2., mu2=None, REAL_t radius=1.):
+        super(gradedMeshTransformer, self).__init__()
+        self.mu = mu
+        if mu2 is None:
+            self.mu2 = mu
+        else:
+            self.mu2 = mu2
+        self.radius = radius
+
+    def __call__(self, meshBase mesh, dict lookup):
+        cdef:
+            INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+            ENCODE_t encodeVal
+            INDEX_t vertexNo
+            REAL_t r1, r2, r, r3, x1, x2, x3
+            INDEX_t dim = mesh.dim
+            REAL_t[:, ::1] vertices = mesh.vertices
+        for encodeVal in lookup:
+            decode_edge(encodeVal, e)
+            vertexNo = lookup[encodeVal]
+            r1 = 0.
+            for i in range(dim):
+                r1 += vertices[e[0], i]**2
+            r1 = sqrt(r1)
+            r2 = 0.
+            for i in range(dim):
+                r2 += vertices[e[1], i]**2
+            r2 = sqrt(r2)
+            r = 0.5*r1 + 0.5*r2
+            r3 = 0.
+            for i in range(dim):
+                r3 += vertices[vertexNo, i]**2
+            r3 = sqrt(r3)
+            if vertices[vertexNo, 0] < 0:
+                x1 = 1-(1-r1/self.radius)**(1/self.mu)
+                x2 = 1-(1-r2/self.radius)**(1/self.mu)
+                x3 = 0.5*x1+0.5*x2
+                r = self.radius*(1-(1-x3)**self.mu)
+            else:
+                x1 = 1-(1-r1/self.radius)**(1/self.mu2)
+                x2 = 1-(1-r2/self.radius)**(1/self.mu2)
+                x3 = 0.5*x1+0.5*x2
+                r = self.radius*(1-(1-x3)**self.mu2)
+            for i in range(dim):
+                mesh.vertices[vertexNo, i] *= r/r3
+
+
+cdef class gradedHypercubeTransformer(meshTransformer):
+    cdef:
+        REAL_t[::1] factor, invFactor
+
+    def __init__(self, factor=0.4):
+        cdef:
+            INDEX_t i
+
+        if isinstance(factor, float):
+            assert 0 < factor
+            self.factor = factor*np.ones((3), dtype=REAL)
+        else:
+            for i in range(factor.shape[0]):
+                assert 0 < factor[i]
+            self.factor = factor
+        self.invFactor = uninitialized((self.factor.shape[0]), dtype=REAL)
+        for i in range(self.factor.shape[0]):
+            self.invFactor[i] = 1./self.factor[i]
+
+    def __call__(self, meshBase mesh, dict lookup):
+        cdef:
+            INDEX_t i, j
+            INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+            REAL_t v0, v1
+            REAL_t[::1] boundMin = np.inf*np.ones((mesh.dim), dtype=REAL)
+            REAL_t[::1] boundMax = -np.inf*np.ones((mesh.dim), dtype=REAL)
+            REAL_t[::1] boundMaxInv = uninitialized((mesh.dim), dtype=REAL)
+        for j in range(mesh.num_vertices):
+            for i in range(mesh.dim):
+                v0 = mesh.vertices[j, i]
+                boundMin[i] = min(boundMin[i], v0)
+                boundMax[i] = max(boundMax[i], v0)
+        for i in range(mesh.dim):
+            boundMax[i] = 1./(boundMax[i]-boundMin[i])
+            boundMaxInv[i] = boundMax[i]-boundMin[i]
+        for encodeVal in lookup:
+            decode_edge(encodeVal, e)
+            j = lookup[encodeVal]
+            for i in range(mesh.dim):
+                v0 = (boundMax[i]*(mesh.vertices[e[0], i]-boundMin[i]))**self.invFactor[i]
+                v1 = (boundMax[i]*(mesh.vertices[e[1], i]-boundMin[i]))**self.invFactor[i]
+                mesh.vertices[j, i] = boundMin[i] + boundMaxInv[i]*(0.5*v0 + 0.5*v1)**self.factor[i]
+
+
+cdef class multiIntervalMeshTransformer(meshTransformer):
+    def __init__(self, list intervals):
+        super(multiIntervalMeshTransformer, self).__init__()
+        self.intervals = intervals
+
+    def __call__(self, meshBase mesh, dict lookup):
+        cdef:
+            INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+            ENCODE_t encodeVal
+            INDEX_t vertexNo
+            REAL_t r1, r2, r, x1, x2, x3, radius
+            REAL_t[:, ::1] vertices = mesh.vertices
+        for encodeVal in lookup:
+            decode_edge(encodeVal, e)
+            vertexNo = lookup[encodeVal]
+            r = mesh.vertices[vertexNo, 0]
+            for interval in self.intervals:
+                a, b, mu1, mu2 = interval
+                r = vertices[vertexNo, 0]
+                if (a < r) and (r <= b):
+                    if mu1 is None:
+                        if mu2 is None:
+                            raise NotImplementedError()
+                        else:
+                            center = a
+                            radius = b-a
+                            r1 = abs(vertices[e[0], 0]-center)
+                            r2 = abs(vertices[e[1], 0]-center)
+                            x1 = 1-(1-r1/radius)**(1/mu2)
+                            x2 = 1-(1-r2/radius)**(1/mu2)
+                            x3 = 0.5*x1+0.5*x2
+                            r = center + radius*(1-(1-x3)**mu2)
+                    else:
+                        if mu2 is None:
+                            center = b
+                            radius = b-a
+                            r1 = abs(vertices[e[0], 0]-center)
+                            r2 = abs(vertices[e[1], 0]-center)
+                            x1 = 1-(1-r1/radius)**(1/mu1)
+                            x2 = 1-(1-r2/radius)**(1/mu1)
+                            x3 = 0.5*x1+0.5*x2
+                            r = center - radius*(1-(1-x3)**mu1)
+                        else:
+                            center = 0.5*(a+b)
+                            radius = 0.5*(b-a)
+                            r1 = abs(vertices[e[0], 0]-center)
+                            r2 = abs(vertices[e[1], 0]-center)
+                            if r < center:
+                                x1 = 1-(1-r1/radius)**(1/mu1)
+                                x2 = 1-(1-r2/radius)**(1/mu1)
+                                x3 = 0.5*x1+0.5*x2
+                                r = center - radius*(1-(1-x3)**mu1)
+                            else:
+                                x1 = 1-(1-r1/radius)**(1/mu2)
+                                x2 = 1-(1-r2/radius)**(1/mu2)
+                                x3 = 0.5*x1+0.5*x2
+                                r = center + radius*(1-(1-x3)**mu2)
+                    break
+            mesh.vertices[vertexNo, 0] = r
+
+
+def radialMeshTransformation(mesh, dict lookup):
+    cdef:
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        ENCODE_t encodeVal
+        INDEX_t vertexNo
+        REAL_t r1, r2, r, r3
+        INDEX_t dim = mesh.dim
+        REAL_t[:, ::1] vertices = mesh.vertices
+    warnings.warn('"radialMeshTransformation" deprecated, use "radialMeshTransformer"', DeprecationWarning)
+    for encodeVal in lookup:
+        decode_edge(encodeVal, e)
+        vertexNo = lookup[encodeVal]
+        r1 = 0.
+        for i in range(dim):
+            r1 += vertices[e[0], i]**2
+        r1 = sqrt(r1)
+        r2 = 0.
+        for i in range(dim):
+            r2 += vertices[e[1], i]**2
+        r2 = sqrt(r2)
+        r = 0.5*r1 + 0.5*r2
+        r3 = 0.
+        for i in range(dim):
+            r3 += vertices[vertexNo, i]**2
+        r3 = sqrt(r3)
+        for i in range(dim):
+            mesh.vertices[vertexNo, i] *= r/r3
+
+
+cdef class meshBase:
+    def __init__(self, vertices_t vertices, cells_t cells):
+        self.vertices = vertices
+        self.cells = cells
+        self.init()
+
+    def __getstate__(self):
+        return (self.vertices_as_array, self.cells_as_array, self.transformer)
+
+    def __setstate__(self, state):
+        self.vertices = state[0]
+        self.cells = state[1]
+        self.init()
+        if state[2] is not None:
+            self.setMeshTransformation(state[2])
+
+    def init(self):
+        self.resetMeshInfo()
+        self.num_vertices = self.vertices.shape[0]
+        self.num_cells = self.cells.shape[0]
+        self.dim = self.vertices.shape[1]
+        self.manifold_dim = self.cells.shape[1]-1
+        if self.dim == 1:
+            self.simplexMapper = simplexMapper1D(self)
+        elif self.dim == 2:
+            self.simplexMapper = simplexMapper2D(self)
+        elif self.dim == 3:
+            self.simplexMapper = simplexMapper3D(self)
+        else:
+            raise NotImplementedError()
+        self.meshTransformer = None
+
+    @property
+    def vertices_as_array(self):
+        return np.array(self.vertices, copy=False)
+
+    @property
+    def cells_as_array(self):
+        return np.array(self.cells, copy=False)
+
+    @property
+    def sizeInBytes(self):
+        s = 0
+        s += self.num_vertices*self.dim*sizeof(REAL_t)
+        s += self.num_cells*(self.manifold_dim+1)*sizeof(INDEX_t)
+        if self._volVector is not None:
+            s += self.num_cells*sizeof(REAL_t)
+        if self._hVector is not None:
+            s += self.num_cells*sizeof(REAL_t)
+        if self.dim >= 1:
+            s += self.boundaryVertices.shape[0]*sizeof(INDEX_t)
+            s += self.boundaryVertices.shape[0]*sizeof(TAG_t)
+        if self.dim >= 2:
+            s += 2*self.boundaryEdges.shape[0]*sizeof(INDEX_t)
+            s += self.boundaryEdgeTags.shape[0]*sizeof(TAG_t)
+        if self.dim >= 3:
+            s += 3*self.boundaryFaces.shape[0]*sizeof(INDEX_t)
+            s += self.boundaryFaceTags.shape[0]*sizeof(TAG_t)
+        return s
+
+    cdef void computeMeshQuantities(self):
+        (self._h, self._delta,
+         self._volume, self._hmin,
+         self._volVector, self._hVector) = hdeltaCy(self)
+
+    def resetMeshInfo(self):
+        self._h, self._delta, self._volume, self._hmin = 0., 0., 0., 0.
+        self._volVector = None
+        self._hVector = None
+
+    @property
+    def h(self):
+        if self._h <= 0:
+            self.computeMeshQuantities()
+        return self._h
+
+    @property
+    def delta(self):
+        if self._delta <= 0:
+            self.computeMeshQuantities()
+        return self._delta
+
+    @property
+    def volume(self):
+        if self._volume <= 0:
+            self.computeMeshQuantities()
+        return self._volume
+
+    @property
+    def hmin(self):
+        if self._hmin <= 0:
+            self.computeMeshQuantities()
+        return self._hmin
+
+    @property
+    def volVector(self):
+        if self._volVector is None:
+            self.computeMeshQuantities()
+        return np.array(self._volVector, copy=False)
+
+    @property
+    def hVector(self):
+        if self._hVector is None:
+            self.computeMeshQuantities()
+        return np.array(self._hVector, copy=False)
+
+    def __eq__(self, meshBase other):
+        if not self.dim == other.dim:
+            return False
+        if not self.manifold_dim == other.manifold_dim:
+            return False
+        if not self.num_vertices == other.num_vertices:
+            return False
+        if not self.num_cells == other.num_cells:
+            return False
+        if self.vertices[0, 0] != other.vertices[0, 0]:
+            return False
+        if self.h != other.h:
+            return False
+        if self.hmin != other.hmin:
+            return False
+        if self.delta != other.delta:
+            return False
+        if self.volume != other.volume:
+            return False
+        return True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getSimplex(meshBase self,
+                         const INDEX_t cellIdx,
+                         REAL_t[:, ::1] simplex):
+        cdef:
+            INDEX_t m, k, l
+        for m in range(self.cells.shape[1]):
+            k = self.cells[cellIdx, m]
+            for l in range(self.vertices.shape[1]):
+                simplex[m, l] = self.vertices[k, l]
+
+    def getSimplex_py(self,
+                      INDEX_t cellIdx,
+                      REAL_t[:, ::1] simplex):
+        self.getSimplex(cellIdx, simplex)
+
+    # def refine(self, returnLookup=False, sortRefine=False):
+    #     from . mesh import mesh1d, mesh2d, mesh3d
+    #     cdef:
+    #         INDEX_t[:, ::1] new_boundaryEdges
+    #         TAG_t[::1] new_boundaryEdgeTags
+    #         INDEX_t[::1] new_boundaryVertices
+    #         TAG_t[::1] new_boundaryVertexTags
+    #         INDEX_t i, nv
+
+    #     if self.dim == 1:
+    #         vertices, new_cells, lookup = refineCy1D(self.vertices, self.cells)
+    #         newMesh = mesh1d(vertices, new_cells)
+    #         newMesh.boundaryVertices = self.boundaryVertices.copy()
+    #         newMesh.boundaryVertexTags = self.boundaryVertexTags.copy()
+    #     elif self.dim == 2:
+    #         if sortRefine:
+    #             # Refine the mesh by sorting all edges. Seems faster, but
+    #             # ordering of DoFs seems to cause the solution time to go
+    #             # up.
+    #             vertices, new_cells, lookup = refineCy2Dsort(self.vertices,
+    #                                                          self.cells)
+    #         else:
+    #             # Refine the mesh by building a lookup table of all edges.
+    #             vertices, new_cells, lookup = refineCy2DedgeVals(self.vertices,
+    #                                                              self.cells)
+    #         newMesh = mesh2d(vertices, new_cells)
+
+    #         new_boundaryEdges = uninitialized((2*self.boundaryEdges.shape[0], 2), dtype=INDEX)
+    #         new_boundaryEdgeTags = uninitialized((2*self.boundaryEdges.shape[0]), dtype=TAG)
+    #         new_boundaryVertices = uninitialized((self.boundaryEdges.shape[0]), dtype=INDEX)
+    #         new_boundaryVertexTags = uninitialized((self.boundaryEdges.shape[0]), dtype=TAG)
+    #         for i in range(self.boundaryEdges.shape[0]):
+    #             e = self.boundaryEdges[i, :]
+    #             t = self.boundaryEdgeTags[i]
+    #             if e[0] < e[1]:
+    #                 nv = lookup[encode_edge(e)]
+    #             else:
+    #                 e2 = np.array([e[1], e[0]])
+    #                 nv = lookup[encode_edge(e2)]
+    #             new_boundaryEdges[2*i, 0] = e[0]
+    #             new_boundaryEdges[2*i, 1] = nv
+    #             new_boundaryEdges[2*i+1, 0] = nv
+    #             new_boundaryEdges[2*i+1, 1] = e[1]
+    #             new_boundaryEdgeTags[2*i] = t
+    #             new_boundaryEdgeTags[2*i+1] = t
+    #             new_boundaryVertices[i] = nv
+    #             new_boundaryVertexTags[i] = t
+    #         newMesh.boundaryVertices = np.concatenate((self.boundaryVertices,
+    #                                                    new_boundaryVertices))
+    #         newMesh.boundaryVertexTags = np.concatenate((self.boundaryVertexTags,
+    #                                                      new_boundaryVertexTags))
+    #         newMesh.boundaryEdges = np.array(new_boundaryEdges, copy=False)
+    #         newMesh.boundaryEdgeTags = np.array(new_boundaryEdgeTags, copy=False)
+    #     elif self.dim == 3:
+    #         vertices, new_cells, lookup = refineCy3DedgeVals(self.vertices,
+    #                                                      self.cells)
+    #         newMesh = mesh3d(vertices, new_cells)
+
+    #         (newBV, newBVtags,
+    #          newMesh.boundaryEdges,
+    #          newMesh.boundaryEdgeTags,
+    #          newMesh.boundaryFaces,
+    #          newMesh.boundaryFaceTags) = newBoundaryAndTags3D(lookup, self.boundaryVertices,
+    #                                                           self.boundaryEdges, self.boundaryFaces,
+    #                                                           self.boundaryEdgeTags, self.boundaryFaceTags)
+    #         newMesh.boundaryVertices = np.concatenate((self.boundaryVertices,
+    #                                                    newBV))
+    #         newMesh.boundaryVertexTags = np.concatenate((self.boundaryVertexTags,
+    #                                                      newBVtags))
+    #     else:
+    #         raise NotImplementedError()
+    #     if returnLookup:
+    #         return newMesh, lookup
+    #     else:
+    #         return newMesh
+
+    def copy(self):
+        newVertices = np.array(self.vertices, copy=True)
+        newCells = np.array(self.cells, copy=True)
+        newMesh = type(self)(newVertices, newCells)
+        if self.transformer is not None:
+            from copy import deepcopy
+            newMesh.setMeshTransformation(deepcopy(self.transformer))
+        return newMesh
+
+    def setMeshTransformation(self, meshTransformer transformer):
+        self.transformer = transformer
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def refine(meshBase self, BOOL_t returnLookup=False, BOOL_t sortRefine=False):
+        from . mesh import mesh1d, mesh2d, mesh3d
+        cdef:
+            INDEX_t i, nv
+            TAG_t t
+            INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] e2 = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] new_boundaryVertices
+            INDEX_t[:, ::1] new_boundaryEdges
+            TAG_t[::1] new_boundaryVertexTags
+            TAG_t[::1] new_boundaryEdgeTags
+        if self.manifold_dim == 1:
+            vertices, new_cells, lookup = refineCy1D(self.vertices, self.cells)
+            newMesh = mesh1d(vertices, new_cells)
+            newMesh.boundaryVertices = self.boundaryVertices.copy()
+            newMesh.boundaryVertexTags = self.boundaryVertexTags.copy()
+        elif self.manifold_dim == 2:
+            if sortRefine:
+                # Refine the mesh by sorting all edges. Seems faster, but
+                # ordering of DoFs seems to cause the solution time to go
+                # up.
+                vertices, new_cells, lookup = refineCy2Dsort(self.vertices,
+                                                             self.cells)
+            else:
+                # Refine the mesh by building a lookup table of all edges.
+                vertices, new_cells, lookup = refineCy2DedgeVals(self.vertices,
+                                                                 self.cells)
+            newMesh = mesh2d(vertices, new_cells)
+
+            new_boundaryEdges = uninitialized((2*self.boundaryEdges.shape[0], 2), dtype=INDEX)
+            new_boundaryEdgeTags = uninitialized((2*self.boundaryEdges.shape[0]), dtype=TAG)
+            new_boundaryVertices = uninitialized((self.boundaryEdges.shape[0]), dtype=INDEX)
+            new_boundaryVertexTags = uninitialized((self.boundaryEdges.shape[0]), dtype=TAG)
+            for i in range(self.boundaryEdges.shape[0]):
+                e[0] = self.boundaryEdges[i, 0]
+                e[1] = self.boundaryEdges[i, 1]
+                t = self.boundaryEdgeTags[i]
+                sortEdge(e[0], e[1], e2)
+                nv = lookup[encode_edge(e2)]
+                new_boundaryEdges[2*i, 0] = e[0]
+                new_boundaryEdges[2*i, 1] = nv
+                new_boundaryEdges[2*i+1, 0] = nv
+                new_boundaryEdges[2*i+1, 1] = e[1]
+                new_boundaryEdgeTags[2*i] = t
+                new_boundaryEdgeTags[2*i+1] = t
+                new_boundaryVertices[i] = nv
+                new_boundaryVertexTags[i] = t
+            newMesh.boundaryVertices = np.concatenate((self.boundaryVertices,
+                                                       new_boundaryVertices))
+            newMesh.boundaryVertexTags = np.concatenate((self.boundaryVertexTags,
+                                                         new_boundaryVertexTags))
+            newMesh.boundaryEdges = np.array(new_boundaryEdges, copy=False)
+            newMesh.boundaryEdgeTags = np.array(new_boundaryEdgeTags, copy=False)
+        elif self.manifold_dim == 3:
+            vertices, new_cells, lookup = refineCy3DedgeVals(self.vertices,
+                                                             self.cells)
+            newMesh = mesh3d(vertices, new_cells)
+
+            (newBV, newBVtags,
+             newMesh.boundaryEdges,
+             newMesh.boundaryEdgeTags,
+             newMesh.boundaryFaces,
+             newMesh.boundaryFaceTags) = newBoundaryAndTags3D(lookup, self.boundaryVertices,
+                                                              self.boundaryEdges, self.boundaryFaces,
+                                                              self.boundaryEdgeTags, self.boundaryFaceTags)
+            newMesh.boundaryVertices = np.concatenate((self.boundaryVertices,
+                                                       newBV))
+            newMesh.boundaryVertexTags = np.concatenate((self.boundaryVertexTags,
+                                                         newBVtags))
+        else:
+            raise NotImplementedError()
+        if self.transformer is not None:
+            self.transformer(newMesh, lookup)
+            newMesh.setMeshTransformation(self.transformer)
+        if returnLookup:
+            return newMesh, lookup
+        else:
+            return newMesh
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def removeUnusedVertices(self):
+        cdef:
+            INDEX_t[::1] mapping = -np.ones((self.num_vertices), dtype=INDEX)
+            INDEX_t manifold_dim = self.manifold_dim
+            INDEX_t k, i, j, v
+            REAL_t[:, ::1] vertices = self.vertices
+            INDEX_t[:, ::1] cells = self.cells
+            REAL_t[:, ::1] new_vertices
+            INDEX_t[:, ::1] new_cells
+            INDEX_t[::1] boundaryVertices, newBoundaryVertices
+            TAG_t[::1] boundaryVertexTags, newBoundaryVertexTags
+            INDEX_t[:, ::1] boundaryEdges
+            INDEX_t[:, ::1] boundaryFaces
+        k = 0
+        for i in range(self.num_cells):
+            for j in range(self.manifold_dim+1):
+                v = cells[i, j]
+                if mapping[v] == -1:
+                    mapping[v] = k
+                    k += 1
+        new_vertices = uninitialized((k, self.dim), dtype=REAL)
+        for i in range(mapping.shape[0]):
+            k = mapping[i]
+            if k == -1:
+                continue
+            for j in range(self.dim):
+                new_vertices[k, j] = vertices[i, j]
+        self.vertices = new_vertices
+        self.num_vertices = new_vertices.shape[0]
+        new_cells = uninitialized((self.num_cells, self.manifold_dim+1), dtype=INDEX)
+        for i in range(self.num_cells):
+            for j in range(manifold_dim+1):
+                v = cells[i, j]
+                new_cells[i, j] = mapping[v]
+        self.cells = new_cells
+        self.num_cells = new_cells.shape[0]
+
+        if hasattr(self, '_boundaryVertices'):
+            numBoundaryVertices = 0
+            boundaryVertices = self._boundaryVertices
+            boundaryVertexTags = self._boundaryVertexTags
+            for i in range(boundaryVertices.shape[0]):
+                v = boundaryVertices[i]
+                j = mapping[v]
+                if j != -1:
+                    numBoundaryVertices += 1
+            newBoundaryVertices = uninitialized((numBoundaryVertices), dtype=INDEX)
+            newBoundaryVertexTags = uninitialized((numBoundaryVertices), dtype=TAG)
+            k = 0
+            for i in range(boundaryVertices.shape[0]):
+                v = boundaryVertices[i]
+                j = mapping[v]
+                if j != -1:
+                    newBoundaryVertices[k] = j
+                    newBoundaryVertexTags[k] = boundaryVertexTags[i]
+                    k += 1
+            self._boundaryVertices = np.array(newBoundaryVertices, copy=False)
+            self._boundaryVertexTags = np.array(newBoundaryVertexTags, copy=False)
+
+        if hasattr(self, '_boundaryEdges'):
+            boundaryEdges = self._boundaryEdges
+            for i in range(boundaryEdges.shape[0]):
+                for j in range(2):
+                    boundaryEdges[i, j] = mapping[boundaryEdges[i, j]]
+        if hasattr(self, '_boundaryFaces'):
+            boundaryFaces = self._boundaryFaces
+            for i in range(boundaryFaces.shape[0]):
+                for j in range(3):
+                    boundaryFaces[i, j] = mapping[boundaryFaces[i, j]]
+        if hasattr(self, '_interiorVertices'):
+            del self._interiorVertices
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def getCellCenters(self):
+        cdef:
+            REAL_t[:, ::1] simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+            INDEX_t j, k
+            REAL_t[:, ::1] centers = np.zeros((self.num_cells, self.dim), dtype=REAL)
+            REAL_t fac = 1./(self.dim+1)
+            INDEX_t cellNo
+
+        for cellNo in range(self.num_cells):
+            self.getSimplex(cellNo, simplex)
+            for j in range(self.dim+1):
+                for k in range(self.dim):
+                    centers[cellNo, k] += simplex[j, k]
+            for k in range(self.dim):
+                centers[cellNo, k] *= fac
+        return centers
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    def getProjectedCenters(self):
+        cdef:
+            REAL_t[:, ::1] simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+            INDEX_t j, k
+            REAL_t[::1] mins = uninitialized((self.dim), dtype=REAL)
+            REAL_t[::1] maxs = uninitialized((self.dim), dtype=REAL)
+            REAL_t[:, ::1] centers = np.zeros((self.num_cells, self.dim), dtype=REAL)
+            INDEX_t cellNo
+
+        for cellNo in range(self.num_cells):
+            self.getSimplex(cellNo, simplex)
+            for k in range(self.dim):
+                mins[k] = simplex[0, k]
+                maxs[k] = simplex[0, k]
+            for j in range(1, self.dim+1):
+                for k in range(self.dim):
+                    mins[k] = min(mins[k], simplex[j, k])
+                    maxs[k] = max(maxs[k], simplex[j, k])
+            for k in range(self.dim):
+                centers[cellNo, k] = 0.5*(mins[k]+maxs[k])
+        return centers
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t vertexInCell(self, REAL_t[::1] vertex, INDEX_t cellNo, REAL_t[:, ::1] simplexMem, REAL_t[::1] baryMem):
+        cdef:
+            INDEX_t i
+        self.getSimplex(cellNo, simplexMem)
+        if self.dim == 1:
+            getBarycentricCoords1D(simplexMem, vertex, baryMem)
+        elif self.dim == 2:
+            getBarycentricCoords2D(simplexMem, vertex, baryMem)
+        else:
+            raise NotImplementedError()
+        for i in range(self.dim+1):
+            if baryMem[i] < 0.:
+                return False
+        return True
+
+    def vertexInCell_py(self, REAL_t[::1] vertex, INDEX_t cellNo):
+        simplex = uninitialized((self.dim+1, self.dim), dtype=REAL)
+        bary = uninitialized((self.dim+1), dtype=REAL)
+        return self.vertexInCell(vertex, cellNo, simplex, bary)
+
+    def getCellConnectivity(self, INDEX_t common_nodes=-1):
+        cdef:
+            list v2c
+            list c2c
+            list cellConnectivity
+            INDEX_t cellNo, cellNo2, vertexNo, vertex
+        if common_nodes < 0:
+            common_nodes = 1
+        v2c = []
+        for vertex in range(self.num_vertices):
+            v2c.append(set())
+        for cellNo in range(self.num_cells):
+            for vertexNo in range(self.cells.shape[1]):
+                vertex = self.cells[cellNo, vertexNo]
+                v2c[vertex].add(cellNo)
+        c2c = []
+        for cellNo in range(self.num_cells):
+            c2c.append({})
+        for vertex in range(self.num_vertices):
+            if len(v2c[vertex]) > 1:
+                for cellNo in v2c[vertex]:
+                    for cellNo2 in v2c[vertex]:
+                        if cellNo != cellNo2:
+                            try:
+                                c2c[cellNo][cellNo2].add(vertex)
+                            except KeyError:
+                                c2c[cellNo][cellNo2] = set([vertex])
+        cellConnectivity = []
+        for cellNo in range(self.num_cells):
+            cellConnectivity.append(set())
+        for cellNo in range(self.num_cells):
+            for cellNo2 in c2c[cellNo]:
+                if len(c2c[cellNo][cellNo2]) >= common_nodes:
+                    cellConnectivity[cellNo].add(cellNo2)
+        return cellConnectivity
+
+
+# Encoding, decoding and sorting of edges
+
+cdef ENCODE_t MAX_VAL_EDGE = (<ENCODE_t>2)**(<ENCODE_t>31)
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+cdef ENCODE_t encode_edge(const INDEX_t[::1] e):
+    return MAX_VAL_EDGE*<ENCODE_t>e[0]+<ENCODE_t>e[1]
+
+
+def encode_edge_python(INDEX_t[::1] e):
+    return encode_edge(e)
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+cdef void decode_edge(const ENCODE_t encodeVal, INDEX_t[::1] e):
+    e[0] = encodeVal // MAX_VAL_EDGE
+    e[1] = encodeVal % MAX_VAL_EDGE
+
+
+def decode_edge_python(ENCODE_t encodeVal):
+    e = uninitialized((2), dtype=INDEX)
+    decode_edge(encodeVal, e)
+    return e
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void sortEdge(const INDEX_t c0, const INDEX_t c1, INDEX_t[::1] e):
+    if c0 < c1:
+        e[0], e[1] = c0, c1
+    else:
+        e[0], e[1] = c1, c0
+
+
+def sortEdge_py(const INDEX_t c0, const INDEX_t c1, INDEX_t[::1] e):
+    sortEdge(c0, c1, e)
+
+# Encoding, decoding and sorting of faces
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef tuple encode_face(const INDEX_t[::1] f):
+    return (f[0], MAX_VAL_EDGE*<ENCODE_t>f[1]+<ENCODE_t>f[2])
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void decode_face(tuple encodeVal, INDEX_t[::1] f):
+    f[0] = encodeVal[0]
+    f[1] = encodeVal[1] // MAX_VAL_EDGE
+    f[2] = encodeVal[1] % MAX_VAL_EDGE
+
+
+def encode_face_python(INDEX_t[::1] f):
+    return encode_face(f)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void sortFace(const INDEX_t c0, const INDEX_t c1, const INDEX_t c2,
+                   INDEX_t[::1] f):
+    if c0 < c1:
+        if c0 < c2:
+            if c1 < c2:
+                f[0], f[1], f[2] = c0, c1, c2
+            else:
+                f[0], f[1], f[2] = c0, c2, c1
+        else:
+            f[0], f[1], f[2] = c2, c0, c1
+    else:
+        if c1 < c2:
+            if c0 < c2:
+                f[0], f[1], f[2] = c1, c0, c2
+            else:
+                f[0], f[1], f[2] = c1, c2, c0
+        else:
+            f[0], f[1], f[2] = c2, c1, c0
+
+
+def sortFace_py(const INDEX_t c0, const INDEX_t c1, const INDEX_t c2, INDEX_t[::1] f):
+    sortFace(c0, c1, c2, f)
+
+
+@cython.cdivision(True)
+def refineCy1D(const REAL_t[:, ::1] vertices,
+               const INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_vertices = vertices.shape[0]
+        INDEX_t num_cells = cells.shape[0]
+        INDEX_t c0, c1, i, j, k, nv0, new_num_vertices
+        np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((2*num_cells, 2),
+                                                                  dtype=INDEX)
+        REAL_t[:, ::1] new_vertices
+        INDEX_t[:, ::1] new_cells = new_cells_mem
+        INDEX_t[::1] e0 = uninitialized((2), dtype=INDEX)
+        ENCODE_t hv
+        dict lookup = {}
+
+    new_num_vertices = num_vertices
+    for i in range(num_cells):
+        c0, c1 = cells[i, 0], cells[i, 1]
+        sortEdge(c0, c1, e0)
+        lookup[encode_edge(e0)] = new_num_vertices
+        new_num_vertices += 1
+    # vertices = np.vstack((vertices,
+    #                       uninitialized((new_num_vertices-num_vertices, 1))))
+    new_vertices_mem = uninitialized((new_num_vertices, vertices.shape[1]))
+    new_vertices = new_vertices_mem
+    for i in range(num_vertices):
+        for k in range(vertices.shape[1]):
+            new_vertices[i, k] = vertices[i, k]
+    for hv, j in lookup.iteritems():
+        decode_edge(hv, e0)
+        for k in range(vertices.shape[1]):
+            new_vertices[j, k] = (vertices[e0[0], k] + vertices[e0[1], k])*0.5
+
+    # Add new cells
+    for i in range(num_cells):
+        c0, c1 = cells[i, 0], cells[i, 1]
+        sortEdge(c0, c1, e0)
+        nv0 = lookup[encode_edge(e0)]
+        new_cells[2*i,   0], new_cells[2*i,   1] = c0,  nv0
+        new_cells[2*i+1, 0], new_cells[2*i+1, 1] = nv0, c1
+
+    return new_vertices_mem, new_cells_mem, lookup
+
+
+# @cython.initializedcheck(False)
+# @cython.boundscheck(False)
+# @cython.wraparound(False)
+# def refineCy2D(const REAL_t[:, ::1] vertices,
+#                const INDEX_t[:, ::1] cells):
+#     cdef:
+#         INDEX_t num_vertices = vertices.shape[0]
+#         INDEX_t num_cells = cells.shape[0]
+#         INDEX_t c0, c1, c2, i, j, new_num_vertices, k, nv0, nv1, nv2, vno
+#         ENCODE_t hv
+#         np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((4*num_cells, 3),
+#                                                              dtype=INDEX)
+#         INDEX_t[:, ::1] new_cells = new_cells_mem
+#         REAL_t[:, ::1] new_vertices
+#         np.ndarray[REAL_t, ndim=2] new_vertices_mem
+#         dict lookup = {}
+#         # unordered_map[ENCODE_t, INDEX_t] lookup
+#         # map[ENCODE_t, INDEX_t] lookup
+#         INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+#         INDEX_t[::1] e0 = temp[0, :]
+#         INDEX_t[::1] e1 = temp[1, :]
+#         INDEX_t[::1] e2 = temp[2, :]
+
+#     # Build lookup table
+#     # edge -> midpoint vertex number
+#     new_num_vertices = num_vertices
+#     for i in range(num_cells):
+#         c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+#         sortEdge(c0, c1, e0)
+#         sortEdge(c0, c2, e1)
+#         sortEdge(c1, c2, e2)
+#         for k in range(3):
+#             try:
+#                 lookup[encode_edge(temp[k, :])]
+#             except KeyError:
+#                 lookup[encode_edge(temp[k, :])] = new_num_vertices
+#                 new_num_vertices += 1
+#     new_vertices_mem = uninitialized((new_num_vertices, 2), dtype=REAL)
+#     new_vertices = new_vertices_mem
+#     # copy over old vertices
+#     for i in range(num_vertices):
+#         for k in range(2):
+#             new_vertices[i, k] = vertices[i, k]
+#     # insert new vertices
+#     for hv, j in lookup.iteritems():
+#         decode_edge(hv, e0)
+#         for k in range(2):
+#             new_vertices[j, k] = (vertices[e0[0], k] + vertices[e0[1], k])*0.5
+
+#     # Add new cells
+#     for i in range(num_cells):
+#         c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+#         sortEdge(c0, c1, e0)
+#         sortEdge(c0, c2, e1)
+#         sortEdge(c1, c2, e2)
+#         nv0 = lookup[encode_edge(e0)]
+#         nv1 = lookup[encode_edge(e2)]
+#         nv2 = lookup[encode_edge(e1)]
+#         new_cells[4*i,   0], new_cells[4*i,   1], new_cells[4*i,   2] = c0,  nv0, nv2
+#         new_cells[4*i+1, 0], new_cells[4*i+1, 1], new_cells[4*i+1, 2] = c1,  nv1, nv0
+#         new_cells[4*i+2, 0], new_cells[4*i+2, 1], new_cells[4*i+2, 2] = c2,  nv2, nv1
+#         new_cells[4*i+3, 0], new_cells[4*i+3, 1], new_cells[4*i+3, 2] = nv0, nv1, nv2
+
+#     return new_vertices_mem, new_cells_mem, lookup
+
+
+cdef inline int compareEdges(const void *pa, const void *pb) nogil:
+    cdef:
+        INDEX_t *a
+        INDEX_t *b
+    a = <INDEX_t *> pa
+    b = <INDEX_t *> pb
+    return 2*((a[0] > b[0])-(a[0] < b[0])) + ((a[1] > b[1])-(a[1] < b[1]))
+    # if a[0] < b[0]:
+    #     return -1
+    # elif a[0] > b[0]:
+    #     return 1
+    # else:
+    #     if a[1] < b[1]:
+    #         return -1
+    #     else:
+    #         return 1
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def refineCy2Dsort(const REAL_t[:, ::1] vertices,
+                   const INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_vertices = vertices.shape[0]
+        INDEX_t num_cells = cells.shape[0]
+        INDEX_t c0, c1, c2, i, j, new_num_vertices, k, nv0, nv1, nv2
+        ENCODE_t hv, hvOld
+        np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((4*num_cells, 3),
+                                                             dtype=INDEX)
+        INDEX_t[:, ::1] new_cells = new_cells_mem
+        # INDEX_t[:, ::1] cells_mv = cells
+        REAL_t[:, ::1] new_vertices
+        dict lookup = {}
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        np.ndarray[REAL_t, ndim=2] new_vertices_mem
+        np.ndarray[INDEX_t, ndim=2] edges_mem = uninitialized((3*num_cells, 2),
+                                                         dtype=INDEX)
+        INDEX_t[:, ::1] edges = edges_mem
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, edges[3*i, :])
+        sortEdge(c0, c2, edges[3*i+1, :])
+        sortEdge(c1, c2, edges[3*i+2, :])
+    qsort(&edges[0, 0],
+          edges.shape[0], edges.shape[1]*sizeof(INDEX_t),
+          compareEdges)
+    new_num_vertices = num_vertices
+    hvOld = 0
+    for i in range(3*num_cells):
+        hv = encode_edge(edges[i, :])
+        if hv != hvOld:
+            lookup[hv] = new_num_vertices
+            new_num_vertices += 1
+            hvOld = hv
+    del edges, edges_mem
+    new_vertices_mem = uninitialized((new_num_vertices, 2), dtype=REAL)
+    new_vertices = new_vertices_mem
+    # copy over old vertices
+    for i in range(num_vertices):
+        for k in range(2):
+            new_vertices[i, k] = vertices[i, k]
+    for hv, j in lookup.iteritems():
+        decode_edge(hv, e0)
+        for k in range(2):
+            new_vertices[j, k] = (vertices[e0[0], k] + vertices[e0[1], k])*0.5
+
+    # Add new cells
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c0, c2, e1)
+        sortEdge(c1, c2, e2)
+        nv0 = lookup[encode_edge(e0)]
+        nv1 = lookup[encode_edge(e2)]
+        nv2 = lookup[encode_edge(e1)]
+        new_cells[4*i,   0], new_cells[4*i,   1], new_cells[4*i,   2] = c0,  nv0, nv2
+        new_cells[4*i+1, 0], new_cells[4*i+1, 1], new_cells[4*i+1, 2] = c1,  nv1, nv0
+        new_cells[4*i+2, 0], new_cells[4*i+2, 1], new_cells[4*i+2, 2] = c2,  nv2, nv1
+        new_cells[4*i+3, 0], new_cells[4*i+3, 1], new_cells[4*i+3, 2] = nv0, nv1, nv2
+    return new_vertices_mem, new_cells_mem, lookup
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def refineCy2DedgeVals(const REAL_t[:, ::1] vertices,
+                       const INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_vertices = vertices.shape[0]
+        INDEX_t num_cells = cells.shape[0]
+        INDEX_t c0, c1, c2, i, nv = 0, new_num_vertices, k, nv0, nv1, nv2
+        np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((4*num_cells, 3),
+                                                             dtype=INDEX)
+        INDEX_t[:, ::1] new_cells = new_cells_mem
+        REAL_t[:, ::1] new_vertices
+        INDEX_t dim = vertices.shape[1]
+        np.ndarray[REAL_t, ndim=2] new_vertices_mem
+        dict lookup = {}
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        tupleDictINDEX eV = tupleDictINDEX(num_vertices, deleteHits=False)
+
+    # Build lookup table
+    # edge -> midpoint vertex number
+    new_num_vertices = num_vertices
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c0, c2, e1)
+        sortEdge(c1, c2, e2)
+        for k in range(3):
+            if eV.enterValue(temp[k, :], new_num_vertices) == new_num_vertices:
+                new_num_vertices += 1
+    new_vertices_mem = uninitialized((new_num_vertices, vertices.shape[1]), dtype=REAL)
+    new_vertices = new_vertices_mem
+    # copy over old vertices
+    for i in range(num_vertices):
+        for k in range(dim):
+            new_vertices[i, k] = vertices[i, k]
+    # insert new vertices
+    eV.startIter()
+    while eV.next(e0, &nv):
+        # FIX: Can we get rid of this lookup table and just keep the edgeVals?
+        lookup[encode_edge(e0)] = nv
+        for k in range(dim):
+            new_vertices[nv, k] = (vertices[e0[0], k] + vertices[e0[1], k])*0.5
+
+    # Add new cells
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c0, c2, e1)
+        sortEdge(c1, c2, e2)
+        nv0 = eV.getValue(e0)
+        nv1 = eV.getValue(e2)
+        nv2 = eV.getValue(e1)
+        new_cells[4*i,   0], new_cells[4*i,   1], new_cells[4*i,   2] = c0,  nv0, nv2
+        new_cells[4*i+1, 0], new_cells[4*i+1, 1], new_cells[4*i+1, 2] = c1,  nv1, nv0
+        new_cells[4*i+2, 0], new_cells[4*i+2, 1], new_cells[4*i+2, 2] = c2,  nv2, nv1
+        new_cells[4*i+3, 0], new_cells[4*i+3, 1], new_cells[4*i+3, 2] = nv0, nv1, nv2
+    return new_vertices_mem, new_cells_mem, lookup
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def refineCy2Dhash(const REAL_t[:, ::1] vertices,
+                   const INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_vertices = vertices.shape[0]
+        INDEX_t num_cells = cells.shape[0]
+        INDEX_t c0, c1, c2, i, nv = 0, new_num_vertices, k, nv0, nv1, nv2
+        np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((4*num_cells, 3),
+                                                             dtype=INDEX)
+        INDEX_t[:, ::1] new_cells = new_cells_mem
+        REAL_t[:, ::1] new_vertices
+        np.ndarray[REAL_t, ndim=2] new_vertices_mem
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        intTuple t, t0, t1, t2
+        dict eV = {}
+
+    # Build lookup table
+    # edge -> midpoint vertex number
+    new_num_vertices = num_vertices
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        t = intTuple.create(e0)
+        try:
+            eV[t]
+        except KeyError:
+            eV[t] = new_num_vertices
+            new_num_vertices += 1
+
+        sortEdge(c0, c2, e1)
+        t = intTuple.create(e1)
+        try:
+            eV[t]
+        except KeyError:
+            eV[t] = new_num_vertices
+            new_num_vertices += 1
+
+        sortEdge(c1, c2, e2)
+        t = intTuple.create(e2)
+        try:
+            eV[t]
+        except KeyError:
+            eV[t] = new_num_vertices
+            new_num_vertices += 1
+
+    new_vertices_mem = uninitialized((new_num_vertices, 2), dtype=REAL)
+    new_vertices = new_vertices_mem
+    # copy over old vertices
+    for i in range(num_vertices):
+        for k in range(2):
+            new_vertices[i, k] = vertices[i, k]
+    # insert new vertices
+    for t, nv in eV.items():
+        # FIX: Can we get rid of this lookup table and just keep the edgeVals?
+        t.get(&e0[0])
+        for k in range(2):
+            new_vertices[nv, k] = (vertices[e0[0], k] + vertices[e0[1], k])*0.5
+
+    # Add new cells
+    t0 = intTuple.createNonOwning(e0)
+    t1 = intTuple.createNonOwning(e1)
+    t2 = intTuple.createNonOwning(e2)
+    for i in range(num_cells):
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c0, c2, e1)
+        sortEdge(c1, c2, e2)
+        nv0 = eV[t0]
+        nv1 = eV[t2]
+        nv2 = eV[t1]
+        new_cells[4*i,   0], new_cells[4*i,   1], new_cells[4*i,   2] = c0,  nv0, nv2
+        new_cells[4*i+1, 0], new_cells[4*i+1, 1], new_cells[4*i+1, 2] = c1,  nv1, nv0
+        new_cells[4*i+2, 0], new_cells[4*i+2, 1], new_cells[4*i+2, 2] = c2,  nv2, nv1
+        new_cells[4*i+3, 0], new_cells[4*i+3, 1], new_cells[4*i+3, 2] = nv0, nv1, nv2
+    return new_vertices_mem, new_cells_mem, eV
+
+
+# @cython.initializedcheck(False)
+# @cython.boundscheck(False)
+# @cython.wraparound(False)
+# def refineCy3D(const REAL_t[:, ::1] vertices,
+#                const INDEX_t[:, ::1] cells):
+#     cdef:
+#         INDEX_t num_vertices = vertices.shape[0]
+#         INDEX_t num_cells = cells.shape[0]
+#         REAL_t[:, ::1] new_vertices
+#         np.ndarray[REAL_t, ndim=2] new_vertices_mem
+#         INDEX_t v0, v1, v2, v3, i, j, m, k, new_num_vertices
+#         INDEX_t v01, v02, v03, v12, v13, v23
+#         dict lookup
+#         INDEX_t[:, ::1] edges = uninitialized((7, 2), dtype=INDEX)
+#         INDEX_t[::1] e01 = edges[0, :]
+#         INDEX_t[::1] e02 = edges[1, :]
+#         INDEX_t[::1] e03 = edges[2, :]
+#         INDEX_t[::1] e12 = edges[3, :]
+#         INDEX_t[::1] e13 = edges[4, :]
+#         INDEX_t[::1] e23 = edges[5, :]
+#         INDEX_t[::1] e = edges[6, :]
+#         INDEX_t[:, ::1] faceedges = uninitialized((3, 2), dtype=INDEX)
+#         INDEX_t[::1] e0 = faceedges[0, :]
+#         INDEX_t[::1] e1 = faceedges[1, :]
+#         INDEX_t[::1] e2 = faceedges[2, :]
+#         INDEX_t[:, ::1] faces = uninitialized((4, 3), dtype=INDEX)
+#         INDEX_t[::1] f012 = faces[0, :]
+#         INDEX_t[::1] f013 = faces[1, :]
+#         INDEX_t[::1] f023 = faces[2, :]
+#         INDEX_t[::1] f123 = faces[3, :]
+#         REAL_t l0123, l0213, l0312
+#         np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((8*num_cells, 4),
+#                                                              dtype=INDEX)
+#         REAL_t[:, ::1] temp = uninitialized((3, 3), dtype=REAL)
+#         INDEX_t[:, ::1] new_cells = new_cells_mem
+#         ENCODE_t hv
+#     # Build lookup table
+#     # edge -> midpoint vertex number
+#     lookup = {}
+#     new_num_vertices = num_vertices
+#     for i in range(num_cells):
+#         v0, v1, v2, v3 = (cells[i, 0], cells[i, 1],
+#                           cells[i, 2], cells[i, 3])
+#         # f012, f013, f023, f123 point to faces
+#         sortFace(v0, v1, v2, f012)
+#         sortFace(v0, v1, v3, f013)
+#         sortFace(v0, v2, v3, f023)
+#         sortFace(v1, v2, v3, f123)
+#         for m in range(4):
+#             e0[0], e0[1] = faces[m, 0], faces[m, 1]
+#             e1[0], e1[1] = faces[m, 1], faces[m, 2]
+#             e2[0], e2[1] = faces[m, 0], faces[m, 2]
+#             for k in range(3):
+#                 try:
+#                     lookup[encode_edge(faceedges[k, :])]
+#                 except KeyError:
+#                     lookup[encode_edge(faceedges[k, :])] = new_num_vertices
+#                     new_num_vertices += 1
+#     new_vertices_mem = uninitialized((new_num_vertices, 3), dtype=REAL)
+#     new_vertices = new_vertices_mem
+#     # copy over old vertices
+#     for i in range(num_vertices):
+#         for k in range(3):
+#             new_vertices[i, k] = vertices[i, k]
+#     for hv, j in lookup.iteritems():
+#         decode_edge(hv, e)
+#         for k in range(3):
+#             new_vertices[j, k] = (vertices[e[0], k] + vertices[e[1], k])*0.5
+
+#     # Add new cells
+#     for i in range(num_cells):
+#         v0, v1, v2, v3 = (cells[i, 0], cells[i, 1],
+#                           cells[i, 2], cells[i, 3])
+#         sortEdge(v0, v1, e01)
+#         sortEdge(v0, v2, e02)
+#         sortEdge(v0, v3, e03)
+#         sortEdge(v1, v2, e12)
+#         sortEdge(v1, v3, e13)
+#         sortEdge(v2, v3, e23)
+#         v01 = lookup[encode_edge(e01)]
+#         v02 = lookup[encode_edge(e02)]
+#         v03 = lookup[encode_edge(e03)]
+#         v12 = lookup[encode_edge(e12)]
+#         v13 = lookup[encode_edge(e13)]
+#         v23 = lookup[encode_edge(e23)]
+
+#         # calculate length^2 of diagonals of internal octahedron
+#         for j in range(3):
+#             temp[0, j] = new_vertices[v01, j]-new_vertices[v23, j]
+#             temp[1, j] = new_vertices[v02, j]-new_vertices[v13, j]
+#             temp[2, j] = new_vertices[v03, j]-new_vertices[v12, j]
+#         l0123 = mydot(temp[0, :], temp[0, :])
+#         l0213 = mydot(temp[1, :], temp[1, :])
+#         l0312 = mydot(temp[2, :], temp[2, :])
+
+#         # I want the cells always to be oriented in the same way.
+#         # => don't use Bey's algorithm, but shortest interior edge refinement
+
+#         # cut off corners
+#         new_cells[8*i,   0], new_cells[8*i,   1], new_cells[8*i,   2], new_cells[8*i,   3] = v0, v01, v02, v03
+#         new_cells[8*i+1, 0], new_cells[8*i+1, 1], new_cells[8*i+1, 2], new_cells[8*i+1, 3] = v01, v1, v12, v13
+#         new_cells[8*i+2, 0], new_cells[8*i+2, 1], new_cells[8*i+2, 2], new_cells[8*i+2, 3] = v02, v12, v2, v23
+#         new_cells[8*i+3, 0], new_cells[8*i+3, 1], new_cells[8*i+3, 2], new_cells[8*i+3, 3] = v03, v13, v23, v3
+
+#         if (l0123 < l0213) and (l0123 < l0312):
+#             # shortest diagonal v01 - v23
+#             new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v12, v02, v23
+#             new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v01, v23, v03, v13
+#             new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v01, v02, v03, v23
+#             new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v01, v13, v12, v23
+#         elif (l0213 < l0312):
+#             # shortest diagonal v02 - v13
+#             new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v02, v03, v13
+#             new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v01, v12, v02, v13
+#             new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v02, v03, v13, v23
+#             new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v02, v13, v12, v23
+#         else:
+#             # shortest diagonal v03 - v12
+#             new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v13, v12, v03
+#             new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v03, v23, v13, v12
+#             new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v03, v23, v12, v02
+#             new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v01, v12, v02, v03
+
+#     return new_vertices_mem, new_cells_mem, lookup
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def refineCy3DedgeVals(const REAL_t[:, ::1] vertices,
+                       const INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_vertices = vertices.shape[0]
+        INDEX_t num_cells = cells.shape[0]
+        REAL_t[:, ::1] new_vertices
+        np.ndarray[REAL_t, ndim=2] new_vertices_mem
+        INDEX_t v0, v1, v2, v3, i, j = 0, m, k, new_num_vertices
+        INDEX_t v01, v02, v03, v12, v13, v23
+        dict lookup
+        INDEX_t[:, ::1] edges = uninitialized((7, 2), dtype=INDEX)
+        INDEX_t[::1] e01 = edges[0, :]
+        INDEX_t[::1] e02 = edges[1, :]
+        INDEX_t[::1] e03 = edges[2, :]
+        INDEX_t[::1] e12 = edges[3, :]
+        INDEX_t[::1] e13 = edges[4, :]
+        INDEX_t[::1] e23 = edges[5, :]
+        INDEX_t[::1] e = edges[6, :]
+        INDEX_t[:, ::1] faceedges = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = faceedges[0, :]
+        INDEX_t[::1] e1 = faceedges[1, :]
+        INDEX_t[::1] e2 = faceedges[2, :]
+        INDEX_t[:, ::1] faces = uninitialized((4, 3), dtype=INDEX)
+        INDEX_t[::1] f012 = faces[0, :]
+        INDEX_t[::1] f013 = faces[1, :]
+        INDEX_t[::1] f023 = faces[2, :]
+        INDEX_t[::1] f123 = faces[3, :]
+        REAL_t l0123, l0213, l0312
+        np.ndarray[INDEX_t, ndim=2] new_cells_mem = uninitialized((8*num_cells, 4),
+                                                             dtype=INDEX)
+        REAL_t[:, ::1] temp = uninitialized((3, 3), dtype=REAL)
+        INDEX_t[:, ::1] new_cells = new_cells_mem
+        tupleDictINDEX eV = tupleDictINDEX(num_vertices, deleteHits=False)
+    # Build lookup table
+    # edge -> midpoint vertex number
+    lookup = {}
+    new_num_vertices = num_vertices
+    for i in range(num_cells):
+        v0, v1, v2, v3 = (cells[i, 0], cells[i, 1],
+                          cells[i, 2], cells[i, 3])
+        # f012, f013, f023, f123 point to faces
+        sortFace(v0, v1, v2, f012)
+        sortFace(v0, v1, v3, f013)
+        sortFace(v0, v2, v3, f023)
+        sortFace(v1, v2, v3, f123)
+        for m in range(4):
+            e0[0], e0[1] = faces[m, 0], faces[m, 1]
+            e1[0], e1[1] = faces[m, 1], faces[m, 2]
+            e2[0], e2[1] = faces[m, 0], faces[m, 2]
+            for k in range(3):
+                if eV.enterValue(faceedges[k, :], new_num_vertices) == new_num_vertices:
+                    new_num_vertices += 1
+    new_vertices_mem = uninitialized((new_num_vertices, 3), dtype=REAL)
+    new_vertices = new_vertices_mem
+    # copy over old vertices
+    for i in range(num_vertices):
+        for k in range(3):
+            new_vertices[i, k] = vertices[i, k]
+    eV.startIter()
+    while eV.next(e, &j):
+        lookup[encode_edge(e)] = j
+        for k in range(3):
+            new_vertices[j, k] = (vertices[e[0], k] + vertices[e[1], k])*0.5
+
+    # Add new cells
+    for i in range(num_cells):
+        v0, v1, v2, v3 = (cells[i, 0], cells[i, 1],
+                          cells[i, 2], cells[i, 3])
+        sortEdge(v0, v1, e01)
+        sortEdge(v0, v2, e02)
+        sortEdge(v0, v3, e03)
+        sortEdge(v1, v2, e12)
+        sortEdge(v1, v3, e13)
+        sortEdge(v2, v3, e23)
+        v01 = eV.getValue(e01)
+        v02 = eV.getValue(e02)
+        v03 = eV.getValue(e03)
+        v12 = eV.getValue(e12)
+        v13 = eV.getValue(e13)
+        v23 = eV.getValue(e23)
+
+        # calculate length^2 of diagonals of internal octahedron
+        for j in range(3):
+            temp[0, j] = new_vertices[v01, j]-new_vertices[v23, j]
+            temp[1, j] = new_vertices[v02, j]-new_vertices[v13, j]
+            temp[2, j] = new_vertices[v03, j]-new_vertices[v12, j]
+        l0123 = mydot(temp[0, :], temp[0, :])
+        l0213 = mydot(temp[1, :], temp[1, :])
+        l0312 = mydot(temp[2, :], temp[2, :])
+
+        # I want the cells always to be oriented in the same way.
+        # => don't use Bey's algorithm, but shortest interior edge refinement
+
+        # cut off corners
+        new_cells[8*i,   0], new_cells[8*i,   1], new_cells[8*i,   2], new_cells[8*i,   3] = v0, v01, v02, v03
+        new_cells[8*i+1, 0], new_cells[8*i+1, 1], new_cells[8*i+1, 2], new_cells[8*i+1, 3] = v01, v1, v12, v13
+        new_cells[8*i+2, 0], new_cells[8*i+2, 1], new_cells[8*i+2, 2], new_cells[8*i+2, 3] = v02, v12, v2, v23
+        new_cells[8*i+3, 0], new_cells[8*i+3, 1], new_cells[8*i+3, 2], new_cells[8*i+3, 3] = v03, v13, v23, v3
+
+        if (l0123 < l0213) and (l0123 < l0312):
+            # shortest diagonal v01 - v23
+            new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v12, v02, v23
+            new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v01, v23, v03, v13
+            new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v01, v02, v03, v23
+            new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v01, v13, v12, v23
+        elif (l0213 < l0312):
+            # shortest diagonal v02 - v13
+            new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v02, v03, v13
+            new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v01, v12, v02, v13
+            new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v02, v03, v13, v23
+            new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v02, v13, v12, v23
+        else:
+            # shortest diagonal v03 - v12
+            new_cells[8*i+4, 0], new_cells[8*i+4, 1], new_cells[8*i+4, 2], new_cells[8*i+4, 3] = v01, v13, v12, v03
+            new_cells[8*i+5, 0], new_cells[8*i+5, 1], new_cells[8*i+5, 2], new_cells[8*i+5, 3] = v03, v23, v13, v12
+            new_cells[8*i+6, 0], new_cells[8*i+6, 1], new_cells[8*i+6, 2], new_cells[8*i+6, 3] = v03, v23, v12, v02
+            new_cells[8*i+7, 0], new_cells[8*i+7, 1], new_cells[8*i+7, 2], new_cells[8*i+7, 3] = v01, v12, v02, v03
+
+    return new_vertices_mem, new_cells_mem, lookup
+
+
+def newBoundaryAndTags3D(dict lookup,
+                         INDEX_t[::1] boundaryVertices,
+                         INDEX_t[:, ::1] boundaryEdges,
+                         INDEX_t[:, ::1] boundaryFaces,
+                         TAG_t[::1] boundaryEdgeTags,
+                         TAG_t[::1] boundaryFaceTags):
+    cdef:
+        INDEX_t i, nv, nv01, nv02, nv12, I
+        TAG_t t
+        np.ndarray[INDEX_t, ndim=2] new_boundaryEdges_mem = uninitialized((2*boundaryEdges.shape[0] +
+                                                                      3*boundaryFaces.shape[0], 2), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=1] new_boundaryVertices_mem = uninitialized((boundaryEdges.shape[0]), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=2] new_boundaryFaces_mem = uninitialized((4*boundaryFaces.shape[0], 3), dtype=INDEX)
+        np.ndarray[TAG_t, ndim=1] new_boundaryFaceTags_mem = uninitialized((4*boundaryFaces.shape[0]), dtype=TAG)
+        np.ndarray[TAG_t, ndim=1] new_boundaryEdgeTags_mem = uninitialized((2*boundaryEdges.shape[0] +
+                                                                       3*boundaryFaces.shape[0]), dtype=TAG)
+        np.ndarray[TAG_t, ndim=1] new_boundaryVertexTags_mem = uninitialized((boundaryEdges.shape[0]), dtype=TAG)
+        INDEX_t[:, ::1] new_boundaryFaces = new_boundaryFaces_mem
+        INDEX_t[:, ::1] new_boundaryEdges = new_boundaryEdges_mem
+        INDEX_t[::1] new_boundaryVertices = new_boundaryVertices_mem
+        TAG_t[::1] new_boundaryFaceTags = new_boundaryFaceTags_mem
+        TAG_t[::1] new_boundaryEdgeTags = new_boundaryEdgeTags_mem
+        TAG_t[::1] new_boundaryVertexTags = new_boundaryVertexTags_mem
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        INDEX_t[::1] f = uninitialized((3), dtype=INDEX)
+
+    for i in range(boundaryEdges.shape[0]):
+        e = boundaryEdges[i, :]
+        t = boundaryEdgeTags[i]
+        nv = lookup[encode_edge(e)]
+        new_boundaryEdges[2*i, 0] = e[0]
+        new_boundaryEdges[2*i, 1] = nv
+        new_boundaryEdges[2*i+1, 0] = e[1]
+        new_boundaryEdges[2*i+1, 1] = nv
+        new_boundaryEdgeTags[2*i] = t
+        new_boundaryEdgeTags[2*i+1] = t
+        new_boundaryVertices[i] = nv
+        new_boundaryVertexTags[i] = t
+    I = 2*boundaryEdges.shape[0]
+    for i in range(boundaryFaces.shape[0]):
+        f = boundaryFaces[i, :]
+        t = boundaryFaceTags[i]
+        e[0] = f[0]
+        e[1] = f[1]
+        sortEdge(e[0], e[1], e)
+        nv01 = lookup[encode_edge(e)]
+        e[0] = f[1]
+        e[1] = f[2]
+        sortEdge(e[0], e[1], e)
+        nv12 = lookup[encode_edge(e)]
+        e[0] = f[0]
+        e[1] = f[2]
+        sortEdge(e[0], e[1], e)
+        nv02 = lookup[encode_edge(e)]
+        new_boundaryFaces[4*i, 0] = f[0]
+        new_boundaryFaces[4*i, 1] = nv01
+        new_boundaryFaces[4*i, 2] = nv02
+        new_boundaryFaces[4*i+1, 0] = f[1]
+        new_boundaryFaces[4*i+1, 1] = nv12
+        new_boundaryFaces[4*i+1, 2] = nv01
+        new_boundaryFaces[4*i+2, 0] = f[2]
+        new_boundaryFaces[4*i+2, 1] = nv02
+        new_boundaryFaces[4*i+2, 2] = nv12
+        new_boundaryFaces[4*i+3, 0] = nv01
+        new_boundaryFaces[4*i+3, 1] = nv12
+        new_boundaryFaces[4*i+3, 2] = nv02
+        new_boundaryFaceTags[4*i:4*i+4] = t
+        nv0, nv1 = (nv01, nv12) if nv01 < nv12 else (nv12, nv01)
+        new_boundaryEdges[I+3*i, 0] = nv0
+        new_boundaryEdges[I+3*i, 1] = nv1
+        nv0, nv1 = (nv12, nv02) if nv12 < nv02 else (nv02, nv12)
+        new_boundaryEdges[I+3*i+1, 0] = nv0
+        new_boundaryEdges[I+3*i+1, 1] = nv1
+        nv0, nv1 = (nv02, nv01) if nv02 < nv01 else (nv01, nv02)
+        new_boundaryEdges[I+3*i+2, 0] = nv0
+        new_boundaryEdges[I+3*i+2, 1] = nv1
+        new_boundaryEdgeTags[I+3*i:I+3*i+3] = t
+
+    return (new_boundaryVertices_mem, new_boundaryVertexTags_mem,
+            new_boundaryEdges_mem, new_boundaryEdgeTags_mem,
+            new_boundaryFaces_mem, new_boundaryFaceTags_mem)
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef void vectorProduct(const REAL_t[::1] v, const REAL_t[::1] w,
+                        REAL_t[::1] z):
+    z[0] = v[1]*w[2]-v[2]*w[1]
+    z[1] = v[2]*w[0]-v[0]*w[2]
+    z[2] = v[0]*w[1]-v[1]*w[0]
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume0D(REAL_t[:, ::1] span):
+    return 1.
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume1D(REAL_t[::1] v0):
+    cdef REAL_t s = 0.0
+    for i in range(v0.shape[0]):
+        s += v0[i]**2
+    return sqrt(s)
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume1Dnew(REAL_t[:, ::1] span):
+    return abs(span[0, 0])
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume1D_in_2D(REAL_t[:, ::1] span):
+    return sqrt(span[0, 0]**2+span[0, 1]**2)
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume2D(REAL_t[::1] v0, REAL_t[::1] v1):
+    return abs(v0[0]*v1[1]-v1[0]*v0[1])*0.5
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume2D_in_3D(REAL_t[::1] v0, REAL_t[::1] v1):
+    cdef:
+        REAL_t temp_mem[3]
+        REAL_t[::1] temp = temp_mem
+    vectorProduct(v0, v1, temp)
+    return sqrt(mydot(temp, temp))*0.5
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume2D_in_3Dnew(REAL_t[:, ::1] span):
+    cdef:
+        REAL_t temp_mem[3]
+        REAL_t[::1] temp = temp_mem
+    vectorProduct(span[0, :], span[1, :], temp)
+    return sqrt(mydot(temp, temp))*0.5
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume2Dnew(REAL_t[:, ::1] span):
+    return abs(span[0, 0]*span[1, 1]-span[0, 1]*span[1, 0])*0.5
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume3D(REAL_t[:, ::1] span):
+    cdef:
+        REAL_t temp_mem[3]
+        REAL_t[::1] temp = temp_mem
+    vectorProduct(span[0, :], span[1, :], temp)
+    return abs(mydot(span[2, :], temp))/6.0
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cdef REAL_t volume3Dnew(REAL_t[:, ::1] span, REAL_t[::1] temp):
+    vectorProduct(span[0, :], span[1, :], temp)
+    return abs(mydot(span[2, :], temp))/6.0
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume0Dsimplex(REAL_t[:, ::1] simplex):
+    return 1.
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume1Dsimplex(REAL_t[:, ::1] simplex):
+    return abs(simplex[1, 0]-simplex[0, 0])
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume2Dsimplex(REAL_t[:, ::1] simplex):
+    cdef:
+        REAL_t v00 = simplex[1, 0]-simplex[0, 0]
+        REAL_t v01 = simplex[1, 1]-simplex[0, 1]
+        REAL_t v10 = simplex[2, 0]-simplex[0, 0]
+        REAL_t v11 = simplex[2, 1]-simplex[0, 1]
+    return abs(v00*v11-v10*v01)*0.5
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef REAL_t volume1Din2Dsimplex(REAL_t[:, ::1] simplex):
+    cdef:
+        REAL_t v0 = simplex[1, 0]-simplex[0, 0]
+        REAL_t v1 = simplex[1, 1]-simplex[0, 1]
+    return sqrt(v0*v0 + v1*v1)
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+def hdeltaCy(meshBase mesh):
+    cdef:
+        INDEX_t space_dim = mesh.dim
+        INDEX_t dim = mesh.manifold_dim
+        INDEX_t nc = mesh.num_cells
+        INDEX_t num_vertices = dim+1
+        INDEX_t i, j
+        REAL_t delta = 0, h = 0, hl, he, hS, vol, volS, hmin = 100.
+        REAL_t[:, ::1] local_vertices = uninitialized((num_vertices, space_dim),
+                                                        dtype=REAL)
+        INDEX_t num_edges = 6
+        REAL_t[:, ::1] temp = uninitialized((num_edges, dim), dtype=REAL)
+        REAL_t[::1] v01 = temp[0, :]
+        REAL_t[::1] v02 = temp[1, :]
+        REAL_t[::1] v03 = temp[2, :]
+        REAL_t[::1] v12 = temp[3, :]
+        REAL_t[::1] v13 = temp[4, :]
+        REAL_t[::1] v32 = temp[5, :]
+        REAL_t[:, ::1] gradient = uninitialized((num_vertices, space_dim), dtype=REAL)
+        REAL_t totalVolume = 0.0
+        REAL_t[::1] volVec = uninitialized((nc), dtype=REAL)
+        REAL_t[::1] hVec = uninitialized((nc), dtype=REAL)
+    if dim == 1 and space_dim == 1:
+        for i in range(nc):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+            hl = abs(local_vertices[1, 0]-local_vertices[0, 0])
+            h = max(h, hl)
+            hmin = min(hmin, hl)
+            totalVolume += hl
+            volVec[i] = hl
+            hVec[i] = hl
+        return h, 1, totalVolume, hmin, volVec, hVec
+    elif dim == 1 and space_dim == 2:
+        for i in range(nc):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+            hl = sqrt((local_vertices[1, 0]-local_vertices[0, 0])**2+(local_vertices[1, 1]-local_vertices[0, 1])**2)
+            h = max(h, hl)
+            hmin = min(hmin, hl)
+            totalVolume += hl
+            volVec[i] = hl
+            hVec[i] = hl
+        return h, 1, totalVolume, hmin, volVec, hVec
+    elif dim == 2 and space_dim == 2:
+        for i in range(nc):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Calculate gradient matrix
+            for j in range(space_dim):
+                gradient[0, j] = local_vertices[2, j]-local_vertices[1, j]
+                gradient[1, j] = local_vertices[2, j]-local_vertices[0, j]
+                gradient[2, j] = local_vertices[1, j]-local_vertices[0, j]
+            vol = volume2Dnew(gradient[1:, :])
+            hl = 0.0
+            volS = 0.0
+            for j in range(3):
+                hS = sqrt(mydot(gradient[j, :], gradient[j, :]))
+                hmin = min(hmin, hS)
+                hl = max(hl, hS)
+                volS += hS
+            delta = max(delta, hl*volS/4.0/vol)
+            h = max(h, hl)
+            totalVolume += vol
+            volVec[i] = vol
+            hVec[i] = hl
+        return h, delta, totalVolume, hmin, volVec, hVec
+    elif dim == 2 and space_dim == 3:
+        for i in range(nc):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Calculate gradient matrix
+            for j in range(space_dim):
+                gradient[0, j] = local_vertices[2, j]-local_vertices[1, j]
+                gradient[1, j] = local_vertices[2, j]-local_vertices[0, j]
+                gradient[2, j] = local_vertices[1, j]-local_vertices[0, j]
+            vol = volume2D_in_3D(gradient[1, :], gradient[2, :])
+            # assert vol > 0., "Cell {} volume: 0.>={}".format(i, vol)
+            hl = 0.0
+            volS = 0.0
+            for j in range(3):
+                hS = sqrt(mydot(gradient[j, :], gradient[j, :]))
+                hmin = min(hmin, hS)
+                hl = max(hl, hS)
+                volS += hS
+            delta = max(delta, hl*volS/4.0/vol)
+            h = max(h, hl)
+            totalVolume += vol
+            volVec[i] = vol
+            hVec[i] = hl
+        return h, delta, totalVolume, hmin, volVec, hVec
+    elif dim == 3 and space_dim == 3:
+        for i in range(nc):
+            # Get local vertices
+            mesh.getSimplex(i, local_vertices)
+
+            # Calculate gradient matrix
+            for j in range(space_dim):
+                v01[j] = local_vertices[1, j]-local_vertices[0, j]
+                v02[j] = local_vertices[2, j]-local_vertices[0, j]
+                v03[j] = local_vertices[3, j]-local_vertices[0, j]
+                v12[j] = local_vertices[2, j]-local_vertices[1, j]
+                v13[j] = local_vertices[3, j]-local_vertices[1, j]
+                v32[j] = local_vertices[2, j]-local_vertices[3, j]
+
+            vol = volume3D(temp[:3, :])
+            # assert vol > 0., "Cell {} volume: 0.>={}".format(i, vol)
+            hl = 0.0
+            for j in range(6):
+                he = mydot(temp[j, :], temp[j, :])
+                hmin = min(hmin, he)
+                hl = max(hl, he)
+            hl = sqrt(hl)
+            volS = volume2D_in_3D(v01, v02)+volume2D_in_3D(v01, v03)+volume2D_in_3D(v02, v03)+volume2D_in_3D(v12, v13)
+            delta = max(delta, hl*volS/6.0/vol)
+            h = max(h, hl)
+            totalVolume += vol
+            volVec[i] = vol
+            hVec[i] = hl
+        return h, delta, totalVolume, hmin, volVec, hVec
+    else:
+        return None
+
+
+def boundaryVertices(INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t nc = cells.shape[0]
+        INDEX_t c0, c1
+        set bvertices = set()
+    assert cells.shape[1] == 2
+
+    for i in range(nc):
+        c0, c1 = cells[i, 0], cells[i, 1]
+        try:
+            bvertices.remove(c0)
+        except KeyError:
+            bvertices.add(c0)
+        try:
+            bvertices.remove(c1)
+        except KeyError:
+            bvertices.add(c1)
+    return np.array(list(bvertices), dtype=INDEX)
+
+
+@cython.boundscheck(False)
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+def boundaryEdges(INDEX_t[:, ::1] cells, BOOL_t returnBoundaryCells=False):
+    cdef:
+        INDEX_t nc = cells.shape[0]
+        INDEX_t c0, c1, c2, cellNo, i, k
+        ENCODE_t hv
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        INDEX_t[:, ::1] bedges_mv
+        INDEX_t[::1] bcells_mv
+        dict bedges = dict()
+        BOOL_t orientation
+
+    if not returnBoundaryCells:
+        for cellNo in range(nc):
+            c0, c1, c2 = cells[cellNo, 0], cells[cellNo, 1], cells[cellNo, 2]
+            sortEdge(c0, c1, e0)
+            sortEdge(c1, c2, e1)
+            sortEdge(c2, c0, e2)
+            for k in range(3):
+                hv = encode_edge(temp[k, :])
+                try:
+                    del bedges[hv]
+                except KeyError:
+                    bedges[hv] = cells[cellNo, k] == temp[k, 0]
+        bedges_mv = uninitialized((len(bedges), 2), dtype=INDEX)
+
+        i = 0
+        for hv in bedges:
+            orientation = bedges[hv]
+            decode_edge(hv, e0)
+            if orientation:
+                bedges_mv[i, 0], bedges_mv[i, 1] = e0[0], e0[1]
+            else:
+                bedges_mv[i, 0], bedges_mv[i, 1] = e0[1], e0[0]
+            i += 1
+        return np.array(bedges_mv, copy=False)
+    else:
+        for cellNo in range(nc):
+            c0, c1, c2 = cells[cellNo, 0], cells[cellNo, 1], cells[cellNo, 2]
+            sortEdge(c0, c1, e0)
+            sortEdge(c1, c2, e1)
+            sortEdge(c2, c0, e2)
+            for k in range(3):
+                hv = encode_edge(temp[k, :])
+                try:
+                    del bedges[hv]
+                except KeyError:
+                    bedges[hv] = (cells[cellNo, k] == temp[k, 0], cellNo)
+        bedges_mv = uninitialized((len(bedges), 2), dtype=INDEX)
+        bcells_mv = uninitialized((len(bedges)), dtype=INDEX)
+
+        i = 0
+        for hv in bedges:
+            orientation, cellNo = bedges[hv]
+            bcells_mv[i] = cellNo
+            decode_edge(hv, e0)
+            if orientation:
+                bedges_mv[i, 0], bedges_mv[i, 1] = e0[0], e0[1]
+            else:
+                bedges_mv[i, 0], bedges_mv[i, 1] = e0[1], e0[0]
+            i += 1
+        return np.array(bedges_mv, copy=False), np.array(bcells_mv, copy=False)
+
+
+def boundaryVerticesFromBoundaryEdges(INDEX_t[:, ::1] bedges):
+    cdef:
+        set bvertices = set()
+        INDEX_t i, k
+        INDEX_t[::1] boundaryVertices
+
+    for i in range(bedges.shape[0]):
+        bvertices.add(bedges[i, 0])
+        bvertices.add(bedges[i, 1])
+    boundaryVertices = uninitialized((len(bvertices)), dtype=INDEX)
+    k = 0
+    for i in bvertices:
+        boundaryVertices[k] = i
+        k += 1
+    assert k == len(bvertices)
+    return np.array(boundaryVertices, copy=False)
+
+
+def boundaryFaces(INDEX_t[:, ::1] cells):
+    cdef:
+        INDEX_t num_cells = cells.shape[0], i, k, j
+        INDEX_t v0, v1, v2, v3
+        INDEX_t[:, ::1] faces = uninitialized((4, 3), dtype=INDEX)
+        INDEX_t[::1] f012 = faces[0, :]
+        INDEX_t[::1] f013 = faces[1, :]
+        INDEX_t[::1] f023 = faces[2, :]
+        INDEX_t[::1] f123 = faces[3, :]
+        INDEX_t[::1] f = faces[3, :]
+        set bfaces = set()
+        np.ndarray[INDEX_t, ndim=2] bfaces_mem
+        INDEX_t[:, ::1] bfaces_mv
+        tuple hv
+    for i in range(num_cells):
+        v0, v1, v2, v3 = (cells[i, 0], cells[i, 1],
+                          cells[i, 2], cells[i, 3])
+        sortFace(v0, v1, v2, f012)
+        sortFace(v0, v1, v3, f013)
+        sortFace(v0, v2, v3, f023)
+        sortFace(v1, v2, v3, f123)
+        for k in range(4):
+            hv = encode_face(faces[k, :])
+            try:
+                bfaces.remove(hv)
+            except KeyError:
+                bfaces.add(hv)
+    bfaces_mem = uninitialized((len(bfaces), 3), dtype=INDEX)
+    bfaces_mv = bfaces_mem
+    for i, hv in enumerate(bfaces):
+        decode_face(hv, f)
+        for j in range(3):
+            bfaces_mv[i, j] = f[j]
+    return bfaces_mem
+
+
+def boundaryEdgesFromBoundaryFaces(INDEX_t[:, ::1] bfaces):
+    cdef:
+        INDEX_t nc = bfaces.shape[0]
+        INDEX_t c0, c1, c2, i, k
+        ENCODE_t hv
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        np.ndarray[INDEX_t, ndim=2] bedges_mem
+        INDEX_t[:, ::1] bedges_mv
+
+    bedges = set()
+    for i in range(nc):
+        c0, c1, c2 = bfaces[i, 0], bfaces[i, 1], bfaces[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c0, c2, e1)
+        sortEdge(c1, c2, e2)
+        for k in range(3):
+            bedges.add(encode_edge(temp[k, :]))
+    bedges_mem = uninitialized((len(bedges), 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    for i, hv in enumerate(bedges):
+        decode_edge(hv, e0)
+        bedges_mv[i, 0], bedges_mv[i, 1] = e0[0], e0[1]
+    return bedges_mem
+
+
+cdef class faceVals:
+    def __init__(self,
+                 INDEX_t num_dofs,
+                 np.uint8_t initial_length=0,
+                 np.uint8_t length_inc=3,
+                 BOOL_t deleteHits=True):
+        cdef:
+            INDEX_t i
+        self.num_dofs = num_dofs
+        self.initial_length = initial_length
+        self.length_inc = length_inc
+        self.nnz = 0
+        self.counts = np.zeros((num_dofs), dtype=np.uint8)
+        self.lengths = initial_length*np.ones((num_dofs), dtype=np.uint8)
+        self.indexL = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
+        self.indexR = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
+        self.vals = <INDEX_t **>malloc(num_dofs*sizeof(INDEX_t *))
+        # reserve initial memory for array of variable column size
+        for i in range(num_dofs):
+            self.indexL[i] = <INDEX_t *>malloc(self.initial_length *
+                                               sizeof(INDEX_t))
+            self.indexR[i] = <INDEX_t *>malloc(self.initial_length *
+                                               sizeof(INDEX_t))
+            self.vals[i] = <INDEX_t *>malloc(self.initial_length *
+                                             sizeof(INDEX_t))
+        self.deleteHits = deleteHits
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef inline INDEX_t enterValue(self, const INDEX_t[::1] f, INDEX_t val):
+        cdef:
+            INDEX_t m, n, I = f[0], J = f[1], K = f[2]
+        for m in range(self.counts[I]):
+            if self.indexL[I][m] == J and self.indexR[I][m] == K:  # J, K is already present
+                val = self.vals[I][m]
+                if self.deleteHits:
+                    for n in range(m+1, self.counts[I]):
+                        self.indexL[I][n-1] = self.indexL[I][n]
+                        self.indexR[I][n-1] = self.indexR[I][n]
+                        self.vals[I][n-1] = self.vals[I][n]
+                    self.counts[I] -= 1
+                    self.nnz -= 1
+                return val
+        else:
+            # J,K was not present
+            # Do we need more space?
+            if self.counts[I] == self.lengths[I]:
+                self.indexL[I] = <INDEX_t *>realloc(self.indexL[I],
+                                                    (self.lengths[I] +
+                                                     self.length_inc) *
+                                                    sizeof(INDEX_t))
+                self.indexR[I] = <INDEX_t *>realloc(self.indexR[I],
+                                                    (self.lengths[I] +
+                                                     self.length_inc) *
+                                                    sizeof(INDEX_t))
+                self.vals[I] = <INDEX_t *>realloc(self.vals[I],
+                                                  (self.lengths[I] +
+                                                   self.length_inc) *
+                                                  sizeof(INDEX_t))
+                self.lengths[I] += self.length_inc
+            # where should we insert?
+            for m in range(self.counts[I]):
+                if self.indexL[I][m] > J or (self.indexL[I][m] == J and self.indexR[I][m] > K):
+                    # move previous indices out of the way
+                    for n in range(self.counts[I], m, -1):
+                        self.indexL[I][n] = self.indexL[I][n-1]
+                        self.indexR[I][n] = self.indexR[I][n-1]
+                        self.vals[I][n] = self.vals[I][n-1]
+                    # insert in empty spot
+                    self.indexL[I][m] = J
+                    self.indexR[I][m] = K
+                    self.vals[I][m] = val
+                    break
+            else:
+                self.indexL[I][self.counts[I]] = J
+                self.indexR[I][self.counts[I]] = K
+                self.vals[I][self.counts[I]] = val
+            self.counts[I] += 1
+            self.nnz += 1
+            return val
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef inline INDEX_t getValue(self, const INDEX_t[::1] f):
+        cdef:
+            INDEX_t m
+        for m in range(self.counts[f[0]]):
+            if self.indexL[f[0]][m] == f[1] and self.indexR[f[0]][m] == f[2]:  # J is already present
+                return self.vals[f[0]][m]
+
+    def __getitem__(self, INDEX_t[::1] face):
+        return self.getValue(face)
+
+    def __dealloc__(self):
+        cdef:
+            INDEX_t i
+        for i in range(self.num_dofs):
+            free(self.indexL[i])
+            free(self.indexR[i])
+            free(self.vals[i])
+        free(self.indexL)
+        free(self.indexR)
+        free(self.vals)
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef void startIter(self):
+        self.i = 0
+        while self.i < self.num_dofs and self.counts[self.i] == 0:
+            self.i += 1
+        self.jj = 0
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t next(self, INDEX_t[::1] f, INDEX_t * val):
+        cdef:
+            INDEX_t i = self.i, jj = self.jj, j, k
+        if i < self.num_dofs:
+            j = self.indexL[i][jj]
+            k = self.indexR[i][jj]
+            val[0] = self.vals[i][jj]
+        else:
+            return False
+        f[0] = i
+        f[1] = j
+        f[2] = k
+        if jj < self.counts[i]-1:
+            self.jj += 1
+        else:
+            self.jj = 0
+            i += 1
+            while i < self.num_dofs and self.counts[i] == 0:
+                i += 1
+            self.i = i
+        return True
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline BOOL_t inCell1D(REAL_t[:, ::1] simplex, REAL_t[::1] x):
+    cdef:
+        REAL_t bary_mem[2]
+        REAL_t[::1] bary = bary_mem
+    getBarycentricCoords1D(simplex, x, bary)
+    return bary[0] >= 0 and bary[1] >= 0
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline BOOL_t inCell2D(REAL_t[:, ::1] simplex, REAL_t[::1] x):
+    cdef:
+        REAL_t bary_mem[3]
+        REAL_t[::1] bary = bary_mem
+    getBarycentricCoords2D(simplex, x, bary)
+    return bary[0] >= 0 and bary[1] >= 0 and bary[2] >= 0
+
+
+cdef class cellFinder(object):
+    def __init__(self, meshBase mesh, INDEX_t numCandidates=-1):
+        cdef:
+            REAL_t[:, ::1] cellCenters = mesh.getCellCenters()
+
+        from scipy.spatial import cKDTree
+        self.mesh = mesh
+        self.kd = (cKDTree(cellCenters), )
+        self.simplex = uninitialized((mesh.dim+1, mesh.dim), dtype=REAL)
+        self.bary = uninitialized((mesh.dim+1), dtype=REAL)
+        if numCandidates <= 0:
+            if mesh.dim == 1:
+                numCandidates = 2
+            else:
+                numCandidates = self.mesh.dim+2
+        self.numCandidates = numCandidates
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findCell(self, REAL_t[::1] vertex):
+        cdef:
+            INDEX_t[::1] cellIdx
+            INDEX_t cellNo
+        cellIdx = self.kd[0].query(vertex, self.numCandidates)[1].astype(INDEX)
+        for cellNo in cellIdx:
+            if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary):
+                return cellNo
+        return -1
+        # raise NotImplementedError('Could not find {}'.format(np.array(vertex)))
+
+
+cdef class cellFinder2:
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __init__(self, meshBase mesh):
+        cdef:
+            INDEX_t L, j, k, cellNo, vertexNo, vertex
+            REAL_t h
+            intTuple t
+            REAL_t[:, ::1] cellCenters = mesh.getCellCenters()
+        self.key = uninitialized((mesh.dim), dtype=INDEX)
+        L = 1
+        h = mesh.h
+        while L*h < 0.5:
+            L *= 2
+        self.x_min = mesh.vertices_as_array.min(axis=0)
+        x_max = mesh.vertices_as_array.max(axis=0)
+        self.diamInv = uninitialized((mesh.dim), dtype=REAL)
+        for j in range(mesh.dim):
+            self.diamInv[j] = L / (x_max[j]-self.x_min[j]) / 1.01
+        self.simplex = uninitialized((mesh.dim+1, mesh.dim), dtype=REAL)
+        self.bary = uninitialized((mesh.dim+1), dtype=REAL)
+        self.lookup = {}
+        for k in range(cellCenters.shape[0]):
+            for j in range(mesh.dim):
+                self.key[j] = <INDEX_t>((cellCenters[k, j]-self.x_min[j]) * self.diamInv[j])
+            t = intTuple.create(self.key)
+            try:
+                self.lookup[t].add(k)
+            except KeyError:
+                self.lookup[t] = set([k])
+        self.mesh = mesh
+
+        self.v2c = {}
+        for cellNo in range(mesh.num_cells):
+            for vertexNo in range(mesh.dim+1):
+                vertex = mesh.cells[cellNo, vertexNo]
+                try:
+                    self.v2c[vertex].add(cellNo)
+                except KeyError:
+                    self.v2c[vertex] = set([cellNo])
+        self.myKey = intTuple.createNonOwning(self.key)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findCell(self, REAL_t[::1] vertex):
+        cdef:
+            INDEX_t j, cellNo, vertexNo, v
+            set candidates, toCheck = set()
+        for j in range(self.mesh.dim):
+            self.key[j] = <INDEX_t>((vertex[j]-self.x_min[j]) * self.diamInv[j])
+        try:
+            candidates = self.lookup[self.myKey]
+        except KeyError:
+            return -1
+
+        for cellNo in candidates:
+            if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary):
+                return cellNo
+        for cellNo in candidates:
+            for vertexNo in range(self.mesh.dim+1):
+                v = self.mesh.cells[cellNo, vertexNo]
+                toCheck |= self.v2c[v]
+        toCheck -= candidates
+        for cellNo in toCheck:
+            if self.mesh.vertexInCell(vertex, cellNo, self.simplex, self.bary):
+                return cellNo
+        return -1
+
+    def findCell_py(self, REAL_t[::1] vertex):
+        return self.findCell(vertex)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void getBarycentricCoords1D(REAL_t[:, ::1] simplex, REAL_t[::1] x, REAL_t[::1] bary):
+    cdef:
+        REAL_t vol
+    vol = simplex[0, 0]-simplex[1, 0]
+    bary[0] = (x[0]-simplex[1, 0])/vol
+    bary[1] = 1.-bary[0]
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef void getBarycentricCoords2D(REAL_t[:, ::1] simplex, REAL_t[::1] x, REAL_t[::1] bary):
+    cdef:
+        REAL_t vol
+    vol = ((simplex[0, 0]-simplex[1,0])*(simplex[2, 1]-simplex[1,1]) -
+           (simplex[0, 1]-simplex[1,1])*(simplex[2, 0]-simplex[1,0]))
+    bary[0] = ((x[0]-simplex[1, 0])*(simplex[2, 1]-simplex[1, 1]) -
+               (x[1]-simplex[1, 1])*(simplex[2, 0]-simplex[1, 0]))/vol
+    bary[1] = ((x[0]-simplex[2, 0])*(simplex[0, 1]-simplex[2, 1]) -
+               (x[1]-simplex[2, 1])*(simplex[0, 0]-simplex[2, 0]))/vol
+    bary[2] = 1. - bary[0] - bary[1]
+
+
+def getSubmesh2(mesh, INDEX_t[::1] newCellIndices, INDEX_t num_cells=-1):
+    cdef:
+        INDEX_t[:, ::1] cells, newCells
+        INDEX_t j, k, i, m, dim
+    cells = mesh.cells
+    dim = mesh.dim
+    new_mesh = mesh.copy()
+
+    if newCellIndices.shape[0] == cells.shape[0]:
+        if num_cells <= 0:
+            num_cells = 0
+            for i in newCellIndices:
+                if i >= 0:
+                    num_cells += 1
+        newCells = uninitialized((num_cells, dim+1), dtype=INDEX)
+        j = 0
+        k = 0
+        for i in newCellIndices:
+            if i >= 0:
+                for m in range(dim+1):
+                    newCells[i, m] = cells[k, m]
+                j += 1
+            k += 1
+        assert j == num_cells
+    else:
+        newCells = uninitialized((newCellIndices.shape[0], dim+1), dtype=INDEX)
+        for k, i in enumerate(newCellIndices):
+            for m in range(dim+1):
+                newCells[k, m] = cells[i, m]
+    new_mesh.cells = newCells
+    new_mesh.init()
+    new_mesh.removeUnusedVertices()
+    return new_mesh
+
+
+def getSubmesh(meshBase mesh, INDEX_t[::1] selectedCells):
+    cdef:
+        INDEX_t manifold_dim = mesh.manifold_dim
+        INDEX_t[:, ::1] old_cells = mesh.cells
+        INDEX_t[:, ::1] new_cells = uninitialized((selectedCells.shape[0], mesh.manifold_dim+1), dtype=INDEX)
+        INDEX_t i, j, I
+        dict lookup
+        INDEX_t[::1] boundaryVertices = mesh.boundaryVertices
+        TAG_t[::1] boundaryVertexTags = mesh.boundaryVertexTags
+        INDEX_t[:, ::1] boundaryEdges
+        TAG_t[::1] boundaryEdgeTags
+        INDEX_t[:, ::1] boundaryFaces
+        TAG_t[::1] boundaryFaceTags
+        INDEX_t hv1
+        ENCODE_t hv
+        INDEX_t e[2]
+        INDEX_t f[3]
+        meshBase new_mesh
+
+    from . mesh import mesh1d, mesh2d, mesh3d
+
+    for i in range(selectedCells.shape[0]):
+        I = selectedCells[i]
+        for j in range(manifold_dim+1):
+            new_cells[i, j] = old_cells[I, j]
+    if mesh.dim == 1:
+        new_mesh = mesh1d(mesh.vertices.copy(), new_cells)
+    elif mesh.dim == 2:
+        new_mesh = mesh2d(mesh.vertices.copy(), new_cells)
+    elif mesh.dim == 3:
+        new_mesh = mesh3d(mesh.vertices.copy(), new_cells)
+    else:
+        raise NotImplementedError()
+
+    # copy boundary vertex tags
+    lookup = {}
+    for i in range(boundaryVertices.shape[0]):
+        I = boundaryVertices[i]
+        lookup[I] = boundaryVertexTags[i]
+    boundaryVertices = new_mesh.boundaryVertices
+    boundaryVertexTags = new_mesh.boundaryVertexTags
+    for i in range(boundaryVertices.shape[0]):
+        try:
+            boundaryVertexTags[i] = lookup.pop(boundaryVertices[i])
+        except KeyError:
+            pass
+    new_mesh.boundaryVertexTags = np.array(boundaryVertexTags, copy=False, dtype=TAG)
+
+    if mesh.dim >= 2:
+        boundaryEdges = mesh.boundaryEdges
+        boundaryEdgeTags = mesh.boundaryEdgeTags
+        # copy boundary edge tags
+        lookup = {}
+        for i in range(boundaryEdges.shape[0]):
+            sortEdge(boundaryEdges[i, 0], boundaryEdges[i, 1], e)
+            hv = encode_edge(e)
+            lookup[hv] = boundaryEdgeTags[i]
+        boundaryEdges = new_mesh.boundaryEdges
+        boundaryEdgeTags = new_mesh.boundaryEdgeTags
+        for i in range(boundaryEdges.shape[0]):
+            sortEdge(boundaryEdges[i, 0], boundaryEdges[i, 1], e)
+            hv = encode_edge(e)
+            try:
+                boundaryEdgeTags[i] = lookup.pop(hv)
+            except KeyError:
+                pass
+        new_mesh.boundaryEdgeTags = np.array(boundaryEdgeTags, copy=False, dtype=TAG)
+
+    if mesh.dim >= 3:
+        boundaryFaces = mesh.boundaryFaces
+        boundaryFaceTags = mesh.boundaryFaceTags
+        # copy boundary face tags
+        lookup = {}
+        for i in range(boundaryFaces.shape[0]):
+            sortFace(boundaryFaces[i, 0], boundaryFaces[i, 1], boundaryFaces[i, 2], f)
+            hv1, hv = encode_face(e)
+            lookup[hv1, hv] = boundaryFaceTags[i]
+        boundaryFaces = new_mesh.boundaryFaces
+        boundaryFaceTags = new_mesh.boundaryFaceTags
+        for i in range(boundaryFaces.shape[0]):
+            sortFace(boundaryFaces[i, 0], boundaryFaces[i, 1], boundaryFaces[i, 2], f)
+            hv1, hv = encode_face(e)
+            try:
+                boundaryFaceTags[i] = lookup.pop((hv1, hv))
+            except KeyError:
+                pass
+        new_mesh.boundaryFaceTags = np.array(boundaryFaceTags, copy=False, dtype=TAG)
+
+    # TODO: same for faces
+
+    new_mesh.removeUnusedVertices()
+    return new_mesh
diff --git a/fem/PyNucleus_fem/meshOverlaps.pxd b/fem/PyNucleus_fem/meshOverlaps.pxd
new file mode 100644
index 0000000..265fc45
--- /dev/null
+++ b/fem/PyNucleus_fem/meshOverlaps.pxd
@@ -0,0 +1,47 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+
+
+cdef class sharedMesh:
+    cdef:
+        public INDEX_t mySubdomainNo, otherSubdomainNo, dim
+        public INDEX_t[:, ::1] vertices, edges, faces
+        public INDEX_t[::1] cells
+
+
+cdef class meshInterface(sharedMesh):
+    pass
+
+
+cdef class sharedMeshManager:
+    cdef:
+        public INDEX_t numSubdomains
+        public dict sharedMeshes
+        public list requests
+        public MPI.Comm comm
+        public dict _rank2subdomain
+        public dict _subdomain2rank
+    cdef inline INDEX_t rank2subdomain(self, INDEX_t rank)
+    cdef inline INDEX_t subdomain2rank(self, INDEX_t subdomainNo)
+
+
+cdef class interfaceManager(sharedMeshManager):
+    pass
+
+
+cdef class meshOverlap(sharedMesh):
+    pass
+
+cdef class overlapManager(sharedMeshManager):
+    pass
diff --git a/fem/PyNucleus_fem/meshOverlaps.pyx b/fem/PyNucleus_fem/meshOverlaps.pyx
new file mode 100644
index 0000000..eb8d1a1
--- /dev/null
+++ b/fem/PyNucleus_fem/meshOverlaps.pyx
@@ -0,0 +1,2188 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, TAG, BOOL
+from PyNucleus_base import uninitialized, uninitialized_like
+from PyNucleus_base.tupleDict cimport tupleDictINDEX
+from . meshCy cimport (meshBase,
+                       decode_edge, encode_edge,
+                       encode_face, decode_face,
+                       sortEdge, sortFace,
+                       faceVals)
+from . DoFMaps cimport DoFMap, P0_DoFMap, P1_DoFMap, P2_DoFMap
+from . mesh import mesh1d, mesh2d, mesh3d
+from . mesh import INTERIOR_NONOVERLAPPING, INTERIOR, NO_BOUNDARY
+from . meshPartitioning import PartitionerException
+from . algebraicOverlaps cimport (algebraicOverlapManager,
+                                  algebraicOverlap,
+                                  algebraicOverlapPersistent,
+                                  algebraicOverlapOneSidedPut,
+                                  algebraicOverlapOneSidedGet,
+                                  algebraicOverlapOneSidedPutLockAll)
+from . boundaryLayerCy import boundaryLayer
+from . simplexMapper cimport simplexMapper, simplexMapper2D, simplexMapper3D
+from PyNucleus_base.linear_operators import (LinearOperator_wrapper,
+                                              diagonalOperator)
+from copy import deepcopy
+import numpy as np
+cimport numpy as np
+from numpy.linalg import norm
+cimport cython
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+
+from warnings import warn
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def boundary1D(meshBase mesh):
+    cdef:
+        INDEX_t[:, ::1] cells = mesh.cells
+        INDEX_t nc = mesh.num_cells
+        INDEX_t i, j, k, m
+        np.ndarray[INDEX_t, ndim=2] bvertices_mem
+        INDEX_t[:, ::1] bvertices_mv
+        dict added_vertices
+
+    added_vertices = dict()
+    for i in range(nc):
+        for j in range(2):
+            if cells[i, j] not in added_vertices:
+                added_vertices[cells[i, j]] = (i, j)
+            else:
+                del added_vertices[cells[i, j]]
+    bvertices_mem = uninitialized((len(added_vertices), 2), dtype=INDEX)
+    bvertices_mv = bvertices_mem
+    m = 0
+    for k, (i, j) in added_vertices.items():
+        bvertices_mv[m, 0] = i
+        bvertices_mv[m, 1] = j
+        m += 1
+    return bvertices_mem
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def boundary2D(meshBase mesh, BOOL_t assumeConnected=True):
+    cdef:
+        simplexMapper sM = mesh.simplexMapper
+        INDEX_t[:, ::1] cells = mesh.cells
+        INDEX_t nc = mesh.num_cells
+        INDEX_t i = 0, k, l
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=2] bvertices_mem
+        np.ndarray[INDEX_t, ndim=2] bedges_mem
+        INDEX_t[:, ::1] bvertices_mv
+        INDEX_t[:, ::1] bedges_mv
+        tupleDictINDEX eV = tupleDictINDEX(mesh.num_vertices, deleteHits=True)
+        set added_vertices
+
+    for i in range(nc):
+        sM.startLoopOverCellEdges(cells[i, :])
+        while sM.loopOverCellEdges(e):
+            eV.enterValue(e, i)
+    bedges_mem = uninitialized((eV.nnz, 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    if assumeConnected:
+        bvertices_mem = uninitialized((eV.nnz, 2), dtype=INDEX)
+        bvertices_mv = bvertices_mem
+
+        k = 0
+        l = 0
+        added_vertices = set()
+        eV.startIter()
+        while eV.next(e, &i):
+            bedges_mv[k, 0] = i
+            bedges_mv[k, 1] = sM.findEdgeInCell(i, e)
+            if e[0] not in added_vertices:
+                bvertices_mv[l, 0] = i
+                bvertices_mv[l, 1] = sM.findVertexInCell(i, e[0])
+                added_vertices.add(e[0])
+                l += 1
+            if e[1] not in added_vertices:
+                bvertices_mv[l, 0] = i
+                bvertices_mv[l, 1] = sM.findVertexInCell(i, e[1])
+                added_vertices.add(e[1])
+                l += 1
+            k += 1
+        if not l == eV.nnz:
+            raise PartitionerException('Domain with a hole')
+    else:
+        k = 0
+        l = 0
+        added_vertices = set()
+        eV.startIter()
+        while eV.next(e, &i):
+            bedges_mv[k, 0] = i
+            bedges_mv[k, 1] = sM.findEdgeInCell(i, e)
+            if e[0] not in added_vertices:
+                added_vertices.add(e[0])
+                l += 1
+            if e[1] not in added_vertices:
+                added_vertices.add(e[1])
+                l += 1
+            k += 1
+
+        bvertices_mem = uninitialized((l, 2), dtype=INDEX)
+        bvertices_mv = bvertices_mem
+
+        l = 0
+        added_vertices = set()
+        eV.startIter()
+        while eV.next(e, &i):
+            if e[0] not in added_vertices:
+                bvertices_mv[l, 0] = i
+                bvertices_mv[l, 1] = sM.findVertexInCell(i, e[0])
+                added_vertices.add(e[0])
+                l += 1
+            if e[1] not in added_vertices:
+                bvertices_mv[l, 0] = i
+                bvertices_mv[l, 1] = sM.findVertexInCell(i, e[1])
+                added_vertices.add(e[1])
+                l += 1
+    return bvertices_mem, bedges_mem
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def boundary3D(meshBase mesh, BOOL_t assumeConnected=True):
+    cdef:
+        simplexMapper3D sM = mesh.simplexMapper
+        INDEX_t[:, ::1] cells = mesh.cells
+        INDEX_t nc = mesh.num_cells
+        INDEX_t cellNo = 0, vertexNo, k, l
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        INDEX_t[::1] f = uninitialized((3), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=2] bvertices_mem
+        np.ndarray[INDEX_t, ndim=2] bedges_mem
+        np.ndarray[INDEX_t, ndim=2] bfaces_mem
+        INDEX_t[:, ::1] bvertices_mv
+        INDEX_t[:, ::1] bedges_mv
+        INDEX_t[:, ::1] bfaces_mv
+        tupleDictINDEX eV = tupleDictINDEX(mesh.num_vertices, deleteHits=False)
+        faceVals fV = faceVals(mesh.num_vertices, deleteHits=True)
+        set added_vertices
+
+    # Get all boundary faces
+    for cellNo in range(nc):
+        sM.startLoopOverCellFaces(cells[cellNo, :])
+        while sM.loopOverCellFaces(f):
+            fV.enterValue(f, cellNo)
+
+    # Get all boundary edges
+    fV.startIter()
+    while fV.next(f, &cellNo):
+        sM.startLoopOverFaceEdges(f)
+        while sM.loopOverFaceEdges(e):
+            eV.enterValue(e, cellNo)
+
+    bfaces_mem = uninitialized((fV.nnz, 2), dtype=INDEX)
+    bfaces_mv = bfaces_mem
+    bedges_mem = uninitialized((eV.nnz, 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    k = 0
+    l = 0
+    added_vertices = set()
+    fV.startIter()
+    while fV.next(f, &cellNo):
+        bfaces_mv[k, 0] = cellNo
+        bfaces_mv[k, 1] = sM.findFaceInCell(cellNo, f)
+        for vertexNo in f:
+            if vertexNo not in added_vertices:
+                added_vertices.add(vertexNo)
+                l += 1
+        k += 1
+
+    bvertices_mem = uninitialized((l, 2), dtype=INDEX)
+    bvertices_mv = bvertices_mem
+
+    l = 0
+    added_vertices = set()
+    fV.startIter()
+    while fV.next(f, &cellNo):
+        for vertexNo in f:
+            if vertexNo not in added_vertices:
+                bvertices_mv[l, 0] = cellNo
+                bvertices_mv[l, 1] = sM.findVertexInCell(cellNo, vertexNo)
+                added_vertices.add(vertexNo)
+                l += 1
+
+    if assumeConnected and not l == 2+eV.nnz-fV.nnz:
+        warn('Domain with a hole')
+
+    k = 0
+    eV.startIter()
+    while eV.next(e, &cellNo):
+        bedges_mv[k, 0] = cellNo
+        bedges_mv[k, 1] = sM.findEdgeInCell(cellNo, e)
+        k += 1
+    return bvertices_mem, bedges_mem, bfaces_mem
+
+
+cdef class dofChecker:
+    cdef void add(self, INDEX_t dof):
+        pass
+
+    cdef np.ndarray[INDEX_t, ndim=1] getDoFs(self):
+        pass
+
+
+cdef class dofCheckerSet(dofChecker):
+    cdef:
+        set dofs
+        list orderedDoFs
+
+    def __init__(self):
+        self.dofs = set()
+        self.orderedDoFs = list()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void add(self, INDEX_t dof):
+        if dof >= 0 and not dof in self.dofs:
+            self.dofs.add(dof)
+            self.orderedDoFs.append(dof)
+
+    cdef np.ndarray[INDEX_t, ndim=1] getDoFs(self):
+        return np.array(self.orderedDoFs, dtype=INDEX)
+
+
+cdef class dofCheckerArray(dofChecker):
+    cdef:
+        BOOL_t[::1] dofs
+        list orderedDoFs
+
+    def __init__(self, INDEX_t num_dofs):
+        self.dofs = np.zeros((num_dofs), dtype=BOOL)
+        self.orderedDoFs = list()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void add(self, INDEX_t dof):
+        if dof >= 0 and not self.dofs[dof]:
+            self.dofs[dof] = True
+            self.orderedDoFs.append(dof)
+
+    cdef np.ndarray[INDEX_t, ndim=1] getDoFs(self):
+        return np.array(self.orderedDoFs, dtype=INDEX)
+
+
+cdef class sharedMesh:
+    def __init__(self,
+                 INDEX_t[:, ::1] vertices,
+                 INDEX_t[:, ::1] edges,
+                 INDEX_t[:, ::1] faces,
+                 INDEX_t[::1] cells,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 INDEX_t dim):
+        self.vertices = vertices
+        self.edges = edges
+        self.faces = faces
+        self.cells = cells
+        self.mySubdomainNo = mySubdomainNo
+        self.otherSubdomainNo = otherSubdomainNo
+        self.dim = dim
+
+    def get_num_vertices(self):
+        return self.vertices.shape[0]
+
+    def get_num_edges(self):
+        return self.edges.shape[0]
+
+    def get_num_faces(self):
+        return self.faces.shape[0]
+
+    def get_num_cells(self):
+        return self.cells.shape[0]
+
+    num_vertices = property(fget=get_num_vertices)
+    num_edges = property(fget=get_num_edges)
+    num_faces = property(fget=get_num_faces)
+    num_cells = property(fget=get_num_cells)
+
+    def __repr__(self):
+        return ('Mesh interface of domain {} with {}: ' +
+                '{} vertices, {} edges, {} faces, {} cells').format(self.mySubdomainNo,
+                                                                    self.otherSubdomainNo,
+                                                                    self.num_vertices,
+                                                                    self.num_edges,
+                                                                    self.num_faces,
+                                                                    self.num_cells)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    def refine(self, mesh=None):
+        cdef:
+            INDEX_t[:, ::1] cells
+            simplexMapper3D sM
+            INDEX_t i, j, k, cellNo, edgeNo, faceNo, cellNo1, cellNo2, cellNo3, order
+            INDEX_t C0, C1, C2
+            INDEX_t subcellsPerRef
+            INDEX_t middleCellNo, middleFaceNo, middleOrder
+            INDEX_t[:, ::1] edges
+            INDEX_t[:, ::1] faces
+            INDEX_t[::1] face = uninitialized((3), dtype=INDEX)
+            INDEX_t[::1] Face = uninitialized((3), dtype=INDEX)
+            INDEX_t[::1] perm = uninitialized((3), dtype=INDEX)
+            INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+
+        if self.dim == 1:
+            subcellsPerRef = 2
+        elif self.dim == 2:
+            subcellsPerRef = 4
+        elif self.dim == 3:
+            subcellsPerRef = 8
+        else:
+            raise NotImplementedError()
+        new_cells = uninitialized((subcellsPerRef*self.num_cells), dtype=INDEX)
+        for i in range(self.num_cells):
+            for k in range(subcellsPerRef):
+                new_cells[subcellsPerRef*i+k] = subcellsPerRef*self.cells[i]+k
+        self.cells = new_cells
+
+        if self.dim == 1:
+            for i in range(self.num_vertices):
+                self.vertices[i, 0] = 2*self.vertices[i, 0] + self.vertices[i, 1]
+        elif self.dim == 2:
+            for i in range(self.num_vertices):
+                self.vertices[i, 0] = 4*self.vertices[i, 0] + self.vertices[i, 1]
+                self.vertices[i, 1] = 0
+            edges = uninitialized((2*self.num_edges, 3), dtype=INDEX)
+            for i in range(self.num_edges):
+                cellNo = self.edges[i, 0]
+                edgeNo = self.edges[i, 1]
+                order = self.edges[i, 2]
+                if order == 0:
+                    edges[2*i, 0] = 4*cellNo + edgeNo
+                    edges[2*i, 1] = 0
+                    edges[2*i, 2] = order
+                    edges[2*i+1, 0] = 4*cellNo + (edgeNo+1) % 3
+                    edges[2*i+1, 1] = 2
+                    edges[2*i+1, 2] = order
+                else:
+                    edges[2*i+1, 0] = 4*cellNo + edgeNo
+                    edges[2*i+1, 1] = 0
+                    edges[2*i+1, 2] = order
+                    edges[2*i, 0] = 4*cellNo + (edgeNo+1) % 3
+                    edges[2*i, 1] = 2
+                    edges[2*i, 2] = order
+            self.edges = edges
+        elif self.dim == 3:
+            sM = mesh.simplexMapper
+            if self.num_vertices+self.num_edges+self.num_faces == 0:
+                return
+            cells = mesh.cells
+            for i in range(self.num_vertices):
+                self.vertices[i, 0] = 8*self.vertices[i, 0] + self.vertices[i, 1]
+            edges = uninitialized((2*self.num_edges, 3),
+                                  dtype=INDEX)
+            for i in range(self.num_edges):
+                cellNo = self.edges[i, 0]
+                edgeNo = self.edges[i, 1]
+                order = self.edges[i, 2]
+                if order == 0:
+                    if edgeNo < 3:
+                        edges[2*i, 0] = 8*cellNo + edgeNo
+                        edges[2*i, 1] = edgeNo
+                        edges[2*i+1, 0] = 8*cellNo + (edgeNo+1)%3
+                        edges[2*i+1, 1] = edgeNo
+                    else:
+                        edges[2*i, 0] = 8*cellNo + edgeNo-3
+                        edges[2*i, 1] = edgeNo
+                        edges[2*i+1, 0] = 8*cellNo + 3
+                        edges[2*i+1, 1] = edgeNo
+                else:
+                    if edgeNo < 3:
+                        edges[2*i+1, 0] = 8*cellNo + edgeNo
+                        edges[2*i+1, 1] = edgeNo
+                        edges[2*i, 0] = 8*cellNo + (edgeNo+1)%3
+                        edges[2*i, 1] = edgeNo
+                    else:
+                        edges[2*i+1, 0] = 8*cellNo + edgeNo-3
+                        edges[2*i+1, 1] = edgeNo
+                        edges[2*i, 0] = 8*cellNo + 3
+                        edges[2*i, 1] = edgeNo
+                edges[2*i:2*i+2, 2] = order
+            faces = uninitialized((4*self.num_faces, 3), dtype=INDEX)
+
+            for i in range(self.num_faces):
+                cellNo = self.faces[i, 0]
+                faceNo = self.faces[i, 1]
+                order = self.faces[i, 2]
+                # find cellNo and faceNo for middle face
+                sM.getFaceVerticesLocal(faceNo, order, faceVertexIndices)
+                cellNo1, cellNo2, cellNo3 = faceVertexIndices[0], faceVertexIndices[1], faceVertexIndices[2]
+                if faceNo == 0:
+                    C0, C1, C2 = cells[8*cellNo, 2], cells[8*cellNo+1, 2], cells[8*cellNo, 1]
+                elif faceNo == 1:
+                    C0, C1, C2 = cells[8*cellNo, 1], cells[8*cellNo+1, 3], cells[8*cellNo, 3]
+                elif faceNo == 2:
+                    C0, C1, C2 = cells[8*cellNo+1, 2], cells[8*cellNo+2, 3], cells[8*cellNo+1, 3]
+                else:
+                    C0, C1, C2 = cells[8*cellNo, 2], cells[8*cellNo, 3], cells[8*cellNo+2, 3]
+                faceSet = set([C0, C1, C2])
+                sortFace(C0, C1, C2, face)
+                for middleCellNo in range(8*cellNo+4, 8*cellNo+8):
+                    if faceSet <= set(cells[middleCellNo, :]):
+                        middleFaceNo = sM.findFaceInCell(middleCellNo, face)
+                        break
+                else:
+                    raise Exception()
+
+                # face without rotation
+                sM.getFaceVerticesGlobal(middleCellNo, middleFaceNo, 0, face)
+
+                # face with rotation
+                Face[0], Face[1], Face[2] = ((set(cells[8*cellNo+cellNo1, :]) & set(cells[8*cellNo+cellNo2, :])).pop(),
+                                             (set(cells[8*cellNo+cellNo2, :]) & set(cells[8*cellNo+cellNo3, :])).pop(),
+                                             (set(cells[8*cellNo+cellNo3, :]) & set(cells[8*cellNo+cellNo1, :])).pop())
+
+                for j in range(3):
+                    for k in range(3):
+                        if face[k] == Face[j]:
+                            perm[j] = k
+                            break
+
+                if perm[0] == 0 and perm[1] == 1 and perm[2] == 2:
+                    middleOrder = 0
+                elif perm[0] == 1 and perm[1] == 2 and perm[2] == 0:
+                    middleOrder = 1
+                elif perm[0] == 2 and perm[1] == 0 and perm[2] == 1:
+                    middleOrder = 2
+                elif perm[0] == 1 and perm[1] == 0 and perm[2] == 2:
+                    middleOrder = -1
+                elif perm[0] == 0 and perm[1] == 2 and perm[2] == 1:
+                    middleOrder = -2
+                else:
+                    middleOrder = -3
+
+                faces[4*i, 0] = 8*cellNo+cellNo1
+                faces[4*i, 1] = faceNo
+                faces[4*i+1, 0] = 8*cellNo+cellNo2
+                faces[4*i+1, 1] = faceNo
+                faces[4*i+2, 0] = 8*cellNo+cellNo3
+                faces[4*i+2, 1] = faceNo
+                faces[4*i:4*i+3, 2] = order
+                faces[4*i+3, 0] = middleCellNo
+                faces[4*i+3, 1] = middleFaceNo
+                faces[4*i+3, 2] = middleOrder
+            self.edges = edges
+            self.faces = faces
+        else:
+            raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getDoFs(self, meshBase mesh, DoFMap dm, comm, overlapType='standard', INDEX_t numSharedVecs=1,
+                BOOL_t allowInteriorBoundary=False):
+        cdef:
+            simplexMapper sM
+            INDEX_t i, j, k, dof, dofOverlap, cellNo, cellNoOverlap, vertexNo, edgeNo, faceNo, edgeOrder, faceOrder
+            INDEX_t vertices_per_element, edges_per_element, edgeOffset, faceOffset
+            INDEX_t dofs_per_vertex, dofs_per_edge, dofs_per_face, dofs_per_cell
+            INDEX_t[::1] edgeVertexIndices = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+            INDEX_t[::1] faceEdgeIndices = uninitialized((3), dtype=INDEX)
+            INDEX_t[::1] faceEdgeOrders = uninitialized((3), dtype=INDEX)
+            DoFMap overlapDM
+            dofChecker dofCheck
+            INDEX_t[::1] overlapCells = self.cells
+            INDEX_t[::1] dofs
+
+        dofs_per_vertex = dm.dofs_per_vertex
+        dofs_per_edge = dm.dofs_per_edge
+        dofs_per_face = dm.dofs_per_face
+        dofs_per_cell = dm.dofs_per_cell
+        dofs_per_element = dm.dofs_per_element
+
+        vertices_per_element = mesh.dim+1
+        if self.dim == 1:
+            edges_per_element = 0
+        elif self.dim == 2:
+            edges_per_element = 3
+        elif self.dim == 3:
+            edges_per_element = 6
+        else:
+            raise NotImplementedError()
+        sM = mesh.simplexMapper
+
+        if self.num_cells > 0.01*mesh.num_cells:
+            dofCheck = dofCheckerArray(dm.num_dofs)
+        else:
+            dofCheck = dofCheckerSet()
+        if dofs_per_vertex > 0:
+            # DoFs associated with cross vertices
+            for i in range(self.num_vertices):
+                cellNo = self.vertices[i, 0]
+                vertexNo = self.vertices[i, 1]
+                for k in range(dofs_per_vertex):
+                    dof = dm.cell2dof(cellNo, dofs_per_vertex*vertexNo+k)
+                    dofCheck.add(dof)
+        if dofs_per_vertex+dofs_per_edge > 0:
+            edgeOffset = dofs_per_vertex*vertices_per_element
+            # DoFs associated with edges and vertices on these edges
+            for i in range(self.num_edges):
+                cellNo = self.edges[i, 0]
+                edgeNo = self.edges[i, 1]
+                edgeOrder = self.edges[i, 2]
+                # dofs on vertices on shared edges
+                sM.getEdgeVerticesLocal(edgeNo, edgeOrder, edgeVertexIndices)
+                for j in range(2):
+                    for k in range(dofs_per_vertex):
+                        dof = dm.cell2dof(cellNo, dofs_per_vertex*edgeVertexIndices[j]+k)
+                        dofCheck.add(dof)
+                # dofs on shared edges
+                if edgeOrder == 0:
+                    for k in range(dofs_per_edge):
+                        dof = dm.cell2dof(cellNo,
+                                          edgeOffset +
+                                          dofs_per_edge*edgeNo+k)
+                        dofCheck.add(dof)
+                else:
+                    for k in range(dofs_per_edge-1, -1, -1):
+                        dof = dm.cell2dof(cellNo,
+                                          edgeOffset +
+                                          dofs_per_edge*edgeNo+k)
+                        dofCheck.add(dof)
+        if dofs_per_vertex+dofs_per_edge+dofs_per_face > 0:
+            # DoFs associated with faces and edges and vertices on these faces
+            assert dofs_per_face <= 1
+            edgeOffset = dofs_per_vertex*vertices_per_element
+            faceOffset = dofs_per_vertex*vertices_per_element + edges_per_element*dofs_per_edge
+            for i in range(self.num_faces):
+                cellNo = self.faces[i, 0]
+                faceNo = self.faces[i, 1]
+                faceOrder = self.faces[i, 2]
+                # dofs on shared vertices
+                sM.getFaceVerticesLocal(faceNo, faceOrder, faceVertexIndices)
+                for j in range(3):
+                    vertexNo = faceVertexIndices[j]
+                    for k in range(dofs_per_vertex):
+                        dof = dm.cell2dof(cellNo, dofs_per_vertex*vertexNo+k)
+                        dofCheck.add(dof)
+                # dofs on shared edges
+                sM.getFaceEdgesLocal(faceNo, faceOrder, faceEdgeIndices, faceEdgeOrders)
+                for j in range(3):
+                    edgeNo = faceEdgeIndices[j]
+                    edgeOrder = faceEdgeOrders[j]
+                    if edgeOrder == 0:
+                        for k in range(dofs_per_edge):
+                            dof = dm.cell2dof(cellNo,
+                                              edgeOffset +
+                                              dofs_per_edge*edgeNo+k)
+                            dofCheck.add(dof)
+                    else:
+                        for k in range(dofs_per_edge-1, -1, -1):
+                            dof = dm.cell2dof(cellNo,
+                                              edgeOffset +
+                                              dofs_per_edge*edgeNo+k)
+                            dofCheck.add(dof)
+                for k in range(dofs_per_face):
+                    dof = dm.cell2dof(cellNo,
+                                      faceOffset +
+                                      dofs_per_face*faceNo+k)
+                    dofCheck.add(dof)
+
+        if self.num_cells > 0:
+            if self.num_cells < mesh.num_cells:
+                # TODO: This is not efficient!
+                # build a fake mesh over the overlap
+                # Why do we need to do this? Can't we just use the given DoFMap?
+                cells = mesh.cells_as_array[self.cells, :]
+                vertices = uninitialized((cells.max()+1, self.dim), dtype=REAL)
+                if self.dim == 1:
+                    overlap_mesh = mesh1d(vertices, cells)
+                elif self.dim == 2:
+                    overlap_mesh = mesh2d(vertices, cells)
+                elif self.dim == 3:
+                    overlap_mesh = mesh3d(vertices, cells)
+                else:
+                    raise NotImplementedError()
+            else:
+                overlap_mesh = mesh
+            # build a DoFMap only over the overlapping region
+            if allowInteriorBoundary:
+                overlapDM = type(dm)(overlap_mesh, NO_BOUNDARY)
+            else:
+                overlapDM = type(dm)(overlap_mesh)
+
+            for cellNoOverlap in range(self.num_cells):
+                cellNo = overlapCells[cellNoOverlap]
+                for k in range(dofs_per_element):
+                    dofOverlap = overlapDM.cell2dof(cellNoOverlap, k)
+                    if dofOverlap >= 0:
+                        dof = dm.cell2dof(cellNo, k)
+                        dofCheck.add(dof)
+
+        dofs = dofCheck.getDoFs()
+        if overlapType == 'standard':
+            return algebraicOverlap(dm.num_dofs,
+                                    dofs,
+                                    self.mySubdomainNo, self.otherSubdomainNo,
+                                    comm, numSharedVecs)
+        elif overlapType == 'persistent':
+            return algebraicOverlapPersistent(dm.num_dofs,
+                                              dofs,
+                                              self.mySubdomainNo,
+                                              self.otherSubdomainNo,
+                                              comm, numSharedVecs)
+        elif overlapType == 'oneSidedGet':
+            return algebraicOverlapOneSidedGet(dm.num_dofs,
+                                               dofs,
+                                               self.mySubdomainNo,
+                                               self.otherSubdomainNo,
+                                               comm, numSharedVecs)
+        elif overlapType == 'oneSidedPut':
+            return algebraicOverlapOneSidedPut(dm.num_dofs,
+                                               dofs,
+                                               self.mySubdomainNo,
+                                               self.otherSubdomainNo,
+                                               comm, numSharedVecs)
+        elif overlapType == 'oneSidedPutLockAll':
+            return algebraicOverlapOneSidedPutLockAll(dm.num_dofs,
+                                                      dofs,
+                                                      self.mySubdomainNo,
+                                                      self.otherSubdomainNo,
+                                                      comm, numSharedVecs)
+        else:
+            raise NotImplementedError()
+
+    def validate(self, meshBase mesh):
+        cdef:
+            simplexMapper sM
+            list vertices = []
+            INDEX_t i, j, cellNo, vertexNo, edgeNo, faceNo, order
+            INDEX_t[::1] edgeVertexIndices = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+
+        sM = mesh.simplexMapper
+
+        offsets = []
+        # DoFs associated with cross vertices
+        for i in range(self.num_vertices):
+            cellNo = self.vertices[i, 0]
+            vertexNo = self.vertices[i, 1]
+            vertices.append(mesh.vertices[mesh.cells[cellNo, vertexNo], :])
+        offsets.append(len(vertices))
+        # DoFs associated with edges and vertices on these edges
+        for i in range(self.num_edges):
+            cellNo = self.edges[i, 0]
+            edgeNo = self.edges[i, 1]
+            order = self.edges[i, 2]
+            sM.getEdgeVerticesLocal(edgeNo, order, edgeVertexIndices)
+            for j in range(2):
+                vertices.append(mesh.vertices[mesh.cells[cellNo, edgeVertexIndices[j]], :])
+        offsets.append(len(vertices))
+        # DoFs associated with faces and edges and vertices on these faces
+        for i in range(self.num_faces):
+            cellNo = self.faces[i, 0]
+            faceNo = self.faces[i, 1]
+            order = self.faces[i, 2]
+            sM.getFaceVerticesLocal(faceNo, order, faceVertexIndices)
+            for j in range(3):
+                vertices.append(mesh.vertices[mesh.cells[cellNo, faceVertexIndices[j]], :])
+        offsets.append(len(vertices))
+        for i in range(self.num_cells):
+            cellNo = self.cells[i]
+            for j in range(self.dim+1):
+                vertices.append(mesh.vertices[mesh.cells[cellNo, j], :])
+        offsets.append(len(vertices))
+        myVertices = np.array(vertices, dtype=REAL)
+        return myVertices, np.array(offsets, dtype=INDEX)
+
+    def __getstate__(self):
+        return (np.array(self.cells), np.array(self.faces), np.array(self.edges), np.array(self.vertices), self.mySubdomainNo, self.otherSubdomainNo, self.dim)
+
+    def __setstate__(self, state):
+        self.cells = state[0]
+        self.faces = state[1]
+        self.edges = state[2]
+        self.vertices = state[3]
+        self.mySubdomainNo = state[4]
+        self.otherSubdomainNo = state[5]
+        self.dim = state[6]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def sendPartition(self, MPI.Comm comm, INDEX_t[::1] part):
+        cdef:
+            INDEX_t i, cellNo
+            INDEX_t[::1] vertexPart = uninitialized((self.num_vertices), dtype=INDEX)
+            INDEX_t[::1] edgePart = uninitialized((self.num_edges), dtype=INDEX)
+            INDEX_t[::1] facePart = uninitialized((self.num_faces), dtype=INDEX)
+            list requests = []
+        for i in range(self.num_vertices):
+            cellNo = self.vertices[i, 0]
+            vertexPart[i] = part[cellNo]
+        if self.num_vertices > 0:
+            requests.append(comm.Isend(vertexPart, dest=self.otherSubdomainNo, tag=23))
+        for i in range(self.num_edges):
+            cellNo = self.edges[i, 0]
+            edgePart[i] = part[cellNo]
+        if self.num_edges > 0:
+            requests.append(comm.Isend(edgePart, dest=self.otherSubdomainNo, tag=24))
+        for i in range(self.num_faces):
+            cellNo = self.faces[i, 0]
+            facePart[i] = part[cellNo]
+        if self.num_faces > 0:
+            requests.append(comm.Isend(facePart, dest=self.otherSubdomainNo, tag=25))
+        return requests
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def recvPartition(self, MPI.Comm comm):
+        cdef:
+            INDEX_t[::1] vertexPart = uninitialized((self.num_vertices), dtype=INDEX)
+            INDEX_t[::1] edgePart = uninitialized((self.num_edges), dtype=INDEX)
+            INDEX_t[::1] facePart = uninitialized((self.num_faces), dtype=INDEX)
+            list requests = []
+        if self.num_vertices > 0:
+            requests.append(comm.Irecv(vertexPart, source=self.otherSubdomainNo, tag=23))
+        if self.num_edges > 0:
+            requests.append(comm.Irecv(edgePart, source=self.otherSubdomainNo, tag=24))
+        if self.num_faces > 0:
+            requests.append(comm.Irecv(facePart, source=self.otherSubdomainNo, tag=25))
+        return requests, vertexPart, edgePart, facePart
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def shrink(self, REAL_t[::1] indicator, INDEX_t[::1] newCellIndices):
+        # indicator vector wrt pre-shrink mesh
+        # newCellIndices translate from pre-shrink to post-shrink cells
+        cdef:
+            INDEX_t i
+            list cells
+        # for i in range(self.vertices.shape[0]):
+        #     self.vertices[i, 0] = newCellIndices[self.vertices[i, 0]]
+        # for i in range(self.edges.shape[0]):
+        #     self.edges[i, 0] = newCellIndices[self.edges[i, 0]]
+        # for i in range(self.faces.shape[0]):
+        #     self.faces[i, 0] = newCellIndices[self.faces[i, 0]]
+        self.vertices = uninitialized((0, 2), dtype=INDEX)
+        self.edges = uninitialized((0, 3), dtype=INDEX)
+        self.faces = uninitialized((0, 3), dtype=INDEX)
+        cells = []
+        assert indicator.shape[0] == self.num_cells
+        for i in range(self.num_cells):
+            if indicator[i] > 1e-4:
+                cells.append(newCellIndices[self.cells[i]])
+        self.cells = uninitialized((len(cells)), dtype=INDEX)
+        for i in range(len(cells)):
+            self.cells[i] = cells[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getAllSharedVertices(self, meshBase mesh):
+        """Returns a list of shared vertices, i.e. vertices that are on shared faces, edges as well."""
+        cdef:
+            simplexMapper sM
+            set sharedVerticesSet
+            list sharedVertices
+            INDEX_t i, j, cellNo, vertexNo, edgeNo, faceNo, order, sVertexNo
+            INDEX_t[::1] edgeVertexIndices = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+
+        sM = mesh.simplexMapper
+        sharedVerticesSet = set()
+        sharedVertices = []
+        for i in range(self.num_vertices):
+            cellNo = self.vertices[i, 0]
+            vertexNo = self.vertices[i, 1]
+            sVertexNo = mesh.cells[cellNo, vertexNo]
+            if not sVertexNo in sharedVerticesSet:
+                sharedVerticesSet.add(sVertexNo)
+                sharedVertices.append((cellNo, vertexNo))
+        for i in range(self.num_edges):
+            cellNo = self.edges[i, 0]
+            edgeNo = self.edges[i, 1]
+            order = self.edges[i, 2]
+            sM.getEdgeVerticesLocal(edgeNo, order, edgeVertexIndices)
+            for j in range(2):
+                sVertexNo = mesh.cells[cellNo, edgeVertexIndices[j]]
+                if not sVertexNo in sharedVerticesSet:
+                    sharedVerticesSet.add(sVertexNo)
+                    sharedVertices.append((cellNo, edgeVertexIndices[j]))
+        for i in range(self.num_faces):
+            cellNo = self.faces[i, 0]
+            faceNo = self.faces[i, 1]
+            order = self.faces[i, 2]
+            sM.getFaceVerticesLocal(faceNo, order, faceVertexIndices)
+            for j in range(3):
+                sVertexNo = mesh.cells[cellNo, faceVertexIndices[j]]
+                if not sVertexNo in sharedVerticesSet:
+                    sharedVerticesSet.add(sVertexNo)
+                    sharedVertices.append((cellNo, faceVertexIndices[j]))
+        return np.array(sharedVertices, dtype=INDEX)
+
+
+cdef class meshInterface(sharedMesh):
+    def __init__(self,
+                 INDEX_t[:, ::1] vertices,
+                 INDEX_t[:, ::1] edges,
+                 INDEX_t[:, ::1] faces,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 INDEX_t dim):
+        super(meshInterface, self).__init__(vertices, edges, faces, uninitialized((0), dtype=INDEX), mySubdomainNo, otherSubdomainNo, dim)
+
+
+cdef class sharedMeshManager:
+    def __init__(self, MPI.Comm comm):
+        assert comm is not None
+        self.comm = comm
+        self.numSubdomains = comm.size
+        self.sharedMeshes = {}
+        self.requests = []
+        self._rank2subdomain = {rank: rank for rank in range(self.numSubdomains)}
+        self._subdomain2rank = {subdomainNo: subdomainNo for subdomainNo in range(self.numSubdomains)}
+
+    cdef inline INDEX_t rank2subdomain(self, INDEX_t rank):
+        return self._rank2subdomain[rank]
+
+    cdef inline INDEX_t subdomain2rank(self, INDEX_t subdomainNo):
+        return self._subdomain2rank[subdomainNo]
+
+    def refine(self, mesh=None):
+        cdef:
+            INDEX_t subdomainNo
+        for subdomainNo in self.sharedMeshes:
+            self.sharedMeshes[subdomainNo].refine(mesh)
+
+    def getDoFs(self, meshBase mesh, DoFMap DoFMap, overlapType='standard', INDEX_t numSharedVecs=1,
+                BOOL_t allowInteriorBoundary=False,
+                BOOL_t useRequests=True, BOOL_t waitRequests=True,
+                splitSharedMeshManager splitManager=None):
+        cdef:
+            INDEX_t rank, subdomainNo, memSize, totalMemSize
+            algebraicOverlap ov
+            algebraicOverlapManager OM
+            INDEX_t i
+            INDEX_t[::1] counts, displ, myMemOffsets, otherMemOffsets
+            BOOL_t useOneSided = overlapType in ('oneSidedGet', 'oneSidedPut', 'oneSidedPutLockAll')
+            list managers
+            sharedMeshManager manager
+            INDEX_t k
+            INDEX_t[::1] memSizePerManager
+            MPI.Win window = None
+
+        OM = algebraicOverlapManager(self.numSubdomains, DoFMap.num_dofs, self.comm)
+        OM.type = overlapType
+        if splitManager is None:
+            managers = [self]
+        else:
+            managers = splitManager.managers
+        memSizePerManager = uninitialized((len(managers)), dtype=INDEX)
+        totalMemSize = 0
+        for k, manager in enumerate(managers):
+            memSize = 0
+            for rank in sorted(manager.sharedMeshes):
+                subdomainNo = manager.rank2subdomain(rank)
+                ov = manager.sharedMeshes[rank].getDoFs(mesh, DoFMap, manager.comm, overlapType, numSharedVecs, allowInteriorBoundary)
+                if ov.num_shared_dofs > 0:
+                    OM.overlaps[subdomainNo] = ov
+                    memSize += ov.num_shared_dofs
+            memSizePerManager[k] = memSize
+            totalMemSize += memSize
+
+        OM.exchangeIn = np.zeros((numSharedVecs, totalMemSize), dtype=REAL)
+        OM.exchangeOut = np.zeros((numSharedVecs, totalMemSize), dtype=REAL)
+
+        for k, manager in enumerate(managers):
+            memSize = 0
+            if useOneSided:
+                window = MPI.Win.Allocate(MPI.REAL.size*numSharedVecs*memSizePerManager[k], comm=manager.comm)
+                if overlapType == 'oneSidedPutLockAll':
+                    window.Lock_all(MPI.MODE_NOCHECK)
+            for rank in sorted(manager.sharedMeshes):
+                subdomainNo = manager.rank2subdomain(rank)
+                if subdomainNo not in OM.overlaps:
+                    continue
+                ov = OM.overlaps[subdomainNo]
+                ov.setMemory(OM.exchangeIn, OM.exchangeOut, memSize, memSizePerManager[k])
+                memSize += ov.num_shared_dofs
+                if useOneSided:
+                    ov.setWindow(window)
+                    if useRequests:
+                        req1, req2 = ov.exchangeMemOffsets(self.comm, tag=2012)
+                        manager.requests.append(req1)
+                        manager.requests.append(req2)
+            if useRequests and waitRequests and len(manager.requests) > 0:
+                MPI.Request.Waitall(manager.requests)
+                manager.requests = []
+
+            if not useRequests and useOneSided:
+                myMemOffsets = uninitialized((len(manager.sharedMeshes)), dtype=INDEX)
+                otherMemOffsets = uninitialized((len(manager.sharedMeshes)), dtype=INDEX)
+                displ = np.zeros((manager.comm.size), dtype=INDEX)
+                counts = np.zeros((manager.comm.size), dtype=INDEX)
+                for i, rank in enumerate(sorted(manager.sharedMeshes)):
+                    counts[rank] = 1
+                    displ[rank] = i
+                    subdomainNo = manager.rank2subdomain(rank)
+                    myMemOffsets[i] = OM.overlaps[subdomainNo].memOffset
+                manager.comm.Alltoallv([myMemOffsets, (counts, displ)],
+                                       [otherMemOffsets, (counts, displ)])
+                for i, rank in enumerate(sorted(manager.sharedMeshes)):
+                    subdomainNo = manager.rank2subdomain(rank)
+                    OM.overlaps[subdomainNo].memOffsetOther = uninitialized((1), dtype=INDEX)
+                    OM.overlaps[subdomainNo].memOffsetOther[0] = otherMemOffsets[i]
+        return OM
+
+    def __repr__(self):
+        s = ''
+        for subdomainNo in self.sharedMeshes:
+            s += self.sharedMeshes[subdomainNo].__repr__() + '\n'
+        return s
+
+    def __getstate__(self):
+        return (self.numSubdomains, self.sharedMeshes)
+
+    def __setstate__(self, state):
+        self.numSubdomains = state[0]
+        self.sharedMeshes = state[1]
+
+    def copy(self):
+        newManager = sharedMeshManager(self.comm)
+        for subdomainNo in self.sharedMeshes:
+            newManager.sharedMeshes[subdomainNo] = deepcopy(self.sharedMeshes[subdomainNo])
+        return newManager
+
+    def exchangePartitioning(self, MPI.Comm comm, INDEX_t[::1] part):
+        requests = []
+        interfacePart = {subdomainNo: {'vertex': None, 'edge': None, 'face': None} for subdomainNo in self.sharedMeshes}
+
+        for subdomainNo in self.sharedMeshes:
+            requests += self.sharedMeshes[subdomainNo].sendPartition(comm, part)
+        for subdomainNo in self.sharedMeshes:
+            (req,
+             interfacePart[subdomainNo]['vertex'],
+             interfacePart[subdomainNo]['edge'],
+             interfacePart[subdomainNo]['face']) = self.sharedMeshes[subdomainNo].recvPartition(comm)
+            requests += req
+        MPI.Request.Waitall(requests)
+        return interfacePart
+
+    def shrink(self, dict localIndicators, INDEX_t[::1] newCellIndices):
+        toDelete = []
+        for subdomainNo in self.sharedMeshes:
+            try:
+                self.sharedMeshes[subdomainNo].shrink(localIndicators[subdomainNo], newCellIndices)
+            except KeyError:
+                toDelete.append(subdomainNo)
+        for subdomainNo in toDelete:
+            del self.sharedMeshes[subdomainNo]
+
+    def validate(self, mesh, comm, label='Mesh interface'):
+        requests = []
+        myVertices = {}
+        offsets = {}
+        validationPassed = True
+        comm.Barrier()
+        for subdomainNo in self.sharedMeshes:
+            myVertices[subdomainNo], offsets[subdomainNo] = self.sharedMeshes[subdomainNo].validate(mesh)
+            if comm.rank > subdomainNo:
+                requests.append(comm.Isend(offsets[subdomainNo], dest=subdomainNo, tag=2))
+                requests.append(comm.Isend(myVertices[subdomainNo], dest=subdomainNo, tag=1))
+        for subdomainNo in self.sharedMeshes:
+            if comm.rank < subdomainNo:
+                otherOffsets = uninitialized((4), dtype=INDEX)
+                comm.Recv(otherOffsets, source=subdomainNo, tag=2)
+                otherVertices = uninitialized((otherOffsets[3], mesh.dim), dtype=REAL)
+                comm.Recv(otherVertices, source=subdomainNo, tag=1)
+                if not np.allclose(offsets[subdomainNo], otherOffsets):
+                    validationPassed = False
+                    print(('Subdomains {} and {} want' +
+                           ' to share {}/{} vertices, ' +
+                           '{}/{} edges, {}/{} faces ' +
+                           'and {}/{} cells.').format(comm.rank,
+                                                      subdomainNo,
+                                                      offsets[subdomainNo][0],
+                                                      otherOffsets[0],
+                                                      (offsets[subdomainNo][1]-offsets[subdomainNo][0])//2,
+                                                      (otherOffsets[1]-otherOffsets[0])//2,
+                                                      (offsets[subdomainNo][2]-offsets[subdomainNo][1])//3,
+                                                      (otherOffsets[2]-otherOffsets[1])//3,
+                                                      (offsets[subdomainNo][3]-offsets[subdomainNo][2])//(mesh.dim+1),
+                                                      (otherOffsets[3]-otherOffsets[2])//(mesh.dim+1)))
+                else:
+                    diff = norm(myVertices[subdomainNo]-otherVertices, axis=1)
+                    if diff.max() > 1e-9:
+                        incorrectVertices = np.sum(diff[:offsets[subdomainNo][0]] > 1e-9)
+                        numVertices = offsets[subdomainNo][0]
+                        incorrectEdges = int(np.ceil(np.sum(diff[offsets[subdomainNo][0]:offsets[subdomainNo][1]] > 1e-9)/2))
+                        numEdges = (offsets[subdomainNo][1]-offsets[subdomainNo][0])//2
+                        incorrectFaces = int(np.ceil(np.sum(diff[offsets[subdomainNo][1]:offsets[subdomainNo][2]] > 1e-9)/3))
+                        numFaces = (offsets[subdomainNo][2]-offsets[subdomainNo][1])//3
+                        incorrectCells = int(np.ceil(np.sum(diff[offsets[subdomainNo][2]:] > 1e-9)/(mesh.dim+1)))
+                        numCells = (offsets[subdomainNo][3]-offsets[subdomainNo][2])//(mesh.dim+1)
+                        diffSorted = (np.array(sorted(myVertices[subdomainNo], key=tuple))-np.array(sorted(otherVertices, key=tuple)))
+                        if np.sum(diff > 1e-9) < 30:
+                            s = "\n"
+                            s += str(myVertices[subdomainNo][diff > 1e-9, :])
+                            s += "\n"
+                            s += str(otherVertices[diff > 1e-9, :])
+                        else:
+                            s = ""
+                        print(('Rank {} has incorrect overlap with {} ' +
+                               '(Diff sorted: {}, wrong vertices {}/{}, edges {}/{}, ' +
+                               'faces {}/{}, cells {}/{}).{}').format(comm.rank,
+                                                                    subdomainNo,
+                                                                    np.sum(diffSorted),
+                                                                    incorrectVertices,
+                                                                    numVertices,
+                                                                    incorrectEdges,
+                                                                    numEdges,
+                                                                    incorrectFaces,
+                                                                    numFaces,
+                                                                    incorrectCells,
+                                                                    numCells,
+                                                                    s))
+                        validationPassed = False
+        MPI.Request.Waitall(requests)
+        comm.Barrier()
+        assert validationPassed
+        comm.Barrier()
+        if comm.rank == 0:
+            print('{} validation successful.'.format(label))
+
+
+cdef class splitSharedMeshManager:
+    cdef:
+        public list managers
+
+    cdef list splitCommInteriorBoundary(self, MPI.Comm comm, INDEX_t[::1] commRanks, BOOL_t inCG, INDEX_t numSubComms=2, maxCommSize=100):
+        cdef:
+            INDEX_t color
+            INDEX_t[::1] myRank = uninitialized((1), dtype=INDEX)
+            INDEX_t[::1] otherRanks
+            INDEX_t[::1] old2new
+            INDEX_t[::1] newRanks
+            INDEX_t i
+            MPI.Comm subcomm
+            list commsAndRanks
+            MPI.Comm subcommBoundary
+        commsAndRanks = []
+        myRank[0] = comm.rank
+
+        # group all 'interior' subdomains with their parent
+        if inCG:
+            color = comm.rank
+        elif commRanks.shape[0] == 1:
+            color = commRanks[0]
+        else:
+            color = MPI.UNDEFINED
+        subcomm = comm.Split(color)
+        if subcomm is not None and subcomm != MPI.COMM_NULL and subcomm.size < comm.size:
+            otherRanks = uninitialized((subcomm.size), dtype=INDEX)
+            subcomm.Allgather(myRank, otherRanks)
+            old2new = -np.ones((comm.size), dtype=INDEX)
+            for i in range(subcomm.size):
+                old2new[otherRanks[i]] = i
+            newRanks = uninitialized((commRanks.shape[0]), dtype=INDEX)
+            for i in range(commRanks.shape[0]):
+                newRanks[i] = old2new[commRanks[i]]
+            commsAndRanks.append((subcomm, newRanks))
+
+        if inCG:
+            color = 0
+        elif commRanks.shape[0] == 1 and subcomm.size < comm.size:
+            color = MPI.UNDEFINED
+        else:
+            color = 0
+        subcommBoundary = comm.Split(color)
+        if subcommBoundary == MPI.COMM_NULL:
+            return commsAndRanks
+
+        numSubComms = max(numSubComms, subcommBoundary.size//maxCommSize+1)
+
+        # group all 'boundary' subdomains with all coarse grid ranks
+        commSplits = np.around(np.linspace(0, subcommBoundary.size, numSubComms+1)).astype(INDEX)
+        for splitNo in range(numSubComms):
+            if inCG:
+                color = 0
+            elif commSplits[splitNo] <= subcommBoundary.rank  and subcommBoundary.rank < commSplits[splitNo+1]:
+                color = 0
+            else:
+                color = MPI.UNDEFINED
+            subcomm = subcommBoundary.Split(color)
+            if subcomm is not None and subcomm != MPI.COMM_NULL:
+                otherRanks = uninitialized((subcomm.size), dtype=INDEX)
+                subcomm.Allgather(myRank, otherRanks)
+                old2new = -np.ones((comm.size), dtype=INDEX)
+                for i in range(subcomm.size):
+                    old2new[otherRanks[i]] = i
+                newRanks = uninitialized((commRanks.shape[0]), dtype=INDEX)
+                for i in range(commRanks.shape[0]):
+                    newRanks[i] = old2new[commRanks[i]]
+                commsAndRanks.append((subcomm, newRanks))
+
+        return commsAndRanks
+
+    cdef list splitComm(self, MPI.Comm comm, INDEX_t[::1] commRanks, BOOL_t inCG, INDEX_t numSubComms=2):
+        cdef:
+            INDEX_t color
+            INDEX_t[::1] myRank = uninitialized((1), dtype=INDEX)
+            INDEX_t[::1] otherRanks
+            INDEX_t[::1] old2new
+            INDEX_t[::1] newRanks
+            INDEX_t i, splitNo
+            list commsAndRanks
+
+        myRank[0] = comm.rank
+        commsAndRanks = []
+        commSplits = np.around(np.linspace(0, comm.size, numSubComms+1)).astype(INDEX)
+        for splitNo in range(numSubComms):
+            if inCG:
+                color = 0
+            elif commSplits[splitNo] <= comm.rank  and comm.rank < commSplits[splitNo+1]:
+                color = 0
+            else:
+                color = MPI.UNDEFINED
+            subcomm = comm.Split(color)
+
+            if subcomm is not None and subcomm != MPI.COMM_NULL:
+                otherRanks = uninitialized((subcomm.size), dtype=INDEX)
+                subcomm.Allgather(myRank, otherRanks)
+                old2new = -np.ones((comm.size), dtype=INDEX)
+                for i in range(subcomm.size):
+                    old2new[otherRanks[i]] = i
+                newRanks = uninitialized((commRanks.shape[0]), dtype=INDEX)
+                for i in range(commRanks.shape[0]):
+                    newRanks[i] = old2new[commRanks[i]]
+            else:
+                newRanks = None
+            commsAndRanks.append((subcomm, newRanks))
+        return commsAndRanks
+
+    def __init__(self, sharedMeshManager manager, MPI.Comm comm, BOOL_t inCG):
+        cdef:
+            MPI.Comm subcomm
+            dict local2global, global2local
+        commRanks = np.array([subdomainNo for subdomainNo in sorted(manager.sharedMeshes)], dtype=INDEX)
+        commAndRanks = self.splitCommInteriorBoundary(comm, commRanks, inCG)
+        # commAndRanks = self.splitComm(comm, commRanks, inCG)
+        # print('On rank {}: Split comm of size {} into overlapping subcomms of sizes {}'.format(comm.rank, comm.size,
+        #                                                                                        [c[0].size if c[0] != MPI.COMM_NULL else 0 for c in commAndRanks]))
+        self.managers = []
+        for subcomm, newRanks in commAndRanks:
+            if subcomm is not None and subcomm != MPI.COMM_NULL:
+                submanager = sharedMeshManager(subcomm)
+                local2global = {}
+                global2local = {}
+                for i in range(commRanks.shape[0]):
+                    oldRank = commRanks[i]
+                    newRank = newRanks[i]
+                    if newRank >= 0:
+                        local2global[newRank] = oldRank
+                        global2local[oldRank] = newRank
+                        sharedMesh = manager.sharedMeshes[oldRank]
+                        sharedMesh.mySubdomainNo = subcomm.rank
+                        sharedMesh.otherSubdomainNo = newRank
+                        submanager.sharedMeshes[newRank] = sharedMesh
+                submanager._subdomain2rank = global2local
+                submanager._rank2subdomain = local2global
+                self.managers.append(submanager)
+
+
+cdef class interfaceManager(sharedMeshManager):
+    def __init__(self, MPI.Comm comm):
+        super(interfaceManager, self).__init__(comm)
+
+    def getInterfaces(self):
+        return self.sharedMeshes
+
+    def setInterfaces(self, interfaces):
+        self.sharedMeshes = interfaces
+
+    interfaces = property(fget=getInterfaces, fset=setInterfaces)
+
+    def copy(self):
+        newManager = interfaceManager(self.comm)
+        for subdomainNo in self.sharedMeshes:
+            newManager.sharedMeshes[subdomainNo] = deepcopy(self.sharedMeshes[subdomainNo])
+        return newManager
+
+
+cdef class meshOverlap(sharedMesh):
+    def __init__(self,
+                 INDEX_t[::1] cells,
+                 INDEX_t mySubdomainNo,
+                 INDEX_t otherSubdomainNo,
+                 INDEX_t dim):
+        super(meshOverlap, self).__init__(uninitialized((0, 2), dtype=INDEX),
+                                          uninitialized((0, 3), dtype=INDEX),
+                                          uninitialized((0, 3), dtype=INDEX),
+                                          cells,
+                                          mySubdomainNo, otherSubdomainNo,
+                                          dim)
+
+
+cdef class overlapManager(sharedMeshManager):
+    def __init__(self, MPI.Comm comm):
+        super(overlapManager, self).__init__(comm)
+
+    def getOverlaps(self):
+        return self.sharedMeshes
+
+    def setOverlaps(self, overlaps):
+        self.sharedMeshes = overlaps
+
+    overlaps = property(fget=getOverlaps, fset=setOverlaps)
+
+    def check(self, mesh, comm, label='Mesh overlap'):
+        self.validate(mesh, comm, label)
+
+    def copy(self):
+        newManager = overlapManager(self.comm)
+        for subdomainNo in self.sharedMeshes:
+            newManager.sharedMeshes[subdomainNo] = deepcopy(self.sharedMeshes[subdomainNo])
+        return newManager
+
+
+cdef class vertexMap:
+    cdef:
+        public INDEX_t dim, num_vertices, num_interface_vertices
+        public dict local2overlap
+        public dict overlap2local
+
+    # translates local to overlap vertex indices and the other way around
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __init__(self, meshBase mesh, sharedMesh interface):
+        cdef:
+            simplexMapper sM
+            INDEX_t k, i, j, cellNo, vertexNo, edgeNo, faceNo, order, vertex
+            INDEX_t[::1] edgeVertexIndices = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+        self.local2overlap = dict()
+        self.overlap2local = dict()
+        self.dim = mesh.dim
+        sM = mesh.simplexMapper
+        k = 0
+        for i in range(interface.num_vertices):
+            cellNo = interface.vertices[i, 0]
+            vertexNo = interface.vertices[i, 1]
+            vertex = mesh.cells[cellNo, vertexNo]
+            self.local2overlap[vertex] = k
+            self.overlap2local[k] = vertex
+            k += 1
+        if self.dim >= 2:
+            for i in range(interface.num_edges):
+                cellNo = interface.edges[i, 0]
+                edgeNo = interface.edges[i, 1]
+                order = interface.edges[i, 2]
+                sM.getEdgeVerticesGlobal(cellNo, edgeNo, order, edgeVertexIndices)
+                for j in range(2):
+                    self.local2overlap[edgeVertexIndices[j]] = k
+                    self.overlap2local[k] = edgeVertexIndices[j]
+                    k += 1
+        if self.dim >= 3:
+            for i in range(interface.num_faces):
+                cellNo = interface.faces[i, 0]
+                faceNo = interface.faces[i, 1]
+                order = interface.faces[i, 2]
+                sM.getFaceVerticesGlobal(cellNo, faceNo, order, faceVertexIndices)
+                for j in range(3):
+                    self.local2overlap[faceVertexIndices[j]] = k
+                    self.overlap2local[k] = faceVertexIndices[j]
+                    k += 1
+
+        self.num_vertices = k
+        self.num_interface_vertices = k
+
+    def translateLocal2Overlap(self, data):
+        # used when we send cells
+        # Translates subdomain vertex indices into overlap vertex indices.
+        # If a subdomain vertex is not yet in the map, we add it.
+        cdef:
+            INDEX_t i
+        dataTranslated = uninitialized_like(data, dtype=INDEX)
+        for x, i in zip(np.nditer(dataTranslated, op_flags=['writeonly']),
+                        data.flat):
+            try:
+                x[...] = self.local2overlap[i]
+            except KeyError:
+                # the vertex with index i is not yet in the map
+                # -> add it
+                self.local2overlap[i] = self.num_vertices
+                self.overlap2local[self.num_vertices] = i
+                x[...] = self.num_vertices
+                self.num_vertices += 1
+        return dataTranslated
+
+    def translateOverlap2Local(self, data, INDEX_t offset=0):
+        # used when we receive cells
+        # Translates overlap vertex indices into subdomain vertex indices.
+        # If a vertex is not on the interface between subdomains, translate with an offset.
+        cdef:
+            INDEX_t i
+        dataTranslated = uninitialized_like(data, dtype=INDEX)
+        for x, i in zip(np.nditer(dataTranslated, op_flags=['writeonly']),
+                        data.flat):
+            if i < self.num_interface_vertices:
+                # this vertex was already on the interface
+                x[...] = self.overlap2local[i]
+            else:
+                # this vertex was added in the outer overlap
+                x[...] = self.overlap2local[i+offset]
+        return dataTranslated
+
+
+class NotFound(Exception):
+    pass
+
+
+class vertexMapManager:
+    def __init__(self, meshBase mesh, interfaceManager, mySubdomainNo):
+        self.mySubdomainNo = mySubdomainNo
+        self.mesh = mesh
+        self.vertexMaps = {}
+        # add all vertices that are on the interface
+        for subdomainNo in interfaceManager.interfaces:
+            self.vertexMaps[subdomainNo] = vertexMap(mesh, interfaceManager.interfaces[subdomainNo])
+        # record which subdomains share the interface vertices
+        self.sharedVertices = {}
+        # loop over all interfaces
+        for subdomainNo in self.vertexMaps:
+            # loop over vertex indices in interface
+            for vertexNo in self.vertexMaps[subdomainNo].local2overlap:
+                try:
+                    self.sharedVertices[vertexNo].add(subdomainNo)
+                except KeyError:
+                    self.sharedVertices[vertexNo] = set([subdomainNo])
+        # we will keep track of which cells we send out to more than one subdomain
+        # in form subdomain cell index -> set of subdomains
+        self.sharedCells = {}
+        # vertices we receive from other subdomains
+        self.newVertices = []
+        self.newVerticesShared = {}
+        # cells we receive from other subdomains
+        self.newCells = []
+        self.newCellsLastLayer = []
+        self.num_newCells = 0
+        self.overlapCellNos = {}
+        self.overlapCellsStart = {}
+
+    def translateLocal2Overlap(self, sharedCells, subdomainNo, track=True):
+        # Translate subdomain cell indices into an array of overlap vertex indices
+        # if track is true, keep track of which subdomains we share each cell with
+        cdef:
+            INDEX_t[:, ::1] cells = self.mesh.cells
+            np.ndarray[INDEX_t, ndim=2] data = uninitialized((len(sharedCells), cells.shape[1]), dtype=INDEX)
+            INDEX_t[:, ::1] dataTranslated
+            INDEX_t i, j, k
+        k = 0
+        for i in sharedCells:
+            for j in range(cells.shape[1]):
+                data[k, j] = cells[i, j]
+            k += 1
+        dataTranslated = self.vertexMaps[subdomainNo].translateLocal2Overlap(data)
+        if track:
+            for i, cellNo in enumerate(sharedCells):
+                try:
+                    self.sharedCells[cellNo].append((subdomainNo, i))
+                except KeyError:
+                    self.sharedCells[cellNo] = [(subdomainNo, i)]
+        return dataTranslated
+
+    def addVertex(self, vertex, subdomainNo):
+        # add a vertex that was not part of the subdomain to the vertexMap with another subdomain
+        vM = self.vertexMaps[subdomainNo]
+        vM.overlap2local[vM.num_vertices] = self.mesh.num_vertices + len(self.newVertices)
+        vM.local2overlap[self.mesh.num_vertices + len(self.newVertices)] = vM.num_vertices
+        vM.num_vertices += 1
+        self.newVertices.append(vertex)
+
+    def addVertexShared(self, vertex, subdomainNo, sharedWith):
+        # add a vertex that was not part of the subdomain and that is
+        # also shared with other subdomains to the vertexMap with
+        # another subdomain
+        vM = self.vertexMaps[subdomainNo]
+        otherSubdomainNo = list(sharedWith)[0]
+        try:
+            candidatesLocalVertexNos = self.newVerticesShared[otherSubdomainNo]
+            for localVertexNo in candidatesLocalVertexNos:
+                if np.linalg.norm(self.newVertices[localVertexNo-self.mesh.num_vertices] - vertex) < 1e-8:
+                    vM.overlap2local[vM.num_vertices] = localVertexNo
+                    vM.local2overlap[localVertexNo] = vM.num_vertices
+                    vM.num_vertices += 1
+                    break
+            else:
+                raise NotFound()
+        except (NotFound, KeyError):
+            localVertexNo = self.mesh.num_vertices + len(self.newVertices)
+            self.addVertex(vertex, subdomainNo)
+            try:
+                self.newVerticesShared[subdomainNo].append(localVertexNo)
+            except KeyError:
+                self.newVerticesShared[subdomainNo] = [localVertexNo]
+
+    def getVertexByOverlapIndex(self, subdomainNo, overlapIndex):
+        vM = self.vertexMaps[subdomainNo]
+        localIndex = vM.overlap2local[overlapIndex]
+        return self.getVertexByLocalIndex(localIndex)
+
+    def getVertexByLocalIndex(self, localIndex):
+        if localIndex >= self.mesh.num_vertices:
+            return self.newVertices[localIndex-self.mesh.num_vertices]
+        else:
+            return self.mesh.vertices[localIndex, :]
+
+    def removeDuplicateVertices(self):
+        temp = np.vstack(self.newVertices)
+        _, idx, inverse = np.unique(temp, axis=0, return_index=True, return_inverse=True)
+        inverse2 = uninitialized((self.mesh.num_vertices+len(self.newVertices)), dtype=INDEX)
+        inverse2[:self.mesh.num_vertices] = np.arange(self.mesh.num_vertices, dtype=INDEX)
+        for i in range(len(self.newVertices)):
+            inverse2[self.mesh.num_vertices+i] = self.mesh.num_vertices+inverse[i]
+        self.newVertices = [self.newVertices[i] for i in idx]
+        for i in range(len(self.newCells)):
+            for j in range(self.newCells[i].shape[0]):
+                for k in range(self.newCells[i].shape[1]):
+                    self.newCells[i][j, k] = inverse2[self.newCells[i][j, k]]
+        return inverse2
+
+    def addCells(self, cells, subdomainNo, numberCellswithoutLastLevel):
+        old_num = self.num_newCells
+        # add new cells in layers 1,..,delta
+        self.newCells.append(cells[:numberCellswithoutLastLevel, :])
+        self.newCells.append(cells[numberCellswithoutLastLevel:, :])
+        # self.num_newCells = self.num_newCells+numberCellswithoutLastLevel
+        self.num_newCells = self.num_newCells+cells.shape[0]
+
+        # add new cells in layer delta+1
+        self.newCellsLastLayer.append(cells[numberCellswithoutLastLevel:, :])
+
+        # subdomain cell indices for cells in layers 1,.., delta
+        localCellNos = np.arange(self.mesh.num_cells+old_num,
+                                 self.mesh.num_cells+self.num_newCells, dtype=INDEX)
+
+        # add subdomain cell indices
+        # make sure that they match up for both subdomains
+        if self.mySubdomainNo < subdomainNo:
+            self.overlapCellsStart[subdomainNo] = 0
+            self.overlapCellNos[subdomainNo] = np.concatenate((localCellNos,
+                                                               self.overlapCellNos[subdomainNo]))
+        else:
+            self.overlapCellsStart[subdomainNo] = self.overlapCellNos[subdomainNo].shape[0]
+            self.overlapCellNos[subdomainNo] = np.concatenate((self.overlapCellNos[subdomainNo],
+                                                               localCellNos))
+
+    def addSharedCells(self, sharedCells, subdomainNo):
+        # Process cells that the subdomain subdomainNo shared with us
+        # and other subdomains as well.
+        for overlapCellNo, otherSubdomainNo in sharedCells:
+            localCellNo = self.overlapCellNos[subdomainNo][self.overlapCellsStart[subdomainNo]+overlapCellNo]
+            # FIX: this is not very efficient
+            try:
+                self.overlapCellNos[otherSubdomainNo] = np.concatenate((self.overlapCellNos[otherSubdomainNo],
+                                                                        np.array([localCellNo], dtype=INDEX)))
+            except KeyError:
+                self.overlapCellNos[otherSubdomainNo] = np.array([localCellNo],
+                                                                 dtype=INDEX)
+
+    def writeOverlapToMesh(self):
+        self.mesh.vertices = np.vstack((self.mesh.vertices,
+                                        np.array(self.newVertices,
+                                                 dtype=REAL)))
+        self.mesh.cells = np.vstack((self.mesh.cells,
+                                     *self.newCells,
+                                     # *self.newCellsLastLayer
+        ))
+        self.mesh.init()
+        numberCellsLastLayer = sum([len(c) for c in self.newCellsLastLayer])
+        self.newVertices = []
+        self.newCells = []
+        self.newCellsLastLayer = []
+        return numberCellsLastLayer
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def updateBoundary1D(INDEX_t[:, ::1] cells,
+                     INDEX_t[::1] oldVertices):
+    cdef:
+        INDEX_t nc = cells.shape[0]
+        INDEX_t i, c0, c1
+        set bvertices
+    bvertices = set()
+    for i in range(nc):
+        c0, c1 = cells[i, 0], cells[i, 1]
+        try:
+            bvertices.remove(c0)
+        except:
+            bvertices.add(c0)
+        try:
+            bvertices.remove(c1)
+        except:
+            bvertices.add(c1)
+    # remove all old (interior) boundary vertices
+    for i in range(oldVertices.shape[0]):
+        bvertices.discard(oldVertices[i])
+    return np.array(list(bvertices), dtype=INDEX)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def updateBoundary2D(INDEX_t[:, ::1] cells,
+                     INDEX_t[:, ::1] oldEdges,
+                     INDEX_t[::1] oldVertices):
+    cdef:
+        INDEX_t nc = cells.shape[0]
+        INDEX_t c0, c1, i
+        ENCODE_t hv = 0
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=2] bedges_mem
+        INDEX_t[:, ::1] bedges_mv
+        simplexMapper sM = simplexMapper2D()
+        set bedges, bvertices
+
+    # get boundary edges for given cells
+    bedges = set()
+    for i in range(nc):
+        sM.startLoopOverCellEdges(cells[i, :])
+        while sM.loopOverCellEdgesEncoded(&hv):
+            try:
+                bedges.remove(hv)
+            except KeyError:
+                bedges.add(hv)
+
+    # remove all old (interior) edges
+    for i in range(oldEdges.shape[0]):
+        c0, c1 = oldEdges[i, 0], oldEdges[i, 1]
+        sortEdge(c0, c1, e)
+        bedges.discard(encode_edge(e))
+    bedges_mem = uninitialized((len(bedges), 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    # get all new boundary vertices
+    bvertices = set()
+    for i, hv in enumerate(bedges):
+        decode_edge(hv, e)
+        bedges_mv[i, 0], bedges_mv[i, 1] = e[0], e[1]
+        bvertices.add(e[0])
+        bvertices.add(e[1])
+    # remove all old (interior) boundary vertices
+    for i in range(oldVertices.shape[0]):
+        bvertices.discard(oldVertices[i])
+    return np.array(list(bvertices), dtype=INDEX), bedges_mem
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def updateBoundary3D(INDEX_t[:, ::1] cells,
+                     INDEX_t[:, ::1] oldFaces,
+                     INDEX_t[:, ::1] oldEdges,
+                     INDEX_t[::1] oldVertices):
+    cdef:
+        INDEX_t nc = cells.shape[0]
+        INDEX_t c0, c1, c2, i
+        ENCODE_t hv = 0
+        INDEX_t[::1] f = uninitialized((3), dtype=INDEX)
+        INDEX_t[::1] e = uninitialized((2), dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=2] bfaces_mem, bedges_mem
+        INDEX_t[:, ::1] bfaces_mv, bedges_mv
+        set bfaces, bedges, bvertices
+        simplexMapper sM = simplexMapper3D()
+        tuple t
+        INDEX_t t0 = 0
+        ENCODE_t t1 = 0
+
+    # get boundary faces for given cells
+    bfaces = set()
+    for i in range(nc):
+        sM.startLoopOverCellFaces(cells[i, :])
+        while sM.loopOverCellFacesEncoded(&t0, &t1):
+            try:
+                bfaces.remove((t0, t1))
+            except KeyError:
+                bfaces.add((t0, t1))
+
+    # remove all old (interior) faces
+    for i in range(oldFaces.shape[0]):
+        c0, c1, c2 = oldFaces[i, 0], oldFaces[i, 1], oldFaces[i, 2]
+        sortFace(c0, c1, c2, f)
+        bfaces.discard(encode_face(f))
+    bfaces_mem = uninitialized((len(bfaces), 3), dtype=INDEX)
+    bfaces_mv = bfaces_mem
+
+    # get all new boundary edges
+    bedges = set()
+    for i, t in enumerate(bfaces):
+        decode_face(t, bfaces_mv[i, :])
+        sM.startLoopOverFaceEdges(bfaces_mv[i, :])
+        while sM.loopOverFaceEdgesEncoded(&hv):
+            bedges.add(hv)
+    # remove all old (interior) boundary edges
+    for i in range(oldEdges.shape[0]):
+        bedges.discard(encode_edge(oldEdges[i, :]))
+    bedges_mem = uninitialized((len(bedges), 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    # get all new boundary vertices
+    bvertices = set()
+    for i, hv in enumerate(bedges):
+        decode_edge(hv, e)
+        bedges_mv[i, 0], bedges_mv[i, 1] = e[0], e[1]
+        bvertices.add(e[0])
+        bvertices.add(e[1])
+    # remove all old (interior) boundary vertices
+    for i in range(oldVertices.shape[0]):
+        bvertices.discard(oldVertices[i])
+
+    return np.array(list(bvertices), dtype=INDEX), bedges_mem, bfaces_mem
+
+
+def extendOverlap(meshBase subdomain, interfaces, interiorBL, depth, comm, debug=False):
+    # extend a (depth+1) layer overlap around each subdomain
+    # track depth layers for data exchange
+    # the (depth+1) layer should be dropped again after matrix assembly
+
+    cdef:
+        simplexMapper sM
+        INDEX_t[::1] edgeVertexIndices = uninitialized((2), dtype=INDEX)
+        INDEX_t[::1] faceVertexIndices = uninitialized((3), dtype=INDEX)
+        INDEX_t dim, nc, k, j, n, I, i, l
+        INDEX_t vertexNo, edgeNo, faceNo, cellNo, subdomainNo, localVertexNo, overlapVertexNo
+        INDEX_t boundaryVertex
+        algebraicOverlap ov
+    sM = subdomain.simplexMapper
+
+    def getBoundaryCells(interface):
+        vertices = set()
+        for i in range(interface.num_vertices):
+            cellNo = interface.vertices[i, 0]
+            vertexNo = interface.vertices[i, 1]
+            vertices.add(subdomain.cells[cellNo, vertexNo])
+        if dim >= 2:
+            for i in range(interface.num_edges):
+                cellNo = interface.edges[i, 0]
+                edgeNo = interface.edges[i, 1]
+                order = interface.edges[i, 2]
+                sM.getEdgeVerticesGlobal(cellNo, edgeNo, order, edgeVertexIndices)
+                for j in range(2):
+                    vertices.add(edgeVertexIndices[j])
+        if dim >= 3:
+            for i in range(interface.num_faces):
+                cellNo = interface.faces[i, 0]
+                faceNo = interface.faces[i, 1]
+                order = interface.faces[i, 2]
+                sM.getFaceVerticesGlobal(cellNo, faceNo, order, faceVertexIndices)
+                for j in range(3):
+                    vertices.add(faceVertexIndices[j])
+
+        boundaryCells = set()
+        for v in vertices:
+            boundaryCells |= set(v2c[v])
+        return boundaryCells
+
+    dim = subdomain.dim
+    nc = subdomain.num_cells
+    vMM = vertexMapManager(subdomain, interfaces, comm.rank)
+    v2c = interiorBL.vertex2cells(subdomain.cells)
+    # dict: subdomainNo -> vertices that are shared with more than one subdomain
+    sharedToSend = {}
+    # dict: subdomainNo -> vertices that are send there
+    verticesToSend = {}
+    # dict: subdomainNo -> dict of overlapVertexNo -> tag
+    boundaryVertexTagsToSend = {}
+    boundaryEdgeTagsToSend = {}
+    boundaryFaceTagsToSend = {}
+    INVALID_VERTEX_TAG = np.iinfo(TAG).max
+    boundaryVertexTagLookup = INVALID_VERTEX_TAG*np.ones((subdomain.num_vertices), dtype=TAG)
+    boundaryVertexTagLookup[subdomain.boundaryVertices] = subdomain.boundaryVertexTags
+
+    # mapping vertexNo -> boundaryEdges
+    boundaryEdgeLookup = {}
+    for edgeNo in range(subdomain.boundaryEdges.shape[0]):
+        for k in range(2):
+            boundaryVertex = subdomain.boundaryEdges[edgeNo, k]
+            try:
+                boundaryEdgeLookup[boundaryVertex].append(edgeNo)
+            except KeyError:
+                boundaryEdgeLookup[boundaryVertex] = [edgeNo]
+
+    # mapping vertexNo -> boundaryFaces
+    boundaryFaceLookup = {}
+    for faceNo in range(subdomain.boundaryFaces.shape[0]):
+        for k in range(3):
+            boundaryVertex = subdomain.boundaryFaces[faceNo, k]
+            try:
+                boundaryFaceLookup[boundaryVertex].append(faceNo)
+            except KeyError:
+                boundaryFaceLookup[boundaryVertex] = [faceNo]
+
+
+    # dict: subdomainNo -> cells that are send there
+    cellsToSend = {}
+    # list of all send requests
+    sendRequests = []
+    # dict: localVertexNo -> subdomains that that vertex is sent to
+    sharedVertices = {}
+    for subdomainNo in interfaces.interfaces:
+        # indices of cells that touch the interface with the other subdomain
+        interfaceBoundaryCellNos = getBoundaryCells(interfaces.interfaces[subdomainNo])
+        # indices of cells that are in the interior overlap
+        localCellNos = interiorBL.getLayer(depth, interfaceBoundaryCellNos, returnLayerNo=True, cells=subdomain.cells)
+        # get layers 1, .., depth
+        localCellNos = np.concatenate((localCellNos))
+        # add cells in layers 1,..,depth to overlap
+        vMM.overlapCellNos[subdomainNo] = localCellNos
+        # cells in the interior overlap, in overlap indices
+        cellsToSend[subdomainNo] = vMM.translateLocal2Overlap(localCellNos, subdomainNo)
+        # number of cells in layers 1,..,depth
+        noc = localCellNos.shape[0]
+        # number of vertices in layers 1,..,depth
+        nv = vMM.vertexMaps[subdomainNo].num_vertices
+        # get the local overlap indices and local indices
+        # of the vertices that need to be sent
+        overlapVertexNosToSend = range(vMM.vertexMaps[subdomainNo].num_interface_vertices,
+                                       vMM.vertexMaps[subdomainNo].num_vertices)
+        localVertexNosToSend = [vMM.vertexMaps[subdomainNo].overlap2local[k]
+                                for k in overlapVertexNosToSend]
+
+        # update list of shared vertices
+        for overlapVertexNo in range(vMM.vertexMaps[subdomainNo].num_vertices):
+            localVertexNo = vMM.vertexMaps[subdomainNo].overlap2local[overlapVertexNo]
+            try:
+                sharedVertices[localVertexNo].add(subdomainNo)
+            except KeyError:
+                sharedVertices[localVertexNo] = set([subdomainNo])
+
+        # get the vertices that need to be sent
+        verticesToSend[subdomainNo] = uninitialized((len(localVertexNosToSend), dim), dtype=REAL)
+        k = 0
+        for vertexNo in localVertexNosToSend:
+            for j in range(dim):
+                verticesToSend[subdomainNo][k, j] = vMM.mesh.vertices[vertexNo, j]
+            k += 1
+        # get the boundary information for those vertices
+        boundaryVertexTagsToSend[subdomainNo] = {}
+        boundaryEdgeTagsToSend[subdomainNo] = {}
+        boundaryFaceTagsToSend[subdomainNo] = {}
+        for overlapVertexNo in overlapVertexNosToSend:
+            localVertexNo = vMM.vertexMaps[subdomainNo].overlap2local[overlapVertexNo]
+            vertexTag = boundaryVertexTagLookup[localVertexNo]
+            # is this a vertex on the new interface?
+            if vertexTag != INVALID_VERTEX_TAG and vertexTag != INTERIOR_NONOVERLAPPING:
+                boundaryVertexTagsToSend[subdomainNo][overlapVertexNo] = vertexTag
+                if dim >= 2:
+                    # find all boundary edges that need to be sent
+                    # for edgeNo in range(subdomain.boundaryEdges.shape[0]):
+                    for edgeNo in boundaryEdgeLookup[localVertexNo]:
+                        edgeTag = subdomain.boundaryEdgeTags[edgeNo]
+                        if edgeTag != INTERIOR_NONOVERLAPPING:
+                            edge = subdomain.boundaryEdges[edgeNo, :]
+                            overlapEdge = vMM.vertexMaps[subdomainNo].translateLocal2Overlap(edge)
+                            boundaryEdgeTagsToSend[subdomainNo][tuple(overlapEdge)] = edgeTag
+                if dim >= 3:
+                    # find all boundary faces that need to be sent
+                    # for faceNo in range(subdomain.boundaryFaces.shape[0]):
+                    for faceNo in boundaryFaceLookup[localVertexNo]:
+                        faceTag = subdomain.boundaryFaceTags[faceNo]
+                        if faceTag != INTERIOR_NONOVERLAPPING:
+                            face = subdomain.boundaryFaces[faceNo, :]
+                            overlapFace = vMM.vertexMaps[subdomainNo].translateLocal2Overlap(face)
+                            boundaryFaceTagsToSend[subdomainNo][tuple(overlapFace)] = faceTag
+
+        # send vertices
+        num_send = verticesToSend[subdomainNo].shape[0]
+        sendRequests.append(comm.isend(num_send,
+                                       dest=subdomainNo, tag=0))
+        sendRequests.append(comm.Isend(verticesToSend[subdomainNo],
+                                       dest=subdomainNo, tag=1))
+        sendRequests.append(comm.isend(boundaryVertexTagsToSend[subdomainNo],
+                                       dest=subdomainNo, tag=7))
+        sendRequests.append(comm.isend(boundaryEdgeTagsToSend[subdomainNo],
+                                       dest=subdomainNo, tag=8))
+        sendRequests.append(comm.isend(boundaryFaceTagsToSend[subdomainNo],
+                                       dest=subdomainNo, tag=9))
+
+        # build dict of vertices that are shared with more than one subdomain
+        sharedToSend[subdomainNo] = {}
+        for localVertexNo, overlapVertexNo in zip(localVertexNosToSend,
+                                                  overlapVertexNosToSend):
+            if localVertexNo in vMM.sharedVertices:
+                sharedToSend[subdomainNo][overlapVertexNo] = vMM.sharedVertices[localVertexNo]
+        # send that information
+        sendRequests.append(comm.isend(sharedToSend[subdomainNo],
+                                       dest=subdomainNo, tag=2))
+
+        # send cells
+        num_send = cellsToSend[subdomainNo].shape[0]
+        sendRequests.append(comm.isend(num_send,
+                                       dest=subdomainNo, tag=3))
+        sendRequests.append(comm.Isend(cellsToSend[subdomainNo],
+                                       dest=subdomainNo, tag=4))
+
+        sendRequests.append(comm.isend((nv, noc),
+                                       dest=subdomainNo, tag=5))
+    del boundaryVertexTagLookup, boundaryEdgeLookup, boundaryFaceLookup
+
+    # Collect cell numbers that are shared with more than one subdomain
+    sharedCellsToSend = {subdomainNo: []
+                         for subdomainNo in interfaces.interfaces}
+    for cellNo in vMM.sharedCells:
+        if len(vMM.sharedCells[cellNo]) > 1:
+            for subdomainNo, subdomainOverlapCellNo in vMM.sharedCells[cellNo]:
+                for otherSubdomainNo, otherSubdomainOverlapCellNo in vMM.sharedCells[cellNo]:
+                    if subdomainNo != otherSubdomainNo:
+                        sharedCellsToSend[subdomainNo].append((subdomainOverlapCellNo, otherSubdomainNo))
+                        # for localVertexNo in subdomain.cells[cellNo, :]:
+                        #     sharedVertices[localVertexNo].remove(subdomainNo)
+
+    # Send cell numbers that are shared with another subdomain
+    for subdomainNo in interfaces.interfaces:
+        sendRequests.append(comm.isend(sharedCellsToSend[subdomainNo],
+                                       dest=subdomainNo, tag=6))
+
+    from copy import deepcopy
+    sharedVerticesToSend = {subdomainNo: {} for subdomainNo in interfaces.interfaces}
+    for vertexNo in sharedVertices:
+        if len(sharedVertices[vertexNo]) > 1:
+            for subdomainNo in sharedVertices[vertexNo]:
+                sharedWith = deepcopy(sharedVertices[vertexNo])
+                sharedWith.remove(subdomainNo)
+                overlapVertexNo = vMM.vertexMaps[subdomainNo].translateLocal2Overlap(np.array([vertexNo], dtype=INDEX))[0]
+                sharedVerticesToSend[subdomainNo][overlapVertexNo] = sharedWith
+    del sharedVertices
+    for subdomainNo in sharedVerticesToSend:
+        sendRequests.append(comm.isend(sharedVerticesToSend[subdomainNo], dest=subdomainNo, tag=10))
+
+    receivedVertexTags = {}
+    receivedEdgeTags = {}
+    receivedFaceTags = {}
+
+    allReceivedSharedVertices = {}
+
+    for subdomainNo in interfaces.interfaces:
+        # receive vertices
+        num_receive = comm.recv(source=subdomainNo, tag=0)
+        receivedVertices = uninitialized((num_receive, dim), dtype=REAL)
+        comm.Recv(receivedVertices, source=subdomainNo, tag=1)
+
+        # receive information about shared vertices
+        receivedSharedVertices = comm.recv(source=subdomainNo, tag=2)
+        offset = vMM.vertexMaps[subdomainNo].num_vertices - vMM.vertexMaps[subdomainNo].num_interface_vertices
+
+        # add vertices to vertexMapManager
+        k = vMM.vertexMaps[subdomainNo].num_interface_vertices
+        for vertex in receivedVertices:
+            if k in receivedSharedVertices:
+                vMM.addVertexShared(vertex, subdomainNo,
+                                    receivedSharedVertices[k])
+            else:
+                vMM.addVertex(vertex, subdomainNo)
+            k += 1
+
+        # receive boundaryVertexTags
+        vertexDict = comm.recv(source=subdomainNo, tag=7)
+        keys = uninitialized((len(vertexDict)), dtype=INDEX)
+        values = uninitialized((len(vertexDict)), dtype=TAG)
+        for k, (key, value) in enumerate(vertexDict.items()):
+            keys[k] = key
+            values[k] = value
+        if len(vertexDict) > 0:
+            # translate receivedVertexTags to refer to local vertices
+            keys = vMM.vertexMaps[subdomainNo].translateOverlap2Local(keys, offset)
+            receivedVertexTags.update({key: value for key, value in zip(keys, values)})
+
+        # receive boundaryEdgeTags
+        edgeDict = comm.recv(source=subdomainNo, tag=8)
+        for k, (key, value) in enumerate(edgeDict.items()):
+            edge = np.array(key, dtype=INDEX)
+            try:
+                key = vMM.vertexMaps[subdomainNo].translateOverlap2Local(edge, offset)
+                if key[0] > key[1]:
+                    key[0], key[1] = key[1], key[0]
+                receivedEdgeTags[tuple(key)] = value
+            except KeyError:
+                pass
+
+        # receive boundaryFaceTags
+        faceDict = comm.recv(source=subdomainNo, tag=9)
+        for k, (key, value) in enumerate(faceDict.items()):
+            face = np.array(key, dtype=INDEX)
+            try:
+                key = vMM.vertexMaps[subdomainNo].translateOverlap2Local(face, offset)
+                sortFace(key[0], key[1], key[2], face)
+                key = (face[0], face[1], face[2])
+                receivedFaceTags[tuple(key)] = value
+            except KeyError:
+                pass
+
+        # receive cells
+        num_receive = comm.recv(source=subdomainNo, tag=3)
+        receivedCells = uninitialized((num_receive, dim+1), dtype=INDEX)
+        comm.Recv(receivedCells, source=subdomainNo, tag=4)
+
+        # translate cells to local vertex numbers
+        cells = vMM.vertexMaps[subdomainNo].translateOverlap2Local(receivedCells, offset)
+        nv, noc = comm.recv(source=subdomainNo, tag=5)
+
+        # add cells to vertexMapManager
+        vMM.addCells(cells, subdomainNo, noc)
+
+        # receive shared vertices
+        receivedSharedVertices = comm.recv(source=subdomainNo, tag=10)
+        receivedSharedVerticesTranslated = {}
+        for overlapVertexNo in receivedSharedVertices:
+            localVertexNo = vMM.vertexMaps[subdomainNo].translateOverlap2Local(np.array([overlapVertexNo], dtype=INDEX), offset=offset)[0]
+            receivedSharedVerticesTranslated[localVertexNo] = receivedSharedVertices[overlapVertexNo]
+        allReceivedSharedVertices[subdomainNo] = receivedSharedVerticesTranslated
+
+    # We now have all the vertices and cells that we need to add.
+    # Process cells that were also sent out to other subdomains. We
+    # process information in order of subdomain number to make sure
+    # that all enter the information in the correct order.
+    for subdomainNo in sorted(interfaces.interfaces):
+        receivedSharedCells = comm.recv(source=subdomainNo, tag=6)
+        vMM.addSharedCells(receivedSharedCells, subdomainNo)
+
+    # We change the local vertex indices by removing duplicates.
+    # From now on, we need to use vertexMap[localVertexNo]
+    vertexMap = vMM.removeDuplicateVertices()
+
+    # adjust vertex tags for removed duplicate vertices
+    receivedVertexTags = {vertexMap[vertexNo]: tag for vertexNo, tag in receivedVertexTags.items()}
+
+    # adjust edges tags for removed duplicate vertices
+    newReceivedEdgeTags = {}
+    for key in receivedEdgeTags:
+        tag = receivedEdgeTags[key]
+        newKey = (vertexMap[key[0]], vertexMap[key[1]])
+        if newKey[0] > newKey[1]:
+            newKey = (newKey[1], newKey[0])
+        newReceivedEdgeTags[newKey] = tag
+    receivedEdgeTags = newReceivedEdgeTags
+
+    # adjust faces tags for removed duplicate vertices
+    newReceivedFaceTags = {}
+    face = uninitialized((3), dtype=INDEX)
+    for key in receivedFaceTags:
+        tag = receivedFaceTags[key]
+        sortFace(vertexMap[key[0]], vertexMap[key[1]], vertexMap[key[2]], face)
+        newKey = (face[0], face[1], face[2])
+        newReceivedFaceTags[newKey] = tag
+    receivedFaceTags = newReceivedFaceTags
+
+
+    allOK = True
+
+    if debug:
+        # Are we trying to add unique vectors?
+        temp = np.vstack(vMM.newVertices)
+        uniqVec = np.unique(temp, axis=0)
+        if uniqVec.shape[0] != temp.shape[0]:
+            print('Subdomain {} tries to add {} vertices, but only {} are unique.'.format(comm.rank, temp.shape[0], uniqVec.shape[0]))
+            allOK = False
+
+    # append vertices and cells to mesh
+    numberCellsLastLayer = vMM.writeOverlapToMesh()
+
+
+    if debug:
+        # Have we added already present vectors?
+        uniqVec = np.unique(subdomain.vertices, axis=0)
+        if uniqVec.shape[0] != subdomain.num_vertices:
+            print('Subdomain {} has {} vertices, but only {} are unique.'.format(comm.rank, subdomain.num_vertices, uniqVec.shape[0]))
+            allOK = False
+
+    # create mesh overlap objects for all overlaps
+    overlap = overlapManager(comm)
+    for subdomainNo in interfaces.interfaces:
+        overlap.overlaps[subdomainNo] = meshOverlap(vMM.overlapCellNos[subdomainNo],
+                                                    comm.rank, subdomainNo,
+                                                    dim)
+
+    if debug:
+        if comm.rank in [8]:
+            if np.unique(subdomain.vertices, axis=0).shape[0] < subdomain.num_vertices:
+                v, inverse, counts = np.unique(subdomain.vertices, axis=0, return_inverse=True, return_counts=True)
+                idx = []
+                for i in range(inverse.shape[0]):
+                    if counts[inverse[i]] > 1:
+                        idx.append(i)
+                print(v[counts > 1])
+                for i in idx:
+                    sharedWith = []
+                    for n in overlap.overlaps:
+                        ov = overlap.overlaps[n]
+                        for k in range(ov.cells.shape[0]):
+                            for j in range(subdomain.cells.shape[1]):
+                                I = ov.cells[k]
+                                if i == subdomain.cells[I, j]:
+                                    sharedWith.append(n)
+                    print(comm.rank, i, subdomain.vertices[i, :], sharedWith)
+        comm.Barrier()
+
+    assert allOK
+
+    # set boundary vertices and boundary vertex tags
+    oldBoundaryFaces = subdomain.boundaryFaces
+    oldBoundaryEdges = subdomain.boundaryEdges
+    oldBoundaryVertices = subdomain.getBoundaryVerticesByTag()
+    if subdomain.dim == 1:
+        newBverticesOut = updateBoundary1D(subdomain.cells[nc:, :],
+                                           oldBoundaryVertices)
+        newBedges = None
+        newBfaces = None
+    elif subdomain.dim == 2:
+        newBverticesOut, newBedges = updateBoundary2D(subdomain.cells[nc:, :],
+                                                      oldBoundaryEdges,
+                                                      oldBoundaryVertices)
+        newBfaces = None
+    elif subdomain.dim == 3:
+        newBverticesOut, newBedges, newBfaces = updateBoundary3D(subdomain.cells[nc:, :],
+                                                                 oldBoundaryFaces,
+                                                                 oldBoundaryEdges,
+                                                                 oldBoundaryVertices)
+    else:
+        raise NotImplementedError()
+
+    # set boundary vertices and boundary tags
+    newBvertexTags = INTERIOR*np.ones((len(newBverticesOut)), dtype=TAG)
+    # update the received boundary vertices
+    for j in range(len(newBverticesOut)):
+        try:
+            newBvertexTags[j] = receivedVertexTags[newBverticesOut[j]]
+        except KeyError:
+            pass
+    subdomain.boundaryVertices = np.hstack((oldBoundaryVertices,
+                                            newBverticesOut))
+    subdomain.boundaryVertexTags = np.hstack((subdomain.boundaryVertexTags,
+                                              newBvertexTags))
+    del newBverticesOut, newBvertexTags
+
+    if subdomain.dim >= 2:
+        # set boundary edges and boundary tags
+        newBoundaryEdgeTags = INTERIOR*np.ones((len(newBedges)), dtype=TAG)
+        # update the received boundary edges
+        for j in range(len(newBedges)):
+            try:
+                newBoundaryEdgeTags[j] = receivedEdgeTags[(newBedges[j, 0],
+                                                           newBedges[j, 1])]
+            except KeyError:
+                pass
+        subdomain.boundaryEdges = np.vstack((oldBoundaryEdges, newBedges))
+        subdomain.boundaryEdgeTags = np.hstack((subdomain.boundaryEdgeTags,
+                                                newBoundaryEdgeTags))
+        del newBedges, newBoundaryEdgeTags
+
+    if subdomain.dim >= 3:
+        # set boundary faces and boundary tags
+        newBoundaryFaceTags = INTERIOR*np.ones((len(newBfaces)), dtype=TAG)
+        # update the received boundary faces
+        for j in range(len(newBfaces)):
+            try:
+                newBoundaryFaceTags[j] = receivedFaceTags[(newBfaces[j, 0],
+                                                           newBfaces[j, 1],
+                                                           newBfaces[j, 2])]
+            except KeyError:
+                pass
+        subdomain.boundaryFaces = np.vstack((oldBoundaryFaces, newBfaces))
+        subdomain.boundaryFaceTags = np.hstack((subdomain.boundaryFaceTags,
+                                                newBoundaryFaceTags))
+        del newBfaces, newBoundaryFaceTags
+
+    MPI.Request.Waitall(sendRequests)
+
+    sharedVertices = {}
+    for subdomainNo in allReceivedSharedVertices:
+        for localVertexNo in allReceivedSharedVertices[subdomainNo]:
+            localVertexNoTranslated = vertexMap[localVertexNo]
+            # find vertices in mesh
+            found = False
+            i = -1
+            j = -1
+            for i in range(nc, subdomain.num_cells):
+                for j in range(dim+1):
+                    if subdomain.cells[i, j] == localVertexNoTranslated:
+                        found = True
+                        break
+                if found:
+                    break
+            assert found
+
+            for otherSubdomainNo in allReceivedSharedVertices[subdomainNo][localVertexNo]:
+                try:
+                    sharedVertices[otherSubdomainNo].add((i, j))
+                except KeyError:
+                    sharedVertices[otherSubdomainNo] = set([(i, j)])
+    for subdomainNo in sharedVertices:
+        sharedVertices[subdomainNo] = list(sharedVertices[subdomainNo])
+        # sort the vertices by coordinates
+        key = np.zeros((len(sharedVertices[subdomainNo])), dtype=REAL)
+        k = 0
+        for i, j in sharedVertices[subdomainNo]:
+            for l in range(dim):
+                key[k] += subdomain.vertices[subdomain.cells[i, j], l] * 100**l
+            k += 1
+        idx = key.argsort()
+        vertices = uninitialized((len(sharedVertices[subdomainNo]), 2), dtype=INDEX)
+        for k in range(len(sharedVertices[subdomainNo])):
+            i, j = sharedVertices[subdomainNo][idx[k]]
+            vertices[k, 0] = i
+            vertices[k, 1] = j
+        if subdomainNo not in overlap.overlaps:
+            overlap.overlaps[subdomainNo] = meshOverlap(uninitialized((0), dtype=INDEX),
+                                                        comm.rank, subdomainNo,
+                                                        dim)
+        overlap.overlaps[subdomainNo].vertices = vertices
+
+    return overlap, numberCellsLastLayer
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def getMeshOverlapsWithOtherPartition(INDEX_t dim,
+                                      INDEX_t myRank,
+                                      MPI.Comm comm,
+                                      INDEX_t[::1] localCellNos,
+                                      INDEX_t[::1] partitions):
+    cdef:
+        INDEX_t numOtherPartitions, localCellNo, globalCellNo, partition
+        INDEX_t otherPartition
+        list sharedCells
+        overlapManager overlaps
+        list cells
+        INDEX_t numMyPartitions = comm.size
+    numOtherPartitions = len(np.unique(partitions))
+    sharedCells = [[] for _ in range(numMyPartitions + numOtherPartitions)]
+    for localCellNo, globalCellNo in enumerate(localCellNos):
+        partition = partitions[globalCellNo]
+        sharedCells[partition].append(localCellNo)
+    overlaps = overlapManager(comm)
+    for otherPartition, cells in enumerate(sharedCells):
+        if len(cells)>0:
+            overlaps.overlaps[otherPartition] = meshOverlap(np.array(cells, dtype=INDEX), myRank, otherPartition, dim)
+    return overlaps
diff --git a/fem/PyNucleus_fem/meshPartitioning.pyx b/fem/PyNucleus_fem/meshPartitioning.pyx
new file mode 100644
index 0000000..9360ea5
--- /dev/null
+++ b/fem/PyNucleus_fem/meshPartitioning.pyx
@@ -0,0 +1,426 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t
+from PyNucleus_base import uninitialized
+from PyNucleus_base.linear_operators cimport LinearOperator, CSR_LinearOperator, sparseGraph
+from itertools import chain
+from libc.math cimport floor
+cimport cython
+
+include "config.pxi"
+
+
+class PartitionerException(Exception):
+    pass
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+def partition2sparseGraph(const INDEX_t[::1] partition,
+                          const INDEX_t numPartitions):
+    cdef:
+        INDEX_t numVertices = partition.shape[0]
+        np.ndarray[INDEX_t, ndim=1] indptr_mem = np.zeros((numPartitions+1),
+                                                          dtype=INDEX)
+        np.ndarray[INDEX_t, ndim=1] indices_mem = uninitialized((numVertices),
+                                                           dtype=INDEX)
+        INDEX_t[::1] indptr = indptr_mem, indices = indices_mem
+        INDEX_t i, pos
+    for i in range(numVertices):
+        indptr[partition[i]+1] += 1
+    for i in range(1, numPartitions+1):
+        indptr[i] += indptr[i-1]
+    for i in range(numVertices):
+        pos = partition[i]
+        indices[indptr[pos]] = i
+        indptr[pos] += 1
+    for i in range(numPartitions, 0, -1):
+        indptr[i] = indptr[i-1]
+    indptr[0] = 0
+    return sparseGraph(indices, indptr, numPartitions, numVertices)
+
+
+class vertexPartitioner(object):
+    def __init__(self, REAL_t[:, ::1] vertices):
+        self.vertices = vertices
+
+    def partitionVertices(self, INDEX_t numPartitions):
+        """
+        Split the vertices into numPartitions partitions.
+        Return a vector that contains the map vertexNo -> partitionNo.
+        """
+        raise PartitionerException("Don't call abstract class.")
+
+    def inversePartitionVertices(self, numPartitions):
+        """
+        Split the vertices into numPartitions partitions.
+        Return a sparse graph that contains the map partitionNo -> [vertexNo]
+        """
+        part, numPartitions = self.partitionVertices(numPartitions)
+        return partition2sparseGraph(part, numPartitions)
+
+
+class dofPartitioner(object):
+    def __init__(self, LinearOperator A=None, dm=None):
+        if A is not None:
+            self.A = A
+        elif dm is not None:
+            self.dm = dm
+            self.A = dm.buildSparsityPattern(dm.mesh.cells)
+        else:
+            raise NotImplementedError()
+
+    def partitionDofs(self, numPartitions):
+        raise PartitionerException("Don't call abstract class.")
+
+    def inversePartitionDofs(self, numPartitions):
+        """
+        Split the DoFs into numPartitions partitions.
+        Return a sparse graph that contains the map partitionNo -> [dofNo]
+        """
+        part, numPartitions = self.partitionDofs(numPartitions)
+        return partition2sparseGraph(part, numPartitions)
+
+
+class meshPartitioner(object):
+    def __init__(self, mesh):
+        self.mesh = mesh
+
+    def partitionVertices(self, numPartitions):
+        """
+        Split the vertices into numPartitions partitions.
+        Return a vector that contains the map vertexNo -> partitionNo.
+        """
+        raise PartitionerException("Don't call abstract class.")
+
+    def inversePartitionVertices(self, numPartitions):
+        """
+        Split the vertices into numPartitions partitions.
+        Return a sparse graph that contains the map partitionNo -> [vertexNo]
+        """
+        part, numPartitions = self.partitionVertices(numPartitions)
+        return partition2sparseGraph(part, numPartitions)
+
+    def partitionCells(self, numPartitions, partition_weights=None):
+        """
+        Split the cells into numPartitions partitions.
+        If inverse is False, return a vector that contains the map cellNo -> partitionNo.
+        If inverse is True, return a sparse graph that contains the map partitionNo -> [cellNo]
+        """
+        raise PartitionerException("Don't call abstract class.")
+
+    def inversePartitionCells(self, numPartitions):
+        """
+        Split the cells into numPartitions partitions.
+        Return a sparse graph that contains the map partitionNo -> [cellNo]
+        """
+        part, numPartitions = self.partitionCells(numPartitions)
+        return partition2sparseGraph(part, numPartitions)
+
+
+class regularVertexPartitioner(vertexPartitioner):
+    def __init__(self, REAL_t[:, ::1] vertices, partitionedDimensions=None, numPartitionsPerDim=None):
+        super(regularVertexPartitioner, self).__init__(vertices)
+        self.partitionedDimensions = partitionedDimensions
+        self.numPartitionsPerDim = numPartitionsPerDim
+        self.mins = None
+        self.maxs = None
+
+    def getBoundingBox(self):
+        cdef:
+            REAL_t[::1] mins, maxs
+            INDEX_t dim = self.vertices.shape[1]
+            INDEX_t k, j
+        mins = np.inf * np.ones((dim), dtype=REAL)
+        maxs = -np.inf * np.ones((dim), dtype=REAL)
+        for k in range(self.vertices.shape[0]):
+            for j in range(dim):
+                mins[j] = min(mins[j], self.vertices[k, j])
+                maxs[j] = max(maxs[j], self.vertices[k, j])
+        self.mins = mins
+        self.maxs = maxs
+
+    def balancePartitions(self, INDEX_t numPartitions):
+        cdef:
+            INDEX_t dim = self.vertices.shape[1]
+
+        if self.partitionedDimensions is None:
+            partitionedDimensions = dim
+        else:
+            partitionedDimensions = self.partitionedDimensions
+
+        def primes(n):
+            primfac = []
+            d = 2
+            while d*d <= n:
+                while (n % d) == 0:
+                    primfac.append(d)
+                    n //= d
+                d += 1
+            if n > 1:
+               primfac.append(n)
+            return primfac
+
+        numPartitionsPerDim = np.ones((dim), dtype=INDEX)
+        self.getBoundingBox()
+        extend = np.empty((dim), dtype=REAL)
+        for j in range(dim):
+            extend[j] = self.maxs[j]-self.mins[j]
+        for p in sorted(primes(numPartitions), reverse=True):
+            q = np.argmin((np.array(numPartitionsPerDim, copy=False)/extend)[:partitionedDimensions])
+            numPartitionsPerDim[q] *= p
+        return numPartitionsPerDim
+
+    def partitionVertices(self, INDEX_t numPartitions, irregular=False):
+        if irregular:
+            return self.partitionVerticesIrregular(numPartitions)
+
+        cdef:
+            INDEX_t[::1] numPartitionsPerDim
+            INDEX_t i, j, k
+            INDEX_t dim = self.vertices.shape[1]
+            REAL_t delta = 1e-5
+            REAL_t w
+            REAL_t[::1] z = uninitialized((dim), dtype=REAL)
+            REAL_t[::1] mins, maxs
+            INDEX_t[::1] part = uninitialized((self.vertices.shape[0]), dtype=INDEX)
+
+        if self.numPartitionsPerDim is None:
+            numPartitionsPerDim = self.balancePartitions(numPartitions)
+        else:
+            numPartitionsPerDim = self.numPartitionsPerDim
+
+        numPartitionsTotal = np.prod(numPartitionsPerDim)
+        if self.mins is None:
+            self.getBoundingBox()
+        mins = self.mins
+        maxs = np.array(self.maxs, copy=True)
+        for j in range(dim):
+            maxs[j] += delta
+        partitionCounter = np.zeros((numPartitionsTotal), dtype=INDEX)
+        for i in range(self.vertices.shape[0]):
+            w = 0
+            for j in range(dim):
+                z[j] = floor((self.vertices[i, j]-mins[j])/(maxs[j]-mins[j])*numPartitionsPerDim[j])
+                for k in range(j):
+                    z[j] *= numPartitionsPerDim[k]
+                w += z[j]
+            q = INDEX(w)
+            part[i] = q
+            partitionCounter[q] += 1
+        if np.min(partitionCounter) == 0:
+            raise PartitionerException('Regular partitioner returned empty partitions. PartitionCounter: {}'.format(np.array(partitionCounter)))
+        numPartitions = np.unique(part).shape[0]
+        return part, numPartitions
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    def partitionDim(self, REAL_t[:, ::1] coord, INDEX_t[::1] part, INDEX_t[::1] numPartitionsPerDim, INDEX_t[::1] idx, INDEX_t d=0, INDEX_t offset=0):
+        cdef:
+            INDEX_t k, j, i
+            REAL_t x
+            INDEX_t dim = self.vertices.shape[1]
+            REAL_t[::1] c = uninitialized((idx.shape[0]), dtype=REAL)
+            REAL_t[::1] boundaries
+            list idx_lists
+            INDEX_t[::1] idx2
+            INDEX_t numPart2
+        for k in range(idx.shape[0]):
+            c[k] = coord[idx[k], d]
+        boundaries = np.quantile(c, np.linspace(0, 1, numPartitionsPerDim[d]+1)[1:numPartitionsPerDim[d]])
+
+        if d == dim-1:
+            for i in range(idx.shape[0]):
+                k = idx[i]
+                x = coord[k, d]
+                for j in range(numPartitionsPerDim[d]-1):
+                    if x < boundaries[j]:
+                        part[k] = offset+j
+                        break
+                else:
+                    part[k] = offset+numPartitionsPerDim[d]-1
+            return numPartitionsPerDim[d]
+        else:
+            idx_lists = [[] for _ in range(numPartitionsPerDim[d])]
+            for k in range(idx.shape[0]):
+                x = c[k]
+                for j in range(numPartitionsPerDim[d]-1):
+                    if x < boundaries[j]:
+                        idx_lists[j].append(idx[k])
+                        break
+                else:
+                    idx_lists[numPartitionsPerDim[d]-1].append(idx[k])
+            for j in range(numPartitionsPerDim[d]):
+                idx2 = np.array(idx_lists[j], dtype=INDEX)
+                numPart2 = self.partitionDim(coord, part, numPartitionsPerDim, idx2, d+1, offset)
+                offset += numPart2
+            return offset
+
+    def partitionVerticesIrregular(self, INDEX_t numPartitions):
+        cdef:
+            INDEX_t[::1] numPartitionsPerDim
+            INDEX_t num_vertices = self.vertices.shape[0]
+        if self.numPartitionsPerDim is None:
+            numPartitionsPerDim = self.balancePartitions(numPartitions)
+        else:
+            numPartitionsPerDim = self.numPartitionsPerDim
+
+        part = uninitialized((num_vertices), dtype=INDEX)
+        numPart = self.partitionDim(self.vertices, part, numPartitionsPerDim, np.arange(num_vertices, dtype=INDEX))
+        return part, numPart
+
+
+class regularMeshPartitioner(meshPartitioner):
+    def partitionVertices(self, INDEX_t numPartitions, interiorOnly=True, partitionedDimensions=None, partition_weights=None, irregular=False):
+        if numPartitions > self.mesh.num_vertices:
+            raise PartitionerException("Cannot partition {} vertices in {} partitions.".format(self.mesh.num_vertices, numPartitions))
+        if interiorOnly:
+            vertices = self.mesh.vertices_as_array
+            rVP = regularVertexPartitioner(vertices[self.mesh.interiorVertices],
+                                           partitionedDimensions=partitionedDimensions,
+                                           numPartitionsPerDim=partition_weights)
+            part, numPartitions= rVP.partitionVertices(numPartitions, irregular=irregular)
+        else:
+            rVP = regularVertexPartitioner(self.mesh.vertices_as_array,
+                                           partitionedDimensions=partitionedDimensions,
+                                           numPartitionsPerDim=partition_weights)
+            part, numPartitions= rVP.partitionVertices(numPartitions, irregular=irregular)
+        return part, numPartitions
+
+    def partitionCells(self, numPartitions, partitionedDimensions=None, partition_weights=None):
+        if numPartitions > self.mesh.num_cells:
+            raise PartitionerException("Cannot partition {} cells in {} partitions.".format(self.mesh.num_cells, numPartitions))
+        rVP = regularVertexPartitioner(self.mesh.getProjectedCenters(),
+                                       partitionedDimensions=partitionedDimensions,
+                                       numPartitionsPerDim=partition_weights)
+        part, numPartitions = rVP.partitionVertices(numPartitions)
+
+        return part, numPartitions
+
+    def __call__(self, numPartitions):
+        return self.inversePartitionVertices(numPartitions)
+
+    def __repr__(self):
+        return 'Regular-Mesh'
+
+class regularDofPartitioner(dofPartitioner):
+    def partitionDofs(self, numPartitions, partitionedDimensions=None, partition_weights=None, irregular=False):
+        assert self.dm is not None
+        if numPartitions > self.dm.num_dofs:
+            raise PartitionerException("Cannot partition {} DoFs in {} partitions.".format(self.dm.num_dofs, numPartitions))
+        rVP = regularVertexPartitioner(self.dm.getDoFCoordinates(),
+                                       partitionedDimensions=partitionedDimensions,
+                                       numPartitionsPerDim=partition_weights)
+        part, numPartitions= rVP.partitionVertices(numPartitions,
+                                                   irregular=irregular)
+        return part, numPartitions
+
+    def __repr__(self):
+        return 'Regular-DoF'
+
+    def __call__(self, numPartitions):
+        return self.inversePartitionDofs(numPartitions)
+
+
+
+IF USE_METIS:
+    import PyNucleus_metisCy
+
+    class metisDofPartitioner(dofPartitioner):
+        def partitionDofs(self, numPartitions, ufactor=30):
+            if numPartitions == self.A.shape[0]:
+                return np.arange(numPartitions, dtype=INDEX), numPartitions
+            elif numPartitions > self.A.shape[0]:
+                raise PartitionerException("Cannot partition {} DoFs in {} partitions.".format(self.A.shape[0], numPartitions))
+            elif numPartitions == 1:
+                return np.zeros((numPartitions), dtype=INDEX), numPartitions
+            if isinstance(self.A, CSR_LinearOperator):
+                A = self.A
+            else:
+                A = self.A.to_csr()
+            options = PyNucleus_metisCy.SetDefaultOptions()
+            options[PyNucleus_metisCy.OPTION_OBJTYPE] = PyNucleus_metisCy.OBJTYPE_VOL
+            options[PyNucleus_metisCy.OPTION_CONTIG] = 1
+            options[PyNucleus_metisCy.OPTION_UFACTOR] = ufactor
+            partNos, numCuts = PyNucleus_metisCy.PartGraphKway(A.indptr,
+                                                               A.indices,
+                                                               numPartitions,
+                                                               options=options)
+            numPartitions = np.unique(partNos).shape[0]
+            return np.array(partNos, dtype=INDEX), numPartitions
+
+        def __repr__(self):
+            return 'Metis-DoF'
+
+        def __call__(self, numPartitions):
+            return self.inversePartitionDofs(numPartitions)
+
+
+    class metisMeshPartitioner(meshPartitioner):
+        def partitionVertices(self, numPartitions, interiorOnly=True, ufactor=30):
+            if numPartitions > self.mesh.num_vertices:
+                raise PartitionerException("Cannot partition {} vertices in {} partitions.".format(self.mesh.num_vertices, numPartitions))
+            if interiorOnly:
+                raise NotImplementedError()
+            options = PyNucleus_metisCy.SetDefaultOptions()
+            options[PyNucleus_metisCy.OPTION_PTYPE] = PyNucleus_metisCy.PTYPE_KWAY
+            options[PyNucleus_metisCy.OPTION_OBJTYPE] = PyNucleus_metisCy.OBJTYPE_VOL
+            options[PyNucleus_metisCy.OPTION_CONTIG] = 1
+            options[PyNucleus_metisCy.OPTION_UFACTOR] = ufactor
+            # METIS requires cells as sparse graph
+            cell_ptr = np.arange(0, (self.mesh.dim+1)*(self.mesh.num_cells+1),
+                                 self.mesh.dim+1, dtype=INDEX)
+            numCells = self.mesh.num_cells
+            dim = self.mesh.dim
+            numVertices = dim+1
+            self.mesh.cells.resize((numVertices*numCells, ))
+            cell_part, vertex_part, objval = PyNucleus_metisCy.PartMeshNodal(cell_ptr,
+                                                                             self.mesh.cells,
+                                                                             numPartitions,
+                                                                             options=options)
+            self.mesh.cells.resize((numCells, numVertices))
+            numPartitions = np.unique(cell_part).shape[0]
+            return vertex_part, numPartitions
+
+        def partitionCells(self, numPartitions, inverse=False, ufactor=30, partition_weights=None):
+            if numPartitions > self.mesh.num_cells:
+                raise PartitionerException("Cannot partition {} cells in {} partitions.".format(self.mesh.num_cells, numPartitions))
+            elif numPartitions == self.mesh.num_cells:
+                cell_part = np.arange(numPartitions, dtype=INDEX)
+            elif numPartitions == 1:
+                cell_part = np.zeros(self.mesh.num_cells, dtype=INDEX)
+            else:
+                options = PyNucleus_metisCy.SetDefaultOptions()
+                options[PyNucleus_metisCy.OPTION_PTYPE] = PyNucleus_metisCy.PTYPE_KWAY
+                options[PyNucleus_metisCy.OPTION_OBJTYPE] = PyNucleus_metisCy.OBJTYPE_VOL
+                options[PyNucleus_metisCy.OPTION_CONTIG] = 1
+                options[PyNucleus_metisCy.OPTION_UFACTOR] = ufactor
+                # METIS requires cells as sparse graph
+                cell_ptr = np.arange(0, (self.mesh.dim+1)*(self.mesh.num_cells+1),
+                                     self.mesh.dim+1, dtype=PyNucleus_metisCy.metisCy.idx)
+                numCells = self.mesh.num_cells
+                dim = self.mesh.dim
+                numVertices = dim+1
+                cells = self.mesh.cells_as_array
+                cells.shape = (numVertices*numCells, )
+                cell_part, vertex_part, objval = PyNucleus_metisCy.PartMeshDual(cell_ptr,
+                                                                                cells.astype(PyNucleus_metisCy.metisCy.idx),
+                                                                                2,
+                                                                                numPartitions,
+                                                                                tpwgts=partition_weights,
+                                                                                options=options)
+                numPartitions = np.unique(cell_part).shape[0]
+            return cell_part, numPartitions
+
+        def __repr__(self):
+            return 'Metis-Mesh'
diff --git a/fem/PyNucleus_fem/pdeProblems.py b/fem/PyNucleus_fem/pdeProblems.py
new file mode 100644
index 0000000..7a32bc6
--- /dev/null
+++ b/fem/PyNucleus_fem/pdeProblems.py
@@ -0,0 +1,251 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base import REAL
+from PyNucleus_base.utilsFem import problem
+from . functions import complexLambda, wrapRealToComplexFunction, waveFunction, radialIndicator
+
+
+class diffusionProblem(problem):
+    def setDriverArgs(self, driver):
+        p = driver.addGroup('problem')
+        p.add('domain', 'square', acceptedValues=['interval', 'cube', 'standardSimplex3D', 'fichera', 'gradedSquare', 'gradedCube'])
+        p.add('problem', 'sin', acceptedValues=['reac-sin', 'diffusivity-sin', 'poly', 'fichera', 'cos'])
+        p.add('noRef', argInterpreter=int)
+        p.add('element', 'P1', acceptedValues=['P1', 'P2', 'P3'])
+        p.add('symmetric', False)
+        p.add('reorder', False)
+
+    def processImpl(self, params):
+        from . import (rhsFunSin1D, rhsFunSin2D, rhsFunSin3D,
+                       solSin1D, solSin2D, solSin3D,
+                       cos2D, rhsCos2D,
+                       rhsFichera, solFichera,
+                       constant, Lambda,
+                       meshFactory)
+        element = params['element']
+        self.diffusivity = None
+        self.reaction = None
+        self.dim = meshFactory.getDim(params['domain'])
+        if params['domain'] in ('interval', 'unitInterval'):
+
+            if params['noRef'] is None:
+                self.noRef = {'P1': 15, 'P2': 14, 'P3': 13}[element]
+            if params['problem'] == 'sin':
+                self.rhsFun = rhsFunSin1D
+                self.exactSolution = solSin1D
+                self.L2ex = 1/2
+                self.H10ex = np.pi**2/2
+                self.boundaryCond = None
+            elif params['problem'] == 'reac-sin':
+                self.rhsFun = Lambda(lambda x: (np.pi**2.0 + 10.)*np.sin(np.pi*x[0]))
+                self.exactSolution = solSin1D
+                self.L2ex = 1/2
+                self.H10ex = (np.pi**2 + 10.)/2
+                self.reaction = 10.
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError()
+        elif params['domain'] in ('square', 'unitSquare', 'gradedSquare'):
+            if params['noRef'] is None:
+                self.noRef = {'P1': 9, 'P2': 8, 'P3': 7}[element]
+            if params['problem'] == 'sin':
+                self.rhsFun = rhsFunSin2D
+                self.exactSolution = solSin2D
+                self.L2ex = 1/4
+                self.H10ex = 2*np.pi**2/4
+                self.boundaryCond = None
+            elif params['problem'] == 'cos':
+                self.rhsFun = rhsCos2D
+                self.exactSolution = cos2D
+                self.L2ex = 1/4
+                self.H10ex = 2*np.pi**2/4
+                self.boundaryCond = cos2D
+            elif params['problem'] == 'reac-sin':
+                self.rhsFun = Lambda(lambda x: (2*np.pi**2.0 + 10.)*np.sin(np.pi*x[0])*np.sin(np.pi*x[1]))
+                self.exactSolution = solSin2D
+                self.L2ex = 1/4
+                self.H10ex = (2*np.pi**2 + 10.)/4
+                self.boundaryCond = None
+                self.reaction = 10.
+            elif params['problem'] == 'diffusivity-sin':
+                self.diffusivity = Lambda(lambda x: np.exp(np.sin(np.pi*x[0]) *
+                                                           np.sin(np.pi*x[1])))
+                self.rhsFun = Lambda(lambda x: -np.pi**2 *
+                                     np.exp(np.sin(np.pi*x[0])*np.sin(np.pi*x[1])) *
+                                     (np.sin(np.pi*x[0])**2 * np.cos(np.pi*x[1])**2 +
+                                      np.cos(np.pi*x[0])**2 * np.sin(np.pi*x[1])**2 -
+                                      2*np.sin(np.pi*x[0]) * np.sin(np.pi*x[1])))
+                self.exactSolution = solSin2D
+                self.L2ex = 1/4
+                self.H10ex = np.nan
+                self.boundaryCond = None
+            elif params['problem'] == 'poly':
+                self.rhsFun = Lambda(lambda x: 32*x[0]*(1-x[0])+32*x[1]*(1-x[1]))
+                self.exactSolution = Lambda(lambda x: 16*x[0]*x[1]*(1-x[0])*(1-x[1]))
+                self.L2ex = 256/900
+                self.H10ex = 256/45
+                self.boundaryCond = None
+            elif params['problem'] == 'variable-reac-sin':
+                self.rhsFun = Lambda(lambda x: (2*np.pi**2.0 + 10.)*np.sin(np.pi*x[0])*np.sin(np.pi*x[1]))
+                self.exactSolution = solSin2D
+                self.L2ex = 1/4
+                self.H10ex = (2*np.pi**2 + 10.)/4
+                self.boundaryCond = None
+                self.reaction = Lambda(lambda x: 0. if x[0] < 0.5 else 2000.)
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'graded_disc':
+            if params['noRef'] is None:
+                self.noRef = {'P1': 5, 'P2': 4, 'P3': 3}[element]
+            if params['problem'] == 'constant':
+                self.rhsFun = constant(1.)
+                self.exactSolution = None
+                self.L2ex = None
+                self.H10ex = None
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError()
+        elif params['domain'] in ('cube', 'gradedCube'):
+            if params['noRef'] is None:
+                self.noRef = {'P1': 6, 'P2': 5, 'P3': 4}[element]
+            if params['problem'] == 'sin':
+                self.rhsFun = rhsFunSin3D
+                self.exactSolution = solSin3D
+                self.L2ex = 1/8
+                self.H10ex = 3*np.pi**2/8
+                self.boundaryCond = None
+            elif params['problem'] == 'variable-reac-sin':
+                self.rhsFun = constant(1.)
+                self.exactSolution = None
+                self.L2ex = np.nan
+                self.H10ex = np.nan
+                self.boundaryCond = None
+                self.reaction = Lambda(lambda x: 0. if x[0] < 0.5 else 2000.)
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'standardSimplex3D':
+            if params['noRef'] is None:
+                self.noRef = {'P1': 2}[element]
+            if params['problem'] == 'poly':
+                self.rhsFun = Lambda(lambda x: 2*(x[1]*x[2]+x[0]*x[2]+x[0]*x[1]))
+                self.L2ex = 1/8
+                self.H10ex = 3*np.pi**2/8
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'fichera':
+            if params['noRef'] is None:
+                self.noRef = {'P1': 5, 'P2': 4}[element]
+            if params['problem'] == 'fichera':
+                self.rhsFun = rhsFichera
+                self.exactSolution = solFichera
+                self.L2ex = None
+                # H10ex = 7/8**9.52031/4
+                self.H10ex = None
+                self.boundaryCond = solFichera
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+
+
+class helmholtzProblem(problem):
+    def setDriverArgs(self, driver):
+        p = driver.addGroup('problem')
+        p.add('domain', acceptedValues=['square', 'interval', 'cube'])
+        p.add('problem', acceptedValues=['wave', 'greens'])
+        p.add('element', 'P1', acceptedValues=['P1'])
+        p.add('frequency', 40.)
+        p.add('symmetric', False)
+        p.add('reorder', False)
+
+    def processImpl(self, params):
+        from . import meshFactory
+        self.dim = meshFactory.getDim(params['domain'])
+        if params['domain'] == 'interval':
+            self.noRef = 7
+
+            def n(x):
+                if x[0] == 0:
+                    return np.array([-1.], dtype=REAL)
+                elif x[0] == 1:
+                    return np.array([1.], dtype=REAL)
+                else:
+                    raise NotImplementedError()
+
+            if params['problem'] == 'wave':
+                xi = np.array([0.5], dtype=REAL)
+                self.solEx = complexLambda(lambda x: np.exp(1j*np.vdot(xi, x)))
+                self.rhs = complexLambda(lambda x: (np.vdot(xi, xi)-self.frequency**2) * self.solEx(x))
+                self.boundaryCond = complexLambda(lambda x: 1j*(np.vdot(xi, n(x))+self.frequency) * self.solEx(x))
+            elif params['problem'] == 'greens':
+                self.rhs = wrapRealToComplexFunction(radialIndicator(1e-2, np.array([0.5])))
+                self.solEx = None
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'square':
+            self.noRef = 8
+
+            def n(x):
+                if x[1] == 0:
+                    return np.array([0., -1.], dtype=REAL)
+                elif x[1] == 1.:
+                    return np.array([0., 1.], dtype=REAL)
+                elif x[0] == 0.:
+                    return np.array([-1., 0.], dtype=REAL)
+                elif x[0] == 1.:
+                    return np.array([1., 0.], dtype=REAL)
+                else:
+                    raise NotImplementedError()
+
+            if params['problem'] == 'wave':
+                xi = np.array([0.5, 0.25], dtype=REAL)
+                self.solEx = waveFunction(xi)
+                self.rhs = (np.vdot(xi, xi)-self.frequency**2) * self.solEx
+                self.boundaryCond = complexLambda(lambda x: 1j*(np.vdot(xi, n(x))+self.frequency) * self.solEx(x))
+            elif params['problem'] == 'greens':
+                self.rhs = wrapRealToComplexFunction(radialIndicator(1e-2, np.array([0.5, 0.5])))
+                self.solEx = None
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'cube':
+            self.noRef = 5
+
+            def n(x):
+                if x[2] == 0:
+                    return np.array([0., 0., -1.], dtype=REAL)
+                elif x[2] == 1.:
+                    return np.array([0., 0., 1.], dtype=REAL)
+                elif x[1] == 0:
+                    return np.array([0., -1., 0.], dtype=REAL)
+                elif x[1] == 1.:
+                    return np.array([0., 1., 0.], dtype=REAL)
+                elif x[0] == 0.:
+                    return np.array([-1., 0., 0.], dtype=REAL)
+                elif x[0] == 1.:
+                    return np.array([1., 0., 0.], dtype=REAL)
+                else:
+                    raise NotImplementedError()
+
+            if params['problem'] == 'wave':
+                xi = np.array([0.75, 0.5, 0.25], dtype=REAL)
+                self.solEx = waveFunction(xi)
+                self.rhs = (np.vdot(xi, xi)-self.frequency**2) * self.solEx
+                self.boundaryCond = complexLambda(lambda x: 1j*(np.vdot(xi, n(x))+self.frequency) * self.solEx(x))
+            elif params['problem'] == 'greens':
+                self.rhs = wrapRealToComplexFunction(radialIndicator(1e-1, np.array([0.5, 0.5, 0.5])))
+                self.solEx = None
+                self.boundaryCond = None
+            else:
+                raise NotImplementedError(params['problem'])
+        else:
+            raise NotImplementedError(params['domain'])
diff --git a/fem/PyNucleus_fem/quadrature.pxd b/fem/PyNucleus_fem/quadrature.pxd
new file mode 100644
index 0000000..a0a71be
--- /dev/null
+++ b/fem/PyNucleus_fem/quadrature.pxd
@@ -0,0 +1,165 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from . femCy cimport volume_t
+from . functions cimport function, vectorFunction, complexFunction
+from . meshCy cimport (vectorProduct,
+                       volume0D,
+                       volume1D, volume1Dnew,
+                       volume1D_in_2D,
+                       volume2Dnew,
+                       volume3D, volume3Dnew,
+                       volume2D_in_3Dnew as volume2D_in_3D,
+                       meshBase)
+cimport numpy as np
+from libc.math cimport sqrt
+
+include "config.pxi"
+
+
+cdef class quadratureRule:
+    cdef:
+        public REAL_t[:, ::1] nodes  # num_bary x num_nodes
+        public REAL_t[::1] weights
+        readonly INDEX_t num_nodes
+        readonly INDEX_t dim
+        readonly INDEX_t manifold_dim
+
+    cdef inline REAL_t eval(self,
+                              const REAL_t[::1] fun_vals,
+                              const REAL_t vol)
+
+
+cdef class simplexQuadratureRule(quadratureRule):
+    cdef:
+        volume_t volume
+        REAL_t[:, ::1] span
+        REAL_t[::1] tempVec
+        public list orders
+    cdef inline void nodesInGlobalCoords(self,
+                                         const REAL_t[:, ::1] simplexVertices,
+                                         const REAL_t[:, ::1] coords)
+    cdef inline void nodesInGlobalCoordsReorderer(self,
+                                                  const REAL_t[:, ::1] simplexVertices,
+                                                  REAL_t[:, ::1] coords,
+                                                  const INDEX_t[::1] idx)
+    cpdef void evalFun(self,
+                       function fun,
+                       const REAL_t[:, ::1] simplexVertices,
+                       REAL_t[::1] fun_vals)
+    cpdef void evalVectorFun(self,
+                             vectorFunction fun,
+                             const REAL_t[:, ::1] simplexVertices,
+                             REAL_t[:, ::1] fun_vals)
+    cpdef void evalComplexFun(self,
+                              complexFunction fun,
+                              const REAL_t[:, ::1] simplexVertices,
+                              COMPLEX_t[::1] fun_vals)
+    cdef REAL_t getSimplexVolume(self,
+                                   const REAL_t[:, ::1] simplexVertices)
+
+
+cdef class transformQuadratureRule(simplexQuadratureRule):
+    cdef:
+        simplexQuadratureRule qr
+        REAL_t[:, ::1] A
+        REAL_t[::1] b
+    cpdef void setBaryTransform(self, REAL_t[:, ::1] A, REAL_t[::1] b)
+    cdef void compute(self)
+
+
+cdef class Gauss1D(simplexQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class Gauss2D(simplexQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class Gauss3D(simplexQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class doubleSimplexQuadratureRule(quadratureRule):
+    cdef:
+        public simplexQuadratureRule rule1, rule2
+
+    # cdef inline REAL_t eval(self, const REAL_t[::1] fun_vals, const REAL_t vol)
+    cpdef void evalFun(self,
+                      function fun,
+                      const REAL_t[:, ::1] simplexVertices1,
+                      const REAL_t[:, ::1] simplexVertices2,
+                      REAL_t[::1] fun_vals)
+
+
+cdef:
+    REAL_t[:, ::1] quad_point2D_order2
+    REAL_t[::1] weights2D_order2
+
+    REAL_t a1 = (6.0-sqrt(15.0))/21.0, a2 = (6.0+sqrt(15.0))/21.0
+    REAL_t c1 = a1*(2.0*a1-1.0), c2 = a2*(2.0*a2-1.0)
+    REAL_t d1 = (4.0*a1-1.0)*(2.0*a1-1.0), d2 = (4.0*a2-1.0)*(2.0*a2-1.0)
+    REAL_t e1 = 4.0*a1**2, e2 = 4.0*a2**2
+    REAL_t f1 = 4.0*a1*(1.0-2.0*a1), f2 = 4.0*a2*(1.0-2.0*a2)
+    REAL_t w1 = (155.0-sqrt(15.0))/1200.0, w2 = (155.0+sqrt(15.0))/1200.0
+    REAL_t[:, ::1] quad_point2D_order5
+    REAL_t[::1] weights2D_order5
+
+    REAL_t[:, ::1] quad_point3D_order3
+    REAL_t[::1] weights3D_order3
+
+
+cdef class quadQuadratureRule(quadratureRule):
+    cdef:
+        volume_t volume
+        public list orders
+    # cpdef REAL_t eval(self,
+    #                    REAL_t[::1] fun_vals,
+    #                    REAL_t vol)
+    cpdef void nodesInGlobalCoords(self,
+                                   const REAL_t[:, ::1] quadVertices,
+                                   REAL_t[:, ::1] coords)
+    cpdef void evalFun(self,
+                      function fun,
+                      const REAL_t[:, ::1] quadVertices,
+                      REAL_t[::1] fun_vals)
+    cpdef REAL_t getQuadVolume(self,
+                                const REAL_t[:, ::1] quadVertices)
+
+
+cdef class Gauss(quadQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class GaussJacobi(quadQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class simplexDuffyTransformation(simplexQuadratureRule):
+    pass
+
+
+cdef class simplexXiaoGimbutas(simplexQuadratureRule):
+    cdef public INDEX_t order
+
+
+cdef class sphericalQuadRule:
+    cdef:
+        public REAL_t[:, ::1] vertexOffsets
+        public REAL_t[::1] weights
+        readonly INDEX_t num_nodes
+
+
+cdef class sphericalQuadRule1D(sphericalQuadRule):
+    pass
+
+
+cdef class sphericalQuadRule2D(sphericalQuadRule):
+    pass
+
diff --git a/fem/PyNucleus_fem/quadrature.pyx b/fem/PyNucleus_fem/quadrature.pyx
new file mode 100644
index 0000000..4d29bfe
--- /dev/null
+++ b/fem/PyNucleus_fem/quadrature.pyx
@@ -0,0 +1,617 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX, BOOL
+from PyNucleus_base import uninitialized
+from PyNucleus_base.blas cimport uninitializedREAL
+from libc.math cimport sin, cos, M_PI as pi
+import numpy as np
+cimport cython
+from modepy import XiaoGimbutasSimplexQuadrature
+from modepy.tools import unit_to_barycentric
+
+include "config.pxi"
+
+
+cdef class quadratureRule:
+    def __init__(self,
+                 REAL_t[:, ::1] nodes,  # in barycentric coordinates
+                 REAL_t[::1] weights,
+                 INDEX_t dim,
+                 manifold_dim=None):
+        assert nodes.shape[1] == weights.shape[0]
+        self.dim = dim
+        self.nodes = nodes
+        self.num_nodes = self.nodes.shape[1]
+        if manifold_dim is None:
+            self.manifold_dim = self.dim
+        else:
+            self.manifold_dim = manifold_dim
+        self.weights = weights
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cdef inline REAL_t eval(self,
+                              const REAL_t[::1] fun_vals,
+                              const REAL_t vol):
+        cdef:
+            INDEX_t i
+            REAL_t I
+        I = 0.
+        for i in range(self.num_nodes):
+            I += self.weights[i]*fun_vals[i]
+        return I*vol
+
+
+cdef class simplexQuadratureRule(quadratureRule):
+    def __init__(self,
+                 REAL_t[:, ::1] nodes,  # in barycentric coordinates
+                 REAL_t[::1] weights,
+                 INDEX_t dim,
+                 manifold_dim=None):
+        quadratureRule.__init__(self, nodes, weights, dim, manifold_dim)
+        if self.manifold_dim == 0:
+            self.volume = volume0D
+        elif self.dim == 1 and self.manifold_dim == 1:
+            self.volume = volume1Dnew
+        elif self.dim == 2 and self.manifold_dim == 1:
+            self.volume = volume1D_in_2D
+        elif self.dim == 2 and self.manifold_dim == 2:
+            self.volume = volume2Dnew
+        elif self.dim == 3 and self.manifold_dim == 3:
+            self.volume = volume3D
+        elif self.dim == 3 and self.manifold_dim == 2:
+            self.volume = volume2D_in_3D
+        else:
+            raise NotImplementedError('dim={}'.format(self.dim))
+        self.span = uninitialized((self.manifold_dim, self.dim), dtype=REAL)
+        self.tempVec = uninitializedREAL((self.dim, ))
+
+    def __add__(self, simplexQuadratureRule other):
+        assert self.dim == other.dim
+        assert self.manifold_dim == other.manifold_dim
+        return simplexQuadratureRule(np.hstack((self.nodes, other.nodes)),
+                                     np.concatenate((self.weights, other.weights)),
+                                     self.dim, self.manifold_dim)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cdef inline void nodesInGlobalCoords(self,
+                                         const REAL_t[:, ::1] simplexVertices,
+                                         REAL_t[:, ::1] coords):
+        cdef:
+            INDEX_t i, k, m
+            REAL_t temp
+        coords[:] = 0.
+        for k in range(self.manifold_dim+1):
+            for i in range(self.num_nodes):
+                temp = self.nodes[k, i]
+                for m in range(self.dim):
+                    coords[i, m] += temp * simplexVertices[k, m]
+
+    def nodesInGlobalCoords_py(self,
+                               const REAL_t[:, ::1] simplexVertices,
+                               REAL_t[:, ::1] coords):
+        self.nodesInGlobalCoords(simplexVertices, coords)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cdef inline void nodesInGlobalCoordsReorderer(self,
+                                                  const REAL_t[:, ::1] simplexVertices,
+                                                  REAL_t[:, ::1] coords,
+                                                  const INDEX_t[::1] idx):
+        cdef:
+            INDEX_t i, k, m, kk
+            REAL_t temp
+        coords[:] = 0.
+        for k in range(self.manifold_dim+1):
+            for i in range(self.num_nodes):
+                temp = self.nodes[k, i]
+                kk = idx[k]
+                for m in range(self.dim):
+                    coords[i, m] += temp * simplexVertices[kk, m]
+
+    def getAllNodesInGlobalCoords(self, meshBase mesh):
+        cdef:
+            REAL_t[:, ::1] simplex = uninitializedREAL((mesh.dim+1, mesh.dim))
+            REAL_t[:, ::1] coords = uninitializedREAL((mesh.num_cells*self.num_nodes, mesh.dim))
+        for cellNo in range(mesh.num_cells):
+            mesh.getSimplex(cellNo, simplex)
+            self.nodesInGlobalCoords(simplex, coords[self.num_nodes*cellNo:self.num_nodes*(cellNo+1), :])
+        return np.array(coords, copy=False)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef void evalFun(self,
+                       function fun,
+                       const REAL_t[:, ::1] simplexVertices,
+                       REAL_t[::1] fun_vals):
+        cdef:
+            INDEX_t i, k, m
+        for i in range(self.num_nodes):
+            self.tempVec[:] = 0.
+            for k in range(self.manifold_dim+1):
+                for m in range(self.dim):
+                    self.tempVec[m] += self.nodes[k, i] * simplexVertices[k, m]
+            fun_vals[i] = fun.eval(self.tempVec)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef void evalVectorFun(self,
+                             vectorFunction fun,
+                             const REAL_t[:, ::1] simplexVertices,
+                             REAL_t[:, ::1] fun_vals):
+        cdef:
+            INDEX_t i, k, m
+        for i in range(self.num_nodes):
+            self.tempVec[:] = 0.
+            for k in range(self.manifold_dim+1):
+                for m in range(self.dim):
+                    self.tempVec[m] += self.nodes[k, i] * simplexVertices[k, m]
+            fun.eval(self.tempVec, fun_vals[i, :])
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef void evalComplexFun(self,
+                              complexFunction fun,
+                              const REAL_t[:, ::1] simplexVertices,
+                              COMPLEX_t[::1] fun_vals):
+        cdef:
+            INDEX_t i, k, m
+        for i in range(self.num_nodes):
+            self.tempVec[:] = 0.
+            for k in range(self.manifold_dim+1):
+                for m in range(self.dim):
+                    self.tempVec[m] += self.nodes[k, i] * simplexVertices[k, m]
+            fun_vals[i] = fun.eval(self.tempVec)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cdef REAL_t getSimplexVolume(self,
+                                   const REAL_t[:, ::1] simplexVertices):
+        cdef INDEX_t k, j
+        for k in range(self.manifold_dim):
+            for j in range(self.dim):
+                self.span[k, j] = simplexVertices[k+1, j]-simplexVertices[0, j]
+        return self.volume(self.span)
+
+    def getSimplexVolume_py(self,
+                            const REAL_t[:, ::1] simplexVertices):
+        return self.getSimplexVolume(simplexVertices)
+
+    def integrate(self,
+                  function fun,
+                  const REAL_t[:, ::1] simplexVertices):
+        cdef:
+            REAL_t vol
+            REAL_t[::1] fun_vals = uninitializedREAL((self.num_nodes))
+        self.evalFun(fun, simplexVertices, fun_vals)
+        vol = self.getSimplexVolume(simplexVertices)
+        return self.eval(fun_vals, vol)
+
+
+cdef class transformQuadratureRule(simplexQuadratureRule):
+    def __init__(self, simplexQuadratureRule qr):
+        nodes = uninitializedREAL((qr.nodes.shape[0],
+                                     qr.nodes.shape[1]))
+        weights = qr.weights
+        super(transformQuadratureRule, self).__init__(nodes, weights, qr.dim, qr.manifold_dim)
+        self.qr = qr
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef void setBaryTransform(self, REAL_t[:, ::1] A, REAL_t[::1] b):
+        self.A = A
+        self.b = b
+        self.compute()
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cdef void compute(self):
+        cdef:
+            INDEX_t k, j, i
+        for k in range(self.manifold_dim+1):
+            for i in range(self.num_nodes):
+                self.nodes[k, i] = self.b[k]
+        for k in range(self.manifold_dim+1):
+            for j in range(self.manifold_dim+1):
+                for i in range(self.num_nodes):
+                    self.nodes[k, i] += self.A[k, j]*self.qr.nodes[j, i]
+
+
+cdef class doubleSimplexQuadratureRule(quadratureRule):
+    def __init__(self,
+                 simplexQuadratureRule rule1,
+                 simplexQuadratureRule rule2):
+        cdef:
+            INDEX_t i, j, k
+
+        self.rule1 = rule1
+        self.rule2 = rule2
+        nodes = uninitialized((0, rule1.num_nodes*rule2.num_nodes), dtype=REAL)
+        weights = uninitializedREAL((rule1.num_nodes*rule2.num_nodes, ))
+        k = 0
+        for i in range(rule1.num_nodes):
+            for j in range(rule2.num_nodes):
+                weights[k] = rule1.weights[i]*rule2.weights[j]
+                k += 1
+        quadratureRule.__init__(self,
+                                nodes, weights,
+                                rule1.dim+rule2.dim,
+                                rule1.manifold_dim+rule2.manifold_dim)
+
+    # @cython.boundscheck(False)
+    # @cython.wraparound(False)
+    # @cython.initializedcheck(False)
+    # cdef inline REAL_t eval(self,
+    #                           const REAL_t[::1] fun_vals,
+    #                           const REAL_t vol):
+    #     cdef:
+    #         INDEX_t i, j, k = 0
+    #         REAL_t I = 0.
+    #     for i in range(self.rule1.num_nodes):
+    #         for j in range(self.rule2.num_nodes):
+    #             I += self.rule1.weights[i]*self.rule2.weights[j]*fun_vals[k]
+    #             k += 1
+    #     return I*vol
+
+    cpdef void evalFun(self,
+                       function fun,
+                       const REAL_t[:, ::1] simplexVertices1,
+                       const REAL_t[:, ::1] simplexVertices2,
+                       REAL_t[::1] fun_vals):
+        cdef:
+            INDEX_t i, j, k, m, l
+            INDEX_t dim1 = self.rule1.dim
+            INDEX_t dim2 = self.rule2.dim
+            REAL_t[::1] x = uninitializedREAL((dim1+dim2))
+        l = 0
+        for i in range(self.rule1.num_nodes):
+            for j in range(self.rule2.num_nodes):
+                x[:] = 0.
+                for k in range(dim1+1):
+                    for m in range(dim1):
+                        x[m] += self.rule1.nodes[k, i] * simplexVertices1[k, m]
+                for k in range(dim2+1):
+                    for m in range(dim2):
+                        x[dim1+m] += self.rule2.nodes[k, j] * simplexVertices2[k, m]
+                fun_vals[l] = fun(x)
+                l += 1
+
+    def integrate(self,
+                  function fun,
+                  const REAL_t[:, ::1] simplexVertices1,
+                  const REAL_t[:, ::1] simplexVertices2):
+        cdef:
+            REAL_t vol
+            REAL_t[::1] fun_vals = uninitializedREAL((self.rule1.num_nodes*self.rule2.num_nodes))
+
+        self.evalFun(fun, simplexVertices1, simplexVertices2, fun_vals)
+        vol = self.rule1.getSimplexVolume(simplexVertices1)*self.rule2.getSimplexVolume(simplexVertices2)
+        return self.eval(fun_vals, vol)
+
+
+quad_point2D_order2 = np.array([[0.5, 0.0, 0.5],
+                                [0.5, 0.5, 0.0],
+                                [0.0, 0.5, 0.5]], dtype=REAL)
+weights2D_order2 = np.array([1.0/3.0, 1.0/3.0, 1.0/3.0], dtype=REAL)
+
+cdef:
+    REAL_t a1 = (6.0-sqrt(15.0))/21.0, a2 = (6.0+sqrt(15.0))/21.0
+    REAL_t c1 = a1*(2.0*a1-1.0), c2 = a2*(2.0*a2-1.0)
+    REAL_t d1 = (4.0*a1-1.0)*(2.0*a1-1.0), d2 = (4.0*a2-1.0)*(2.0*a2-1.0)
+    REAL_t e1 = 4.0*a1**2, e2 = 4.0*a2**2
+    REAL_t f1 = 4.0*a1*(1.0-2.0*a1), f2 = 4.0*a2*(1.0-2.0*a2)
+    REAL_t w1 = (155.0-sqrt(15.0))/1200.0, w2 = (155.0+sqrt(15.0))/1200.0
+quad_point2D_order5 = np.array([[1.0/3.0, a1, a1, 1.0-2.0*a1, a2, a2, 1.0-2.0*a2],
+                                [1.0/3.0, a1, 1.0-2.0*a1, a1, a2, 1.0-2.0*a2, a2],
+                                [1.0/3.0, 1.0-2.0*a1, a1, a1, 1.0-2.0*a2, a2, a2]], dtype=REAL)
+weights2D_order5 = np.array([9.0/40.0, w1, w1, w1, w2, w2, w2], dtype=REAL)
+
+quad_point3D_order3 = np.array([[0.25, 0.5,     1.0/6.0, 1.0/6.0, 1.0/6.0],
+                                [0.25, 1.0/6.0, 0.5,     1.0/6.0, 1.0/6.0],
+                                [0.25, 1.0/6.0, 1.0/6.0, 0.5,     1.0/6.0],
+                                [0.25, 1.0/6.0, 1.0/6.0, 1.0/6.0, 0.5    ]], dtype=REAL)
+weights3D_order3 = np.array([-0.8, 9.0/20.0, 9.0/20.0, 9.0/20.0, 9.0/20.0], dtype=REAL)
+
+
+cdef class Gauss1D(simplexQuadratureRule):
+    def __init__(self, order):
+        k = (order+1)//2
+        if 2*k-1 != order:
+            raise NotImplementedError()
+        from scipy.special import p_roots
+        nodesT, weights = p_roots(k)
+        nodes = uninitializedREAL((2, nodesT.shape[0]))
+        for i in range(nodesT.shape[0]):
+            nodes[0, i] = (nodesT[i]+1.)/2.
+            nodes[1, i] = 1. - nodes[0, i]
+        weights /= 2.
+        super(Gauss1D, self).__init__(nodes, weights, 1)
+        self.order = order
+
+
+cdef class Gauss2D(simplexQuadratureRule):
+    def __init__(self, INDEX_t order):
+        if order == 2:
+            super(Gauss2D, self).__init__(quad_point2D_order2,
+                                          weights2D_order2,
+                                          2)
+        elif order == 5:
+            super(Gauss2D, self).__init__(quad_point2D_order5,
+                                          weights2D_order5,
+                                          2)
+        else:
+            raise NotImplementedError()
+        self.order = order
+
+
+cdef class Gauss3D(simplexQuadratureRule):
+    def __init__(self, INDEX_t order):
+        if order == 3:
+            super(Gauss3D, self).__init__(quad_point3D_order3,
+                                          weights3D_order3,
+                                          3)
+        else:
+            raise NotImplementedError()
+        self.order = order
+
+
+cdef class quadQuadratureRule(quadratureRule):
+    def __init__(self, REAL_t[:, ::1] nodes, REAL_t[::1] weights):
+        quadratureRule.__init__(self,
+                                nodes, weights,
+                                nodes.shape[0])
+        if self.dim == 1:
+            self.volume = volume1Dnew
+        elif self.dim == 2:
+            self.volume = volume2Dnew
+        elif self.dim == 3:
+            self.volume = volume3D
+        else:
+            # raise NotImplementedError()
+            pass
+
+    def __add__(self, quadQuadratureRule other):
+        assert self.dim == other.dim
+        return quadQuadratureRule(np.hstack((self.nodes, other.nodes)),
+                                  np.concatenate((self.weights, other.weights)))
+
+    # @cython.boundscheck(False)
+    # @cython.initializedcheck(False)
+    # @cython.wraparound(False)
+    # cpdef REAL_t eval(self,
+    #                     REAL_t[::1] fun_vals,
+    #                     REAL_t vol):
+    #     cdef:
+    #         INDEX_t i
+    #         REAL_t I
+    #     I = 0.
+    #     for i in range(self.num_nodes):
+    #         I += self.weights[i]*fun_vals[i]
+    #     return I*vol
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef void nodesInGlobalCoords(self,
+                                   const REAL_t[:, ::1] quadVertices,
+                                   REAL_t[:, ::1] coords):
+        cdef:
+            INDEX_t i, k, m
+        coords[:] = 0.
+        for i in range(self.num_nodes):
+            for k in range(self.dim):
+                for m in range(self.dim):
+                    coords[i, m] += quadVertices[0, m] + self.nodes[i, k] * (quadVertices[k+1, m]-quadVertices[0, m])
+
+    @cython.boundscheck(False)
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    cpdef void evalFun(self,
+                       function fun,
+                       const REAL_t[:, ::1] quadVertices,
+                       REAL_t[::1] fun_vals):
+        cdef:
+            INDEX_t i, k, m
+            REAL_t[::1] x = uninitializedREAL((self.dim, ))
+        for i in range(self.num_nodes):
+            x[:] = 0.
+            for k in range(self.dim):
+                for m in range(self.dim):
+                    x[m] += quadVertices[0, m] + self.nodes[i, k] * (quadVertices[k+1, m]-quadVertices[0, m])
+                    fun_vals[i] = fun(x)
+
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    cpdef REAL_t getQuadVolume(self,
+                                 const REAL_t[:, ::1] quadVertices):
+        cdef:
+            INDEX_t k, j
+            REAL_t[:, ::1] span = uninitializedREAL((self.dim, self.dim))
+            REAL_t vol
+        for k in range(self.dim):
+            for j in range(self.dim):
+                span[k, j] = quadVertices[k+1, j]-quadVertices[0, j]
+        vol = self.volume(span)
+        if self.dim == 2:
+            vol *= 2.
+        elif self.dim == 3:
+            vol *= 6.
+        return vol
+
+    def integrate(self,
+                  function fun,
+                  const REAL_t[:, ::1] quadVertices):
+        cdef:
+            INDEX_t i, k, m
+            REAL_t[::1] fun_vals = uninitializedREAL((self.num_nodes, ))
+        self.evalFun(fun, quadVertices, fun_vals)
+        vol = self.getQuadVolume(quadVertices)
+        return self.eval(fun_vals, vol)
+
+
+from itertools import product
+
+cdef class Gauss(quadQuadratureRule):
+    def __init__(self, order, dim):
+        k = (order+1)//2
+        if 2*k-1 != order:
+            print('Incrementing order in Gauss quadrature rule, only odd orders are available.')
+            k += 1
+        from scipy.special import p_roots
+        nodes1D, weights1D = p_roots(k)
+        nodes = uninitializedREAL((dim, nodes1D.shape[0]**dim))
+        weights = uninitializedREAL((nodes1D.shape[0]**dim))
+        nodes1D = (nodes1D+1.)/2.
+        weights1D /= 2.
+        k = 0
+        for idx in product(*([range(nodes1D.shape[0])]*dim)):
+            for m in range(dim):
+                nodes[m, k] = nodes1D[idx[m]]
+            weights[k] = np.prod([weights1D[h] for h in idx])
+            k += 1
+        super(Gauss, self).__init__(nodes, weights)
+        self.order = order
+
+
+cdef class GaussJacobi(quadQuadratureRule):
+    def __init__(self, order_weight_exponents):
+        from scipy.special import js_roots
+        nodes1D = []
+        weights1D = []
+        dim = len(order_weight_exponents)
+        self.orders = []
+        for order, alpha, beta in order_weight_exponents:
+            k = (order+1)//2
+            if 2*k-1 != order:
+                # print('Incrementing order in Gauss-Jacobi quadrature rule, only odd orders are available.')
+                k += 1
+            self.orders.append(2*k-1)
+            alpha = alpha+1
+            beta = beta+alpha
+            n1D, w1D = js_roots(k, beta, alpha)
+            nodes1D.append(n1D)
+            weights1D.append(w1D)
+        nodes = uninitializedREAL((dim, np.prod([n1D.shape[0] for n1D in nodes1D])))
+        weights = np.ones((nodes.shape[1]), dtype=REAL)
+        k = 0
+        for idx in product(*([range(n1D.shape[0]) for n1D in nodes1D])):
+            for m in range(dim):
+                nodes[m, k] = nodes1D[m][idx[m]]
+                weights[k] *= weights1D[m][idx[m]]
+            k += 1
+        super(GaussJacobi, self).__init__(nodes, weights)
+        self.order = order
+
+
+cdef class simplexDuffyTransformation(simplexQuadratureRule):
+    def __init__(self, order, dim, manifold_dim=None):
+        cdef:
+            list orders
+            INDEX_t i, j, k
+        if manifold_dim is None:
+            manifold_dim = dim
+        if manifold_dim == 0:
+            nodes = np.ones((1, 1), dtype=REAL)
+            weights = np.ones((1), dtype=REAL)
+            super(simplexDuffyTransformation, self).__init__(nodes, weights, dim, manifold_dim)
+            self.orders = [100]
+            return
+        weight_exponents = [(order+manifold_dim-d-1, 0, manifold_dim-d-1) for d in range(manifold_dim)]
+        qr = GaussJacobi(weight_exponents)
+        orders = qr.orders
+        nodes = uninitializedREAL((manifold_dim+1, qr.num_nodes))
+        for i in range(qr.num_nodes):
+            for j in range(manifold_dim-1, -1, -1):
+                nodes[j+1, i] = qr.nodes[j, i]
+                for k in range(j):
+                    nodes[j+1, i] *= (1.-qr.nodes[k, i])
+            nodes[0, i] = 1.
+            for j in range(manifold_dim):
+                nodes[0, i] -= nodes[j+1, i]
+        # adjust for volume of reference element
+        if manifold_dim == 1:
+            pass
+        elif manifold_dim == 2:
+            for i in range(qr.num_nodes):
+                qr.weights[i] *= 2.
+        elif manifold_dim == 3:
+            for i in range(qr.num_nodes):
+                qr.weights[i] *= 6.
+        else:
+            raise NotImplementedError('dim={}'.format(manifold_dim))
+        super(simplexDuffyTransformation, self).__init__(nodes, qr.weights, dim, manifold_dim)
+        self.orders = orders
+
+
+cdef class simplexXiaoGimbutas(simplexQuadratureRule):
+    def __init__(self, order, dim, manifold_dim=None):
+        if manifold_dim is None:
+            manifold_dim = dim
+
+        if manifold_dim in (0, 1, ):
+            qr = simplexDuffyTransformation(order, dim, manifold_dim)
+            super(simplexXiaoGimbutas, self).__init__(qr.nodes, qr.weights,
+                                                      dim, manifold_dim)
+        else:
+            qr = XiaoGimbutasSimplexQuadrature(order, manifold_dim)
+            nodes = unit_to_barycentric(qr.nodes)
+            num_nodes = nodes.shape[1]
+            # adjust for volume of reference element
+            if manifold_dim == 2:
+                for i in range(num_nodes):
+                    qr.weights[i] *= 0.5
+            elif manifold_dim == 3:
+                for i in range(num_nodes):
+                    qr.weights[i] *= 0.75
+            else:
+                raise NotImplementedError('dim={}'.format(manifold_dim))
+            self.order = qr.exact_to
+            super(simplexXiaoGimbutas, self).__init__(nodes, qr.weights,
+                                                      dim, manifold_dim)
+
+
+cdef class sphericalQuadRule:
+    def __init__(self, REAL_t[:, ::1] vertexOffsets, REAL_t[::1] weights):
+        assert vertexOffsets.shape[0] == weights.shape[0]
+        self.vertexOffsets = vertexOffsets
+        self.weights = weights
+        self.num_nodes = self.weights.shape[0]
+
+
+cdef class sphericalQuadRule1D(sphericalQuadRule):
+    def __init__(self, REAL_t radius):
+        vertexOffsets = uninitializedREAL((2, 1))
+        vertexOffsets[0, 0] = -radius
+        vertexOffsets[1, 0] = radius
+        weights = np.ones((2), dtype=REAL)
+        sphericalQuadRule.__init__(self, vertexOffsets, weights)
+
+
+cdef class sphericalQuadRule2D(sphericalQuadRule):
+    def __init__(self, REAL_t radius, INDEX_t numQuadNodes):
+        vertexOffsets = uninitializedREAL((numQuadNodes, 2))
+        for i in range(numQuadNodes):
+            angle = 2*pi*i/numQuadNodes
+            vertexOffsets[i, 0] = radius*cos(angle)
+            vertexOffsets[i, 1] = radius*sin(angle)
+        weights = (2*pi*radius/numQuadNodes)*np.ones((numQuadNodes), dtype=REAL)
+        sphericalQuadRule.__init__(self, vertexOffsets, weights)
+
+
diff --git a/fem/PyNucleus_fem/repartitioner.pyx b/fem/PyNucleus_fem/repartitioner.pyx
new file mode 100644
index 0000000..8e4d500
--- /dev/null
+++ b/fem/PyNucleus_fem/repartitioner.pyx
@@ -0,0 +1,1649 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+
+import PyNucleus_metisCy
+from PyNucleus_metisCy.metisCy import idx as metis_idx, real as metis_real
+from PyNucleus_metisCy.metisCy cimport idx_t
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, TAG
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, TAG_t, BOOL_t
+from PyNucleus_base import uninitialized
+from . DoFMaps import P1_DoFMap
+from . mesh import (mesh1d, mesh2d, mesh3d,
+                    PHYSICAL, INTERIOR_NONOVERLAPPING)
+from . meshCy cimport meshBase
+from . simplexMapper cimport simplexMapper
+from . meshOverlaps import boundary1D, boundary2D, boundary3D
+from . meshOverlaps cimport (meshOverlap, overlapManager,
+                             meshInterface, interfaceManager)
+from . meshCy cimport encode_edge, encode_face, sortEdge, sortFace, decode_edge
+from functools import lru_cache
+cimport cython
+
+
+cdef class Repartitioner:
+    cdef:
+         meshBase subdomain
+         interfaceManager interfaces
+         INDEX_t dim
+         MPI.Comm globalComm
+         MPI.Comm oldComm
+         MPI.Comm newComm
+         MPI.Comm interComm
+         BOOL_t is_overlapping
+         INDEX_t[::1] _newSubdomainGlobalRank
+         INDEX_t[::1] _oldSubdomainGlobalRank
+         dict _newRankSubdomainNo
+         dict _oldRankSubdomainNo
+         INDEX_t[::1] cells, part
+         INDEX_t cell_offset
+
+    def __init__(self, meshBase subdomain, interfaceManager interfaces, MPI.Comm globalComm, MPI.Comm oldComm, MPI.Comm newComm):
+        cdef:
+            list req1, req2
+            INDEX_t myLeaderRank = 0, rank1, rank2, otherLeader = 0
+            MPI.Status status
+        self.subdomain = subdomain
+        self.interfaces = interfaces
+        self.globalComm = globalComm
+        self.oldComm = oldComm
+        self.newComm = newComm
+        self.is_overlapping = self.globalComm.allreduce(oldComm is not None and newComm is not None, MPI.LOR)
+        if self.subdomain is not None:
+            self.dim = self.subdomain.dim
+        else:
+            self.dim = 0
+        self.dim = self.globalComm.allreduce(self.dim, MPI.MAX)
+        if not self.is_overlapping:
+            req1 = []
+            req2 = []
+            if self.oldComm is not None:
+                myLeaderRank = 0
+                if self.oldComm.rank == myLeaderRank:
+                    req1.append(self.globalComm.isend('me', dest=0, tag=777))
+            if self.newComm is not None:
+                myLeaderRank = 0
+                if self.newComm.rank == myLeaderRank:
+                    req1.append(self.globalComm.isend('me', dest=0, tag=778))
+            if self.globalComm.rank == 0:
+                status = MPI.Status()
+                self.globalComm.recv(source=MPI.ANY_SOURCE, status=status, tag=777)
+                rank1 = status.source
+                status = MPI.Status()
+                self.globalComm.recv(source=MPI.ANY_SOURCE, status=status, tag=778)
+                rank2 = status.source
+                req2.append(self.globalComm.isend(rank2, dest=rank1, tag=779))
+                req2.append(self.globalComm.isend(rank1, dest=rank2, tag=780))
+            MPI.Request.Waitall(req1)
+            if self.oldComm is not None:
+                if self.oldComm.rank == myLeaderRank:
+                    otherLeader = self.globalComm.recv(source=0, tag=779)
+                else:
+                    otherLeader = 0
+                otherLeader = self.oldComm.bcast(otherLeader, root=myLeaderRank)
+            if self.newComm is not None:
+                if self.newComm.rank == myLeaderRank:
+                    otherLeader = self.globalComm.recv(source=0, tag=780)
+                else:
+                    otherLeader = 0
+                otherLeader = self.newComm.bcast(otherLeader, root=myLeaderRank)
+            MPI.Request.Waitall(req2)
+
+            if self.oldComm is not None:
+                self._oldSubdomainGlobalRank = np.array(self.oldComm.allgather(self.globalComm.rank), dtype=INDEX)
+                self._newSubdomainGlobalRank = None
+
+                self.interComm = self.oldComm.Create_intercomm(myLeaderRank, self.globalComm, otherLeader)
+                self.interComm.bcast(np.array(self._oldSubdomainGlobalRank), root=MPI.ROOT if self.oldComm.rank == myLeaderRank else MPI.PROC_NULL)
+                self._newSubdomainGlobalRank = self.interComm.bcast(self._newSubdomainGlobalRank, root=0)
+
+            if self.newComm is not None:
+                self._oldSubdomainGlobalRank = None
+                self._newSubdomainGlobalRank = np.array(self.newComm.allgather(self.globalComm.rank), dtype=INDEX)
+
+                self.interComm = self.newComm.Create_intercomm(myLeaderRank, self.globalComm, otherLeader)
+                self._oldSubdomainGlobalRank = self.interComm.bcast(self._oldSubdomainGlobalRank, root=0)
+                self.interComm.bcast(np.array(self._newSubdomainGlobalRank), root=MPI.ROOT if self.newComm.rank == myLeaderRank else MPI.PROC_NULL)
+        else:
+            self._oldSubdomainGlobalRank = np.arange(self.globalComm.size, dtype=INDEX)
+            self._newSubdomainGlobalRank = np.arange(self.globalComm.size, dtype=INDEX)
+
+        self._oldRankSubdomainNo = {self._oldSubdomainGlobalRank[subdomainNo]: subdomainNo for subdomainNo in range(self._oldSubdomainGlobalRank.shape[0])}
+        self._newRankSubdomainNo = {self._newSubdomainGlobalRank[subdomainNo]: subdomainNo for subdomainNo in range(self._newSubdomainGlobalRank.shape[0])}
+
+    @lru_cache(maxsize=1)
+    def getGlobalVertexIndices(self):
+        if self.oldComm is not None:
+            subdomain = self.subdomain
+            if self.interfaces is None:
+                return np.arange((subdomain.num_vertices), dtype=INDEX)
+            else:
+                dm = P1_DoFMap(subdomain, 10)
+                ov = self.interfaces.getDoFs(self.subdomain, dm)
+                return ov.getGlobalIndices()
+
+    globalVertexIndices = property(fget=getGlobalVertexIndices)
+
+    cdef INDEX_t oldSubdomainGlobalRank_c(self, INDEX_t subdomainNo):
+        return self._oldSubdomainGlobalRank[subdomainNo]
+
+    cdef INDEX_t newSubdomainGlobalRank_c(self, INDEX_t subdomainNo):
+        return self._newSubdomainGlobalRank[subdomainNo]
+
+    cdef INDEX_t oldRankSubdomainNo_c(self, INDEX_t rank):
+        return self._oldRankSubdomainNo[rank]
+
+    cdef INDEX_t newRankSubdomainNo_c(self, INDEX_t rank):
+        return self._newRankSubdomainNo[rank]
+
+    def oldSubdomainGlobalRank(self, INDEX_t subdomainNo):
+        return self._oldSubdomainGlobalRank[subdomainNo]
+
+    def newSubdomainGlobalRank(self, INDEX_t subdomainNo):
+        return self._newSubdomainGlobalRank[subdomainNo]
+
+    def oldRankSubdomainNo(self, INDEX_t rank):
+        return self._oldRankSubdomainNo[rank]
+
+    def newRankSubdomainNo(self, INDEX_t rank):
+        return self._newRankSubdomainNo[rank]
+
+    def getNumPartitions(self):
+        if self.newComm is not None:
+            return self.newComm.size
+        else:
+            return self.globalComm.size-self.oldComm.size
+
+    numPartitions = property(fget=getNumPartitions)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getCellPartition(self, partitioner='parmetis', partitionerParams={}):
+        cdef:
+            INDEX_t i, c, dim, numVerticesPerCell, numCells
+            meshBase subdomain
+            INDEX_t[::1] globalVertexIndices
+            INDEX_t[::1] cells
+            idx_t[::1] cell_dist
+            INDEX_t rank
+
+        if self.oldComm is not None:
+            dim = self.dim
+            numVerticesPerCell = dim+1
+            subdomain = self.subdomain
+            numCells = subdomain.num_cells
+            if 'partition_weights' in partitionerParams:
+                partition_weights = partitionerParams['partition_weights']
+            else:
+                partition_weights = None
+
+            if partitioner == 'parmetis':
+                cell_ptr = np.arange(0, numVerticesPerCell*(numCells+1),
+                                     numVerticesPerCell,
+                                     dtype=metis_idx)
+
+                cells2 = subdomain.cells_as_array.astype(metis_idx)
+                cells2.resize((numVerticesPerCell*numCells, ))
+                cells = cells2
+                self.cells = cells
+
+                globalVertexIndices = self.globalVertexIndices
+                for i in range(cells.shape[0]):
+                    c = cells[i]
+                    cells[i] = globalVertexIndices[c]
+
+                cell_dist = np.array(self.oldComm.allgather(numCells),
+                                     dtype=metis_idx)
+
+                cell_dist = np.concatenate((np.zeros((1), dtype=metis_idx),
+                                            np.cumsum(cell_dist, dtype=metis_idx)))
+                rank = self.oldComm.rank
+                self.cell_offset = cell_dist[rank]
+
+                if partition_weights is not None and partition_weights.dtype != metis_real:
+                    partition_weights = partition_weights.astype(metis_real)
+                if partition_weights is not None and partition_weights.ndim == 1:
+                    partition_weights = partition_weights[:, np.newaxis]
+
+                self.part = PyNucleus_metisCy.parmetisCy.PartMeshKway(cell_dist,
+                                                                      cell_ptr,
+                                                                      np.array(cells, copy=False).astype(metis_idx),
+                                                                      subdomain.dim,
+                                                                      self.numPartitions,
+                                                                      self.oldComm,
+                                                                      tpwgts=partition_weights)
+                if self.oldComm.size > 1:
+                    self.reorderPartitioning()
+            elif partitioner in ('metis', 'regular'):
+                cells2 = subdomain.cells_as_array.copy()
+                cells2.resize((numVerticesPerCell*numCells, ))
+                cells = cells2
+                self.cells = cells
+
+                globalVertexIndices = self.globalVertexIndices
+                for i in range(cells.shape[0]):
+                    c = cells[i]
+                    cells[i] = globalVertexIndices[c]
+
+                if self.oldComm.size == 1:
+                    numPartitions = self.numPartitions
+                    partitionOffset = 0
+                else:
+                    cellsPerSubdomain = self.oldComm.gather(numCells, root=0)
+                    if self.oldComm.rank == 0:
+                        cellsPerSubdomain = np.array(cellsPerSubdomain)
+                        numPartitionsPerSubdomain = self.numPartitions*(cellsPerSubdomain/cellsPerSubdomain.sum())
+                        numPartitionsPerSubdomainInt = np.around(numPartitionsPerSubdomain).astype(INDEX)
+
+                        if numPartitionsPerSubdomainInt.sum() < self.numPartitions:
+                            while numPartitionsPerSubdomainInt.sum() != self.numPartitions:
+                                numPartitionsPerSubdomainInt[(numPartitionsPerSubdomain-numPartitionsPerSubdomainInt).argmax()] += 1
+                        elif numPartitionsPerSubdomainInt.sum() > self.numPartitions:
+                            while numPartitionsPerSubdomainInt.sum() != self.numPartitions:
+                                numPartitionsPerSubdomainInt[(numPartitionsPerSubdomain-numPartitionsPerSubdomainInt).argmin()] -= 1
+
+                        partitionOffsetPerSubdomain = np.concatenate(([0], np.cumsum(numPartitionsPerSubdomainInt)[:numPartitionsPerSubdomainInt.shape[0]-1]))
+                    else:
+                        numPartitionsPerSubdomainInt = None
+                        partitionOffsetPerSubdomain = None
+                    numPartitions = self.oldComm.scatter(numPartitionsPerSubdomainInt)
+                    partitionOffset = self.oldComm.scatter(partitionOffsetPerSubdomain)
+
+                cell_dist = np.array(self.oldComm.allgather(numCells),
+                                     dtype=metis_idx)
+
+                cell_dist = np.concatenate((np.zeros((1), dtype=metis_idx),
+                                            np.cumsum(cell_dist, dtype=metis_idx)))
+                rank = self.oldComm.rank
+                self.cell_offset = cell_dist[rank]
+
+                if partition_weights is not None:
+                    myPartitionWeights = partition_weights[partitionOffset:partitionOffset+numPartitions]
+                    myPartitionWeights /= myPartitionWeights.sum()
+                    myPartitionWeights = myPartitionWeights.astype(metis_real)
+                else:
+                    myPartitionWeights = None
+
+                if partitioner == 'metis':
+                    from . meshPartitioning import metisMeshPartitioner
+                    mP = metisMeshPartitioner(subdomain)
+                elif partitioner == 'regular':
+                    from . meshPartitioning import regularMeshPartitioner
+                    mP = regularMeshPartitioner(subdomain)
+                else:
+                    raise NotImplementedError()
+                partitionerParams['partition_weights'] = myPartitionWeights
+                part, actualNumPartitions = mP.partitionCells(numPartitions, **partitionerParams)
+                assert actualNumPartitions == numPartitions
+
+                for i in range(part.shape[0]):
+                    part[i] += partitionOffset
+                self.part = part
+            else:
+                raise NotImplementedError(partitioner)
+            return self.part
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def reorderPartitioning(self):
+        """We collect the information of how many cells f_{p,q} of which
+        partition q are on each subdomain p. Then we solve a linear
+        program
+
+        max_n \sum_{p=1}^P \sum_{q=1}^Q f_{p,q} * n_{p,q}
+        subject to
+        sum_q n_{p,q}  = 1  \forall p = 1,..,P  (each subdomain gets one partition)
+        sum_p n_{p,q} <= 1  \forall q = 1,..,Q  (each partition gets at most one subdomain)
+
+        """
+        cdef:
+            INDEX_t i, p
+            INDEX_t[::1] count, counts, mapping
+        if self.oldComm is not None:
+            count = np.zeros((self.numPartitions), dtype=INDEX)
+            for i in range(self.part.shape[0]):
+                count[self.part[i]] += 1
+            counts = self.oldComm.gather(count, root=0)
+            if self.oldComm.rank == 0:
+                P = self.oldComm.size
+                Q = self.numPartitions
+                F = np.concatenate(counts)
+                A_eq = np.zeros((P, P*Q))
+                for p in range(P):
+                    A_eq[p, Q*p:Q*(p+1)] = 1
+                b_eq = np.ones((P))
+
+                A_ub = np.zeros((Q, P*Q))
+                for q in range(Q):
+                    for p in range(P):
+                        A_ub[q, Q*p+q] = 1
+                b_ub = np.ones((Q))
+
+                from scipy.optimize import linprog
+                res = linprog(-F.ravel(),
+                              A_eq=A_eq, b_eq=b_eq,
+                              A_ub=A_ub, b_ub=b_ub)
+                mapping = uninitialized((Q), dtype=INDEX)
+                leftOverParts = set(range(Q))
+                for p in range(P):
+                    mapping[p] = res.x[p*Q:(p+1)*Q].argmax()
+                    leftOverParts.remove(mapping[p])
+                for p in range(P, Q):
+                    mapping[p] = leftOverParts.pop()
+                assert np.unique(mapping).shape[0] == Q
+            else:
+                mapping = uninitialized((0), dtype=INDEX)
+            mapping = self.oldComm.bcast(mapping)
+            inv_mapping = uninitialized((self.numPartitions), dtype=INDEX)
+            inv_mapping[mapping] = np.arange(self.numPartitions, dtype=INDEX)
+            assert np.unique(inv_mapping).shape[0] == self.numPartitions
+            for i in range(self.part.shape[0]):
+                self.part[i] = inv_mapping[self.part[i]]
+            # print(self.oldComm.rank, count[self.oldComm.rank], count[mapping[self.oldComm.rank]])
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getRepartitionedSubdomains(self):
+        cdef:
+            INDEX_t dim, numVerticesPerCell = -1, i, j, k, m, l, p
+            list req = [], sendRequests = []
+            meshBase subdomain
+            INDEX_t numCells = -1, subdomainNo
+            INDEX_t[::1] part, cells, numCellsNew, numCellsNewLocal
+            INDEX_t[::1] cellsToSend, cellsToRecv, globalCellIdxToSend, globalCellIdxToRecv, cellsToSendPtr, sendCount, sendDispl, recvCount, recvDispl
+            dict globalToLocalCells
+            INDEX_t rank, count
+            INDEX_t[::1] gVITS
+            REAL_t[:, ::1] vTS
+            INDEX_t cellNo, vertexNo, vertex
+            list partitionVertices
+            INDEX_t[::1] globalVertexIndices
+            MPI.Comm globalComm = self.globalComm
+            INDEX_t globalCommSize = globalComm.size
+            INDEX_t[::1] newSubdomainNos, counts
+            INDEX_t[::1] c2
+            REAL_t[:, ::1] newVertices
+            INDEX_t[::1] overlapCount
+            localInterfaceManager lIM
+            interfaceProcessor iP
+        dim = self.dim
+        numVerticesPerCell = dim+1
+        req = []
+        if self.oldComm is not None:
+            subdomain = self.subdomain
+            numCells = subdomain.num_cells
+            part = self.part
+            numCellsNew = np.zeros((self.numPartitions), dtype=INDEX)
+            for i in range(part.shape[0]):
+                numCellsNew[part[i]] += 1
+            numCellsNewLocal = np.zeros((self.numPartitions), dtype=INDEX)
+            self.oldComm.Reduce(numCellsNew, numCellsNewLocal, root=0)
+            if self.oldComm.rank == 0:
+                req.append(globalComm.Isend(numCellsNewLocal, dest=self.newSubdomainGlobalRank_c(0), tag=15))
+
+        if self.newComm is not None:
+            numCellsNew = uninitialized((self.numPartitions), dtype=INDEX)
+            numCellsNewLocal = uninitialized((1), dtype=INDEX)
+            if self.newComm.rank == 0:
+                globalComm.Recv(numCellsNew, source=self.oldSubdomainGlobalRank_c(0), tag=15)
+            self.newComm.Scatter(numCellsNew, numCellsNewLocal, root=0)
+
+        MPI.Request.Waitall(req)
+
+        if self.oldComm is not None:
+            # prepare cells for sending
+            newSubdomainNos, countsLong = np.unique(part, return_counts=True)
+            counts = countsLong.astype(INDEX)
+            cells = self.cells
+            cellsToSend = np.zeros((cells.shape[0]), dtype=INDEX)
+            globalCellIdxToSend = np.zeros((cells.shape[0]//numVerticesPerCell), dtype=INDEX)
+            cellsToSendPtr = np.zeros((globalCommSize+1), dtype=INDEX)
+            sendCount = np.zeros((globalCommSize), dtype=INDEX)
+            sendDispl = np.zeros((globalCommSize), dtype=INDEX)
+
+            for k in range(newSubdomainNos.shape[0]):
+                subdomainNo = newSubdomainNos[k]
+                count = counts[k]
+                rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                cellsToSendPtr[rank+1] = count
+                sendCount[rank] = count
+            for i in range(globalCommSize):
+                cellsToSendPtr[i+1] += cellsToSendPtr[i]
+                sendDispl[i] = cellsToSendPtr[i]
+
+            for j in range(part.shape[0]):
+                subdomainNo = part[j]
+                rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                globalCellIdxToSend[cellsToSendPtr[rank]] = j+self.cell_offset
+                cellsToSendPtr[rank] += 1
+            for rank in range(globalCommSize, 0, -1):
+                cellsToSendPtr[rank] = cellsToSendPtr[rank-1]
+            cellsToSendPtr[0] = 0
+
+            for i in range(numCells):
+                rank = self.newSubdomainGlobalRank_c(part[i])
+                j = cellsToSendPtr[rank]
+                cellsToSendPtr[rank] += 1
+                for k in range(numVerticesPerCell):
+                    cellsToSend[numVerticesPerCell*j+k] = cells[numVerticesPerCell*i+k]
+        else:
+            globalCellIdxToSend = uninitialized((0), dtype=INDEX)
+            sendCount = np.zeros((globalCommSize), dtype=INDEX)
+            sendDispl = np.zeros((globalCommSize), dtype=INDEX)
+            cellsToSend = np.zeros((0), dtype=INDEX)
+
+        # prepare cells for receiving
+        if self.oldComm is not None:
+            sendRequests = []
+            for i in range(newSubdomainNos.shape[0]):
+                sendRequests.append(globalComm.Isend(counts[i:i+1],
+                                                     dest=self.newSubdomainGlobalRank_c(newSubdomainNos[i]),
+                                                     tag=121))
+        if self.newComm is not None:
+            c = 0
+            c2 = uninitialized((1), dtype=INDEX)
+            recvCount = np.zeros((globalCommSize), dtype=INDEX)
+            recv_ranks = set()
+            while c < numCellsNewLocal[0]:
+                status = MPI.Status()
+                globalComm.Recv(c2, source=MPI.ANY_SOURCE, status=status, tag=121)
+                rank = status.source
+                subdomainNo = self.oldRankSubdomainNo_c(rank)
+                recvCount[rank] = c2[0]
+                c += c2[0]
+                recv_ranks.add(subdomainNo)
+        if self.oldComm is not None:
+            MPI.Request.Waitall(sendRequests)
+
+        if self.newComm is not None:
+            recvDispl = np.zeros((globalCommSize), dtype=INDEX)
+            for rank in range(1, globalCommSize):
+                recvDispl[rank] = recvDispl[rank-1]+recvCount[rank-1]
+
+            globalCellIdxToRecv = uninitialized((numCellsNewLocal[0]), dtype=INDEX)
+        else:
+            globalCellIdxToRecv = uninitialized((0), dtype=INDEX)
+            recvDispl = np.zeros((globalCommSize), dtype=INDEX)
+            recvCount = np.zeros((globalCommSize), dtype=INDEX)
+
+        globalComm.Alltoallv([globalCellIdxToSend, (sendCount, sendDispl)],
+                             [globalCellIdxToRecv, (recvCount, recvDispl)])
+
+        if self.newComm is not None:
+            globalToLocalCells = {}
+            for localCellNo in range(globalCellIdxToRecv.shape[0]):
+                globalCellNo = globalCellIdxToRecv[localCellNo]
+                globalToLocalCells[globalCellNo] = localCellNo
+
+        for i in range(globalCommSize):
+            sendCount[i] *= numVerticesPerCell
+            sendDispl[i] *= numVerticesPerCell
+            recvCount[i] *= numVerticesPerCell
+            recvDispl[i] *= numVerticesPerCell
+
+        if self.newComm is not None:
+            cellsToRecv = uninitialized((numVerticesPerCell*numCellsNewLocal[0]), dtype=INDEX)
+        else:
+            cellsToRecv = uninitialized((0), dtype=INDEX)
+
+        globalComm.Alltoallv([cellsToSend, (sendCount, sendDispl)],
+                             [cellsToRecv, (recvCount, recvDispl)])
+
+        ######################################################################
+        # exchange vertices
+
+        if self.oldComm is not None:
+            sendRequests = []
+            globalVertexIndicesToSend = {}
+            verticesToSend = {}
+            partitionVertices = [set() for p in range(globalCommSize)]
+            for cellNo in range(subdomain.num_cells):
+                p = part[cellNo]
+                for vertexNo in range(subdomain.dim+1):
+                    vertex = subdomain.cells[cellNo, vertexNo]
+                    partitionVertices[p].add(vertex)
+            globalVertexIndices = self.globalVertexIndices
+            for i in newSubdomainNos:
+                n = len(partitionVertices[i])
+                gVITS = uninitialized(n, dtype=INDEX)
+                vTS = uninitialized((n, dim), dtype=REAL)
+                k = 0
+                for vertex in partitionVertices[i]:
+                    gVITS[k] = globalVertexIndices[vertex]
+                    for j in range(dim):
+                        vTS[k, j] = subdomain.vertices[vertex, j]
+                    k += 1
+                rank = self.newSubdomainGlobalRank_c(i)
+                globalVertexIndicesToSend[i] = gVITS
+                verticesToSend[i] = vTS
+                sendRequests.append(globalComm.isend(n, dest=rank, tag=1))
+                sendRequests.append(globalComm.Isend(globalVertexIndicesToSend[i],
+                                                     dest=rank, tag=2))
+                sendRequests.append(globalComm.Isend(verticesToSend[i],
+                                                     dest=rank, tag=3))
+
+        if self.newComm is not None:
+            globalVertexIndicesToRecv = {}
+            verticesToRecv = {}
+            globalToLocal = {}
+            for i in recv_ranks:
+                rank = self.oldSubdomainGlobalRank_c(i)
+                n = globalComm.recv(source=rank, tag=1)
+                globalVertexIndicesToRecv[i] = np.zeros((n), dtype=INDEX)
+                verticesToRecv[i] = np.zeros((n, dim), dtype=REAL)
+                globalComm.Recv(globalVertexIndicesToRecv[i],
+                                source=rank, tag=2)
+                globalComm.Recv(verticesToRecv[i], source=rank, tag=3)
+                for j in range(globalVertexIndicesToRecv[i].shape[0]):
+                    if globalVertexIndicesToRecv[i][j] not in globalToLocal:
+                        globalToLocal[globalVertexIndicesToRecv[i][j]] = (i, j)
+
+            newVertices = uninitialized((len(globalToLocal), dim), dtype=REAL)
+            k = 0
+            for i in globalToLocal:
+                j, m = globalToLocal[i]
+                for l in range(dim):
+                    newVertices[k, l] = verticesToRecv[j][m, l]
+                globalToLocal[i] = k
+                k += 1
+            localToGlobal = uninitialized((len(globalToLocal)), dtype=INDEX)
+            for globalVertexNo, localVertexNo in globalToLocal.items():
+                localToGlobal[localVertexNo] = globalVertexNo
+
+            cellsToRecv2 = uninitialized((numCellsNewLocal[0], numVerticesPerCell), dtype=INDEX)
+            k = 0
+            for i in range(numCellsNewLocal[0]):
+                for j in range(numVerticesPerCell):
+                    cellsToRecv2[i, j] = globalToLocal[cellsToRecv[k]]
+                    k += 1
+
+        if self.oldComm is not None:
+            MPI.Request.Waitall(sendRequests)
+
+        ######################################################################
+        # build new subdomain
+
+        if self.newComm is not None:
+            if dim == 1:
+                subdomainNew = mesh1d(newVertices, cellsToRecv2)
+            elif dim == 2:
+                subdomainNew = mesh2d(newVertices, cellsToRecv2)
+            elif dim == 3:
+                subdomainNew = mesh3d(newVertices, cellsToRecv2)
+            else:
+                raise NotImplementedError()
+        else:
+            subdomainNew = None
+
+        ######################################################################
+        # build mesh overlap between old and new partitioning
+
+        if self.oldComm is not None:
+            overlapCells = {}
+            for k in range(newSubdomainNos.shape[0]):
+                subdomainNo = newSubdomainNos[k]
+                count = counts[k]
+                overlapCells[subdomainNo] = uninitialized((count), dtype=INDEX)
+            overlapCount = np.zeros((self.numPartitions), dtype=INDEX)
+            for i in range(numCells):
+                p = part[i]
+                k = overlapCount[p]
+                overlapCount[p] += 1
+                overlapCells[p][k] = i
+
+            OM = overlapManager(globalComm)
+            for subdomainNo in newSubdomainNos:
+                rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                OM.overlaps[rank] = meshOverlap(overlapCells[subdomainNo],
+                                                globalComm.rank,
+                                                rank, dim)
+        else:
+            OM = None
+
+        if self.newComm is not None:
+            OMnew = overlapManager(globalComm)
+            for subdomainNo in recv_ranks:
+                rank = self.oldSubdomainGlobalRank_c(subdomainNo)
+                OMnew.overlaps[rank] = meshOverlap(np.arange(recvDispl[rank]//numVerticesPerCell,
+                                                             (recvDispl[rank]+recvCount[rank])//numVerticesPerCell, dtype=INDEX),
+                                                   globalComm.rank, rank, dim)
+        else:
+            OMnew = None
+
+        ######################################################################
+        # build mesh interfaces between new partitions
+
+        ######################################################################
+        # send out all interface information from old partition
+        if self.oldComm is not None:
+
+            lIM = localInterfaceManager(subdomain, self.interfaces, self.oldComm,
+                                        self.part, self.cell_offset)
+            for subdomainNo in newSubdomainNos:
+                lIM.addSubdomain(subdomainNo)
+            lIM.removeBoundary()
+            packed_send_vertices, packed_send_edges, packed_send_faces = lIM.getPackedDataForSend()
+
+            if subdomain.dim == 3:
+                for subdomainNo in newSubdomainNos:
+                    rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                    try:
+                        sendRequests.append(globalComm.isend(packed_send_faces[subdomainNo].shape[0], dest=rank, tag=53))
+                        sendRequests.append(globalComm.Isend(packed_send_faces[subdomainNo], dest=rank, tag=54))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=53))
+
+                    try:
+                        n = packed_send_edges[subdomainNo].shape[0]
+                        sendRequests.append(globalComm.isend(n, dest=rank, tag=55))
+                        sendRequests.append(globalComm.Isend(packed_send_edges[subdomainNo], dest=rank, tag=56))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=55))
+
+                    try:
+                        n = packed_send_vertices[subdomainNo].shape[0]
+                        sendRequests.append(globalComm.isend(n, dest=rank, tag=57))
+                        sendRequests.append(globalComm.Isend(packed_send_vertices[subdomainNo], dest=rank, tag=58))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=57))
+            elif subdomain.dim == 2:
+                for subdomainNo in newSubdomainNos:
+                    rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                    try:
+                        sendRequests.append(globalComm.isend(packed_send_edges[subdomainNo].shape[0], dest=rank, tag=53))
+                        sendRequests.append(globalComm.Isend(packed_send_edges[subdomainNo], dest=rank, tag=54))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=53))
+                    try:
+                        n = packed_send_vertices[subdomainNo].shape[0]
+                        sendRequests.append(globalComm.isend(n, dest=rank, tag=57))
+                        sendRequests.append(globalComm.Isend(packed_send_vertices[subdomainNo], dest=rank, tag=58))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=57))
+            elif subdomain.dim == 1:
+                for subdomainNo in newSubdomainNos:
+                    rank = self.newSubdomainGlobalRank_c(subdomainNo)
+                    try:
+                        n = packed_send_vertices[subdomainNo].shape[0]
+                        sendRequests.append(globalComm.isend(n, dest=rank, tag=57))
+                        sendRequests.append(globalComm.Isend(packed_send_vertices[subdomainNo], dest=rank, tag=58))
+                    except KeyError:
+                        sendRequests.append(globalComm.isend(0, dest=rank, tag=57))
+            else:
+                raise NotImplementedError()
+
+
+        ######################################################################
+        # recv all interface information from old partition
+        if self.newComm is not None:
+            iP = interfaceProcessor(subdomainNew, self.newComm, localToGlobal, globalToLocalCells)
+
+            if dim == 1:
+                for subdomainNo in recv_ranks:
+                    rank = self.oldSubdomainGlobalRank_c(subdomainNo)
+                    n = globalComm.recv(source=rank, tag=57)
+                    packed_recv_vertices = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_vertices, source=rank, tag=58)
+                    packed_recv_faces = uninitialized((0, 3), dtype=INDEX)
+                    packed_recv_edges = uninitialized((0, 3), dtype=INDEX)
+
+                    iP.processInterfaceInformation(packed_recv_faces, packed_recv_edges, packed_recv_vertices)
+            elif dim == 2:
+                for subdomainNo in recv_ranks:
+                    rank = self.oldSubdomainGlobalRank_c(subdomainNo)
+                    n = globalComm.recv(source=rank, tag=53)
+                    packed_recv_edges = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_edges, source=rank, tag=54)
+                    n = globalComm.recv(source=rank, tag=57)
+                    packed_recv_vertices = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_vertices, source=rank, tag=58)
+                    packed_recv_faces = uninitialized((0, 3), dtype=INDEX)
+
+                    iP.processInterfaceInformation(packed_recv_faces, packed_recv_edges, packed_recv_vertices)
+            elif dim == 3:
+                for subdomainNo in recv_ranks:
+                    rank = self.oldSubdomainGlobalRank_c(subdomainNo)
+                    n = globalComm.recv(source=rank, tag=53)
+                    packed_recv_faces = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_faces, source=rank, tag=54)
+                    n = globalComm.recv(source=rank, tag=55)
+                    packed_recv_edges = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_edges, source=rank, tag=56)
+                    n = globalComm.recv(source=rank, tag=57)
+                    packed_recv_vertices = uninitialized((n, 3), dtype=INDEX)
+                    if n > 0:
+                        globalComm.Recv(packed_recv_vertices, source=rank, tag=58)
+
+                    iP.processInterfaceInformation(packed_recv_faces, packed_recv_edges, packed_recv_vertices)
+
+            iP.setBoundaryInformation()
+            iM = iP.getInterfaceManager()
+        else:
+            iM = None
+
+        if self.oldComm is not None:
+            MPI.Request.Waitall(sendRequests)
+
+        return subdomainNew, OM, OMnew, iM
+
+
+cdef class localInterfaceManager:
+    cdef:
+        meshBase mesh
+        interfaceManager interfaces
+        MPI.Comm oldComm
+        INDEX_t[::1] part
+        INDEX_t cell_offset
+        simplexMapper sM
+        dict faceLookup
+        dict edgeLookup
+        dict vertexLookup
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __init__(self, meshBase mesh, interfaceManager interfaces=None, MPI.Comm comm=None, INDEX_t[::1] part=None, INDEX_t cell_offset=0):
+        cdef:
+            dict faceLookup, edgeLookup, vertexLookup
+            INDEX_t interfaceVertexNo, interfaceEdgeNo, interfaceFaceNo, cellNo, vertexNo, edgeNo, faceNo, subdomainNo, otherSubdomainNo, vertex
+            ENCODE_t hv = 0
+            INDEX_t edge[2]
+            INDEX_t face[3]
+            tuple hvF
+        self.mesh = mesh
+        self.interfaces = interfaces
+        self.oldComm = comm
+        self.part = part
+        self.cell_offset = cell_offset
+
+        self.sM = self.mesh.simplexMapper
+
+        self.faceLookup = {}
+        self.edgeLookup = {}
+        self.vertexLookup = {}
+        if self.interfaces is not None:
+            faceLookup = self.faceLookup
+            edgeLookup = self.edgeLookup
+            vertexLookup = self.vertexLookup
+
+            # get new partitions for interface edges
+            interfacePart = self.interfaces.exchangePartitioning(self.oldComm, self.part)
+            # enter all interface faces in faceLookup
+            for subdomainNo in self.interfaces.interfaces:
+                # enter information for all previous interface vertices in
+                # vertexLookup
+                for interfaceVertexNo in range(self.interfaces.interfaces[subdomainNo].vertices.shape[0]):
+                    cellNo = self.interfaces.interfaces[subdomainNo].vertices[interfaceVertexNo, 0]
+                    vertexNo = self.interfaces.interfaces[subdomainNo].vertices[interfaceVertexNo, 1]
+                    otherSubdomainNo = interfacePart[subdomainNo]['vertex'][interfaceVertexNo]
+                    vertex = self.sM.getVertexInCell(cellNo, vertexNo)
+                    try:
+                        vertexLookup[vertex][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[vertex] = {otherSubdomainNo: -1}
+
+                # enter information for all previous interface edges in
+                # edgeLookup and vertexLookup
+                for interfaceEdgeNo in range(self.interfaces.interfaces[subdomainNo].edges.shape[0]):
+                    cellNo = self.interfaces.interfaces[subdomainNo].edges[interfaceEdgeNo, 0]
+                    edgeNo = self.interfaces.interfaces[subdomainNo].edges[interfaceEdgeNo, 1]
+                    otherSubdomainNo = interfacePart[subdomainNo]['edge'][interfaceEdgeNo]
+                    self.sM.getEdgeInCell(cellNo, edgeNo, edge)
+                    hv = self.sM.getEdgeInCellEncoded(cellNo, edgeNo)
+                    try:
+                        edgeLookup[hv][otherSubdomainNo] = -1
+                    except KeyError:
+                        edgeLookup[hv] = {otherSubdomainNo: -1}
+
+                    try:
+                        vertexLookup[edge[0]][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[edge[0]] = {otherSubdomainNo: -1}
+                    try:
+                        vertexLookup[edge[1]][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[edge[1]] = {otherSubdomainNo: -1}
+
+                # enter information for all previous interface faces in
+                # faceLookup, edgeLookup and vertexLookup
+                for interfaceFaceNo in range(self.interfaces.interfaces[subdomainNo].faces.shape[0]):
+                    cellNo = self.interfaces.interfaces[subdomainNo].faces[interfaceFaceNo, 0]
+                    faceNo = self.interfaces.interfaces[subdomainNo].faces[interfaceFaceNo, 1]
+                    otherSubdomainNo = interfacePart[subdomainNo]['face'][interfaceFaceNo]
+                    self.sM.getFaceInCell(cellNo, faceNo, face)
+                    hvF = self.sM.sortAndEncodeFace(face)
+
+                    faceLookup[hvF] = {otherSubdomainNo: -1}
+
+                    self.sM.startLoopOverFaceEdges(face)
+                    while self.sM.loopOverFaceEdgesEncoded(&hv):
+                        try:
+                            edgeLookup[hv][otherSubdomainNo] = -1
+                        except KeyError:
+                            edgeLookup[hv] = {otherSubdomainNo: -1}
+
+                    try:
+                        vertexLookup[face[0]][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[face[0]] = {otherSubdomainNo: -1}
+                    try:
+                        vertexLookup[face[1]][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[face[1]] = {otherSubdomainNo: -1}
+                    try:
+                        vertexLookup[face[2]][otherSubdomainNo] = -1
+                    except KeyError:
+                        vertexLookup[face[2]] = {otherSubdomainNo: -1}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addSubdomain(self, INDEX_t subdomainNo):
+        cdef:
+            dict faceLookup, edgeLookup, vertexLookup
+            INDEX_t i, cellNo, faceNo, edgeNo, vertexNo
+            INDEX_t face[3]
+            INDEX_t edge[2]
+            INDEX_t vertex
+            ENCODE_t hv
+            tuple hvF
+            meshBase fakeSubdomain
+            INDEX_t[:, ::1] bvertices, bedges, bfaces
+            INDEX_t[::1] fakeToLocal
+        faceLookup = self.faceLookup
+        edgeLookup = self.edgeLookup
+        vertexLookup = self.vertexLookup
+
+        fakeCells = self.mesh.cells_as_array[np.array(self.part) == subdomainNo, :].copy()
+        if self.mesh.dim == 1:
+            fakeSubdomain = mesh1d(self.mesh.vertices, fakeCells)
+            bvertices = boundary1D(fakeSubdomain)
+        elif self.mesh.dim == 2:
+            fakeSubdomain = mesh2d(self.mesh.vertices, fakeCells)
+            bvertices, bedges = boundary2D(fakeSubdomain, assumeConnected=False)
+        elif self.mesh.dim == 3:
+            fakeSubdomain = mesh3d(self.mesh.vertices, fakeCells)
+            bvertices, bedges, bfaces = boundary3D(fakeSubdomain, assumeConnected=False)
+        else:
+            raise NotImplementedError()
+
+        fakeToLocal = np.arange((self.mesh.num_cells), dtype=INDEX)[np.array(self.part) == subdomainNo]
+        if self.mesh.dim == 3:
+            for i in range(bfaces.shape[0]):
+                cellNo, faceNo = bfaces[i, 0], bfaces[i, 1]
+                cellNo = fakeToLocal[cellNo]
+                self.sM.getFaceInCell(cellNo, faceNo, face)
+                hvF = self.sM.sortAndEncodeFace(face)
+
+                try:
+                    faceLookup[hvF][subdomainNo] = cellNo
+                except KeyError:
+                    faceLookup[hvF] = {subdomainNo: cellNo}
+
+        if self.mesh.dim >= 2:
+            for i in range(bedges.shape[0]):
+                cellNo, edgeNo = bedges[i, 0], bedges[i, 1]
+                cellNo = fakeToLocal[cellNo]
+                self.sM.getEdgeInCell(cellNo, edgeNo, edge)
+                hv = self.sM.sortAndEncodeEdge(edge)
+
+                try:
+                    edgeLookup[hv][subdomainNo] = cellNo
+                except KeyError:
+                    edgeLookup[hv] = {subdomainNo: cellNo}
+
+        for i in range(bvertices.shape[0]):
+            cellNo, vertexNo = bvertices[i, 0], bvertices[i, 1]
+            cellNo = fakeToLocal[cellNo]
+            vertex = self.sM.getVertexInCell(cellNo, vertexNo)
+
+            try:
+                vertexLookup[vertex][subdomainNo] = cellNo
+            except KeyError:
+                vertexLookup[vertex] = {subdomainNo: cellNo}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void removeBoundary(self, TAG_t tag=PHYSICAL):
+        cdef:
+            INDEX_t[:, ::1] bfaces, bedges
+            INDEX_t[::1] bvertices
+            INDEX_t i, v, localVertexNo
+            ENCODE_t hv
+            tuple hvF
+            dict temp
+        if self.mesh.dim == 3:
+            bfaces = self.mesh.getBoundaryFacesByTag(tag)
+            for i in range(bfaces.shape[0]):
+                hvF = self.sM.sortAndEncodeFace(bfaces[i, :])
+                self.faceLookup.pop(hvF)
+        if self.mesh.dim >= 2:
+            bedges = self.mesh.getBoundaryEdgesByTag(tag)
+            for i in range(bedges.shape[0]):
+                hv = self.sM.sortAndEncodeEdge(bedges[i, :])
+                self.edgeLookup.pop(hv)
+        bvertices = self.mesh.getBoundaryVerticesByTag(tag)
+        for v in bvertices:
+            self.vertexLookup.pop(v)
+
+        if self.mesh.dim == 3:
+            # kick out all faces that are shared by 1 subdomain
+            temp = {}
+            for hvF in self.faceLookup:
+                if len(self.faceLookup[hvF]) > 1:
+                    temp[hvF] = self.faceLookup[hvF]
+            self.faceLookup = temp
+            # kick out all edges that are shared by 3 subdomains or fewer
+            temp = {}
+            for hv in self.edgeLookup:
+                if len(self.edgeLookup[hv]) > 3:
+                    temp[hv] = self.edgeLookup[hv]
+            self.edgeLookup = temp
+            # kick out all vertices that are shared by 4 subdomains or fewer
+            temp = {}
+            for localVertexNo in self.vertexLookup:
+                if len(self.vertexLookup[localVertexNo]) > 4:
+                    temp[localVertexNo] = self.vertexLookup[localVertexNo]
+            self.vertexLookup = temp
+        elif self.mesh.dim == 2:
+            # kick out all edges that are shared by 1 subdomains or fewer
+            temp = {}
+            for hv in self.edgeLookup:
+                if len(self.edgeLookup[hv]) > 1:
+                    temp[hv] = self.edgeLookup[hv]
+            self.edgeLookup = temp
+            # kick out all vertices that are shared by 3 subdomains or fewer
+            temp = {}
+            for localVertexNo in self.vertexLookup:
+                if len(self.vertexLookup[localVertexNo]) > 3:
+                    temp[localVertexNo] = self.vertexLookup[localVertexNo]
+            self.vertexLookup = temp
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef tuple getDataForSend(self):
+        cdef:
+            dict faceLookup, interface_faces
+            dict edgeLookup, interface_edges
+            dict vertexLookup, interface_vertices
+            tuple hvF
+            INDEX_t subdomainNo, globalCellNo, cellNo, faceNo, edgeNo, vertexNo
+            INDEX_t face[3]
+            INDEX_t edge[2]
+            INDEX_t vertex
+            tuple key, val
+
+        # write to interface_faces:
+        # (receiving subdomainNo) -> (face in local indices) -> sharing subdomains -> (globalCellNo, faceNo)
+        faceLookup = self.faceLookup
+        interface_faces = {}
+        for hvF in faceLookup:
+            for subdomainNo in faceLookup[hvF]:
+                cellNo = faceLookup[hvF][subdomainNo]
+                if cellNo == -1:
+                    continue
+                faceNo = self.sM.findFaceInCellEncoded(cellNo, hvF)
+
+                for otherSubdomainNo in faceLookup[hvF]:
+                    globalCellNo = self.cell_offset+cellNo
+                    self.sM.getFaceInCell(cellNo, faceNo, face, sorted=True)
+                    key = (face[0], face[1])
+                    val = (globalCellNo, faceNo)
+                    try:
+                        interface_faces[subdomainNo]
+                        try:
+                            interface_faces[subdomainNo][key]
+                            try:
+                                interface_faces[subdomainNo][key][otherSubdomainNo].add(val)
+                            except KeyError:
+                                interface_faces[subdomainNo][key][otherSubdomainNo] = set([val])
+                        except KeyError:
+                            interface_faces[subdomainNo][key] = {otherSubdomainNo: set([val])}
+                    except KeyError:
+                        interface_faces[subdomainNo] = {key: {otherSubdomainNo: set([val])}}
+
+        # write to interface_edges:
+        # (receiving subdomainNo) -> (edge in local indices) -> sharing subdomains -> (globalCellNo, edgeNo)
+        edgeLookup = self.edgeLookup
+        interface_edges = {}
+        for hv in edgeLookup:
+            for subdomainNo in edgeLookup[hv]:
+                cellNo = edgeLookup[hv][subdomainNo]
+                if cellNo == -1:
+                    continue
+                edgeNo = self.sM.findEdgeInCellEncoded(cellNo, hv)
+                for otherSubdomainNo in edgeLookup[hv]:
+                    globalCellNo = self.cell_offset+cellNo
+                    self.sM.getEdgeInCell(cellNo, edgeNo, edge, sorted=True)
+                    key = (edge[0], edge[1])
+                    val = (globalCellNo, edgeNo)
+                    try:
+                        interface_edges[subdomainNo]
+                        try:
+                            interface_edges[subdomainNo][key]
+                            try:
+                                interface_edges[subdomainNo][key][otherSubdomainNo].add(val)
+                            except KeyError:
+                                interface_edges[subdomainNo][key][otherSubdomainNo] = set([val])
+                        except KeyError:
+                            interface_edges[subdomainNo][key] = {otherSubdomainNo: set([val])}
+                    except KeyError:
+                        interface_edges[subdomainNo] = {key: {otherSubdomainNo: set([val])}}
+
+        # write to interface_vertices:
+        # (receiving subdomainNo) -> (vertex in local indices) -> sharing subdomains -> (globalCellNo, vertexNo)
+        vertexLookup = self.vertexLookup
+        interface_vertices = {}
+        for localVertexNo in vertexLookup:
+            for subdomainNo in vertexLookup[localVertexNo]:
+                cellNo = vertexLookup[localVertexNo][subdomainNo]
+                if cellNo == -1:
+                    continue
+                vertexNo = self.sM.findVertexInCell(cellNo, localVertexNo)
+                for otherSubdomainNo in vertexLookup[localVertexNo]:
+                    globalCellNo = self.cell_offset+cellNo
+                    vertex = self.sM.getVertexInCell(cellNo, vertexNo)
+                    val = (globalCellNo, vertexNo)
+                    try:
+                        interface_vertices[subdomainNo]
+                        try:
+                            interface_vertices[subdomainNo][vertex]
+                            try:
+                                interface_vertices[subdomainNo][vertex][otherSubdomainNo].add(val)
+                            except KeyError:
+                                interface_vertices[subdomainNo][vertex][otherSubdomainNo] = set([val])
+                        except KeyError:
+                            interface_vertices[subdomainNo][vertex] = {otherSubdomainNo: set([val])}
+                    except KeyError:
+                        interface_vertices[subdomainNo] = {vertex: {otherSubdomainNo: set([val])}}
+
+        return interface_vertices, interface_edges, interface_faces
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef tuple getPackedDataForSend(self):
+        cdef:
+            dict interface_vertices, interface_edges, interface_faces
+            dict packed_send_vertices, packed_send_edges, packed_send_faces
+            INDEX_t subdomainNo, otherSubdomainNo, numFaces, numEdges, numVertices
+            tuple face, edge
+            INDEX_t globalCellNo, faceNo, edgeNo, vertexNo, k
+            INDEX_t[:, ::1] psf, pse, psv
+        interface_vertices, interface_edges, interface_faces = self.getDataForSend()
+
+        ##################################################
+        # send interface faces
+        packed_send_faces = {}
+        for subdomainNo in interface_faces:
+            numFaces = 0
+            for face in interface_faces[subdomainNo]:
+                for otherSubdomainNo in interface_faces[subdomainNo][face]:
+                    for globalCellNo, faceNo in interface_faces[subdomainNo][face][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        numFaces += 1
+            psf = uninitialized((numFaces, 3), dtype=INDEX)
+            k = 0
+            for face in interface_faces[subdomainNo]:
+                for otherSubdomainNo in interface_faces[subdomainNo][face]:
+                    for globalCellNo, faceNo in interface_faces[subdomainNo][face][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        psf[k, 0] = globalCellNo
+                        psf[k, 1] = faceNo
+                        psf[k, 2] = otherSubdomainNo
+                        k += 1
+            assert k == numFaces
+            packed_send_faces[subdomainNo] = psf
+
+        ##################################################
+        # send interface edges
+        packed_send_edges = {}
+        for subdomainNo in interface_edges:
+            numEdges = 0
+            for edge in interface_edges[subdomainNo]:
+                for otherSubdomainNo in interface_edges[subdomainNo][edge]:
+                    for globalCellNo, edgeNo in interface_edges[subdomainNo][edge][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        numEdges += 1
+            pse = uninitialized((numEdges, 3), dtype=INDEX)
+            k = 0
+            for edge in interface_edges[subdomainNo]:
+                for otherSubdomainNo in interface_edges[subdomainNo][edge]:
+                    for globalCellNo, edgeNo in interface_edges[subdomainNo][edge][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        pse[k, 0] = globalCellNo
+                        pse[k, 1] = edgeNo
+                        pse[k, 2] = otherSubdomainNo
+                        k += 1
+            assert k == numEdges, (k, numEdges)
+            packed_send_edges[subdomainNo] = pse
+
+        ##################################################
+        # send interface vertices
+        packed_send_vertices = {}
+        for subdomainNo in interface_vertices:
+            numVertices = 0
+            for vertex in interface_vertices[subdomainNo]:
+                for otherSubdomainNo in interface_vertices[subdomainNo][vertex]:
+                    for globalCellNo, vertexNo in interface_vertices[subdomainNo][vertex][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        numVertices += 1
+            psv = uninitialized((numVertices, 3), dtype=INDEX)
+            k = 0
+            for vertex in interface_vertices[subdomainNo]:
+                for otherSubdomainNo in interface_vertices[subdomainNo][vertex]:
+                    for globalCellNo, vertexNo in interface_vertices[subdomainNo][vertex][otherSubdomainNo]:
+                        if subdomainNo == otherSubdomainNo:
+                            continue
+                        psv[k, 0] = globalCellNo
+                        psv[k, 1] = vertexNo
+                        psv[k, 2] = otherSubdomainNo
+                        k += 1
+            assert k == numVertices
+            packed_send_vertices[subdomainNo] = psv
+
+        return packed_send_vertices, packed_send_edges, packed_send_faces
+
+
+cdef class interfaceProcessor:
+    cdef:
+        meshBase mesh
+        MPI.Comm newComm
+        INDEX_t[::1] localToGlobal
+        dict globalToLocalCells
+        dict interface_vertices
+        dict interface_edges
+        dict interface_faces
+        simplexMapper sMnew
+        set face_candidates, edge_candidates, vertex_candidates
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __init__(self, meshBase mesh, MPI.Comm comm, INDEX_t[::1] localToGlobal, dict globalToLocalCells):
+        cdef:
+            INDEX_t[:, ::1] bvertices, bedges, bfaces
+            set face_candidates, edge_candidates, vertex_candidates
+            INDEX_t i
+            INDEX_t localCellNo, faceNo, edgeNo, vertexNo
+        self.mesh = mesh
+        self.newComm = comm
+        self.localToGlobal = localToGlobal
+        self.globalToLocalCells = globalToLocalCells
+
+        self.interface_vertices = {}
+        self.interface_edges = {}
+        self.interface_faces = {}
+
+        self.sMnew = self.mesh.simplexMapper
+        if self.mesh.dim == 3:
+            bvertices, bedges, bfaces = boundary3D(self.mesh)
+
+            # face_candidates contains all boundary faces of the new subdomain
+            face_candidates = set()
+            for i in range(bfaces.shape[0]):
+                localCellNo = bfaces[i, 0]
+                faceNo = bfaces[i, 1]
+                face_candidates.add((localCellNo, faceNo))
+            self.face_candidates = face_candidates
+
+            # edge_candidates contains all boundary edges of the new subdomain
+            edge_candidates = set()
+            for i in range(bedges.shape[0]):
+                localCellNo = bedges[i, 0]
+                edgeNo = bedges[i, 1]
+                edge_candidates.add((localCellNo, edgeNo))
+            self.edge_candidates = edge_candidates
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex_candidates.add((localCellNo, vertexNo))
+            self.vertex_candidates = vertex_candidates
+        elif self.mesh.dim == 2:
+            bvertices, bedges = boundary2D(self.mesh)
+
+            # edge_candidates contains all boundary edges of the new subdomain
+            edge_candidates = set()
+            for i in range(bedges.shape[0]):
+                localCellNo = bedges[i, 0]
+                edgeNo = bedges[i, 1]
+                edge_candidates.add((localCellNo, edgeNo))
+            self.edge_candidates = edge_candidates
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex_candidates.add((localCellNo, vertexNo))
+            self.vertex_candidates = vertex_candidates
+        elif self.mesh.dim == 1:
+            bvertices = boundary1D(self.mesh)
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex_candidates.add((localCellNo, vertexNo))
+            self.vertex_candidates = vertex_candidates
+        else:
+            raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void processInterfaceInformation(self, INDEX_t[:, ::1] packed_recv_faces, INDEX_t[:, ::1] packed_recv_edges, INDEX_t[:, ::1] packed_recv_vertices):
+        cdef:
+            INDEX_t i
+            INDEX_t globalCellNo, localCellNo, faceNo, edgeNo, vertexNo, otherSubdomainNo
+        for i in range(packed_recv_faces.shape[0]):
+            globalCellNo = packed_recv_faces[i, 0]
+            localCellNo = self.globalToLocalCells[globalCellNo]
+            faceNo = packed_recv_faces[i, 1]
+            otherSubdomainNo = packed_recv_faces[i, 2]
+            try:
+                self.interface_faces[otherSubdomainNo].append((localCellNo, faceNo))
+            except KeyError:
+                self.interface_faces[otherSubdomainNo] = [(localCellNo, faceNo)]
+
+        for i in range(packed_recv_edges.shape[0]):
+            globalCellNo = packed_recv_edges[i, 0]
+            localCellNo = self.globalToLocalCells[globalCellNo]
+            edgeNo = packed_recv_edges[i, 1]
+            otherSubdomainNo = packed_recv_edges[i, 2]
+            try:
+                self.interface_edges[otherSubdomainNo].append((localCellNo, edgeNo))
+            except KeyError:
+                self.interface_edges[otherSubdomainNo] = [(localCellNo, edgeNo)]
+
+        for i in range(packed_recv_vertices.shape[0]):
+            globalCellNo = packed_recv_vertices[i, 0]
+            localCellNo = self.globalToLocalCells[globalCellNo]
+            vertexNo = packed_recv_vertices[i, 1]
+            otherSubdomainNo = packed_recv_vertices[i, 2]
+            try:
+                self.interface_vertices[otherSubdomainNo].append((localCellNo, vertexNo))
+            except KeyError:
+                self.interface_vertices[otherSubdomainNo] = [(localCellNo, vertexNo)]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setBoundaryInformation(self):
+        cdef:
+            INDEX_t[:, ::1] bvertices, bedges, bfaces
+            set face_candidates, edge_candidates, vertex_candidates
+            INDEX_t i, vertex
+            INDEX_t[::1] edge = np.empty((2), dtype=INDEX)
+            INDEX_t[::1] face = np.empty((3), dtype=INDEX)
+            TAG_t tag
+            INDEX_t localCellNo, faceNo, edgeNo, vertexNo
+            INDEX_t kFace, kEdge, kVertex
+            ENCODE_t hv
+            INDEX_t[:, ::1] subdomainBoundaryFaces, subdomainBoundaryEdges
+            INDEX_t[::1] subdomainBoundaryVertices
+            TAG_t[::1] subdomainBoundaryFaceTags, subdomainBoundaryEdgeTags, subdomainBoundaryVertexTags
+            dict boundaryEdgeTagsDict, boundaryVertexTagsDict
+        if self.mesh.dim == 3:
+            bvertices, bedges, bfaces = boundary3D(self.mesh)
+
+            # face_candidates contains all boundary faces of the new subdomain
+            face_candidates = set()
+            for i in range(bfaces.shape[0]):
+                localCellNo = bfaces[i, 0]
+                faceNo = bfaces[i, 1]
+                face_candidates.add((localCellNo, faceNo))
+
+            # edge_candidates contains all boundary edges of the new subdomain
+            edge_candidates = set()
+            for i in range(bedges.shape[0]):
+                localCellNo = bedges[i, 0]
+                edgeNo = bedges[i, 1]
+                hv = self.sMnew.getEdgeInCellEncoded(localCellNo, edgeNo)
+                edge_candidates.add(hv)
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex = self.sMnew.getVertexInCell(localCellNo, vertexNo)
+                vertex_candidates.add(vertex)
+        elif self.mesh.dim == 2:
+            bvertices, bedges = boundary2D(self.mesh)
+            bfaces = uninitialized((0, 3), dtype=INDEX)
+
+            face_candidates = set()
+
+            # edge_candidates contains all boundary edges of the new subdomain
+            edge_candidates = set()
+            for i in range(bedges.shape[0]):
+                localCellNo = bedges[i, 0]
+                edgeNo = bedges[i, 1]
+                hv = self.sMnew.getEdgeInCellEncoded(localCellNo, edgeNo)
+                edge_candidates.add(hv)
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex = self.sMnew.getVertexInCell(localCellNo, vertexNo)
+                vertex_candidates.add(vertex)
+        elif self.mesh.dim == 1:
+            bvertices = boundary1D(self.mesh)
+            bedges = uninitialized((0, 2), dtype=INDEX)
+            bfaces = uninitialized((0, 3), dtype=INDEX)
+
+            face_candidates = set()
+            edge_candidates = set()
+
+            vertex_candidates = set()
+            for i in range(bvertices.shape[0]):
+                localCellNo = bvertices[i, 0]
+                vertexNo = bvertices[i, 1]
+                vertex = self.sMnew.getVertexInCell(localCellNo, vertexNo)
+                vertex_candidates.add(vertex)
+        else:
+            raise NotImplementedError()
+
+        subdomainBoundaryFaces = uninitialized((bfaces.shape[0], 3), dtype=INDEX)
+        subdomainBoundaryFaceTags = np.zeros((bfaces.shape[0]), dtype=TAG)
+
+        subdomainBoundaryEdges = uninitialized((bedges.shape[0], 2), dtype=INDEX)
+        subdomainBoundaryEdgeTags = np.zeros((bedges.shape[0]), dtype=TAG)
+
+        subdomainBoundaryVertices = uninitialized((bvertices.shape[0]), dtype=INDEX)
+        subdomainBoundaryVertexTags = np.zeros((bvertices.shape[0]), dtype=TAG)
+
+        kFace = 0
+        kEdge = 0
+        kVertex = 0
+
+        ##################################################
+        # boundary face tags
+
+        for subdomainNo in self.interface_faces:
+            for localCellNo, faceNo in self.interface_faces[subdomainNo]:
+                try:
+                    face_candidates.remove((localCellNo, faceNo))
+                    # set boundary face tag
+                    self.sMnew.getFaceInCell(localCellNo, faceNo, face, sorted=True)
+                    subdomainBoundaryFaces[kFace, 0] = face[0]
+                    subdomainBoundaryFaces[kFace, 1] = face[1]
+                    subdomainBoundaryFaces[kFace, 2] = face[2]
+                    subdomainBoundaryFaceTags[kFace] = INTERIOR_NONOVERLAPPING
+                    kFace += 1
+                except KeyError:
+                    pass
+        for localCellNo, faceNo in face_candidates:
+            self.sMnew.getFaceInCell(localCellNo, faceNo, face, sorted=True)
+            subdomainBoundaryFaces[kFace, 0] = face[0]
+            subdomainBoundaryFaces[kFace, 1] = face[1]
+            subdomainBoundaryFaces[kFace, 2] = face[2]
+            subdomainBoundaryFaceTags[kFace] = PHYSICAL
+            kFace += 1
+
+        ##################################################
+        # boundary edge tags
+
+        for subdomainNo in self.interface_edges:
+            for localCellNo, edgeNo in self.interface_edges[subdomainNo]:
+                try:
+                    hv = self.sMnew.getEdgeInCellEncoded(localCellNo, edgeNo)
+                    edge_candidates.remove(hv)
+                    # set boundary edge tag
+                    self.sMnew.getEdgeInCell(localCellNo, edgeNo, edge, sorted=True)
+                    subdomainBoundaryEdges[kEdge, 0] = edge[0]
+                    subdomainBoundaryEdges[kEdge, 1] = edge[1]
+                    subdomainBoundaryEdgeTags[kEdge] = INTERIOR_NONOVERLAPPING
+                    kEdge += 1
+                except KeyError:
+                    pass
+
+        # propagate from boundary faces to boundary edges
+        # we exploit that PHYSICAL faces are ordered last in boundaryFaceTags
+        boundaryEdgeTagsDict = {}
+        for faceNo in range(subdomainBoundaryFaces.shape[0]):
+            tag = subdomainBoundaryFaceTags[faceNo]
+            face = subdomainBoundaryFaces[faceNo, :]
+            self.sMnew.startLoopOverFaceEdges(face)
+            while self.sMnew.loopOverFaceEdgesEncoded(&hv):
+                boundaryEdgeTagsDict[hv] = tag
+        for hv in boundaryEdgeTagsDict:
+            try:
+                # set boundary edge tag
+                edge_candidates.remove(hv)
+                tag = boundaryEdgeTagsDict[hv]
+                decode_edge(hv, edge)
+                subdomainBoundaryEdges[kEdge, 0] = edge[0]
+                subdomainBoundaryEdges[kEdge, 1] = edge[1]
+                subdomainBoundaryEdgeTags[kEdge] = tag
+                kEdge += 1
+            except KeyError:
+                pass
+
+        for hv in edge_candidates:
+            decode_edge(hv, edge)
+            subdomainBoundaryEdges[kEdge, 0] = edge[0]
+            subdomainBoundaryEdges[kEdge, 1] = edge[1]
+            subdomainBoundaryEdgeTags[kEdge] = PHYSICAL
+            kEdge += 1
+
+        ##################################################
+        # boundary vertex tags
+        for subdomainNo in self.interface_vertices:
+            for localCellNo, vertexNo in self.interface_vertices[subdomainNo]:
+                try:
+                    vertex = self.sMnew.getVertexInCell(localCellNo, vertexNo)
+                    vertex_candidates.remove(vertex)
+                    # set boundary vertex tag
+                    subdomainBoundaryVertices[kVertex] = vertex
+                    subdomainBoundaryVertexTags[kVertex] = INTERIOR_NONOVERLAPPING
+                    kVertex += 1
+                except KeyError:
+                    pass
+
+        # propagate from boundary edges to boundary vertices
+        boundaryVertexTagsDict = {}
+        if self.mesh.dim == 1:
+            pass
+        elif self.mesh.dim == 2:
+            # we exploit that PHYSICAL edges are ordered last in boundaryEdgeTags
+            for edgeNo in range(subdomainBoundaryEdges.shape[0]):
+                tag = subdomainBoundaryEdgeTags[edgeNo]
+                boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 0]] = tag
+                boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 1]] = tag
+        elif self.mesh.dim == 3:
+            for edgeNo in range(subdomainBoundaryEdges.shape[0]):
+                tag = subdomainBoundaryEdgeTags[edgeNo]
+                try:
+                    tagOld = boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 0]]
+                    boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 0]] = max(tag, tagOld)
+                except KeyError:
+                    boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 0]] = tag
+
+                try:
+                    tagOld = boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 1]]
+                    boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 1]] = max(tag, tagOld)
+                except KeyError:
+                    boundaryVertexTagsDict[subdomainBoundaryEdges[edgeNo, 1]] = tag
+        else:
+            raise NotImplementedError()
+        for vertex in boundaryVertexTagsDict:
+            try:
+                # set boundary vertex tag
+                vertex_candidates.remove(vertex)
+                tag = boundaryVertexTagsDict[vertex]
+                subdomainBoundaryVertices[kVertex] = vertex
+                subdomainBoundaryVertexTags[kVertex] = tag
+                kVertex += 1
+            except KeyError:
+                pass
+
+        for vertex in vertex_candidates:
+            subdomainBoundaryVertices[kVertex] = vertex
+            subdomainBoundaryVertexTags[kVertex] = PHYSICAL
+            kVertex += 1
+
+        assert kVertex == bvertices.shape[0]
+
+        self.mesh._boundaryFaces = np.array(subdomainBoundaryFaces, copy=False)
+        self.mesh._boundaryFaceTags = np.array(subdomainBoundaryFaceTags, copy=False)
+        self.mesh._boundaryEdges = np.array(subdomainBoundaryEdges, copy=False)
+        self.mesh._boundaryEdgeTags = np.array(subdomainBoundaryEdgeTags, copy=False)
+        self.mesh._boundaryVertices = np.array(subdomainBoundaryVertices, copy=False)
+        self.mesh._boundaryVertexTags = np.array(subdomainBoundaryVertexTags, copy=False)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef interfaceManager getInterfaceManager(self):
+        cdef:
+            INDEX_t dim, subdomainNo, i, k
+            list interface_faces, interface_edges, interface_vertices
+            INDEX_t numFaces, numEdges, numVertices
+            INDEX_t face[3]
+            INDEX_t faceGlobal[3]
+            INDEX_t edge[2]
+            INDEX_t edgeGlobal[2]
+            INDEX_t vertex, vertexGlobal
+            tuple hvF
+            ENCODE_t hv
+            INDEX_t[:, ::1] faces, edges, vertices
+            INDEX_t cellNo, faceNo, order, edgeNo, vertexNo
+            INDEX_t c0, c1, c2
+            list sortKeyF
+            ENCODE_t[::1] sortKeyE
+            INDEX_t[::1] sortKeyV
+            interfaceManager iM
+        dim = self.mesh.dim
+        iM = interfaceManager(self.newComm)
+        for subdomainNo in range(self.newComm.size):
+            if subdomainNo in self.interface_faces:
+                interface_faces = self.interface_faces[subdomainNo]
+                numFaces = len(interface_faces)
+                sortKeyF = []
+                for i in range(numFaces):
+                    cellNo, faceNo = interface_faces[i]
+                    self.sMnew.getFaceInCell(cellNo, faceNo, face)
+                    faceGlobal[0] = self.localToGlobal[face[0]]
+                    faceGlobal[1] = self.localToGlobal[face[1]]
+                    faceGlobal[2] = self.localToGlobal[face[2]]
+                    sortFace(faceGlobal[0], faceGlobal[1], faceGlobal[2], faceGlobal)
+                    hvF = encode_face(faceGlobal)
+                    sortKeyF.append(hvF)
+                sortIdx = [f[0] for f in sorted(enumerate(sortKeyF),
+                                                key=lambda x: x[1])]
+                faces = uninitialized((numFaces, 3), dtype=INDEX)
+                k = 0
+                for i in sortIdx:
+                    cellNo, faceNo = interface_faces[i]
+                    faces[k, 0] = cellNo
+                    faces[k, 1] = faceNo
+                    self.sMnew.getFaceInCell(cellNo, faceNo, face)
+                    c0, c1, c2 = self.localToGlobal[face[0]], self.localToGlobal[face[1]], self.localToGlobal[face[2]]
+                    if c0 < c1:
+                        if c0 < c2:
+                            if c1 < c2:
+                                order = 0
+                            else:
+                                order = -2
+                        else:
+                            order = 2
+                    else:
+                        if c1 < c2:
+                            if c0 < c2:
+                                order = -1
+                            else:
+                                order = 1
+                        else:
+                            order = -3
+                    faces[k, 2] = order
+                    k += 1
+            else:
+                faces = uninitialized((0, 3), dtype=INDEX)
+
+            if subdomainNo in self.interface_edges:
+                interface_edges = self.interface_edges[subdomainNo]
+                numEdges = len(interface_edges)
+                sortKeyE = uninitialized((numEdges), dtype=ENCODE)
+                for i in range(numEdges):
+                    cellNo, edgeNo = interface_edges[i]
+                    self.sMnew.getEdgeInCell(cellNo, edgeNo, edge)
+                    edgeGlobal[0] = self.localToGlobal[edge[0]]
+                    edgeGlobal[1] = self.localToGlobal[edge[1]]
+                    sortEdge(edgeGlobal[0], edgeGlobal[1], edgeGlobal)
+                    hv = encode_edge(edgeGlobal)
+                    sortKeyE[i] = hv
+                sortIdx = np.argsort(sortKeyE)
+                edges = uninitialized((numEdges, 3), dtype=INDEX)
+                k = 0
+                for i in sortIdx:
+                    cellNo, edgeNo = interface_edges[i]
+                    edges[k, 0] = cellNo
+                    edges[k, 1] = edgeNo
+                    self.sMnew.getEdgeInCell(cellNo, edgeNo, edge)
+                    if self.localToGlobal[edge[0]] < self.localToGlobal[edge[1]]:
+                        edges[k, 2] = 0
+                    else:
+                        edges[k, 2] = 1
+                    k += 1
+            else:
+                edges = np.zeros((0, 3), dtype=INDEX)
+
+            # process interface vertices
+            # Sort each vertex by global vertex id.
+            if subdomainNo in self.interface_vertices:
+                interface_vertices = self.interface_vertices[subdomainNo]
+                numVertices = len(interface_vertices)
+                sortKeyV = uninitialized((numVertices), dtype=INDEX)
+                for i in range(numVertices):
+                    cellNo, vertexNo = interface_vertices[i]
+                    vertex = self.sMnew.getVertexInCell(cellNo, vertexNo)
+                    vertexGlobal = self.localToGlobal[vertex]
+                    sortKeyV[i] = vertexGlobal
+                sortIdx = np.argsort(sortKeyV)
+                vertices = uninitialized((numVertices, 2), dtype=INDEX)
+                k = 0
+                for i in sortIdx:
+                    cellNo, vertexNo = interface_vertices[i]
+                    vertices[k, 0] = cellNo
+                    vertices[k, 1] = vertexNo
+                    k += 1
+            else:
+                vertices = uninitialized((0, 2), dtype=INDEX)
+
+            if vertices.shape[0]+edges.shape[0]+faces.shape[0] > 0:
+                iM.interfaces[subdomainNo] = meshInterface(vertices,
+                                                           edges,
+                                                           faces,
+                                                           self.newComm.rank,
+                                                           subdomainNo,
+                                                           dim)
+        return iM
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P1.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P1.pxi
new file mode 100644
index 0000000..a562a74
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P1.pxi
@@ -0,0 +1,35 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_1d_sym_P1(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 1
+        if qr is None:
+            qr = simplexXiaoGimbutas(1, 1)
+        super(scalar_coefficient_stiffness_1d_sym_P1, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_1d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolume1D(simplex, self.temp)
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        for k in range(self.qr.num_nodes):
+            I += self.qr.weights[k] * self.funVals[k]
+        contrib[0] = (vol) * I
+        contrib[1] = (-vol) * I
+        contrib[2] = (vol) * I
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P2.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P2.pxi
new file mode 100644
index 0000000..c3f493a
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_1D_P2.pxi
@@ -0,0 +1,48 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_1d_sym_P2(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 1
+        if qr is None:
+            qr = simplexXiaoGimbutas(3, 1)
+        super(scalar_coefficient_stiffness_1d_sym_P2, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_1d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolume1D(simplex, self.temp)
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        contrib[0] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[0] += (vol*(4*self.PHI[0, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[1] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[1] += (-vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[1, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[2] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[2] += (-4*vol*(self.PHI[0, k] - self.PHI[1, k])*(4*self.PHI[0, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[3] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[3] += (vol*(4*self.PHI[1, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[4] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[4] += (4*vol*(self.PHI[0, k] - self.PHI[1, k])*(4*self.PHI[1, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[5] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[5] += (16*vol*(self.PHI[0, k] - self.PHI[1, k])**2) * self.qr.weights[k] * self.funVals[k]
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P1.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P1.pxi
new file mode 100644
index 0000000..3a8e3f1
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P1.pxi
@@ -0,0 +1,45 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_2d_sym_P1(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 2
+        if qr is None:
+            qr = simplexXiaoGimbutas(1, 2)
+        super(scalar_coefficient_stiffness_2d_sym_P1, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_2d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.250000000000000
+            REAL_t l00, l01, l02, l11, l12, l22
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l11 = self.innerProducts[3]
+        l12 = self.innerProducts[4]
+        l22 = self.innerProducts[5]
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        for k in range(self.qr.num_nodes):
+            I += self.qr.weights[k] * self.funVals[k]
+        contrib[0] = (l00*vol) * I
+        contrib[1] = (l01*vol) * I
+        contrib[2] = (l02*vol) * I
+        contrib[3] = (l11*vol) * I
+        contrib[4] = (l12*vol) * I
+        contrib[5] = (l22*vol) * I
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P2.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P2.pxi
new file mode 100644
index 0000000..1fc55ed
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_2D_P2.pxi
@@ -0,0 +1,100 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_2d_sym_P2(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 2
+        if qr is None:
+            qr = simplexXiaoGimbutas(3, 2)
+        super(scalar_coefficient_stiffness_2d_sym_P2, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_2d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.250000000000000
+            REAL_t l00, l01, l02, l11, l12, l22
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l11 = self.innerProducts[3]
+        l12 = self.innerProducts[4]
+        l22 = self.innerProducts[5]
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        contrib[0] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[0] += (l00*vol*(4*self.PHI[0, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[1] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[1] += (l01*vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[1, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[2] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[2] += (l02*vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[2, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[3] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[3] += (4*vol*(4*self.PHI[0, k] - 1)*(l00*self.PHI[1, k] + l01*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[4] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[4] += (4*vol*(4*self.PHI[0, k] - 1)*(l01*self.PHI[2, k] + l02*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[5] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[5] += (4*vol*(4*self.PHI[0, k] - 1)*(l00*self.PHI[2, k] + l02*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[6] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[6] += (l11*vol*(4*self.PHI[1, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[7] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[7] += (l12*vol*(4*self.PHI[1, k] - 1)*(4*self.PHI[2, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[8] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[8] += (4*vol*(4*self.PHI[1, k] - 1)*(l01*self.PHI[1, k] + l11*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[9] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[9] += (4*vol*(4*self.PHI[1, k] - 1)*(l11*self.PHI[2, k] + l12*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[10] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[10] += (4*vol*(4*self.PHI[1, k] - 1)*(l01*self.PHI[2, k] + l12*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[11] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[11] += (l22*vol*(4*self.PHI[2, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[12] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[12] += (4*vol*(4*self.PHI[2, k] - 1)*(l02*self.PHI[1, k] + l12*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[13] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[13] += (4*vol*(4*self.PHI[2, k] - 1)*(l12*self.PHI[2, k] + l22*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[14] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[14] += (4*vol*(4*self.PHI[2, k] - 1)*(l02*self.PHI[2, k] + l22*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[15] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[15] += (16*vol*(l00*self.PHI[1, k]**2 + 2*l01*self.PHI[0, k]*self.PHI[1, k] + l11*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[16] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[16] += (16*vol*(l01*self.PHI[1, k]*self.PHI[2, k] + l02*self.PHI[1, k]**2 + l11*self.PHI[0, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[17] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[17] += (16*vol*(l00*self.PHI[1, k]*self.PHI[2, k] + l01*self.PHI[0, k]*self.PHI[2, k] + l02*self.PHI[0, k]*self.PHI[1, k] + l12*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[18] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[18] += (16*vol*(l11*self.PHI[2, k]**2 + 2*l12*self.PHI[1, k]*self.PHI[2, k] + l22*self.PHI[1, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[19] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[19] += (16*vol*(l01*self.PHI[2, k]**2 + l02*self.PHI[1, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[2, k] + l22*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[20] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[20] += (16*vol*(l00*self.PHI[2, k]**2 + 2*l02*self.PHI[0, k]*self.PHI[2, k] + l22*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P1.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P1.pxi
new file mode 100644
index 0000000..7640e6f
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P1.pxi
@@ -0,0 +1,53 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_3d_sym_P1(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 3
+        if qr is None:
+            qr = simplexXiaoGimbutas(1, 3)
+        super(scalar_coefficient_stiffness_3d_sym_P1, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_3d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0277777777777778
+            REAL_t l00, l01, l02, l03, l11, l12, l13, l22, l23, l33
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolumeAndProducts3D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l03 = self.innerProducts[3]
+        l11 = self.innerProducts[4]
+        l12 = self.innerProducts[5]
+        l13 = self.innerProducts[6]
+        l22 = self.innerProducts[7]
+        l23 = self.innerProducts[8]
+        l33 = self.innerProducts[9]
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        for k in range(self.qr.num_nodes):
+            I += self.qr.weights[k] * self.funVals[k]
+        contrib[0] = (l00*vol) * I
+        contrib[1] = (l01*vol) * I
+        contrib[2] = (l02*vol) * I
+        contrib[3] = (l03*vol) * I
+        contrib[4] = (l11*vol) * I
+        contrib[5] = (l12*vol) * I
+        contrib[6] = (l13*vol) * I
+        contrib[7] = (l22*vol) * I
+        contrib[8] = (l23*vol) * I
+        contrib[9] = (l33*vol) * I
diff --git a/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P2.pxi b/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P2.pxi
new file mode 100644
index 0000000..36ab4d1
--- /dev/null
+++ b/fem/PyNucleus_fem/scalar_coefficient_stiffness_3D_P2.pxi
@@ -0,0 +1,206 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class scalar_coefficient_stiffness_3d_sym_P2(stiffness_quadrature_matrix):
+    def __init__(self, function diffusivity, simplexQuadratureRule qr=None):
+        self.dim = 3
+        if qr is None:
+            qr = simplexXiaoGimbutas(3, 3)
+        super(scalar_coefficient_stiffness_3d_sym_P2, self).__init__(diffusivity, qr)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(scalar_coefficient_stiffness_3d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0277777777777778
+            REAL_t l00, l01, l02, l03, l11, l12, l13, l22, l23, l33
+            REAL_t I = 0.
+            INDEX_t k
+
+        vol /= simplexVolumeAndProducts3D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l03 = self.innerProducts[3]
+        l11 = self.innerProducts[4]
+        l12 = self.innerProducts[5]
+        l13 = self.innerProducts[6]
+        l22 = self.innerProducts[7]
+        l23 = self.innerProducts[8]
+        l33 = self.innerProducts[9]
+
+        self.qr.evalFun(self.diffusivity, simplex, self.funVals)
+        contrib[0] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[0] += (l00*vol*(4*self.PHI[0, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[1] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[1] += (l01*vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[1, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[2] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[2] += (l02*vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[2, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[3] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[3] += (l03*vol*(4*self.PHI[0, k] - 1)*(4*self.PHI[3, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[4] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[4] += (4*vol*(4*self.PHI[0, k] - 1)*(l00*self.PHI[1, k] + l01*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[5] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[5] += (4*vol*(4*self.PHI[0, k] - 1)*(l01*self.PHI[2, k] + l02*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[6] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[6] += (4*vol*(4*self.PHI[0, k] - 1)*(l00*self.PHI[2, k] + l02*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[7] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[7] += (4*vol*(4*self.PHI[0, k] - 1)*(l00*self.PHI[3, k] + l03*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[8] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[8] += (4*vol*(4*self.PHI[0, k] - 1)*(l01*self.PHI[3, k] + l03*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[9] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[9] += (4*vol*(4*self.PHI[0, k] - 1)*(l02*self.PHI[3, k] + l03*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[10] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[10] += (l11*vol*(4*self.PHI[1, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[11] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[11] += (l12*vol*(4*self.PHI[1, k] - 1)*(4*self.PHI[2, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[12] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[12] += (l13*vol*(4*self.PHI[1, k] - 1)*(4*self.PHI[3, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[13] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[13] += (4*vol*(4*self.PHI[1, k] - 1)*(l01*self.PHI[1, k] + l11*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[14] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[14] += (4*vol*(4*self.PHI[1, k] - 1)*(l11*self.PHI[2, k] + l12*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[15] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[15] += (4*vol*(4*self.PHI[1, k] - 1)*(l01*self.PHI[2, k] + l12*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[16] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[16] += (4*vol*(4*self.PHI[1, k] - 1)*(l01*self.PHI[3, k] + l13*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[17] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[17] += (4*vol*(4*self.PHI[1, k] - 1)*(l11*self.PHI[3, k] + l13*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[18] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[18] += (4*vol*(4*self.PHI[1, k] - 1)*(l12*self.PHI[3, k] + l13*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[19] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[19] += (l22*vol*(4*self.PHI[2, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[20] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[20] += (l23*vol*(4*self.PHI[2, k] - 1)*(4*self.PHI[3, k] - 1)) * self.qr.weights[k] * self.funVals[k]
+        contrib[21] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[21] += (4*vol*(4*self.PHI[2, k] - 1)*(l02*self.PHI[1, k] + l12*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[22] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[22] += (4*vol*(4*self.PHI[2, k] - 1)*(l12*self.PHI[2, k] + l22*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[23] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[23] += (4*vol*(4*self.PHI[2, k] - 1)*(l02*self.PHI[2, k] + l22*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[24] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[24] += (4*vol*(4*self.PHI[2, k] - 1)*(l02*self.PHI[3, k] + l23*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[25] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[25] += (4*vol*(4*self.PHI[2, k] - 1)*(l12*self.PHI[3, k] + l23*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[26] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[26] += (4*vol*(4*self.PHI[2, k] - 1)*(l22*self.PHI[3, k] + l23*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[27] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[27] += (l33*vol*(4*self.PHI[3, k] - 1)**2) * self.qr.weights[k] * self.funVals[k]
+        contrib[28] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[28] += (4*vol*(4*self.PHI[3, k] - 1)*(l03*self.PHI[1, k] + l13*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[29] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[29] += (4*vol*(4*self.PHI[3, k] - 1)*(l13*self.PHI[2, k] + l23*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[30] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[30] += (4*vol*(4*self.PHI[3, k] - 1)*(l03*self.PHI[2, k] + l23*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[31] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[31] += (4*vol*(4*self.PHI[3, k] - 1)*(l03*self.PHI[3, k] + l33*self.PHI[0, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[32] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[32] += (4*vol*(4*self.PHI[3, k] - 1)*(l13*self.PHI[3, k] + l33*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[33] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[33] += (4*vol*(4*self.PHI[3, k] - 1)*(l23*self.PHI[3, k] + l33*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[34] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[34] += (16*vol*(l00*self.PHI[1, k]**2 + 2*l01*self.PHI[0, k]*self.PHI[1, k] + l11*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[35] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[35] += (16*vol*(l01*self.PHI[1, k]*self.PHI[2, k] + l02*self.PHI[1, k]**2 + l11*self.PHI[0, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[36] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[36] += (16*vol*(l00*self.PHI[1, k]*self.PHI[2, k] + l01*self.PHI[0, k]*self.PHI[2, k] + l02*self.PHI[0, k]*self.PHI[1, k] + l12*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[37] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[37] += (16*vol*(l00*self.PHI[1, k]*self.PHI[3, k] + l01*self.PHI[0, k]*self.PHI[3, k] + l03*self.PHI[0, k]*self.PHI[1, k] + l13*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[38] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[38] += (16*vol*(l01*self.PHI[1, k]*self.PHI[3, k] + l03*self.PHI[1, k]**2 + l11*self.PHI[0, k]*self.PHI[3, k] + l13*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[39] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[39] += (16*vol*(l02*self.PHI[1, k]*self.PHI[3, k] + l03*self.PHI[1, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[3, k] + l13*self.PHI[0, k]*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[40] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[40] += (16*vol*(l11*self.PHI[2, k]**2 + 2*l12*self.PHI[1, k]*self.PHI[2, k] + l22*self.PHI[1, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[41] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[41] += (16*vol*(l01*self.PHI[2, k]**2 + l02*self.PHI[1, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[2, k] + l22*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[42] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[42] += (16*vol*(l01*self.PHI[2, k]*self.PHI[3, k] + l02*self.PHI[1, k]*self.PHI[3, k] + l13*self.PHI[0, k]*self.PHI[2, k] + l23*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[43] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[43] += (16*vol*(l11*self.PHI[2, k]*self.PHI[3, k] + l12*self.PHI[1, k]*self.PHI[3, k] + l13*self.PHI[1, k]*self.PHI[2, k] + l23*self.PHI[1, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[44] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[44] += (16*vol*(l12*self.PHI[2, k]*self.PHI[3, k] + l13*self.PHI[2, k]**2 + l22*self.PHI[1, k]*self.PHI[3, k] + l23*self.PHI[1, k]*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[45] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[45] += (16*vol*(l00*self.PHI[2, k]**2 + 2*l02*self.PHI[0, k]*self.PHI[2, k] + l22*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[46] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[46] += (16*vol*(l00*self.PHI[2, k]*self.PHI[3, k] + l02*self.PHI[0, k]*self.PHI[3, k] + l03*self.PHI[0, k]*self.PHI[2, k] + l23*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[47] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[47] += (16*vol*(l01*self.PHI[2, k]*self.PHI[3, k] + l03*self.PHI[1, k]*self.PHI[2, k] + l12*self.PHI[0, k]*self.PHI[3, k] + l23*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[48] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[48] += (16*vol*(l02*self.PHI[2, k]*self.PHI[3, k] + l03*self.PHI[2, k]**2 + l22*self.PHI[0, k]*self.PHI[3, k] + l23*self.PHI[0, k]*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[49] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[49] += (16*vol*(l00*self.PHI[3, k]**2 + 2*l03*self.PHI[0, k]*self.PHI[3, k] + l33*self.PHI[0, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[50] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[50] += (16*vol*(l01*self.PHI[3, k]**2 + l03*self.PHI[1, k]*self.PHI[3, k] + l13*self.PHI[0, k]*self.PHI[3, k] + l33*self.PHI[0, k]*self.PHI[1, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[51] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[51] += (16*vol*(l02*self.PHI[3, k]**2 + l03*self.PHI[2, k]*self.PHI[3, k] + l23*self.PHI[0, k]*self.PHI[3, k] + l33*self.PHI[0, k]*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[52] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[52] += (16*vol*(l11*self.PHI[3, k]**2 + 2*l13*self.PHI[1, k]*self.PHI[3, k] + l33*self.PHI[1, k]**2)) * self.qr.weights[k] * self.funVals[k]
+        contrib[53] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[53] += (16*vol*(l12*self.PHI[3, k]**2 + l13*self.PHI[2, k]*self.PHI[3, k] + l23*self.PHI[1, k]*self.PHI[3, k] + l33*self.PHI[1, k]*self.PHI[2, k])) * self.qr.weights[k] * self.funVals[k]
+        contrib[54] = 0.
+        for k in range(self.qr.num_nodes):
+            contrib[54] += (16*vol*(l22*self.PHI[3, k]**2 + 2*l23*self.PHI[2, k]*self.PHI[3, k] + l33*self.PHI[2, k]**2)) * self.qr.weights[k] * self.funVals[k]
diff --git a/fem/PyNucleus_fem/simplexMapper.pxd b/fem/PyNucleus_fem/simplexMapper.pxd
new file mode 100644
index 0000000..ff266a6
--- /dev/null
+++ b/fem/PyNucleus_fem/simplexMapper.pxd
@@ -0,0 +1,117 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+
+cdef class simplexMapper:
+    cdef:
+        REAL_t[:, ::1] vertices
+        INDEX_t[:, ::1] cells
+        INDEX_t[::1] temp_cell
+        INDEX_t[::1] temp_edge
+        INDEX_t[::1] temp_edge2
+        INDEX_t[::1] temp_face
+        INDEX_t[::1] temp_face2
+        INDEX_t[:, ::1] temp_edges
+        INDEX_t[:, ::1] temp_faces
+        INDEX_t[:, ::1] temp_edges2
+        INDEX_t iteration_counter
+        INDEX_t dim
+
+    cdef void startLoopOverCellNodes(self, INDEX_t[::1] cell)
+    cdef BOOL_t loopOverCellNodes(self, INDEX_t *node)
+    cdef void startLoopOverCellEdges(self, INDEX_t[::1] cell)
+    cdef BOOL_t loopOverCellEdges(self, INDEX_t[::1] edge)
+    cdef BOOL_t loopOverCellEdgesEncoded(self, ENCODE_t * hv)
+    cdef void startLoopOverCellFaces(self, INDEX_t[::1] cell)
+    cdef BOOL_t loopOverCellFaces(self, INDEX_t[::1] face)
+    cdef BOOL_t loopOverCellFacesEncoded(self, INDEX_t * t0, ENCODE_t * t1)
+    cdef void startLoopOverFaceEdges(self, INDEX_t[::1] face)
+    cdef BOOL_t loopOverFaceEdges(self, INDEX_t[::1] edge)
+    cdef BOOL_t loopOverFaceEdgesEncoded(self, ENCODE_t * hv)
+    cdef INDEX_t getVertexInCell(self, INDEX_t cellNo, INDEX_t vertexNo)
+    cdef INDEX_t findVertexInCell(self, INDEX_t cellNo, INDEX_t vertexNo)
+    cdef void getEdgeVerticesLocal(self,
+                                   INDEX_t edgeNo,
+                                   INDEX_t order,
+                                   INDEX_t[::1] indices)
+    cdef void getFaceVerticesLocal(self,
+                                   INDEX_t faceNo,
+                                   INDEX_t order,
+                                   INDEX_t[::1] indices)
+    cdef void getFaceEdgesLocal(self,
+                                INDEX_t faceNo,
+                                INDEX_t order,
+                                INDEX_t[::1] indices,
+                                INDEX_t[::1] orders)
+    cdef void getEdgeVerticesGlobal(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t edgeNo,
+                                    INDEX_t order,
+                                    INDEX_t[::1] indices)
+    cdef void getFaceVerticesGlobal(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t faceNo,
+                                    INDEX_t order,
+                                    INDEX_t[::1] indices)
+    cdef ENCODE_t sortAndEncodeEdge(self, INDEX_t[::1] edge)
+    cdef INDEX_t findEdgeInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] edge)
+    cdef INDEX_t findEdgeInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       ENCODE_t hv)
+    cdef void getEdgeInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t edgeNo,
+                            INDEX_t[::1] edge,
+                            BOOL_t sorted=*)
+    cdef ENCODE_t getEdgeInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       INDEX_t edgeNo)
+    cdef tuple sortAndEncodeFace(self, INDEX_t[::1] face)
+    cdef INDEX_t findFaceInCell(self,
+                                INDEX_t cellNo,
+                                const INDEX_t[::1] face)
+    cdef void getFaceInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t faceNo,
+                            INDEX_t[::1] face,
+                            BOOL_t sorted=*)
+    cdef tuple getFaceInCellEncoded(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t faceNo)
+    cdef INDEX_t findFaceInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       tuple hv)
+    cdef void getEdgeSimplex(self,
+                             INDEX_t cellNo,
+                             INDEX_t edgeNo,
+                             REAL_t[:, ::1] edgeSimplex)
+    cdef void getEncodedEdgeSimplex(self,
+                                    ENCODE_t hv,
+                                    REAL_t[:, ::1] edgeSimplex)
+    cdef void getFaceSimplex(self,
+                             INDEX_t cellNo,
+                             INDEX_t faceNo,
+                             REAL_t[:, ::1] faceSimplex)
+    cdef void getEncodedFaceSimplex(self,
+                                    tuple hv,
+                                    REAL_t[:, ::1] faceSimplex)
+
+
+cdef class simplexMapper1D(simplexMapper):
+    pass
+
+
+cdef class simplexMapper2D(simplexMapper):
+    pass
+
+
+cdef class simplexMapper3D(simplexMapper):
+    pass
diff --git a/fem/PyNucleus_fem/simplexMapper.pyx b/fem/PyNucleus_fem/simplexMapper.pyx
new file mode 100644
index 0000000..0a06992
--- /dev/null
+++ b/fem/PyNucleus_fem/simplexMapper.pyx
@@ -0,0 +1,721 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, TAG
+from PyNucleus_base import uninitialized
+
+from . meshCy cimport (sortEdge, sortFace,
+                       encode_face,
+                       decode_face,
+                       decode_edge,
+                       encode_edge)
+
+
+cdef class simplexMapper:
+    def __init__(self, mesh=None):
+        if mesh is not None:
+            self.vertices = mesh.vertices
+            self.cells = mesh.cells
+            assert self.dim == mesh.dim
+        self.temp_edge = uninitialized((2), dtype=INDEX)
+        self.temp_edge2 = uninitialized((2), dtype=INDEX)
+        self.temp_face = uninitialized((3), dtype=INDEX)
+        self.temp_face2 = uninitialized((3), dtype=INDEX)
+        self.temp_cell = uninitialized((4), dtype=INDEX)
+        self.iteration_counter = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverCellNodes(self, INDEX_t[::1] cell):
+        cdef:
+            INDEX_t i
+        self.iteration_counter = 0
+        for i in range(self.dim+1):
+            self.temp_cell[i] = cell[i]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverCellNodes(self, INDEX_t *node):
+        if self.iteration_counter < self.dim+1:
+            node[0] = self.temp_cell[self.iteration_counter]
+            self.iteration_counter += 1
+            return True
+        else:
+            return False
+
+    cdef void startLoopOverCellEdges(self, INDEX_t[::1] cell):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverCellEdges(self, INDEX_t[::1] edge):
+        if self.iteration_counter < self.temp_edges.shape[0]:
+            edge[0], edge[1] = self.temp_edges[self.iteration_counter, 0], self.temp_edges[self.iteration_counter, 1]
+            self.iteration_counter += 1
+            return True
+        else:
+            return False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverCellEdgesEncoded(self, ENCODE_t * hv):
+        cdef:
+            BOOL_t rval
+        rval = self.loopOverCellEdges(self.temp_edge2)
+        hv[0] = self.sortAndEncodeEdge(self.temp_edge2)
+        return rval
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def startLoopOverCellEdges_py(self, INDEX_t[::1] cell):
+        self.startLoopOverCellEdges(cell)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def loopOverCellEdges_py(self, INDEX_t[::1] edge):
+        return self.loopOverCellEdges(edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def loopOverCellEdgesEncoded_py(self, ENCODE_t[::1] hv):
+        cdef:
+            BOOL_t rval
+        rval = self.loopOverCellEdgesEncoded(&hv[0])
+        return rval
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverCellFaces(self, INDEX_t[::1] cell):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverCellFaces(self, INDEX_t[::1] face):
+        if self.iteration_counter < self.temp_faces.shape[0]:
+            face[0], face[1], face[2] = self.temp_faces[self.iteration_counter, 0], self.temp_faces[self.iteration_counter, 1], self.temp_faces[self.iteration_counter, 2]
+            self.iteration_counter += 1
+            return True
+        else:
+            return False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverCellFacesEncoded(self, INDEX_t * t0, ENCODE_t * t1):
+        cdef:
+            BOOL_t rval
+        rval = self.loopOverCellFaces(self.temp_face2)
+        t0[0], t1[0] = self.sortAndEncodeFace(self.temp_face2)
+        return rval
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverFaceEdges(self, INDEX_t[::1] face):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverFaceEdges(self, INDEX_t[::1] edge):
+        if self.iteration_counter < self.temp_edges2.shape[0]:
+            edge[0], edge[1] = self.temp_edges2[self.iteration_counter, 0], self.temp_edges2[self.iteration_counter, 1]
+            self.iteration_counter += 1
+            return True
+        else:
+            return False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef BOOL_t loopOverFaceEdgesEncoded(self, ENCODE_t * hv):
+        cdef:
+            BOOL_t rval
+        rval = self.loopOverFaceEdges(self.temp_edge2)
+        hv[0] = self.sortAndEncodeEdge(self.temp_edge2)
+        return rval
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def startLoopOverFaceEdges_py(self, INDEX_t[::1] face):
+        self.startLoopOverFaceEdges(face)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def loopOverFaceEdges_py(self, INDEX_t[::1] edge):
+        return self.loopOverFaceEdges(edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def loopOverFaceEdgesEncoded_py(self, list hv):
+        cdef:
+            ENCODE_t hv2
+            BOOL_t rval
+        rval = self.loopOverFaceEdgesEncoded(&hv2)
+        hv[0] = hv2
+        return rval
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t getVertexInCell(self, INDEX_t cellNo, INDEX_t vertexNo):
+        return self.cells[cellNo, vertexNo]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getVertexInCell_py(self, INDEX_t cellNo, INDEX_t vertexNo):
+        return self.getVertexInCell(cellNo, vertexNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findVertexInCell(self, INDEX_t cellNo, INDEX_t vertexNo):
+        cdef:
+            INDEX_t i
+        for i in range(self.cells.shape[1]):
+            if self.cells[cellNo, i] == vertexNo:
+                return i
+        return -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def findVertexInCell_py(self, INDEX_t cellNo, INDEX_t vertexNo):
+        return self.findVertexInCell(cellNo, vertexNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEdgeVerticesLocal(self,
+                                   INDEX_t edgeNo,
+                                   INDEX_t order,
+                                   INDEX_t[::1] indices):
+        if edgeNo == 0:
+            indices[0] = 0
+            indices[1] = 1
+        elif edgeNo == 1:
+            indices[0] = 1
+            indices[1] = 2
+        elif edgeNo == 2:
+            indices[0] = 2
+            indices[1] = 0
+        elif edgeNo == 3:
+            indices[0] = 0
+            indices[1] = 3
+        elif edgeNo == 4:
+            indices[0] = 1
+            indices[1] = 3
+        else:
+            indices[0] = 2
+            indices[1] = 3
+        if order == 1:
+            indices[0], indices[1] = indices[1], indices[0]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getFaceVerticesLocal(self,
+                                   INDEX_t faceNo,
+                                   INDEX_t order,
+                                   INDEX_t[::1] indices):
+        if faceNo == 0:
+            indices[0], indices[1], indices[2] = 0, 2, 1
+        elif faceNo == 1:
+            indices[0], indices[1], indices[2] = 0, 1, 3
+        elif faceNo == 2:
+            indices[0], indices[1], indices[2] = 1, 2, 3
+        else:
+            indices[0], indices[1], indices[2] = 2, 0, 3
+
+        if order == 1:
+            indices[0], indices[1], indices[2] = indices[1], indices[2], indices[0]
+        elif order == 2:
+            indices[0], indices[1], indices[2] = indices[2], indices[0], indices[1]
+        elif order == -1:
+            indices[0], indices[1], indices[2] = indices[1], indices[0], indices[2]
+        elif order == -2:
+            indices[0], indices[1], indices[2] = indices[0], indices[2], indices[1]
+        elif order == -3:
+            indices[0], indices[1], indices[2] = indices[2], indices[1], indices[0]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getFaceEdgesLocal(self,
+                                INDEX_t faceNo,
+                                INDEX_t order,
+                                INDEX_t[::1] indices,
+                                INDEX_t[::1] orders):
+        if faceNo == 0:
+            indices[0], indices[1], indices[2] = 2, 1, 0
+            orders[0], orders[1], orders[2] = 1, 1, 1
+        elif faceNo == 1:
+            indices[0], indices[1], indices[2] = 0, 4, 3
+            orders[0], orders[1], orders[2] = 0, 0, 1
+        elif faceNo == 2:
+            indices[0], indices[1], indices[2] = 1, 5, 4
+            orders[0], orders[1], orders[2] = 0, 0, 1
+        else:
+            indices[0], indices[1], indices[2] = 2, 3, 5
+            orders[0], orders[1], orders[2] = 0, 0, 1
+
+        if order == 1:
+            indices[0], indices[1], indices[2] = indices[1], indices[2], indices[0]
+            orders[0], orders[1], orders[2] = orders[1], orders[2], orders[0]
+        elif order == 2:
+            indices[0], indices[1], indices[2] = indices[2], indices[0], indices[1]
+            orders[0], orders[1], orders[2] = orders[2], orders[0], orders[1]
+        elif order == -1:
+            indices[0], indices[1], indices[2] = indices[0], indices[2], indices[1]
+            orders[0], orders[1], orders[2] = 1-orders[0], 1-orders[2], 1-orders[1]
+        elif order == -2:
+            indices[0], indices[1], indices[2] = indices[2], indices[1], indices[0]
+            orders[0], orders[1], orders[2] = 1-orders[2], 1-orders[1], 1-orders[0]
+        elif order == -3:
+            indices[0], indices[1], indices[2] = indices[1], indices[0], indices[2]
+            orders[0], orders[1], orders[2] = 1-orders[1], 1-orders[0], 1-orders[2]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEdgeVerticesGlobal(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t edgeNo,
+                                    INDEX_t order,
+                                    INDEX_t[::1] indices):
+        cdef:
+            INDEX_t j
+        self.getEdgeVerticesLocal(edgeNo, order, indices)
+        for j in range(2):
+            indices[j] = self.cells[cellNo, indices[j]]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getFaceVerticesGlobal(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t faceNo,
+                                    INDEX_t order,
+                                    INDEX_t[::1] indices):
+        cdef:
+            INDEX_t j
+        self.getFaceVerticesLocal(faceNo, order, indices)
+        for j in range(3):
+            indices[j] = self.cells[cellNo, indices[j]]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef ENCODE_t sortAndEncodeEdge(self, INDEX_t[::1] edge):
+        sortEdge(edge[0], edge[1], self.temp_edge)
+        return encode_edge(self.temp_edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def sortAndEncodeEdge_py(self, INDEX_t[::1] edge):
+        return self.sortAndEncodeEdge(edge)
+
+    cdef INDEX_t findEdgeInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] edge):
+        raise NotImplementedError()
+
+    def findEdgeInCell_py(self, INDEX_t cellNo,
+                          INDEX_t[::1] edge):
+        return self.findEdgeInCell(cellNo, edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findEdgeInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       ENCODE_t hv):
+        decode_edge(hv, self.temp_edge2)
+        return self.findEdgeInCell(cellNo, self.temp_edge2)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def findEdgeInCellEncoded_py(self,
+                                 INDEX_t cellNo,
+                                 ENCODE_t hv):
+        return self.findEdgeInCellEncoded(cellNo, hv)
+
+    cdef void getEdgeInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t edgeNo,
+                            INDEX_t[::1] edge,
+                            BOOL_t sorted=False):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getEdgeInCell_py(self,
+                         INDEX_t cellNo,
+                         INDEX_t edgeNo,
+                         BOOL_t sorted=False):
+        edge = uninitialized((2), dtype=INDEX)
+        self.getEdgeInCell(cellNo, edgeNo, edge, sorted)
+        return edge
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef ENCODE_t getEdgeInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       INDEX_t edgeNo):
+        self.getEdgeInCell(cellNo, edgeNo, self.temp_edge)
+        return self.sortAndEncodeEdge(self.temp_edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getEdgeInCellEncoded_py(self,
+                                INDEX_t cellNo,
+                                INDEX_t edgeNo):
+        return self.getEdgeInCellEncoded(cellNo, edgeNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef tuple sortAndEncodeFace(self, INDEX_t[::1] face):
+        sortFace(face[0], face[1], face[2], self.temp_face)
+        return encode_face(self.temp_face)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def sortAndEncodeFace_py(self, INDEX_t[::1] face):
+        return self.sortAndEncodeFace(face)
+
+    cdef INDEX_t findFaceInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] face):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def findFaceInCell_py(self,
+                          INDEX_t cellNo,
+                          INDEX_t[::1] face):
+        return self.findFaceInCell(cellNo, face)
+
+    cdef void getFaceInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t faceNo,
+                            INDEX_t[::1] face,
+                            BOOL_t sorted=False):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getFaceInCell_py(self,
+                         INDEX_t cellNo,
+                         INDEX_t faceNo,
+                         BOOL_t sorted=False):
+        face = uninitialized((3), dtype=INDEX)
+        self.getFaceInCell(cellNo, faceNo, face, sorted)
+        return face
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef tuple getFaceInCellEncoded(self,
+                                    INDEX_t cellNo,
+                                    INDEX_t faceNo):
+        self.getFaceInCell(cellNo, faceNo, self.temp_face)
+        return self.sortAndEncodeFace(self.temp_face)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getFaceInCellEncoded_py(self,
+                                INDEX_t cellNo,
+                                INDEX_t faceNo):
+        return self.getFaceInCellEncoded(cellNo, faceNo)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findFaceInCellEncoded(self,
+                                       INDEX_t cellNo,
+                                       tuple hv):
+        decode_face(hv, self.temp_face2)
+        return self.findFaceInCell(cellNo, self.temp_face2)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def findFaceInCellEncoded_py(self,
+                                 INDEX_t cellNo,
+                                 tuple hv):
+        return self.findFaceInCellEncoded(cellNo, hv)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEdgeSimplex(self,
+                             INDEX_t cellNo,
+                             INDEX_t edgeNo,
+                             REAL_t[:, ::1] edgeSimplex):
+        cdef:
+            INDEX_t vertexNo, j
+        self.getEdgeInCell(cellNo, edgeNo, self.temp_edge)
+        for vertexNo in range(2):
+            for j in range(self.dim):
+                edgeSimplex[vertexNo, j] = self.vertices[self.temp_edge[vertexNo], j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEncodedEdgeSimplex(self,
+                                    ENCODE_t hv,
+                                    REAL_t[:, ::1] edgeSimplex):
+        cdef:
+            INDEX_t vertexNo, j
+        decode_edge(hv, self.temp_edge)
+        for vertexNo in range(2):
+            for j in range(self.dim):
+                edgeSimplex[vertexNo, j] = self.vertices[self.temp_edge[vertexNo], j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getFaceSimplex(self,
+                             INDEX_t cellNo,
+                             INDEX_t faceNo,
+                             REAL_t[:, ::1] faceSimplex):
+        cdef:
+            INDEX_t vertexNo, j
+        self.getFaceInCell(cellNo, faceNo, self.temp_face)
+        for vertexNo in range(3):
+            for j in range(3):
+                faceSimplex[vertexNo, j] = self.vertices[self.temp_face[vertexNo], j]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEncodedFaceSimplex(self,
+                                    tuple hv,
+                                    REAL_t[:, ::1] faceSimplex):
+        cdef:
+            INDEX_t vertexNo, j
+        decode_face(hv, self.temp_face)
+        for vertexNo in range(3):
+            for j in range(3):
+                faceSimplex[vertexNo, j] = self.vertices[self.temp_face[vertexNo], j]
+
+
+cdef class simplexMapper1D(simplexMapper):
+    def __init__(self, mesh=None):
+        self.dim = 1
+        super(simplexMapper1D, self).__init__(mesh)
+
+
+cdef class simplexMapper2D(simplexMapper):
+    def __init__(self, mesh=None):
+        self.dim = 2
+        super(simplexMapper2D, self).__init__(mesh)
+        self.temp_edges = uninitialized((3, 2), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverCellEdges(self, INDEX_t[::1] cell):
+        cdef:
+            INDEX_t c0, c1, c2
+        c0, c1, c2 = cell[0], cell[1], cell[2]
+        sortEdge(c0, c1, self.temp_edges[0, :])
+        sortEdge(c0, c2, self.temp_edges[1, :])
+        sortEdge(c1, c2, self.temp_edges[2, :])
+        self.iteration_counter = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findEdgeInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] edge):
+        sortEdge(self.cells[cellNo, 0], self.cells[cellNo, 1], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 0
+        sortEdge(self.cells[cellNo, 1], self.cells[cellNo, 2], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 1
+        return 2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEdgeInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t edgeNo,
+                            INDEX_t[::1] edge,
+                            BOOL_t sorted=False):
+        if edgeNo == 0:
+            edge[0], edge[1] = self.cells[cellNo, 0], self.cells[cellNo, 1]
+        elif edgeNo == 1:
+            edge[0], edge[1] = self.cells[cellNo, 1], self.cells[cellNo, 2]
+        else:
+            edge[0], edge[1] = self.cells[cellNo, 2], self.cells[cellNo, 0]
+        if sorted:
+            sortEdge(edge[0], edge[1], edge)
+
+
+cdef class simplexMapper3D(simplexMapper):
+    def __init__(self, mesh=None):
+        self.dim = 3
+        super(simplexMapper3D, self).__init__(mesh)
+        self.temp_edges = uninitialized((6, 2), dtype=INDEX)
+        self.temp_faces = uninitialized((4, 3), dtype=INDEX)
+        self.temp_edges2 = uninitialized((3, 2), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverCellEdges(self, INDEX_t[::1] cell):
+        cdef:
+            INDEX_t c0, c1, c2, c3
+        c0, c1, c2, c3 = cell[0], cell[1], cell[2], cell[3]
+        sortEdge(c0, c1, self.temp_edges[0, :])
+        sortEdge(c1, c2, self.temp_edges[1, :])
+        sortEdge(c2, c0, self.temp_edges[2, :])
+        sortEdge(c0, c3, self.temp_edges[3, :])
+        sortEdge(c1, c3, self.temp_edges[4, :])
+        sortEdge(c2, c3, self.temp_edges[5, :])
+        self.iteration_counter = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverCellFaces(self, INDEX_t[::1] cell):
+        cdef:
+            INDEX_t c0, c1, c2, c3
+        c0, c1, c2, c3 = cell[0], cell[1], cell[2], cell[3]
+        sortFace(c0, c1, c2, self.temp_faces[0, :])
+        sortFace(c0, c1, c3, self.temp_faces[1, :])
+        sortFace(c1, c2, c3, self.temp_faces[2, :])
+        sortFace(c0, c2, c3, self.temp_faces[3, :])
+        self.iteration_counter = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void startLoopOverFaceEdges(self, INDEX_t[::1] face):
+        cdef:
+            INDEX_t c0, c1, c2
+        c0, c1, c2 = face[0], face[1], face[2]
+        sortEdge(c0, c1, self.temp_edges2[0, :])
+        sortEdge(c1, c2, self.temp_edges2[1, :])
+        sortEdge(c2, c0, self.temp_edges2[2, :])
+        self.iteration_counter = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findEdgeInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] edge):
+        sortEdge(self.cells[cellNo, 0], self.cells[cellNo, 1], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 0
+        sortEdge(self.cells[cellNo, 1], self.cells[cellNo, 2], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 1
+        sortEdge(self.cells[cellNo, 0], self.cells[cellNo, 2], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 2
+        sortEdge(self.cells[cellNo, 0], self.cells[cellNo, 3], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 3
+        sortEdge(self.cells[cellNo, 1], self.cells[cellNo, 3], self.temp_edge)
+        if self.temp_edge[0] == edge[0] and self.temp_edge[1] == edge[1]:
+            return 4
+        return 5
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getEdgeInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t edgeNo,
+                            INDEX_t[::1] edge,
+                            BOOL_t sorted=False):
+        if edgeNo == 0:
+            edge[0], edge[1] = self.cells[cellNo, 0], self.cells[cellNo, 1]
+        elif edgeNo == 1:
+            edge[0], edge[1] = self.cells[cellNo, 1], self.cells[cellNo, 2]
+        elif edgeNo == 2:
+            edge[0], edge[1] = self.cells[cellNo, 2], self.cells[cellNo, 0]
+        elif edgeNo == 3:
+            edge[0], edge[1] = self.cells[cellNo, 0], self.cells[cellNo, 3]
+        elif edgeNo == 4:
+            edge[0], edge[1] = self.cells[cellNo, 1], self.cells[cellNo, 3]
+        else:
+            edge[0], edge[1] = self.cells[cellNo, 2], self.cells[cellNo, 3]
+        if sorted:
+            sortEdge(edge[0], edge[1], edge)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t findFaceInCell(self,
+                                INDEX_t cellNo,
+                                INDEX_t[::1] face):
+        sortFace(self.cells[cellNo, 0], self.cells[cellNo, 1], self.cells[cellNo, 2], self.temp_face)
+        if self.temp_face[0] == face[0] and self.temp_face[1] == face[1] and self.temp_face[2] == face[2]:
+            return 0
+        sortFace(self.cells[cellNo, 0], self.cells[cellNo, 1], self.cells[cellNo, 3], self.temp_face)
+        if self.temp_face[0] == face[0] and self.temp_face[1] == face[1] and self.temp_face[2] == face[2]:
+            return 1
+        sortFace(self.cells[cellNo, 1], self.cells[cellNo, 2], self.cells[cellNo, 3], self.temp_face)
+        if self.temp_face[0] == face[0] and self.temp_face[1] == face[1] and self.temp_face[2] == face[2]:
+            return 2
+        sortFace(self.cells[cellNo, 0], self.cells[cellNo, 2], self.cells[cellNo, 3], self.temp_face)
+        if self.temp_face[0] == face[0] and self.temp_face[1] == face[1] and self.temp_face[2] == face[2]:
+            return 3
+        return -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getFaceInCell(self,
+                            INDEX_t cellNo,
+                            INDEX_t faceNo,
+                            INDEX_t[::1] face,
+                            BOOL_t sorted=False):
+        if faceNo == 0:
+            face[0], face[1], face[2] = self.cells[cellNo, 0], self.cells[cellNo, 2], self.cells[cellNo, 1]
+        elif faceNo == 1:
+            face[0], face[1], face[2] = self.cells[cellNo, 0], self.cells[cellNo, 1], self.cells[cellNo, 3]
+        elif faceNo == 2:
+            face[0], face[1], face[2] = self.cells[cellNo, 1], self.cells[cellNo, 2], self.cells[cellNo, 3]
+        else:
+            face[0], face[1], face[2] = self.cells[cellNo, 2], self.cells[cellNo, 0], self.cells[cellNo, 3]
+        if sorted:
+            sortFace(face[0], face[1], face[2], face)
diff --git a/fem/PyNucleus_fem/splitting.py b/fem/PyNucleus_fem/splitting.py
new file mode 100644
index 0000000..72c5951
--- /dev/null
+++ b/fem/PyNucleus_fem/splitting.py
@@ -0,0 +1,153 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base import INDEX
+from . import P0_DoFMap
+from . import getSubmesh
+from . DoFMaps import (getSubMapRestrictionProlongation,
+                       getSubMapRestrictionProlongation2)
+
+
+class meshSplitter:
+    def __init__(self, mesh, indicators):
+        self.mesh = mesh
+        self.indicators = indicators
+        self.submeshes = {}
+        self.selectedCells = {}
+
+    def getSubMesh(self, label):
+        from . import function
+        if label not in self.submeshes:
+            ind = self.indicators[label]
+            if isinstance(ind, function):
+                dm = P0_DoFMap(self.mesh, ind)
+                selectedCells = []
+                for cellNo in range(self.mesh.num_cells):
+                    for dofNo in range(dm.dofs_per_element):
+                        if dm.dofs[cellNo, dofNo] >= 0:
+                            selectedCells.append(cellNo)
+                            break
+                selectedCells = np.array(selectedCells, dtype=INDEX)
+            else:
+                selectedCells = ind
+            self.selectedCells[label] = selectedCells
+            new_mesh = getSubmesh(self.mesh, selectedCells)
+            self.submeshes[label] = new_mesh
+        return self.submeshes[label]
+
+    def getSubMap(self, label, dm):
+        subMesh = self.getSubMesh(label)
+        sub_dm = type(dm)(subMesh, -1)
+        sub_dof = 0
+        sub_boundary_dof = -1
+        assigned_dofs = {}
+        for sub_cellNo, cellNo in enumerate(self.selectedCells[label]):
+            for dofNo in range(dm.dofs_per_element):
+                dof = dm.cell2dof_py(cellNo, dofNo)
+                try:
+                    sub_dm.dofs[sub_cellNo, dofNo] = assigned_dofs[dof]
+                except KeyError:
+                    if dof >= 0:
+                        sub_dm.dofs[sub_cellNo, dofNo] = sub_dof
+                        assigned_dofs[dof] = sub_dof
+                        sub_dof += 1
+                    else:
+                        sub_dm.dofs[sub_cellNo, dofNo] = sub_boundary_dof
+                        assigned_dofs[dof] = sub_boundary_dof
+                        sub_boundary_dof -= 1
+        sub_dm.num_dofs = sub_dof
+        sub_dm.num_boundary_dofs = -sub_boundary_dof-1
+        return sub_dm
+
+    def getRestrictionProlongation(self, label, dm, sub_dm):
+        cellIndices = -np.ones((self.mesh.num_cells), dtype=INDEX)
+        cells = self.selectedCells[label]
+        cellIndices[cells] = np.arange(cells.shape[0], dtype=INDEX)
+        subR = getSubMapRestrictionProlongation2(self.mesh, dm, sub_dm, cellIndices)
+        subP = subR.transpose()
+        return subR, subP
+
+    def plotSubMeshes(self):
+        import matplotlib.pyplot as plt
+        numSubMeshes = len(self.submeshes)
+        for k, label in enumerate(self.submeshes):
+            plt.subplot(numSubMeshes, 1, k+1)
+            self.mesh.plot()
+            submesh = self.getSubMesh(label)
+            dm0 = P0_DoFMap(submesh, -1)
+            dm0.ones().plot(flat=True)
+            plt.title(label)
+
+
+class dofmapSplitter:
+    def __init__(self, dm, indicators):
+        self.dm = dm
+        self.indicators = indicators
+        self.submeshes = {}
+        self.submaps = {}
+        self.selectedCells = {}
+
+    def getSubMap(self, label):
+        from copy import deepcopy
+        from . import function
+        if label not in self.submaps:
+            self.submaps[label] = deepcopy(self.dm)
+            if isinstance(self.indicators[label], function):
+                self.submaps[label].resetUsingIndicator(self.indicators[label])
+            else:
+                self.submaps[label].resetUsingFEVector(self.indicators[label])
+        return self.submaps[label]
+
+    def getSubMesh(self, label):
+        if label not in self.submeshes:
+            subMap = self.getSubMap(label)
+            selectedCells = []
+            for cellNo in range(subMap.mesh.num_cells):
+                for dofNo in range(subMap.dofs_per_element):
+                    if subMap.cell2dof_py(cellNo, dofNo) >= 0:
+                        selectedCells.append(cellNo)
+                        break
+            selectedCells = np.array(selectedCells, dtype=INDEX)
+            self.selectedCells[label] = selectedCells
+            new_mesh = getSubmesh(self.dm.mesh, selectedCells)
+            self.submeshes[label] = new_mesh
+        return self.submeshes[label]
+
+    def getSubMapOnSubMesh(self, label):
+        dm = self.getSubMap(label)
+        subMesh = self.getSubMesh(label)
+        sub_dm = type(dm)(subMesh, -1)
+        num_boundary_dofs = -1
+        boundary_dofs = {}
+        for sub_cellNo, cellNo in enumerate(self.selectedCells[label]):
+            for dofNo in range(dm.dofs_per_element):
+                dof = dm.cell2dof_py(cellNo, dofNo)
+                if dof < 0:
+                    try:
+                        dof = boundary_dofs[dof]
+                    except KeyError:
+                        boundary_dofs[dof] = num_boundary_dofs
+                        dof = num_boundary_dofs
+                        num_boundary_dofs -= 1
+                sub_dm.dofs[sub_cellNo, dofNo] = dof
+        sub_dm.num_dofs = dm.num_dofs
+        sub_dm.num_boundary_dofs = -num_boundary_dofs-1
+        return sub_dm
+
+    def getRestrictionProlongation(self, label):
+        return getSubMapRestrictionProlongation(self.dm, self.getSubMap(label))
+
+    def plotSubMaps(self):
+        import matplotlib.pyplot as plt
+        numSubMaps = len(self.submaps)
+        for k, label in enumerate(self.submaps):
+            plt.subplot(numSubMaps, 1, k+1)
+            submap = self.getSubMap(label)
+            submap.plot()
+            plt.title(label)
diff --git a/fem/PyNucleus_fem/stiffness_1D_P1.pxi b/fem/PyNucleus_fem/stiffness_1D_P1.pxi
new file mode 100644
index 0000000..4dc346c
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_1D_P1.pxi
@@ -0,0 +1,24 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_1d_sym_P1(stiffness_1d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_1d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 1.00000000000000
+
+        vol /= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = vol
+        contrib[1] = -vol
+        contrib[2] = vol
diff --git a/fem/PyNucleus_fem/stiffness_1D_P2.pxi b/fem/PyNucleus_fem/stiffness_1D_P2.pxi
new file mode 100644
index 0000000..b44bed2
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_1D_P2.pxi
@@ -0,0 +1,27 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_1d_sym_P2(stiffness_1d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_1d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.333333333333333
+
+        vol /= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = 7*vol
+        contrib[1] = vol
+        contrib[2] = -8*vol
+        contrib[3] = 7*vol
+        contrib[4] = -8*vol
+        contrib[5] = 16*vol
diff --git a/fem/PyNucleus_fem/stiffness_1D_P3.pxi b/fem/PyNucleus_fem/stiffness_1D_P3.pxi
new file mode 100644
index 0000000..800669e
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_1D_P3.pxi
@@ -0,0 +1,31 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_1d_sym_P3(stiffness_1d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_1d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0250000000000000
+
+        vol /= simplexVolume1D(simplex, self.temp)
+
+        contrib[0] = 148*vol
+        contrib[1] = -13*vol
+        contrib[2] = -189*vol
+        contrib[3] = 54*vol
+        contrib[4] = 148*vol
+        contrib[5] = 54*vol
+        contrib[6] = -189*vol
+        contrib[7] = 432*vol
+        contrib[8] = -297*vol
+        contrib[9] = 432*vol
diff --git a/fem/PyNucleus_fem/stiffness_2D_P1.pxi b/fem/PyNucleus_fem/stiffness_2D_P1.pxi
new file mode 100644
index 0000000..03f6c23
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_2D_P1.pxi
@@ -0,0 +1,34 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_2d_sym_P1(stiffness_2d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_2d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.250000000000000
+            REAL_t l00, l01, l02, l11, l12, l22
+
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l11 = self.innerProducts[3]
+        l12 = self.innerProducts[4]
+        l22 = self.innerProducts[5]
+
+        contrib[0] = l00*vol
+        contrib[1] = l01*vol
+        contrib[2] = l02*vol
+        contrib[3] = l11*vol
+        contrib[4] = l12*vol
+        contrib[5] = l22*vol
diff --git a/fem/PyNucleus_fem/stiffness_2D_P2.pxi b/fem/PyNucleus_fem/stiffness_2D_P2.pxi
new file mode 100644
index 0000000..77a5efe
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_2D_P2.pxi
@@ -0,0 +1,49 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_2d_sym_P2(stiffness_2d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_2d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0833333333333333
+            REAL_t l00, l01, l02, l11, l12, l22
+
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l11 = self.innerProducts[3]
+        l12 = self.innerProducts[4]
+        l22 = self.innerProducts[5]
+
+        contrib[0] = 3*l00*vol
+        contrib[1] = -l01*vol
+        contrib[2] = -l02*vol
+        contrib[3] = 4*l01*vol
+        contrib[4] = 0
+        contrib[5] = 4*l02*vol
+        contrib[6] = 3*l11*vol
+        contrib[7] = -l12*vol
+        contrib[8] = 4*l01*vol
+        contrib[9] = 4*l12*vol
+        contrib[10] = 0
+        contrib[11] = 3*l22*vol
+        contrib[12] = 0
+        contrib[13] = 4*l12*vol
+        contrib[14] = 4*l02*vol
+        contrib[15] = 8*vol*(l00 + l01 + l11)
+        contrib[16] = 4*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[17] = 4*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[18] = 8*vol*(l11 + l12 + l22)
+        contrib[19] = 4*vol*(2*l01 + l02 + l12 + l22)
+        contrib[20] = 8*vol*(l00 + l02 + l22)
diff --git a/fem/PyNucleus_fem/stiffness_2D_P3.pxi b/fem/PyNucleus_fem/stiffness_2D_P3.pxi
new file mode 100644
index 0000000..3ead305
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_2D_P3.pxi
@@ -0,0 +1,83 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_2d_sym_P3(stiffness_2d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_2d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.00312500000000000
+            REAL_t l00, l01, l02, l11, l12, l22
+
+        vol /= simplexVolumeAndProducts2D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l11 = self.innerProducts[3]
+        l12 = self.innerProducts[4]
+        l22 = self.innerProducts[5]
+
+        contrib[0] = 68*l00*vol
+        contrib[1] = 14*l01*vol
+        contrib[2] = 14*l02*vol
+        contrib[3] = 6*vol*(l00 + 19*l01)
+        contrib[4] = 6*vol*(l00 - 8*l01)
+        contrib[5] = 6*vol*(l01 + l02)
+        contrib[6] = 6*vol*(l01 + l02)
+        contrib[7] = 6*vol*(l00 - 8*l02)
+        contrib[8] = 6*vol*(l00 + 19*l02)
+        contrib[9] = 18*vol*(l00 + l01 + l02)
+        contrib[10] = 68*l11*vol
+        contrib[11] = 14*l12*vol
+        contrib[12] = 6*vol*(-8*l01 + l11)
+        contrib[13] = 6*vol*(19*l01 + l11)
+        contrib[14] = 6*vol*(l11 + 19*l12)
+        contrib[15] = 6*vol*(l11 - 8*l12)
+        contrib[16] = 6*vol*(l01 + l12)
+        contrib[17] = 6*vol*(l01 + l12)
+        contrib[18] = 18*vol*(l01 + l11 + l12)
+        contrib[19] = 68*l22*vol
+        contrib[20] = 6*vol*(l02 + l12)
+        contrib[21] = 6*vol*(l02 + l12)
+        contrib[22] = 6*vol*(-8*l12 + l22)
+        contrib[23] = 6*vol*(19*l12 + l22)
+        contrib[24] = 6*vol*(19*l02 + l22)
+        contrib[25] = 6*vol*(-8*l02 + l22)
+        contrib[26] = 18*vol*(l02 + l12 + l22)
+        contrib[27] = 270*vol*(l00 + l01 + l11)
+        contrib[28] = 54*vol*(-l00 + 2*l01 - l11)
+        contrib[29] = -27*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[30] = -27*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[31] = -27*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[32] = 135*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[33] = 162*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[34] = 270*vol*(l00 + l01 + l11)
+        contrib[35] = 135*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[36] = -27*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[37] = -27*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[38] = -27*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[39] = 162*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[40] = 270*vol*(l11 + l12 + l22)
+        contrib[41] = 54*vol*(-l11 + 2*l12 - l22)
+        contrib[42] = -27*vol*(2*l01 + l02 + l12 + l22)
+        contrib[43] = -27*vol*(2*l01 + l02 + l12 + l22)
+        contrib[44] = 162*vol*(2*l01 + l02 + l12 + l22)
+        contrib[45] = 270*vol*(l11 + l12 + l22)
+        contrib[46] = 135*vol*(2*l01 + l02 + l12 + l22)
+        contrib[47] = -27*vol*(2*l01 + l02 + l12 + l22)
+        contrib[48] = 162*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[49] = 270*vol*(l00 + l02 + l22)
+        contrib[50] = 54*vol*(-l00 + 2*l02 - l22)
+        contrib[51] = 162*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[52] = 270*vol*(l00 + l02 + l22)
+        contrib[53] = 162*vol*(2*l01 + l02 + l12 + l22)
+        contrib[54] = 648*vol*(l00 + l01 + l02 + l11 + l12 + l22)
diff --git a/fem/PyNucleus_fem/stiffness_3D_P1.pxi b/fem/PyNucleus_fem/stiffness_3D_P1.pxi
new file mode 100644
index 0000000..e557a69
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_3D_P1.pxi
@@ -0,0 +1,42 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_3d_sym_P1(stiffness_3d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_3d_sym_P1 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0277777777777778
+            REAL_t l00, l01, l02, l03, l11, l12, l13, l22, l23, l33
+
+        vol /= simplexVolumeAndProducts3D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l03 = self.innerProducts[3]
+        l11 = self.innerProducts[4]
+        l12 = self.innerProducts[5]
+        l13 = self.innerProducts[6]
+        l22 = self.innerProducts[7]
+        l23 = self.innerProducts[8]
+        l33 = self.innerProducts[9]
+
+        contrib[0] = l00*vol
+        contrib[1] = l01*vol
+        contrib[2] = l02*vol
+        contrib[3] = l03*vol
+        contrib[4] = l11*vol
+        contrib[5] = l12*vol
+        contrib[6] = l13*vol
+        contrib[7] = l22*vol
+        contrib[8] = l23*vol
+        contrib[9] = l33*vol
diff --git a/fem/PyNucleus_fem/stiffness_3D_P2.pxi b/fem/PyNucleus_fem/stiffness_3D_P2.pxi
new file mode 100644
index 0000000..d440ad6
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_3D_P2.pxi
@@ -0,0 +1,87 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_3d_sym_P2(stiffness_3d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_3d_sym_P2 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.00555555555555556
+            REAL_t l00, l01, l02, l03, l11, l12, l13, l22, l23, l33
+
+        vol /= simplexVolumeAndProducts3D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l03 = self.innerProducts[3]
+        l11 = self.innerProducts[4]
+        l12 = self.innerProducts[5]
+        l13 = self.innerProducts[6]
+        l22 = self.innerProducts[7]
+        l23 = self.innerProducts[8]
+        l33 = self.innerProducts[9]
+
+        contrib[0] = 3*l00*vol
+        contrib[1] = -l01*vol
+        contrib[2] = -l02*vol
+        contrib[3] = -l03*vol
+        contrib[4] = vol*(-l00 + 3*l01)
+        contrib[5] = -vol*(l01 + l02)
+        contrib[6] = vol*(-l00 + 3*l02)
+        contrib[7] = vol*(-l00 + 3*l03)
+        contrib[8] = -vol*(l01 + l03)
+        contrib[9] = -vol*(l02 + l03)
+        contrib[10] = 3*l11*vol
+        contrib[11] = -l12*vol
+        contrib[12] = -l13*vol
+        contrib[13] = vol*(3*l01 - l11)
+        contrib[14] = vol*(-l11 + 3*l12)
+        contrib[15] = -vol*(l01 + l12)
+        contrib[16] = -vol*(l01 + l13)
+        contrib[17] = vol*(-l11 + 3*l13)
+        contrib[18] = -vol*(l12 + l13)
+        contrib[19] = 3*l22*vol
+        contrib[20] = -l23*vol
+        contrib[21] = -vol*(l02 + l12)
+        contrib[22] = vol*(3*l12 - l22)
+        contrib[23] = vol*(3*l02 - l22)
+        contrib[24] = -vol*(l02 + l23)
+        contrib[25] = -vol*(l12 + l23)
+        contrib[26] = vol*(-l22 + 3*l23)
+        contrib[27] = 3*l33*vol
+        contrib[28] = -vol*(l03 + l13)
+        contrib[29] = -vol*(l13 + l23)
+        contrib[30] = -vol*(l03 + l23)
+        contrib[31] = vol*(3*l03 - l33)
+        contrib[32] = vol*(3*l13 - l33)
+        contrib[33] = vol*(3*l23 - l33)
+        contrib[34] = 8*vol*(l00 + l01 + l11)
+        contrib[35] = 4*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[36] = 4*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[37] = 4*vol*(l00 + l01 + l03 + 2*l13)
+        contrib[38] = 4*vol*(l01 + 2*l03 + l11 + l13)
+        contrib[39] = 4*vol*(l02 + l03 + l12 + l13)
+        contrib[40] = 8*vol*(l11 + l12 + l22)
+        contrib[41] = 4*vol*(2*l01 + l02 + l12 + l22)
+        contrib[42] = 4*vol*(l01 + l02 + l13 + l23)
+        contrib[43] = 4*vol*(l11 + l12 + l13 + 2*l23)
+        contrib[44] = 4*vol*(l12 + 2*l13 + l22 + l23)
+        contrib[45] = 8*vol*(l00 + l02 + l22)
+        contrib[46] = 4*vol*(l00 + l02 + l03 + 2*l23)
+        contrib[47] = 4*vol*(l01 + l03 + l12 + l23)
+        contrib[48] = 4*vol*(l02 + 2*l03 + l22 + l23)
+        contrib[49] = 8*vol*(l00 + l03 + l33)
+        contrib[50] = 4*vol*(2*l01 + l03 + l13 + l33)
+        contrib[51] = 4*vol*(2*l02 + l03 + l23 + l33)
+        contrib[52] = 8*vol*(l11 + l13 + l33)
+        contrib[53] = 4*vol*(2*l12 + l13 + l23 + l33)
+        contrib[54] = 8*vol*(l22 + l23 + l33)
diff --git a/fem/PyNucleus_fem/stiffness_3D_P3.pxi b/fem/PyNucleus_fem/stiffness_3D_P3.pxi
new file mode 100644
index 0000000..60eea95
--- /dev/null
+++ b/fem/PyNucleus_fem/stiffness_3D_P3.pxi
@@ -0,0 +1,242 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class stiffness_3d_sym_P3(stiffness_3d_sym):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef inline void eval(stiffness_3d_sym_P3 self,
+                          const REAL_t[:, ::1] simplex,
+                          REAL_t[::1] contrib):
+        cdef:
+            REAL_t vol = 0.0000496031746031746
+            REAL_t l00, l01, l02, l03, l11, l12, l13, l22, l23, l33
+
+        vol /= simplexVolumeAndProducts3D(simplex, self.innerProducts, self.temp)
+        l00 = self.innerProducts[0]
+        l01 = self.innerProducts[1]
+        l02 = self.innerProducts[2]
+        l03 = self.innerProducts[3]
+        l11 = self.innerProducts[4]
+        l12 = self.innerProducts[5]
+        l13 = self.innerProducts[6]
+        l22 = self.innerProducts[7]
+        l23 = self.innerProducts[8]
+        l33 = self.innerProducts[9]
+
+        contrib[0] = 200*l00*vol
+        contrib[1] = 38*l01*vol
+        contrib[2] = 38*l02*vol
+        contrib[3] = 38*l03*vol
+        contrib[4] = 9*vol*(-13*l00 + 23*l01)
+        contrib[5] = 9*vol*(5*l00 - 13*l01)
+        contrib[6] = 45*vol*(l01 + l02)
+        contrib[7] = 45*vol*(l01 + l02)
+        contrib[8] = 9*vol*(5*l00 - 13*l02)
+        contrib[9] = 9*vol*(-13*l00 + 23*l02)
+        contrib[10] = 9*vol*(-13*l00 + 23*l03)
+        contrib[11] = 9*vol*(5*l00 - 13*l03)
+        contrib[12] = 45*vol*(l01 + l03)
+        contrib[13] = 45*vol*(l01 + l03)
+        contrib[14] = 45*vol*(l02 + l03)
+        contrib[15] = 45*vol*(l02 + l03)
+        contrib[16] = 54*vol*(2*l00 - l01 - l02)
+        contrib[17] = 54*vol*(2*l00 - l01 - l03)
+        contrib[18] = 108*vol*(l01 + l02 + l03)
+        contrib[19] = 54*vol*(2*l00 - l02 - l03)
+        contrib[20] = 200*l11*vol
+        contrib[21] = 38*l12*vol
+        contrib[22] = 38*l13*vol
+        contrib[23] = 9*vol*(-13*l01 + 5*l11)
+        contrib[24] = 9*vol*(23*l01 - 13*l11)
+        contrib[25] = 9*vol*(-13*l11 + 23*l12)
+        contrib[26] = 9*vol*(5*l11 - 13*l12)
+        contrib[27] = 45*vol*(l01 + l12)
+        contrib[28] = 45*vol*(l01 + l12)
+        contrib[29] = 45*vol*(l01 + l13)
+        contrib[30] = 45*vol*(l01 + l13)
+        contrib[31] = 9*vol*(-13*l11 + 23*l13)
+        contrib[32] = 9*vol*(5*l11 - 13*l13)
+        contrib[33] = 45*vol*(l12 + l13)
+        contrib[34] = 45*vol*(l12 + l13)
+        contrib[35] = 54*vol*(-l01 + 2*l11 - l12)
+        contrib[36] = 54*vol*(-l01 + 2*l11 - l13)
+        contrib[37] = 54*vol*(2*l11 - l12 - l13)
+        contrib[38] = 108*vol*(l01 + l12 + l13)
+        contrib[39] = 200*l22*vol
+        contrib[40] = 38*l23*vol
+        contrib[41] = 45*vol*(l02 + l12)
+        contrib[42] = 45*vol*(l02 + l12)
+        contrib[43] = 9*vol*(-13*l12 + 5*l22)
+        contrib[44] = 9*vol*(23*l12 - 13*l22)
+        contrib[45] = 9*vol*(23*l02 - 13*l22)
+        contrib[46] = 9*vol*(-13*l02 + 5*l22)
+        contrib[47] = 45*vol*(l02 + l23)
+        contrib[48] = 45*vol*(l02 + l23)
+        contrib[49] = 45*vol*(l12 + l23)
+        contrib[50] = 45*vol*(l12 + l23)
+        contrib[51] = 9*vol*(-13*l22 + 23*l23)
+        contrib[52] = 9*vol*(5*l22 - 13*l23)
+        contrib[53] = 54*vol*(-l02 - l12 + 2*l22)
+        contrib[54] = 108*vol*(l02 + l12 + l23)
+        contrib[55] = 54*vol*(-l12 + 2*l22 - l23)
+        contrib[56] = 54*vol*(-l02 + 2*l22 - l23)
+        contrib[57] = 200*l33*vol
+        contrib[58] = 45*vol*(l03 + l13)
+        contrib[59] = 45*vol*(l03 + l13)
+        contrib[60] = 45*vol*(l13 + l23)
+        contrib[61] = 45*vol*(l13 + l23)
+        contrib[62] = 45*vol*(l03 + l23)
+        contrib[63] = 45*vol*(l03 + l23)
+        contrib[64] = 9*vol*(-13*l03 + 5*l33)
+        contrib[65] = 9*vol*(23*l03 - 13*l33)
+        contrib[66] = 9*vol*(-13*l13 + 5*l33)
+        contrib[67] = 9*vol*(23*l13 - 13*l33)
+        contrib[68] = 9*vol*(-13*l23 + 5*l33)
+        contrib[69] = 9*vol*(23*l23 - 13*l33)
+        contrib[70] = 108*vol*(l03 + l13 + l23)
+        contrib[71] = 54*vol*(-l03 - l13 + 2*l33)
+        contrib[72] = 54*vol*(-l13 - l23 + 2*l33)
+        contrib[73] = 54*vol*(-l03 - l23 + 2*l33)
+        contrib[74] = 162*vol*(5*l00 + 4*l01 + 4*l11)
+        contrib[75] = 81*vol*(-3*l00 + 2*l01 - 3*l11)
+        contrib[76] = -81*vol*(2*l01 + 3*l02 + l11 + l12)
+        contrib[77] = -81*vol*(l01 + 2*l02 + l11 + l12)
+        contrib[78] = -81*vol*(l00 + l01 + 2*l02 + 3*l12)
+        contrib[79] = 81*vol*(5*l00 + 4*l01 + 4*l02 + 8*l12)
+        contrib[80] = 81*vol*(5*l00 + 4*l01 + 4*l03 + 8*l13)
+        contrib[81] = -81*vol*(l00 + l01 + 2*l03 + 3*l13)
+        contrib[82] = -81*vol*(2*l01 + 3*l03 + l11 + l13)
+        contrib[83] = -81*vol*(l01 + 2*l03 + l11 + l13)
+        contrib[84] = -81*vol*(l02 + l03 + l12 + l13)
+        contrib[85] = -81*vol*(l02 + l03 + l12 + l13)
+        contrib[86] = 162*vol*(-l00 + 2*l01 + 5*l02 + 2*l11 + 2*l12)
+        contrib[87] = 162*vol*(-l00 + 2*l01 + 5*l03 + 2*l11 + 2*l13)
+        contrib[88] = -81*vol*(l01 + 2*l02 + 2*l03 + l11 + l12 + l13)
+        contrib[89] = 81*vol*(-l00 - l01 + 5*l02 + 5*l03 + 4*l12 + 4*l13)
+        contrib[90] = 162*vol*(4*l00 + 4*l01 + 5*l11)
+        contrib[91] = 81*vol*(4*l01 + 8*l02 + 5*l11 + 4*l12)
+        contrib[92] = -81*vol*(l01 + 3*l02 + l11 + 2*l12)
+        contrib[93] = -81*vol*(l00 + l01 + l02 + 2*l12)
+        contrib[94] = -81*vol*(l00 + 2*l01 + l02 + 3*l12)
+        contrib[95] = -81*vol*(l00 + 2*l01 + l03 + 3*l13)
+        contrib[96] = -81*vol*(l00 + l01 + l03 + 2*l13)
+        contrib[97] = 81*vol*(4*l01 + 8*l03 + 5*l11 + 4*l13)
+        contrib[98] = -81*vol*(l01 + 3*l03 + l11 + 2*l13)
+        contrib[99] = -81*vol*(l02 + l03 + l12 + l13)
+        contrib[100] = -81*vol*(l02 + l03 + l12 + l13)
+        contrib[101] = 162*vol*(2*l00 + 2*l01 + 2*l02 - l11 + 5*l12)
+        contrib[102] = 162*vol*(2*l00 + 2*l01 + 2*l03 - l11 + 5*l13)
+        contrib[103] = 81*vol*(-l01 + 4*l02 + 4*l03 - l11 + 5*l12 + 5*l13)
+        contrib[104] = -81*vol*(l00 + l01 + l02 + l03 + 2*l12 + 2*l13)
+        contrib[105] = 162*vol*(5*l11 + 4*l12 + 4*l22)
+        contrib[106] = 81*vol*(-3*l11 + 2*l12 - 3*l22)
+        contrib[107] = -81*vol*(3*l01 + l02 + 2*l12 + l22)
+        contrib[108] = -81*vol*(2*l01 + l02 + l12 + l22)
+        contrib[109] = -81*vol*(l01 + l02 + l13 + l23)
+        contrib[110] = -81*vol*(l01 + l02 + l13 + l23)
+        contrib[111] = 81*vol*(5*l11 + 4*l12 + 4*l13 + 8*l23)
+        contrib[112] = -81*vol*(l11 + l12 + 2*l13 + 3*l23)
+        contrib[113] = -81*vol*(2*l12 + 3*l13 + l22 + l23)
+        contrib[114] = -81*vol*(l12 + 2*l13 + l22 + l23)
+        contrib[115] = 162*vol*(5*l01 + 2*l02 - l11 + 2*l12 + 2*l22)
+        contrib[116] = 81*vol*(5*l01 + 4*l02 - l11 - l12 + 5*l13 + 4*l23)
+        contrib[117] = 162*vol*(-l11 + 2*l12 + 5*l13 + 2*l22 + 2*l23)
+        contrib[118] = -81*vol*(2*l01 + l02 + l12 + 2*l13 + l22 + l23)
+        contrib[119] = 162*vol*(4*l11 + 4*l12 + 5*l22)
+        contrib[120] = 81*vol*(8*l01 + 4*l02 + 4*l12 + 5*l22)
+        contrib[121] = -81*vol*(3*l01 + 2*l02 + l12 + l22)
+        contrib[122] = -81*vol*(l01 + l02 + l13 + l23)
+        contrib[123] = -81*vol*(l01 + l02 + l13 + l23)
+        contrib[124] = -81*vol*(l11 + 2*l12 + l13 + 3*l23)
+        contrib[125] = -81*vol*(l11 + l12 + l13 + 2*l23)
+        contrib[126] = 81*vol*(4*l12 + 8*l13 + 5*l22 + 4*l23)
+        contrib[127] = -81*vol*(l12 + 3*l13 + l22 + 2*l23)
+        contrib[128] = 162*vol*(2*l01 + 5*l02 + 2*l11 + 2*l12 - l22)
+        contrib[129] = -81*vol*(l01 + 2*l02 + l11 + l12 + l13 + 2*l23)
+        contrib[130] = 162*vol*(2*l11 + 2*l12 + 2*l13 - l22 + 5*l23)
+        contrib[131] = 81*vol*(4*l01 + 5*l02 - l12 + 4*l13 - l22 + 5*l23)
+        contrib[132] = 162*vol*(4*l00 + 4*l02 + 5*l22)
+        contrib[133] = 81*vol*(-3*l00 + 2*l02 - 3*l22)
+        contrib[134] = -81*vol*(l00 + 2*l02 + l03 + 3*l23)
+        contrib[135] = -81*vol*(l00 + l02 + l03 + 2*l23)
+        contrib[136] = -81*vol*(l01 + l03 + l12 + l23)
+        contrib[137] = -81*vol*(l01 + l03 + l12 + l23)
+        contrib[138] = 81*vol*(4*l02 + 8*l03 + 5*l22 + 4*l23)
+        contrib[139] = -81*vol*(l02 + 3*l03 + l22 + 2*l23)
+        contrib[140] = 162*vol*(2*l00 + 2*l01 + 2*l02 + 5*l12 - l22)
+        contrib[141] = -81*vol*(l00 + l01 + l02 + l03 + 2*l12 + 2*l23)
+        contrib[142] = 81*vol*(4*l01 - l02 + 4*l03 + 5*l12 - l22 + 5*l23)
+        contrib[143] = 162*vol*(2*l00 + 2*l02 + 2*l03 - l22 + 5*l23)
+        contrib[144] = 162*vol*(5*l00 + 4*l02 + 4*l22)
+        contrib[145] = 81*vol*(5*l00 + 4*l02 + 4*l03 + 8*l23)
+        contrib[146] = -81*vol*(l00 + l02 + 2*l03 + 3*l23)
+        contrib[147] = -81*vol*(l01 + l03 + l12 + l23)
+        contrib[148] = -81*vol*(l01 + l03 + l12 + l23)
+        contrib[149] = -81*vol*(2*l02 + 3*l03 + l22 + l23)
+        contrib[150] = -81*vol*(l02 + 2*l03 + l22 + l23)
+        contrib[151] = 162*vol*(-l00 + 5*l01 + 2*l02 + 2*l12 + 2*l22)
+        contrib[152] = 81*vol*(-l00 + 5*l01 - l02 + 5*l03 + 4*l12 + 4*l23)
+        contrib[153] = -81*vol*(2*l01 + l02 + 2*l03 + l12 + l22 + l23)
+        contrib[154] = 162*vol*(-l00 + 2*l02 + 5*l03 + 2*l22 + 2*l23)
+        contrib[155] = 162*vol*(5*l00 + 4*l03 + 4*l33)
+        contrib[156] = 81*vol*(-3*l00 + 2*l03 - 3*l33)
+        contrib[157] = -81*vol*(2*l01 + l03 + l13 + l33)
+        contrib[158] = -81*vol*(3*l01 + 2*l03 + l13 + l33)
+        contrib[159] = -81*vol*(2*l02 + l03 + l23 + l33)
+        contrib[160] = -81*vol*(3*l02 + 2*l03 + l23 + l33)
+        contrib[161] = 81*vol*(-l00 + 5*l01 + 5*l02 - l03 + 4*l13 + 4*l23)
+        contrib[162] = 162*vol*(-l00 + 5*l01 + 2*l03 + 2*l13 + 2*l33)
+        contrib[163] = -81*vol*(2*l01 + 2*l02 + l03 + l13 + l23 + l33)
+        contrib[164] = 162*vol*(-l00 + 5*l02 + 2*l03 + 2*l23 + 2*l33)
+        contrib[165] = 162*vol*(4*l00 + 4*l03 + 5*l33)
+        contrib[166] = -81*vol*(3*l01 + l03 + 2*l13 + l33)
+        contrib[167] = 81*vol*(8*l01 + 4*l03 + 4*l13 + 5*l33)
+        contrib[168] = -81*vol*(3*l02 + l03 + 2*l23 + l33)
+        contrib[169] = 81*vol*(8*l02 + 4*l03 + 4*l23 + 5*l33)
+        contrib[170] = -81*vol*(l00 + l01 + l02 + l03 + 2*l13 + 2*l23)
+        contrib[171] = 162*vol*(2*l00 + 2*l01 + 2*l03 + 5*l13 - l33)
+        contrib[172] = 81*vol*(4*l01 + 4*l02 - l03 + 5*l13 + 5*l23 - l33)
+        contrib[173] = 162*vol*(2*l00 + 2*l02 + 2*l03 + 5*l23 - l33)
+        contrib[174] = 162*vol*(5*l11 + 4*l13 + 4*l33)
+        contrib[175] = 81*vol*(-3*l11 + 2*l13 - 3*l33)
+        contrib[176] = -81*vol*(2*l12 + l13 + l23 + l33)
+        contrib[177] = -81*vol*(3*l12 + 2*l13 + l23 + l33)
+        contrib[178] = 81*vol*(5*l01 + 4*l03 - l11 + 5*l12 - l13 + 4*l23)
+        contrib[179] = 162*vol*(5*l01 + 2*l03 - l11 + 2*l13 + 2*l33)
+        contrib[180] = 162*vol*(-l11 + 5*l12 + 2*l13 + 2*l23 + 2*l33)
+        contrib[181] = -81*vol*(2*l01 + l03 + 2*l12 + l13 + l23 + l33)
+        contrib[182] = 162*vol*(4*l11 + 4*l13 + 5*l33)
+        contrib[183] = -81*vol*(3*l12 + l13 + 2*l23 + l33)
+        contrib[184] = 81*vol*(8*l12 + 4*l13 + 4*l23 + 5*l33)
+        contrib[185] = -81*vol*(l01 + 2*l03 + l11 + l12 + l13 + 2*l23)
+        contrib[186] = 162*vol*(2*l01 + 5*l03 + 2*l11 + 2*l13 - l33)
+        contrib[187] = 162*vol*(2*l11 + 2*l12 + 2*l13 + 5*l23 - l33)
+        contrib[188] = 81*vol*(4*l01 + 5*l03 + 4*l12 - l13 + 5*l23 - l33)
+        contrib[189] = 162*vol*(5*l22 + 4*l23 + 4*l33)
+        contrib[190] = 81*vol*(-3*l22 + 2*l23 - 3*l33)
+        contrib[191] = 81*vol*(5*l02 + 4*l03 + 5*l12 + 4*l13 - l22 - l23)
+        contrib[192] = -81*vol*(2*l02 + l03 + 2*l12 + l13 + l23 + l33)
+        contrib[193] = 162*vol*(5*l12 + 2*l13 - l22 + 2*l23 + 2*l33)
+        contrib[194] = 162*vol*(5*l02 + 2*l03 - l22 + 2*l23 + 2*l33)
+        contrib[195] = 162*vol*(4*l22 + 4*l23 + 5*l33)
+        contrib[196] = -81*vol*(l02 + 2*l03 + l12 + 2*l13 + l22 + l23)
+        contrib[197] = 81*vol*(4*l02 + 5*l03 + 4*l12 + 5*l13 - l23 - l33)
+        contrib[198] = 162*vol*(2*l12 + 5*l13 + 2*l22 + 2*l23 - l33)
+        contrib[199] = 162*vol*(2*l02 + 5*l03 + 2*l22 + 2*l23 - l33)
+        contrib[200] = 1944*vol*(l00 + l01 + l02 + l11 + l12 + l22)
+        contrib[201] = 972*vol*(l00 + l01 + l02 + l03 + l11 + l12 + l13 + 2*l23)
+        contrib[202] = 972*vol*(l01 + l02 + 2*l03 + l11 + l12 + l13 + l22 + l23)
+        contrib[203] = 972*vol*(l00 + l01 + l02 + l03 + l12 + 2*l13 + l22 + l23)
+        contrib[204] = 1944*vol*(l00 + l01 + l03 + l11 + l13 + l33)
+        contrib[205] = 972*vol*(l01 + 2*l02 + l03 + l11 + l12 + l13 + l23 + l33)
+        contrib[206] = 972*vol*(l00 + l01 + l02 + l03 + 2*l12 + l13 + l23 + l33)
+        contrib[207] = 1944*vol*(l11 + l12 + l13 + l22 + l23 + l33)
+        contrib[208] = 972*vol*(2*l01 + l02 + l03 + l12 + l13 + l22 + l23 + l33)
+        contrib[209] = 1944*vol*(l00 + l02 + l03 + l22 + l23 + l33)
diff --git a/fem/PyNucleus_fem/vector_decl_{SCALAR}.pxi b/fem/PyNucleus_fem/vector_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..f34d28d
--- /dev/null
+++ b/fem/PyNucleus_fem/vector_decl_{SCALAR}.pxi
@@ -0,0 +1,17 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+cdef class {SCALAR_label_lc_}fe_vector:
+    cdef:
+        {SCALAR}_t[::1] data
+        bytes format
+        public DoFMap dm
+
+    cpdef REAL_t norm(self, BOOL_t acc=*, BOOL_t asynchronous=*)
+    cpdef {SCALAR}_t inner(self, other, BOOL_t accSelf=*, BOOL_t accOther=*, BOOL_t asynchronous=*)
diff --git a/fem/PyNucleus_fem/vector_{SCALAR}.pxi b/fem/PyNucleus_fem/vector_{SCALAR}.pxi
new file mode 100644
index 0000000..ebd4427
--- /dev/null
+++ b/fem/PyNucleus_fem/vector_{SCALAR}.pxi
@@ -0,0 +1,270 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+cdef class {SCALAR_label_lc_}fe_vector:
+    def __init__(self, {SCALAR}_t[::1] data, DoFMap dm):
+        self.data = data
+        self.dm = dm
+
+    def __getbuffer__(self, Py_buffer* info, int flags):
+        info.buf = &self.data[0]
+        info.len = self.data.shape[0]
+        info.ndim = 1
+        info.shape = self.data.shape
+        info.strides = NULL
+        info.suboffsets = NULL
+        info.readonly = 0
+        IF {IS_REAL}:
+            info.itemsize = 8
+            self.format = b"d"
+        ELSE:
+            info.itemsize = 16
+            self.format = b"Zd"
+        info.format = self.format
+
+    @property
+    def shape(self):
+        return (self.data.shape[0], )
+
+    @property
+    def ndim(self):
+        return 1
+
+    @property
+    def dtype(self):
+        return {SCALAR}
+
+    def __add__({SCALAR_label_lc_}fe_vector self, other):
+        cdef:
+            {SCALAR_label_lc_}fe_vector v, v3
+            complex_fe_vector vc, v2c, v3c
+            {SCALAR}_t[::1] v3d
+        if isinstance(other, {SCALAR_label_lc_}fe_vector):
+            v = {SCALAR_label_lc_}fe_vector(np.empty((self.data.shape[0]), dtype={SCALAR}), self.dm)
+            v3 = other
+            assign3(v.data, self.data, 1.0, v3.data, 1.0)
+            return v
+        elif isinstance(other, np.ndarray) and other.dtype == {SCALAR}:
+            v = {SCALAR_label_lc_}fe_vector(np.empty((self.data.shape[0]), dtype={SCALAR}), self.dm)
+            v3d = other
+            assign3(v.data, self.data, 1.0, v3d, 1.0)
+            return v
+        elif isinstance(other, complex_fe_vector):
+            v2c = self.astype(COMPLEX)
+            v3c = other
+            vc = complex_fe_vector(np.empty((v2c.data.shape[0]), dtype=COMPLEX), v2c.dm)
+            assign3(vc.data, v2c.data, 1.0, v3c.data, 1.0)
+            return vc
+        else:
+            raise NotImplementedError()
+
+    def __sub__({SCALAR_label_lc_}fe_vector self, other):
+        cdef:
+            {SCALAR_label_lc_}fe_vector v, v3
+            complex_fe_vector vc, v2c, v3c
+            {SCALAR}_t[::1] v3d
+        if isinstance(other, {SCALAR_label_lc_}fe_vector):
+            v = {SCALAR_label_lc_}fe_vector(np.empty((self.data.shape[0]), dtype={SCALAR}), self.dm)
+            v3 = other
+            assign3(v.data, self.data, 1.0, v3.data, -1.0)
+            return v
+        elif isinstance(other, np.ndarray) and other.dtype == {SCALAR}:
+            v = {SCALAR_label_lc_}fe_vector(np.empty((self.data.shape[0]), dtype={SCALAR}), self.dm)
+            v3d = other
+            assign3(v.data, self.data, 1.0, v3d, -1.0)
+            return v
+        elif isinstance(other, complex_fe_vector):
+            v2c = self.astype(COMPLEX)
+            v3c = other
+            vc = complex_fe_vector(np.empty((v2c.data.shape[0]), dtype=COMPLEX), v2c.dm)
+            assign3(vc.data, v2c.data, 1.0, v3c.data, -1.0)
+            return vc
+        else:
+            raise NotImplementedError()
+
+    def __iadd__({SCALAR_label_lc_}fe_vector self, {SCALAR}_t[::1] other):
+        assign3(self.data, self.data, 1.0, other, 1.0)
+        return self
+
+    def __isub__({SCALAR_label_lc_}fe_vector self, {SCALAR}_t[::1] other):
+        assign3(self.data, self.data, 1.0, other, -1.0)
+        return self
+
+    def __imul__({SCALAR_label_lc_}fe_vector self, {SCALAR}_t alpha):
+        assignScaled(self.data, self.data, alpha)
+        return self
+
+    def __mul__(self, other):
+        cdef:
+            {SCALAR_label_lc_}fe_vector v1, v2
+            complex_fe_vector v1c, v2c
+            {SCALAR}_t alpha
+            COMPLEX_t alphac
+            INDEX_t i
+        if isinstance(self, {SCALAR_label_lc_}fe_vector):
+            if isinstance(other, (COMPLEX, complex)):
+                v1c = self.astype(COMPLEX)
+                alphac = other
+                v2c = complex_fe_vector(np.empty((v1c.data.shape[0]), dtype=COMPLEX), v1c.dm)
+                assignScaled(v2c.data, v1c.data, alphac)
+                return v2c
+            elif isinstance(other, {SCALAR_label_lc_}fe_vector):
+                v1 = other
+                v2 = {SCALAR_label_lc_}fe_vector(np.empty((v1.data.shape[0]), dtype={SCALAR}), v1.dm)
+                for i in range(self.data.shape[0]):
+                    v2.data[i] = self.data[i]*other.data[i]
+                return v2
+            else:
+                v1 = self
+                alpha = other
+                v2 = {SCALAR_label_lc_}fe_vector(np.empty((v1.data.shape[0]), dtype={SCALAR}), v1.dm)
+                assignScaled(v2.data, v1.data, alpha)
+                return v2
+        else:
+            if isinstance(self, (COMPLEX, complex)):
+                v1c = other.astype(COMPLEX)
+                alphac = self
+                v2c = complex_fe_vector(np.empty((v1c.data.shape[0]), dtype=COMPLEX), v1c.dm)
+                assignScaled(v2c.data, v1c.data, alphac)
+                return v2c
+            else:
+                v1 = other
+                alpha = self
+                v2 = {SCALAR_label_lc_}fe_vector(np.empty((v1.data.shape[0]), dtype={SCALAR}), v1.dm)
+                assignScaled(v2.data, v1.data, alpha)
+                return v2
+
+    def toarray(self, copy=False):
+        return np.array(self.data, copy=copy)
+
+    def assign(self, other):
+        cdef:
+            {SCALAR_label_lc_}fe_vector v
+            {SCALAR}_t[::1] v2
+        if isinstance(other, {SCALAR_label_lc_}fe_vector):
+            v = other
+            assign(self.data, v.data)
+        elif isinstance(other, {SCALAR}):
+            for i in range(self.data.shape[0]):
+                self.data[i] = other
+        elif {IS_REAL} and isinstance(other, float):
+            for i in range(self.data.shape[0]):
+                self.data[i] = other
+        else:
+            v2 = other
+            assign(self.data, v2)
+
+    def astype(self, dtype):
+        cdef:
+            complex_fe_vector v
+            INDEX_t i
+        IF {IS_REAL}:
+            if dtype == COMPLEX:
+                v = complex_fe_vector(np.empty((self.data.shape[0]), dtype=COMPLEX), self.dm)
+                for i in range(self.data.shape[0]):
+                    v.data[i] = self.data[i]
+                return v
+            else:
+                return self
+        ELSE:
+            if dtype == REAL:
+                raise NotImplementedError()
+            else:
+                return self
+
+    @property
+    def real(self):
+        cdef:
+            fe_vector v
+            INDEX_t i
+        IF {IS_REAL}:
+            return self
+        ELSE:
+            v = fe_vector(np.empty((self.data.shape[0]), dtype=REAL), self.dm)
+            for i in range(self.data.shape[0]):
+                v.data[i] = self.data[i].real
+            return v
+
+    @property
+    def imag(self):
+        cdef:
+            fe_vector v
+            INDEX_t i
+        IF {IS_REAL}:
+            v = fe_vector(np.zeros((self.data.shape[0]), dtype=REAL), self.dm)
+            return v
+        ELSE:
+            v = fe_vector(np.empty((self.data.shape[0]), dtype=REAL), self.dm)
+            for i in range(self.data.shape[0]):
+                v.data[i] = self.data[i].imag
+            return v
+
+    def __repr__(self):
+        if self.dm is not None:
+            return '{SCALAR}fe_vector<{}>'.format(self.dm)
+        else:
+            return '{SCALAR}fe_vector'
+
+    def __getitem__(self, INDEX_t i):
+        return self.data[i]
+
+    def __setitem__(self, INDEX_t i, {SCALAR}_t value):
+        self.data[i] = value
+
+    def plot(self, **kwargs):
+        mesh = self.dm.mesh
+        if isinstance(self.dm, P0_DoFMap):
+            return mesh.plotFunction(self.toarray(), DoFMap=self.dm, **kwargs)
+        else:
+            y = self.linearPart()
+            return mesh.plotFunction(y.toarray(), DoFMap=y.dm, **kwargs)
+
+    def copy(self):
+        cdef:
+            {SCALAR_label_lc_}fe_vector v
+        v = self.dm.empty()
+        assign(v.data, self.data)
+        return v
+
+    def __getstate__(self):
+        return (np.array(self.data, copy=False), self.dm)
+
+    def __setstate__(self, state):
+        self.data = state[0]
+        self.dm = state[1]
+
+    def __getattr__(self, name):
+        return getattr(np.array(self.data, copy=False), name)
+
+    cpdef REAL_t norm(self, BOOL_t acc=False, BOOL_t asynchronous=False):
+        if self.dm.norm is not None:
+            IF {IS_REAL}:
+                return self.dm.norm.eval(self.data, acc)
+            ELSE:
+                return self.dm.complex_norm.eval(self.data, acc)
+        else:
+            raise AttributeError('\'Norm\' has not been set on the vectors DoFMap.')
+
+    cpdef {SCALAR}_t inner(self, other, BOOL_t accSelf=False, BOOL_t accOther=False, BOOL_t asynchronous=False):
+        if self.dm.inner is not None:
+            IF {IS_REAL}:
+                if isinstance(other, {SCALAR_label_lc_}fe_vector):
+                    return self.dm.inner.eval(self.data, other.data, accSelf, accOther, asynchronous)
+                else:
+                    return self.dm.inner.eval(self.data, other, accSelf, accOther, asynchronous)
+            ELSE:
+                if isinstance(other, {SCALAR_label_lc_}fe_vector):
+                    return self.dm.complex_inner.eval(self.data, other.data, accSelf, accOther, asynchronous)
+                else:
+                    return self.dm.complex_inner.eval(self.data, other, accSelf, accOther, asynchronous)
+        else:
+            raise AttributeError('\'Inner\' has not been set on the vectors DoFMap.')
+
+    def linearPart(self):
+        return self.dm.linearPart(self)[0]
diff --git a/fem/setup.cfg b/fem/setup.cfg
new file mode 100644
index 0000000..cfb64b1
--- /dev/null
+++ b/fem/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_fem/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/fem/setup.py b/fem/setup.py
new file mode 100644
index 0000000..2f99e3e
--- /dev/null
+++ b/fem/setup.py
@@ -0,0 +1,72 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+try:
+    from PyNucleus_base.setupUtils import package
+except ImportError as e:
+    raise ImportError('\'PyNucleus_base\' needs to be installed first.') from e
+from PyNucleus_packageTools import fillTemplate
+from pathlib import Path
+
+p = package('PyNucleus_fem')
+p.addOption('USE_METIS', 'use_metis', True, ['PyNucleus_metisCy'])
+
+p.loadConfig()
+
+print('Generating templates')
+templates = [
+    'distributed_operators_{SCALAR}.pxi', 'distributed_operators_decl_{SCALAR}.pxi',
+    'vector_{SCALAR}.pxi', 'vector_decl_{SCALAR}.pxi'
+]
+replacementGroups = [[('{SCALAR}', 'REAL'),
+                      ('{SCALAR_label}', ''),
+                      ('{SCALAR_label_lc}', ''),
+                      ('{SCALAR_label_lc_}', ''),
+                      ('{IS_REAL}', 'True'),
+                      ('{IS_COMPLEX}', 'False')],
+                     [('{SCALAR}', 'COMPLEX'),
+                      ('{SCALAR_label}', 'Complex'),
+                      ('{SCALAR_label_lc}', 'complex'),
+                      ('{SCALAR_label_lc_}', 'complex_'),
+                      ('{IS_REAL}', 'False'),
+                      ('{IS_COMPLEX}', 'True'),
+                      # for some reason, complex cannot handle += etc
+                      ('\s([^\s]+\[[^\]]*\])\s([\*\+-])=', ' \\1 = \\1 \\2'),
+                      ('\s([^\s]+)\s([\*\+-])=', ' \\1 = \\1 \\2')]]
+fillTemplate(Path(p.folder), templates, replacementGroups)
+
+p.addExtension("meshCy",
+               sources=[p.folder+"meshCy.pyx"])
+
+p.addExtension("meshPartitioning",
+               sources=[p.folder+"meshPartitioning.pyx"])
+p.addExtension("functions",
+               sources=[p.folder+"functions.pyx"])
+p.addExtension("femCy",
+               sources=[p.folder+"femCy.pyx"])
+p.addExtension("repartitioner",
+               sources=[p.folder+"repartitioner.pyx"])
+p.addExtension("DoFMaps",
+               sources=[p.folder+"DoFMaps.pyx"])
+p.addExtension("quadrature",
+               sources=[p.folder+"quadrature.pyx"])
+p.addExtension("meshOverlaps",
+               sources=[p.folder+"meshOverlaps.pyx"])
+p.addExtension("algebraicOverlaps",
+               sources=[p.folder+"algebraicOverlaps.pyx"])
+p.addExtension("distributed_operators",
+               sources=[p.folder+"distributed_operators.pyx"])
+p.addExtension("boundaryLayerCy",
+               sources=[p.folder+"boundaryLayerCy.pyx"])
+p.addExtension("simplexMapper",
+               sources=[p.folder+"simplexMapper.pyx"])
+
+p.setup(description="A finite element code.",
+        install_requires=['cython', 'numpy', 'scipy', 'matplotlib', 'meshpy', 'modepy',
+                          'mpi4py>=2.0.0',
+                          'PyNucleus_base'])
diff --git a/fem/versioneer.py b/fem/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/fem/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/metisCy/.gitattributes b/metisCy/.gitattributes
new file mode 100644
index 0000000..57fc476
--- /dev/null
+++ b/metisCy/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_metisCy/_version.py export-subst
diff --git a/metisCy/MANIFEST.in b/metisCy/MANIFEST.in
new file mode 100644
index 0000000..6ffe9c2
--- /dev/null
+++ b/metisCy/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_metisCy/_version.py
diff --git a/metisCy/PyNucleus_metisCy/__init__.py b/metisCy/PyNucleus_metisCy/__init__.py
new file mode 100644
index 0000000..154d412
--- /dev/null
+++ b/metisCy/PyNucleus_metisCy/__init__.py
@@ -0,0 +1,89 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+"""
+A Cython interface to METIS and ParMETIS.
+
+http://glaros.dtc.umn.edu/gkhome/metis/metis/overview
+http://glaros.dtc.umn.edu/gkhome/metis/parmetis/overview
+
+"""
+from . metisCy import (PartGraphRecursive,
+                       PartGraphKway,
+                       PartMeshNodal,
+                       PartMeshDual,
+                       NodeND,
+                       SetDefaultOptions)
+
+from . metisCy import NOPTIONS
+
+# Options codes
+from . metisCy import (OPTION_PTYPE,
+                       OPTION_OBJTYPE,
+                       OPTION_CTYPE,
+                       OPTION_IPTYPE,
+                       OPTION_RTYPE,
+                       OPTION_DBGLVL,
+                       OPTION_NITER,
+                       OPTION_NCUTS,
+                       OPTION_SEED,
+                       OPTION_NO2HOP,
+                       OPTION_MINCONN,
+                       OPTION_CONTIG,
+                       OPTION_COMPRESS,
+                       OPTION_CCORDER,
+                       OPTION_PFACTOR,
+                       OPTION_NSEPS,
+                       OPTION_UFACTOR,
+                       OPTION_NUMBERING)
+
+# Partitioning Schemes
+from . metisCy import (PTYPE_RB,
+                       PTYPE_KWAY)
+
+# Graph types for meshes
+from . metisCy import (GTYPE_DUAL,
+                       GTYPE_NODAL)
+
+# Coarsening Schemes
+from . metisCy import (CTYPE_RM,
+                       CTYPE_SHEM)
+
+# Initial partitioning schemes
+from . metisCy import (IPTYPE_GROW,
+                       IPTYPE_RANDOM,
+                       IPTYPE_EDGE,
+                       IPTYPE_NODE,
+                       IPTYPE_METISRB)
+
+# Refinement schemes
+from . metisCy import (RTYPE_FM,
+                       RTYPE_GREEDY,
+                       RTYPE_SEP2SIDED,
+                       RTYPE_SEP1SIDED)
+
+# Debug Levels
+from . metisCy import (DBG_INFO,
+                       DBG_TIME,
+                       DBG_COARSEN,
+                       DBG_REFINE,
+                       DBG_IPART,
+                       DBG_MOVEINFO,
+                       DBG_SEPINFO,
+                       DBG_CONNINFO,
+                       DBG_CONTIGINFO,
+                       DBG_MEMORY)
+
+# Types of objectives
+from . metisCy import (OPTION_OBJTYPE,
+                       OBJTYPE_CUT,
+                       OBJTYPE_NODE,
+                       OBJTYPE_VOL)
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/metisCy/PyNucleus_metisCy/_version.py b/metisCy/PyNucleus_metisCy/_version.py
new file mode 100644
index 0000000..183e2d8
--- /dev/null
+++ b/metisCy/PyNucleus_metisCy/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_metisCy/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/metisCy/PyNucleus_metisCy/metisCy.pxd b/metisCy/PyNucleus_metisCy/metisCy.pxd
new file mode 100644
index 0000000..ab2bcfc
--- /dev/null
+++ b/metisCy/PyNucleus_metisCy/metisCy.pxd
@@ -0,0 +1,21 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+
+include "config.pxi"
+
+IF IDXTYPEWIDTH == 32:
+    ctypedef np.int32_t idx_t
+ELIF IDXTYPEWIDTH == 64:
+    ctypedef np.int64_t idx_t
+
+IF REALTYPEWIDTH == 32:
+    ctypedef float real_t
+ELIF REALTYPEWIDTH == 64:
+    ctypedef np.float64_t real_t
diff --git a/metisCy/PyNucleus_metisCy/metisCy.pyx b/metisCy/PyNucleus_metisCy/metisCy.pyx
new file mode 100644
index 0000000..d7ab902
--- /dev/null
+++ b/metisCy/PyNucleus_metisCy/metisCy.pyx
@@ -0,0 +1,382 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+
+include "config.pxi"
+
+from PyNucleus_base import uninitialized
+
+IF IDXTYPEWIDTH == 32:
+    idx = np.int32
+ELIF IDXTYPEWIDTH == 64:
+    idx = np.int64
+
+IF REALTYPEWIDTH == 32:
+    real = np.float32
+ELIF REALTYPEWIDTH == 64:
+    real = np.float64
+
+
+cdef extern from "metis.h":
+    int METIS_PartGraphRecursive(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy,
+                                 idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts,
+                                 real_t *ubvec, idx_t *options, idx_t *objval, idx_t *part)
+
+    int METIS_PartGraphKway(idx_t *nvtxs, idx_t *ncon, idx_t *xadj, idx_t *adjncy,
+                            idx_t *vwgt, idx_t *vsize, idx_t *adjwgt, idx_t *nparts, real_t *tpwgts,
+                            real_t *ubvec, idx_t *options, idx_t *objval, idx_t *part)
+
+    int METIS_MeshToDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                         idx_t *ncommon, idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy);
+
+    int METIS_MeshToNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                          idx_t *numflag, idx_t **r_xadj, idx_t **r_adjncy);
+
+    int METIS_PartMeshNodal(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                            idx_t *vwgt, idx_t *vsize, idx_t *nparts, real_t *tpwgts,
+                            idx_t *options, idx_t *objval, idx_t *epart, idx_t *npart);
+
+    int METIS_PartMeshDual(idx_t *ne, idx_t *nn, idx_t *eptr, idx_t *eind,
+                           idx_t *vwgt, idx_t *vsize, idx_t *ncommon, idx_t *nparts,
+                           real_t *tpwgts, idx_t *options, idx_t *objval, idx_t *epart,
+                           idx_t *npart);
+
+    int METIS_NodeND(idx_t *nvtxs, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+                     idx_t *options, idx_t *perm, idx_t *iperm);
+
+    int METIS_Free(void *ptr);
+
+    idx_t METIS_NOPTIONS
+
+    idx_t METIS_OK, METIS_ERROR_INPUT, METIS_ERROR_MEMORY, METIS_ERROR
+
+    # Options codes
+    idx_t METIS_OPTION_PTYPE
+    idx_t METIS_OPTION_OBJTYPE
+    idx_t METIS_OPTION_CTYPE
+    idx_t METIS_OPTION_IPTYPE
+    idx_t METIS_OPTION_RTYPE
+    idx_t METIS_OPTION_DBGLVL
+    idx_t METIS_OPTION_NITER
+    idx_t METIS_OPTION_NCUTS
+    idx_t METIS_OPTION_SEED
+    idx_t METIS_OPTION_NO2HOP
+    idx_t METIS_OPTION_MINCONN
+    idx_t METIS_OPTION_CONTIG
+    idx_t METIS_OPTION_COMPRESS
+    idx_t METIS_OPTION_CCORDER
+    idx_t METIS_OPTION_PFACTOR
+    idx_t METIS_OPTION_NSEPS
+    idx_t METIS_OPTION_UFACTOR
+    idx_t METIS_OPTION_NUMBERING
+
+    # Partitioning Schemes
+    idx_t METIS_PTYPE_RB
+    idx_t METIS_PTYPE_KWAY
+
+    # Graph types for meshes
+    idx_t METIS_GTYPE_DUAL
+    idx_t METIS_GTYPE_NODAL
+
+    # Coarsening Schemes
+    idx_t METIS_CTYPE_RM
+    idx_t METIS_CTYPE_SHEM
+
+    # Initial partitioning schemes
+    idx_t METIS_IPTYPE_GROW
+    idx_t METIS_IPTYPE_RANDOM
+    idx_t METIS_IPTYPE_EDGE
+    idx_t METIS_IPTYPE_NODE
+    idx_t METIS_IPTYPE_METISRB
+
+    # Refinement schemes
+    idx_t METIS_RTYPE_FM
+    idx_t METIS_RTYPE_GREEDY
+    idx_t METIS_RTYPE_SEP2SIDED
+    idx_t METIS_RTYPE_SEP1SIDED
+
+    # Debug Levels
+    idx_t METIS_DBG_INFO        # Shows various diagnostic messages
+    idx_t METIS_DBG_TIME        # Perform timing analysis
+    idx_t METIS_DBG_COARSEN     # Show the coarsening progress
+    idx_t METIS_DBG_REFINE      # Show the refinement progress
+    idx_t METIS_DBG_IPART       # Show info on initial partitioning
+    idx_t METIS_DBG_MOVEINFO    # Show info on vertex moves during refinement
+    idx_t METIS_DBG_SEPINFO     # Show info on vertex moves during sep refinement
+    idx_t METIS_DBG_CONNINFO    # Show info on minimization of subdomain connectivity
+    idx_t METIS_DBG_CONTIGINFO  # Show info on elimination of connected components
+    idx_t METIS_DBG_MEMORY      # Show info related to wspace allocation
+
+    # Types of objectives
+    idx_t METIS_OBJTYPE_CUT
+    idx_t METIS_OBJTYPE_VOL
+    idx_t METIS_OBJTYPE_NODE
+
+    int METIS_SetDefaultOptions(idx_t *options)
+
+
+NOPTIONS = METIS_NOPTIONS
+
+# Options codes
+OPTION_PTYPE	 = METIS_OPTION_PTYPE
+OPTION_OBJTYPE	 = METIS_OPTION_OBJTYPE
+OPTION_CTYPE	 = METIS_OPTION_CTYPE
+OPTION_IPTYPE	 = METIS_OPTION_IPTYPE
+OPTION_RTYPE	 = METIS_OPTION_RTYPE
+OPTION_DBGLVL	 = METIS_OPTION_DBGLVL
+OPTION_NITER	 = METIS_OPTION_NITER
+OPTION_NCUTS	 = METIS_OPTION_NCUTS
+OPTION_SEED	 = METIS_OPTION_SEED
+OPTION_NO2HOP	 = METIS_OPTION_NO2HOP
+OPTION_MINCONN	 = METIS_OPTION_MINCONN
+OPTION_CONTIG	 = METIS_OPTION_CONTIG
+OPTION_COMPRESS	 = METIS_OPTION_COMPRESS
+OPTION_CCORDER	 = METIS_OPTION_CCORDER
+OPTION_PFACTOR	 = METIS_OPTION_PFACTOR
+OPTION_NSEPS	 = METIS_OPTION_NSEPS
+OPTION_UFACTOR	 = METIS_OPTION_UFACTOR
+OPTION_NUMBERING = METIS_OPTION_NUMBERING
+
+# Partitioning Schemes
+PTYPE_RB   = METIS_PTYPE_RB
+PTYPE_KWAY = METIS_PTYPE_KWAY
+
+# Graph types for meshes
+GTYPE_DUAL  = METIS_GTYPE_DUAL
+GTYPE_NODAL = METIS_GTYPE_NODAL
+
+# Coarsening Schemes
+CTYPE_RM   = METIS_CTYPE_RM
+CTYPE_SHEM = METIS_CTYPE_SHEM
+
+# Initial partitioning schemes
+IPTYPE_GROW    = METIS_IPTYPE_GROW
+IPTYPE_RANDOM  = METIS_IPTYPE_RANDOM
+IPTYPE_EDGE    = METIS_IPTYPE_EDGE
+IPTYPE_NODE    = METIS_IPTYPE_NODE
+IPTYPE_METISRB = METIS_IPTYPE_METISRB
+
+# Refinement schemes
+RTYPE_FM	= METIS_RTYPE_FM
+RTYPE_GREEDY	= METIS_RTYPE_GREEDY
+RTYPE_SEP2SIDED = METIS_RTYPE_SEP2SIDED
+RTYPE_SEP1SIDED = METIS_RTYPE_SEP1SIDED
+
+# Debug Levels
+DBG_INFO       = METIS_DBG_INFO
+DBG_TIME       = METIS_DBG_TIME
+DBG_COARSEN    = METIS_DBG_COARSEN
+DBG_REFINE     = METIS_DBG_REFINE
+DBG_IPART      = METIS_DBG_IPART
+DBG_MOVEINFO   = METIS_DBG_MOVEINFO
+DBG_SEPINFO    = METIS_DBG_SEPINFO
+DBG_CONNINFO   = METIS_DBG_CONNINFO
+DBG_CONTIGINFO = METIS_DBG_CONTIGINFO
+DBG_MEMORY     = METIS_DBG_MEMORY
+
+# Types of objectives
+OBJTYPE_CUT  = METIS_OBJTYPE_CUT
+OBJTYPE_VOL  = METIS_OBJTYPE_VOL
+OBJTYPE_NODE = METIS_OBJTYPE_NODE
+
+
+cpdef SetDefaultOptions():
+    cdef:
+        np.ndarray[idx_t, ndim=1] options = uninitialized((NOPTIONS), dtype=idx)
+        idx_t[::1] options_mv = options
+
+    METIS_SetDefaultOptions(&options_mv[0])
+    return options
+
+
+cpdef process_return(returnVal):
+    if returnVal == METIS_OK:
+        return
+    else:
+        raise Exception
+
+
+cpdef PartGraphRecursive(idx_t[::1] xadj,
+                         idx_t[::1] adjncy,
+                         idx_t nparts,
+                         idx_t[::1] vwgt=None,
+                         idx_t[::1] vsize=None,
+                         idx_t[::1] adjwgt=None,
+                         real_t[::1] tpwgts=None,
+                         real_t[::1] ubvec=None,
+                         idx_t[::1] options=None):
+    cdef:
+        idx_t nvtxs = xadj.shape[0]-1, ncon, objval
+        np.ndarray[idx_t, ndim=1] part = uninitialized(nvtxs, dtype=idx)
+        idx_t[::1] part_mv = part
+        idx_t *vwgtPtr
+        idx_t *vsizePtr
+        idx_t *adjwgtPtr
+        idx_t *optionsPtr
+        real_t *tpwgtsPtr
+        real_t *ubvecPtr
+        int returnVal
+
+    optionsPtr = NULL if options is None else &options[0]
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+    vsizePtr = NULL if vsize is None else &vsize[0]
+    adjwgtPtr = NULL if adjwgt is None else &adjwgt[0]
+    tpwgtsPtr = NULL if tpwgts is None else &tpwgts[0]
+
+    if ubvec is None:
+        ncon = 1
+        ubvecPtr = NULL
+    else:
+        ncon = ubvec.shape[0]
+        ubvecPtr = &ubvec[0]
+
+    returnVal = METIS_PartGraphRecursive(&nvtxs, &ncon, &xadj[0], &adjncy[0],
+                                         vwgtPtr, vsizePtr, adjwgtPtr, &nparts,
+                                         tpwgtsPtr, ubvecPtr, optionsPtr,
+                                         &objval, &part_mv[0])
+    process_return(returnVal)
+    return part, objval
+
+
+cpdef PartGraphKway(idx_t[::1] xadj,
+                    idx_t[::1] adjncy,
+                    idx_t nparts,
+                    idx_t[::1] vwgt=None,
+                    idx_t[::1] vsize=None,
+                    idx_t[::1] adjwgt=None,
+                    real_t[::1] tpwgts=None,
+                    real_t[::1] ubvec=None,
+                    idx_t[::1] options=None):
+    cdef:
+        idx_t nvtxs = xadj.shape[0]-1, ncon, objval
+        np.ndarray[idx_t, ndim=1] part = uninitialized(nvtxs, dtype=idx)
+        idx_t[::1] part_mv = part
+        idx_t *vwgtPtr
+        idx_t *vsizePtr
+        idx_t *adjwgtPtr
+        idx_t *optionsPtr
+        real_t *tpwgtsPtr
+        real_t *ubvecPtr
+        int returnVal
+
+    optionsPtr = NULL if options is None else &options[0]
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+    vsizePtr = NULL if vsize is None else &vsize[0]
+    adjwgtPtr = NULL if adjwgt is None else &adjwgt[0]
+    tpwgtsPtr = NULL if tpwgts is None else &tpwgts[0]
+
+    if ubvec is None:
+        ncon = 1
+        ubvecPtr = NULL
+    else:
+        ncon = ubvec.shape[0]
+        ubvecPtr = &ubvec[0]
+
+    returnVal = METIS_PartGraphKway(&nvtxs, &ncon, &xadj[0], &adjncy[0],
+                                    vwgtPtr, vsizePtr, adjwgtPtr, &nparts,
+                                    tpwgtsPtr, ubvecPtr, optionsPtr,
+                                    &objval, &part_mv[0])
+    process_return(returnVal)
+    return part, objval
+
+
+cpdef PartMeshDual(idx_t[::1] eptr,
+                   idx_t[::1] eind,
+                   idx_t ncommon,
+                   idx_t nparts,
+                   idx_t[::1] vwgt=None,
+                   idx_t[::1] vsize=None,
+                   idx_t[::1] adjwgt=None,
+                   real_t[::1] tpwgts=None,
+                   real_t[::1] ubvec=None,
+                   idx_t[::1] options=None):
+    cdef:
+        idx_t ne = eptr.shape[0]-1, objval
+        idx_t nn = max(eind)+1
+        np.ndarray[idx_t, ndim=1] npart = uninitialized(nn, dtype=idx)
+        np.ndarray[idx_t, ndim=1] epart = uninitialized(ne, dtype=idx)
+        idx_t[::1] npart_mv = npart
+        idx_t[::1] epart_mv = epart
+        idx_t *vwgtPtr
+        idx_t *vsizePtr
+        idx_t *optionsPtr
+        real_t *tpwgtsPtr
+        int returnVal
+
+    optionsPtr = NULL if options is None else &options[0]
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+    vsizePtr = NULL if vsize is None else &vsize[0]
+    tpwgtsPtr = NULL if tpwgts is None else &tpwgts[0]
+
+    returnVal = METIS_PartMeshDual(&ne, &nn, &eptr[0], &eind[0],
+                                   vwgtPtr, vsizePtr, &ncommon, &nparts,
+                                   tpwgtsPtr, optionsPtr,
+                                   &objval, &epart_mv[0], &npart_mv[0])
+    process_return(returnVal)
+    return epart, npart, objval
+
+
+cpdef PartMeshNodal(idx_t[::1] eptr,
+                    idx_t[::1] eind,
+                    idx_t nparts,
+                    idx_t[::1] vwgt=None,
+                    idx_t[::1] vsize=None,
+                    idx_t[::1] adjwgt=None,
+                    real_t[::1] tpwgts=None,
+                    real_t[::1] ubvec=None,
+                    idx_t[::1] options=None):
+    cdef:
+        idx_t ne = eptr.shape[0]-1, objval
+        idx_t nn = max(eind)+1
+        np.ndarray[idx_t, ndim=1] npart = uninitialized(nn, dtype=idx)
+        np.ndarray[idx_t, ndim=1] epart = uninitialized(ne, dtype=idx)
+        idx_t[::1] npart_mv = npart
+        idx_t[::1] epart_mv = epart
+        idx_t *vwgtPtr
+        idx_t *vsizePtr
+        idx_t *optionsPtr
+        real_t *tpwgtsPtr
+        int returnVal
+
+    optionsPtr = NULL if options is None else &options[0]
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+    vsizePtr = NULL if vsize is None else &vsize[0]
+    tpwgtsPtr = NULL if tpwgts is None else &tpwgts[0]
+
+    returnVal = METIS_PartMeshNodal(&ne, &nn, &eptr[0], &eind[0],
+                                    vwgtPtr, vsizePtr, &nparts,
+                                    tpwgtsPtr, optionsPtr,
+                                    &objval, &epart_mv[0], &npart_mv[0])
+    process_return(returnVal)
+    return epart, npart, objval
+
+
+
+cpdef NodeND(idx_t[::1] xadj,
+             idx_t[::1] adjncy,
+             idx_t[::1] vwgt=None,
+             idx_t[::1] options=None):
+    cdef:
+        idx_t nvtxs = xadj.shape[0]-1
+        np.ndarray[idx_t, ndim=1] perm = uninitialized(nvtxs, dtype=idx)
+        np.ndarray[idx_t, ndim=1] iperm = uninitialized(nvtxs, dtype=idx)
+        idx_t[::1] perm_mv = perm, iperm_mv = iperm
+        idx_t *vwgtPtr
+        idx_t *optionsPtr
+        int returnVal
+
+    optionsPtr = NULL if options is None else &options[0]
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+
+    returnVal = METIS_NodeND(&nvtxs, &xadj[0], &adjncy[0],
+                             vwgtPtr, optionsPtr,
+                             &perm_mv[0], &iperm_mv[0])
+    process_return(returnVal)
+    return perm, iperm
diff --git a/metisCy/PyNucleus_metisCy/parmetisCy.pyx b/metisCy/PyNucleus_metisCy/parmetisCy.pyx
new file mode 100644
index 0000000..2ea58a2
--- /dev/null
+++ b/metisCy/PyNucleus_metisCy/parmetisCy.pyx
@@ -0,0 +1,230 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+from mpi4py cimport libmpi as mpi
+
+from PyNucleus_base import uninitialized
+
+ctypedef mpi.MPI_Comm MPI_Comm
+
+include "config.pxi"
+
+IF IDXTYPEWIDTH == 32:
+    idx = np.int32
+    cdef int IDX = np.NPY_INT32
+    ctypedef np.int32_t idx_t
+ELIF IDXTYPEWIDTH == 64:
+    idx = np.int64
+    cdef int IDX = np.NPY_INT64
+    ctypedef np.int64_t idx_t
+
+IF REALTYPEWIDTH == 32:
+    real = np.float32
+    ctypedef float real_t
+ELIF REALTYPEWIDTH == 64:
+    real = np.float64
+    ctypedef np.float64_t real_t
+
+
+cdef extern from "parmetis.h":
+    int ParMETIS_V3_PartMeshKway(
+        idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *elmwgt,
+	idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *ncommonnodes, idx_t *nparts,
+	real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut, idx_t *part,
+	MPI_Comm *comm);
+
+    int ParMETIS_V3_Mesh2Dual(
+        idx_t *elmdist, idx_t *eptr, idx_t *eind, idx_t *numflag,
+	idx_t *ncommonnodes, idx_t **xadj, idx_t **adjncy, MPI_Comm *comm);
+
+    int ParMETIS_V3_RefineKway(
+        idx_t *vtxdist, idx_t *xadj, idx_t *adjncy, idx_t *vwgt,
+        idx_t *adjwgt, idx_t *wgtflag, idx_t *numflag, idx_t *ncon, idx_t *nparts,
+        real_t *tpwgts, real_t *ubvec, idx_t *options, idx_t *edgecut,
+        idx_t *part, MPI_Comm *comm);
+
+    int METIS_Free(void *ptr);
+
+    idx_t METIS_OK, METIS_ERROR_INPUT, METIS_ERROR_MEMORY, METIS_ERROR
+    idx_t PARMETIS_PSR_COUPLED, PARMETIS_PSR_UNCOUPLED
+    idx_t PARMETIS_DBGLVL_TIME, PARMETIS_DBGLVL_INFO, PARMETIS_DBGLVL_PROGRESS, PARMETIS_DBGLVL_REFINEINFO, PARMETIS_DBGLVL_MATCHINFO, PARMETIS_DBGLVL_RMOVEINFO, PARMETIS_DBGLVL_REMAP
+
+
+cpdef process_return(returnVal):
+    if returnVal == METIS_OK:
+        return
+    elif returnVal == METIS_ERROR_INPUT:
+        raise Exception("METIS_ERROR_INPUT")
+    elif returnVal == METIS_ERROR_MEMORY:
+        raise Exception("METIS_ERROR_MEMORY")
+    elif returnVal == METIS_ERROR:
+        raise Exception("METIS_ERROR")
+    else:
+        raise Exception("Unknown METIS error")
+
+
+cpdef PartMeshKway(idx_t[::1] elemdist,
+                   idx_t[::1] eptr,
+                   idx_t[::1] eind,
+                   idx_t ncommonnodes,
+                   idx_t nparts,
+                   MPI.Comm comm,
+                   idx_t[::1] elemwgt=None,
+                   idx_t[::1] adjwgt=None,
+                   real_t[:, ::1] tpwgts=None,
+                   real_t[::1] ubvec=None,
+                   idx_t[::1] options=None):
+    cdef:
+        np.ndarray[idx_t, ndim=1] part = uninitialized((eptr.shape[0]-1), dtype=idx)
+        idx_t[::1] part_mv = part
+        idx_t *elemwgtPtr
+        int returnVal
+        idx_t wgtflag, numflag = 0, ncon, edgecut = 0
+        MPI_Comm *commPtr = &comm.ob_mpi
+
+    if options is None:
+        options = np.zeros((3), dtype=idx)
+    elemwgtPtr = NULL if elemwgt is None else &elemwgt[0]
+
+    if ubvec is None:
+        ncon = 1
+        ubvec = real(1.05)*np.ones((ncon), dtype=real)
+    else:
+        ncon = ubvec.shape[0]
+
+    if tpwgts is None:
+        tpwgts = 1/real(nparts)*np.ones((ncon, nparts), dtype=real)
+
+    if elemwgt is None:
+        wgtflag = 0
+    else:
+        wgtflag = 2
+
+    returnVal = ParMETIS_V3_PartMeshKway(&elemdist[0],
+                                         &eptr[0],
+                                         &eind[0],
+                                         elemwgtPtr,
+                                         &wgtflag,
+                                         &numflag,
+                                         &ncon,
+                                         &ncommonnodes,
+                                         &nparts,
+                                         &tpwgts[0, 0],
+                                         &ubvec[0],
+                                         &options[0],
+                                         &edgecut,
+                                         &part_mv[0],
+                                         commPtr)
+    process_return(returnVal)
+    return part
+
+
+cpdef Mesh2Dual(idx_t[::1] elemdist,
+                idx_t[::1] eptr,
+                idx_t[::1] eind,
+                idx_t ncommonnodes,
+                MPI.Comm comm):
+    cdef:
+        int returnVal
+        idx_t numflag = 0
+        MPI_Comm *commPtr = &comm.ob_mpi
+        idx_t *xadjPtr
+        idx_t *adjncyPtr
+        idx_t[::1] xadj = np.zeros((eptr.shape[0]), dtype=idx)
+        idx_t[::1] adjncy
+
+    returnVal = ParMETIS_V3_Mesh2Dual(&elemdist[0],
+                                      &eptr[0],
+                                      &eind[0],
+                                      &numflag,
+                                      &ncommonnodes,
+                                      &xadjPtr,
+                                      &adjncyPtr,
+                                      commPtr)
+    process_return(returnVal)
+
+    for i in range(xadj.shape[0]):
+        xadj[i] = xadjPtr[i]
+    returnVal = METIS_Free(xadjPtr)
+    process_return(returnVal)
+    adjncy = np.zeros((xadj[xadj.shape[0]-1]), dtype=idx)
+    for i in range(adjncy.shape[0]):
+        adjncy[i] = adjncyPtr[i]
+    returnVal = METIS_Free(adjncyPtr)
+    process_return(returnVal)
+
+    return np.array(xadj, copy=False, dtype=idx), np.array(adjncy, copy=False, dtype=idx)
+
+
+cpdef RefineKway(idx_t[::1] vtxdist,
+                 idx_t[::1] xadj,
+                 idx_t[::1] adjncy,
+                 idx_t[::1] part,
+                 idx_t nparts,
+                 MPI.Comm comm,
+                 idx_t[::1] vwgt=None,
+                 idx_t[::1] adjwgt=None,
+                 real_t[:, ::1] tpwgts=None,
+                 real_t[::1] ubvec=None,
+                 idx_t[::1] options=None):
+    cdef:
+        idx_t *vwgtPtr
+        idx_t *adjwgtPtr
+        int returnVal
+        idx_t wgtflag, numflag = 0, ncon, edgecut = 0
+        MPI_Comm *commPtr = &comm.ob_mpi
+
+    if options is None:
+        options = np.zeros((4), dtype=idx)
+    vwgtPtr = NULL if vwgt is None else &vwgt[0]
+    adjwgtPtr = NULL if adjwgt is None else &adjwgt[0]
+
+    if ubvec is None:
+        ncon = 1
+        ubvec = real(1.05)*np.ones((ncon), dtype=real)
+    else:
+        ncon = ubvec.shape[0]
+
+    if tpwgts is None:
+        tpwgts = 1/real(nparts)*np.ones((ncon, nparts), dtype=real)
+
+    if vwgt is None:
+        if adjwgt is None:
+            wgtflag = 0
+        else:
+            wgtflag = 1
+    else:
+        if adjwgt is None:
+            wgtflag = 2
+        else:
+            wgtflag = 3
+
+    returnVal = ParMETIS_V3_RefineKway(&vtxdist[0],
+                                       &xadj[0],
+                                       &adjncy[0],
+                                       vwgtPtr,
+                                       adjwgtPtr,
+                                       &wgtflag,
+                                       &numflag,
+                                       &ncon,
+                                       &nparts,
+                                       &tpwgts[0, 0],
+                                       &ubvec[0],
+                                       &options[0],
+                                       &edgecut,
+                                       &part[0],
+                                       commPtr)
+    process_return(returnVal)
+    # print(edgecut)
+    return part
diff --git a/metisCy/setup.cfg b/metisCy/setup.cfg
new file mode 100644
index 0000000..befafb2
--- /dev/null
+++ b/metisCy/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_metisCy/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/metisCy/setup.py b/metisCy/setup.py
new file mode 100644
index 0000000..260a8bc
--- /dev/null
+++ b/metisCy/setup.py
@@ -0,0 +1,55 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from subprocess import Popen, PIPE, STDOUT
+import re
+try:
+    from PyNucleus_base.setupUtils import package
+except ImportError as e:
+    raise ImportError('\'PyNucleus_base\' needs to be installed first.') from e
+
+
+p = package('PyNucleus_metisCy')
+
+######################################################################
+# Attempt to detect the types used for indices and reals in Metis
+cmd = "echo '#include <metis.h>' | cpp -H -o /dev/null 2>&1 | head -n1"
+proc = Popen(cmd,
+             stdout=PIPE, stderr=STDOUT,
+             shell=True,
+             universal_newlines=True)
+out, _ = proc.communicate()
+metisHeader = out[2:-1]
+
+idx = re.compile(r'\s*#define\s*IDXTYPEWIDTH\s*([0-9]+)')
+real = re.compile(r'\s*#define\s*REALTYPEWIDTH\s*([0-9]+)')
+
+idxDefault = 32
+realDefault = 32
+with open(metisHeader, 'r') as f:
+    for line in f:
+        match = idx.match(line)
+        if match:
+            idxDefault = int(match.group(1))
+        match = real.match(line)
+        if match:
+            realDefault = int(match.group(1))
+
+p.addOption('IDXTYPEWIDTH', 'METIS_idx_width', idxDefault)
+p.addOption('REALTYPEWIDTH', 'METIS_real_width', realDefault)
+p.loadConfig()
+
+p.addExtension("metisCy",
+               sources=[p.folder+"metisCy.pyx"],
+               libraries=["metis"])
+p.addExtension("parmetisCy",
+               sources=[p.folder+"parmetisCy.pyx"],
+               libraries=["parmetis", "metis"])
+
+p.setup(description="Cython wrapper for METIS.",
+        install_requires=['cython', 'numpy'])
diff --git a/metisCy/versioneer.py b/metisCy/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/metisCy/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/multilevelSolver/.gitattributes b/multilevelSolver/.gitattributes
new file mode 100644
index 0000000..45f49e4
--- /dev/null
+++ b/multilevelSolver/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_multilevelSolver/_version.py export-subst
diff --git a/multilevelSolver/MANIFEST.in b/multilevelSolver/MANIFEST.in
new file mode 100644
index 0000000..5aad390
--- /dev/null
+++ b/multilevelSolver/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_multilevelSolver/_version.py
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/__init__.py b/multilevelSolver/PyNucleus_multilevelSolver/__init__.py
new file mode 100644
index 0000000..386b7e6
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/__init__.py
@@ -0,0 +1,25 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from . levels import meshLevel, algebraicLevel
+from . hierarchies import EmptyHierarchy, hierarchy, hierarchyManager
+from . connectors import (inputConnector, repartitionConnector,
+                          
+                          )
+from . multigrid import multigrid, Complexmultigrid, V, W, FMG_V, FMG_W
+
+from . geometricMG import (writeToHDF, readFromHDF,
+                           paramsForMG, paramsForSerialMG)
+
+
+from PyNucleus_base import solverFactory
+solverFactory.register('mg', multigrid, isMultilevelSolver=True)
+solverFactory.register('complex_mg', Complexmultigrid, isMultilevelSolver=True)
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/_version.py b/multilevelSolver/PyNucleus_multilevelSolver/_version.py
new file mode 100644
index 0000000..bd25756
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_multilevelSolver/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pxd b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pxd
new file mode 100644
index 0000000..ac3c4bf
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pxd
@@ -0,0 +1,16 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+from PyNucleus_fem.algebraicOverlaps cimport algebraicOverlapManager
+
+include "coarseSolvers_decl_REAL.pxi"
+include "coarseSolvers_decl_COMPLEX.pxi"
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pyx b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pyx
new file mode 100644
index 0000000..3841390
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers.pyx
@@ -0,0 +1,33 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+import numpy as np
+cimport numpy as np
+cimport cython
+import logging
+from PyNucleus_base.myTypes import INDEX, REAL, BOOL
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_base import uninitialized
+from PyNucleus_base.performanceLogger cimport PLogger, FakePLogger
+from PyNucleus_base.linear_operators cimport LinearOperator
+from PyNucleus_base import solverFactory
+from PyNucleus_fem.meshOverlaps import overlapManager
+from time import sleep
+from sys import stdout
+include "config.pxi"
+LOGGER = logging.getLogger(__name__)
+MPI_BOOL = MPI.BOOL
+
+from PyNucleus_fem.algebraicOverlaps cimport flush_type, no_flush, flush_local, flush_local_all, flush, flush_all
+
+include "coarseSolvers_REAL.pxi"
+include "coarseSolvers_COMPLEX.pxi"
+
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_decl_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..ab77bdf
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_decl_{SCALAR}.pxi
@@ -0,0 +1,35 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, {SCALAR}_t, BOOL_t
+from PyNucleus_base.solvers cimport {SCALAR_label_lc_}solver, {SCALAR_label_lc_}iterative_solver
+
+
+cdef class {SCALAR_label}coarseSolver({SCALAR_label_lc_}iterative_solver):
+    cdef:
+        MPI.Comm comm, subset_comm, subset_commFine
+        MPI.Intercomm inter_comm
+        INDEX_t myLeaderRank, otherLeaderRank
+        public algebraicOverlapManager overlapsCoarse, overlapsFine, intraLevelCoarse, intraLevelFine
+        public {SCALAR}_t[::1] rhs, x
+        public {SCALAR_label_lc_}solver Ainv
+        public BOOL_t inCG
+        public BOOL_t inSubdomain
+        list levels
+        object hierarchy
+        object hierarchyManager
+        INDEX_t depth
+        str solver_description
+        str solverName
+        dict kwargs
+        public BOOL_t asynchronous
+        str name
+    cpdef BOOL_t canWriteRHS(self)
+    cpdef void sendRHS(self, {SCALAR}_t[::1] b)
+    cdef BOOL_t solve_cg(self)
+    cpdef BOOL_t getSolution(self, {SCALAR}_t[::1] x)
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_{SCALAR}.pxi
new file mode 100644
index 0000000..cb4381e
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/coarseSolvers_{SCALAR}.pxi
@@ -0,0 +1,181 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+import numpy as np
+cimport numpy as np
+cimport cython
+import logging
+from PyNucleus_base.myTypes import INDEX, {SCALAR}, BOOL
+from PyNucleus_base.myTypes cimport INDEX_t, {SCALAR}_t, BOOL_t
+from PyNucleus_base import uninitialized
+from PyNucleus_base.performanceLogger cimport PLogger, FakePLogger
+from PyNucleus_base.linear_operators cimport {SCALAR_label}LinearOperator
+from PyNucleus_base import solverFactory
+from PyNucleus_fem.meshOverlaps import overlapManager
+
+from time import sleep
+from sys import stdout
+include "config.pxi"
+LOGGER = logging.getLogger(__name__)
+MPI_BOOL = MPI.BOOL
+
+from PyNucleus_fem.algebraicOverlaps cimport flush_type, no_flush, flush_local, flush_local_all, flush, flush_all
+
+
+######################################################################
+
+cdef class {SCALAR_label}coarseSolver({SCALAR_label_lc_}iterative_solver):
+    """
+    This coarse solver gathers the ride-hand side from all nodes, then
+    solves the problem on a subcommunicator using a distributed solver
+    and scatters the solution.
+    """
+
+    def __init__(self,
+                 hierarchyManager,
+                 FakePLogger PLogger,
+                 solverName,
+                 **kwargs):
+        self.solverName = solverName
+        self.kwargs = kwargs
+        self.asynchronous = False
+
+        self.hierarchyManager = hierarchyManager
+        hierarchy = hierarchyManager.builtHierarchies[-1]
+        self.hierarchy = hierarchy
+        self.comm = hierarchy.connectorEnd.global_comm
+        if not hierarchy.connectorEnd.is_overlapping:
+            self.inter_comm = hierarchy.connectorEnd.interComm
+            self.myLeaderRank = hierarchy.connectorEnd.myLeaderRank
+            self.otherLeaderRank = hierarchy.connectorEnd.otherLeaderRank
+
+        if hasattr(hierarchy, 'connectorEnd') and hasattr(hierarchy.connectorEnd, 'depth'):
+            self.depth = hierarchy.connectorEnd.depth
+        else:
+            self.depth = 0
+
+        if hasattr(hierarchy.connectorEnd, 'algOM'):
+            # rank is part of coarse grid
+            self.overlapsCoarse = hierarchy.connectorEnd.algOM
+            self.levels = hierarchy.getLevelList(recurse=False)
+            self.subset_comm = hierarchy.comm
+            localSize = self.levels[len(self.levels)-1]['A'].shape[0]
+            self.inCG = True
+            self.x = uninitialized((localSize), dtype={SCALAR})
+            self.rhs = uninitialized((localSize), dtype={SCALAR})
+            self.intraLevelCoarse = hierarchy.algebraicLevels[-1].algebraicOverlaps
+        
+        else:
+            self.inCG = False
+
+        if hasattr(hierarchy.connectorEnd, 'algOMnew'):
+            # rank is part of fine grid
+            self.overlapsFine = hierarchy.connectorEnd.algOMnew
+            self.inSubdomain = True
+            self.intraLevelFine = hierarchy.connectorEnd.hierarchy2.algebraicLevels[0].algebraicOverlaps
+            self.subset_commFine = hierarchy.connectorEnd.comm2
+            {SCALAR_label_lc_}iterative_solver.__init__(self, num_rows=hierarchy.connectorEnd.hierarchy2.algebraicLevels[0].DoFMap.num_dofs)
+        
+        else:
+            self.inSubdomain = False
+            {SCALAR_label_lc_}iterative_solver.__init__(self, num_rows=0)
+        self.PLogger = PLogger
+        self.setAinv()
+        if self.inCG:
+            self.Ainv.PLogger = self.PLogger
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cpdef BOOL_t canWriteRHS(self):
+        return True
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cpdef void sendRHS(self, {SCALAR}_t[::1] b):
+        self.overlapsFine.send{SCALAR_label}(b)
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t solve_cg(self):
+        self.rhs[:] = 0.
+        self.overlapsCoarse.receive{SCALAR_label}(self.rhs)
+        if self.intraLevelCoarse is not None:
+            self.intraLevelCoarse.distribute{SCALAR_label}(self.rhs)
+        with self.PLogger.Timer('solveTimeLocal'):
+            if isinstance(self.Ainv, {SCALAR_label_lc_}iterative_solver):
+                self.Ainv.tolerance = self.tolerance
+                self.Ainv.maxIter = self.maxIter
+            numIter = self.Ainv.solve(self.rhs, self.x)
+            self.PLogger.addValue('coarsegrid.Iterations', numIter)
+        self.overlapsCoarse.send{SCALAR_label}(self.x)
+        return True
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cpdef BOOL_t getSolution(self, {SCALAR}_t[::1] x):
+        x[:] = 0.
+        self.overlapsFine.receive{SCALAR_label}(x)
+        self.overlapsFine.distribute{SCALAR_label}(x)
+        return True
+
+    @cython.wraparound(False)
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    cdef int solve(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] x) except -1:
+        cdef:
+            BOOL_t ret = True
+        {SCALAR_label_lc_}iterative_solver.solve(self, b, x)
+        if (self.overlapsFine is not None) or (b.shape[0] != self.Ainv.num_rows):
+            with self.PLogger.Timer('solveTime'):
+                if self.inSubdomain and self.canWriteRHS():
+                    self.sendRHS(b)
+                if self.inCG:
+                    ret = self.solve_cg()
+                if self.inSubdomain:
+                    self.getSolution(x)
+        else:
+            with self.PLogger.Timer('solveTime'):
+                if isinstance(self.Ainv, {SCALAR_label_lc_}iterative_solver):
+                    self.Ainv.tolerance = self.tolerance
+                    self.Ainv.maxIter = self.maxIter
+                numIter = self.Ainv.solve(b, x)
+                self.PLogger.addValue('coarsegrid.Iterations', numIter)
+        return ret
+
+    def __str__(self):
+        return self.solver_description
+
+    def setAinv(self):
+        if self.inCG:
+            if self.solverName in ('LU', 'Chol', 'IChol', 'ILU'):
+                assert self.subset_comm.size == 1, 'Cannot run {} in distributed mode'.format(self.solverName)
+            self.Ainv = solverFactory.build(self.solverName, A=self.levels[-1]['A'], hierarchy=self.hierarchyManager, **self.kwargs)
+
+    cpdef void setup(self, {SCALAR_label}LinearOperator A=None):
+        if self.Ainv is not None:
+            self.Ainv.setup()
+            self.solver_description = str(self.Ainv)
+        root = 0
+        if not self.hierarchy.connectorEnd.is_overlapping:
+            if self.inCG:
+                root = self.hierarchy.connectorEnd.myGlobalLeaderRank
+            if self.inSubdomain:
+                root = self.hierarchy.connectorEnd.otherLeaderRank
+        else:
+            root = self.hierarchy.connectorEnd.myGlobalLeaderRank
+        self.solver_description = self.comm.bcast(self.solver_description, root=root)
+        self.initialized = True
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/connectors.py b/multilevelSolver/PyNucleus_multilevelSolver/connectors.py
new file mode 100644
index 0000000..d96fb72
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/connectors.py
@@ -0,0 +1,366 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import print_function
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+import logging
+import numpy as np
+from copy import deepcopy
+from PyNucleus_base.myTypes import REAL, INDEX, TAG
+from PyNucleus_base import uninitialized
+from . levels import meshLevel, algebraicLevel
+from . hierarchies import EmptyHierarchy, hierarchy
+
+from PyNucleus_base import TimerManager
+from PyNucleus_fem import (simpleInterval, intervalWithInteraction,
+                           simpleSquare, simpleLshape,
+                           uniformSquare, crossSquare,
+                           squareWithInteractions,
+                           simpleBox, standardSimplex3D,
+                           simpleFicheraCube,
+                           circle,
+                           discWithInteraction,
+                           meshNd,
+                           boundaryLayer,
+                           P0_DoFMap,
+                           meshFactory)
+
+from PyNucleus_fem.meshCy import radialMeshTransformation
+from PyNucleus_fem.repartitioner import Repartitioner
+from PyNucleus_fem.meshOverlaps import meshOverlap, overlapManager, interfaceManager
+from PyNucleus_fem.mesh import INTERIOR_NONOVERLAPPING, INTERIOR, NO_BOUNDARY
+
+LOGGER = logging.getLogger(__name__)
+
+
+class hierarchyConnector(object):
+    def __init__(self, global_comm, comm1, comm2, hierarchy1):
+        self.global_comm = global_comm
+        self.comm1 = comm1
+        self.comm2 = comm2
+        self.hierarchy1 = hierarchy1
+        if self.hierarchy1 is not None:
+            self.hierarchy1.connectorEnd = self
+        self.Timer = TimerManager(LOGGER,
+                                  comm=self.global_comm,
+                                     # prefix=label
+        )
+        if self.comm1 is None and self.comm2 is not None:
+            if self.global_comm.size == 1:
+                self.is_overlapping = True
+            else:
+                self.is_overlapping = self.global_comm.allreduce(comm1 is not None and comm2 is not None, MPI.LOR)
+        else:
+            self.is_overlapping = self.global_comm.allreduce(comm1 is not None and comm2 is not None, MPI.LOR)
+        if not self.is_overlapping:
+            req1 = []
+            req2 = []
+            if self.comm1 is not None:
+                self.myLeaderRank = 0
+                self.myGlobalLeaderRank = self.comm1.bcast(self.global_comm.rank, root=self.myLeaderRank)
+                if self.comm1.rank == self.myLeaderRank:
+                    req1.append(self.global_comm.isend('me', dest=0, tag=770))
+            if self.comm2 is not None:
+                self.myLeaderRank = 0
+                self.myGlobalLeaderRank = self.comm2.bcast(self.global_comm.rank, root=self.myLeaderRank)
+                if self.comm2.rank == self.myLeaderRank:
+                    req1.append(self.global_comm.isend('me', dest=0, tag=771))
+            if self.global_comm.rank == 0:
+                status = MPI.Status()
+                self.global_comm.recv(source=MPI.ANY_SOURCE, status=status, tag=770)
+                rank1 = status.source
+                status = MPI.Status()
+                self.global_comm.recv(source=MPI.ANY_SOURCE, status=status, tag=771)
+                rank2 = status.source
+                req2.append(self.global_comm.isend(rank2, dest=rank1, tag=772))
+                req2.append(self.global_comm.isend(rank1, dest=rank2, tag=773))
+            MPI.Request.Waitall(req1)
+            if self.comm1 is not None:
+                if self.comm1.rank == self.myLeaderRank:
+                    self.otherLeaderRank = self.global_comm.recv(source=0, tag=772)
+                    self.comm1.bcast(self.otherLeaderRank, root=self.myLeaderRank)
+                else:
+                    self.otherLeaderRank = self.comm1.bcast(-1, root=self.myLeaderRank)
+            if self.comm2 is not None:
+                if self.comm2.rank == self.myLeaderRank:
+                    self.otherLeaderRank = self.global_comm.recv(source=0, tag=773)
+                    self.comm2.bcast(self.otherLeaderRank, root=self.myLeaderRank)
+                else:
+                    self.otherLeaderRank = self.comm2.bcast(-1, root=self.myLeaderRank)
+            MPI.Request.Waitall(req2)
+
+            if self.comm1 is not None:
+                self._oldSubdomainGlobalRank = np.array(self.comm1.allgather(self.global_comm.rank), dtype=INDEX)
+                self._newSubdomainGlobalRank = None
+
+                self.interComm = self.comm1.Create_intercomm(self.myLeaderRank, self.global_comm, self.otherLeaderRank)
+                self.interComm.bcast(self._oldSubdomainGlobalRank, root=MPI.ROOT if self.comm1.rank == self.myLeaderRank else MPI.PROC_NULL)
+                self._newSubdomainGlobalRank = self.interComm.bcast(self._newSubdomainGlobalRank, root=0)
+
+            if self.comm2 is not None:
+                self._oldSubdomainGlobalRank = None
+                self._newSubdomainGlobalRank = np.array(self.comm2.allgather(self.global_comm.rank), dtype=INDEX)
+
+                self.interComm = self.comm2.Create_intercomm(self.myLeaderRank, self.global_comm, self.otherLeaderRank)
+                self._oldSubdomainGlobalRank = self.interComm.bcast(self._oldSubdomainGlobalRank, root=0)
+                self.interComm.bcast(self._newSubdomainGlobalRank, root=MPI.ROOT if self.comm2.rank == self.myLeaderRank else MPI.PROC_NULL)
+        else:
+            inBothComms = self.comm1 is not None and self.comm2 is not None
+            self.myGlobalLeaderRank = self.global_comm.allreduce(self.global_comm.rank if inBothComms else self.global_comm.size, op=MPI.MIN)
+            self._oldSubdomainGlobalRank = np.arange(self.global_comm.size, dtype=INDEX)
+            self._newSubdomainGlobalRank = np.arange(self.global_comm.size, dtype=INDEX)
+
+        self._oldRankSubdomainNo = {self._oldSubdomainGlobalRank[subdomainNo]: subdomainNo for subdomainNo in range(self._oldSubdomainGlobalRank.shape[0])}
+        self._newRankSubdomainNo = {self._newSubdomainGlobalRank[subdomainNo]: subdomainNo for subdomainNo in range(self._newSubdomainGlobalRank.shape[0])}
+
+    def getNewHierarchy(self):
+        raise NotImplementedError()
+
+    def getLevelList(self):
+        if self.hierarchy1 is not None:
+            return self.hierarchy1.getLevelList()
+        else:
+            return []
+
+    def build(self):
+        pass
+
+    def comm1SubdomainGlobalRank(self, subdomainNo):
+        return self._oldSubdomainGlobalRank[subdomainNo]
+
+    def comm2SubdomainGlobalRank(self, subdomainNo):
+        return self._newSubdomainGlobalRank[subdomainNo]
+
+    def comm1RankSubdomainNo(self, rank):
+        return self._oldRankSubdomainNo[rank]
+
+    def comm2RankSubdomainNo(self, rank):
+        return self._newRankSubdomainNo[rank]
+
+
+class inputConnector(hierarchyConnector):
+    def __init__(self, global_comm, comm1, comm2, hierarchy1, domain,
+                 useMultiMesh=False,
+                 algebraicLevelType=algebraicLevel, meshParams={}):
+        super(inputConnector, self).__init__(global_comm, comm1, comm2, hierarchy1)
+        self.domain = domain
+        self.meshParams = meshParams
+        self.useMultiMesh = useMultiMesh
+        self.algebraicLevelType = algebraicLevelType
+
+    def getNewHierarchy(self, params):
+        with self.Timer('Initializing mesh on \'{}\''.format(params['label'])):
+            if self.useMultiMesh and self.domain.find('Multi') < 0:
+                self.domain += 'Multi'
+            mesh = meshFactory.build(self.domain, **self.meshParams)
+            if self.hierarchy1 is not None:
+                startLevelNo = self.hierarchy1.meshLevels[-1].levelNo
+            else:
+                startLevelNo = 0
+            if self.useMultiMesh:
+                 raise NotImplementedError()
+            else:
+                level = meshLevel(mesh, params['params'], label=params['label'], comm=params['comm'], startLevelNo=startLevelNo)
+                level.setAlgebraicLevelType(self.algebraicLevelType)
+                h = hierarchy(level, params['params'], comm=params['comm'], label=params['label'])
+                h.connectorStart = self
+        return h
+
+
+class repartitionConnector(hierarchyConnector):
+    def __init__(self, global_comm, comm1, comm2, hierarchy1, partitionerType, partitionerParams, debugOverlaps=False, commType='standard', algebraicLevelType=algebraicLevel):
+        super(repartitionConnector, self).__init__(global_comm, comm1, comm2, hierarchy1)
+        self.partitionerType = partitionerType
+        self.partitionerParams = partitionerParams
+        self.debugOverlaps = debugOverlaps
+        self.commType = commType
+        self.splitOM = None
+        
+        self.algebraicLevelType = algebraicLevelType
+
+    def getNewHierarchy(self, params):
+        if self.hierarchy1 is not None:
+            label1 = self.hierarchy1.label
+        else:
+            label1 = ''
+        self.label2 = params['label']
+        with self.Timer('Repartitioning from \'{}\' to \'{}\' using \'{}\''.format(label1, params['label'], self.partitionerType)):
+            if self.comm1 is not None:
+                subdomain = self.hierarchy1.meshLevels[-1].mesh
+                interfaces = self.hierarchy1.meshLevels[-1].interfaces
+                rep = Repartitioner(subdomain, interfaces, self.global_comm, self.comm1, self.comm2)
+
+                self.repartitioner = rep
+                part = rep.getCellPartition(self.partitionerType, self.partitionerParams)
+                subdomainNew, self.OM, self.OMnew, iM = rep.getRepartitionedSubdomains()
+
+                if self.debugOverlaps and not self.is_overlapping:
+                    self.OM.check(subdomain, self.global_comm, 'meshOverlaps from \'{}\' to \'{}\''.format(self.hierarchy1.label, params['label']))
+                if self.hierarchy1 is not None:
+                    startLevelNo = self.hierarchy1.meshLevels[-1].levelNo
+                else:
+                    startLevelNo = 0
+            else:
+                rep = Repartitioner(None, None, self.global_comm, self.comm1, self.comm2)
+                self.repartitioner = rep
+                subdomainNew, self.OM, self.OMnew, iM = rep.getRepartitionedSubdomains()
+                if self.debugOverlaps and not self.is_overlapping:
+                    self.OMnew.check(subdomainNew, self.global_comm, 'meshOverlaps from \'{}\' to \'{}\''.format(self.hierarchy1.label, params['label']))
+                startLevelNo = 0
+            startLevelNo = self.global_comm.bcast(startLevelNo)
+            if self.comm2 is not None:
+                hierarchy.updateParamsFromDefaults(params['params'])
+                level = meshLevel(subdomainNew,
+                                  params['params'],
+                                  interfaces=iM,
+                                  label=params['label'],
+                                  comm=params['comm'],
+                                  startLevelNo=startLevelNo)
+                level.setAlgebraicLevelType(self.algebraicLevelType)
+                h = hierarchy(level, params['params'], comm=params['comm'], label=params['label'])
+                h.connectorStart = self
+                self.hierarchy2 = h
+            else:
+                h = EmptyHierarchy(params['params'], label=params['label'])
+                h.connectorStart = self
+                self.hierarchy2 = h
+        self.getLocalOverlap()
+        return h
+
+    def getLocalOverlap(self):
+        if self.is_overlapping and self.comm1 is not None:
+            subdomain = self.hierarchy1.meshLevels[-1].mesh
+            if self.global_comm.rank in self.OM.overlaps:
+                print('cells kept local on rank {} in repartitioning: {:,} / target: {:,}'.format(self.global_comm.rank,
+                                                                                                  self.OM.overlaps[self.global_comm.rank].num_cells/subdomain.num_cells,
+                                                                                                  self.comm1.size/self.global_comm.size))
+            else:
+                print('cells kept local on rank {} in repartitioning: {:,} / target: {:,}'.format(self.global_comm.rank,
+                                                                                                  0.,
+                                                                                                  self.comm1.size/self.global_comm.size))
+
+    def build(self):
+        if self.hierarchy1 is not None:
+            label1 = self.hierarchy1.label
+        else:
+            label1 = ''
+        self.global_comm.Barrier()
+        if self.OM is not None and self.OMnew is None:
+            self.global_comm.Barrier()
+            with self.Timer('Building algebraic overlaps of type \'{}\' from \'{}\' to \'{}\' using Alltoallv'.format(self.commType, label1, self.label2)):
+                subdomain = self.hierarchy1.meshLevels[-1].mesh
+                dm = self.hierarchy1.algebraicLevels[-1].DoFMap
+                self.algOM = self.OM.getDoFs(subdomain, dm, overlapType=self.commType, allowInteriorBoundary=True, useRequests=self.commType == 'standard', splitManager=self.splitOM)
+                if self.debugOverlaps and not self.is_overlapping:
+                    self.algOM.check(subdomain, dm, 'algebraicOverlaps from \'{}\' to \'{}\''.format(label1, self.label2))
+            self.global_comm.Barrier()
+            with self.Timer('Building distribute from \'{}\' to \'{}\''.format(label1, self.label2)):
+                self.algOM.prepareDistributeRepartition(dm)
+            if self.debugOverlaps:
+                from PyNucleus_fem import solSin1D, solSin2D, solSin3D
+                if subdomain.dim == 1:
+                    xOld = dm.interpolate(solSin1D)
+                elif subdomain.dim == 2:
+                    xOld = dm.interpolate(solSin2D)
+                elif subdomain.dim == 3:
+                    xOld = dm.interpolate(solSin3D)
+                else:
+                    raise NotImplementedError()
+                self.algOM.send_py(xOld)
+
+                yOld = np.zeros((dm.num_dofs), dtype=REAL)
+                self.algOM.receive_py(yOld)
+                self.algOM.distribute_py(yOld)
+                assert np.linalg.norm(xOld-yOld) < 1e-9, (xOld, yOld)
+
+        if self.OM is None and self.OMnew is not None:
+            self.global_comm.Barrier()
+            with self.Timer('Building algebraic overlaps of type \'{}\' from \'{}\' to \'{}\' using Alltoallv'.format(self.commType, label1, self.label2)):
+                subdomainNew = self.hierarchy2.meshLevels[0].mesh
+                dmNew = self.hierarchy2.algebraicLevels[0].DoFMap
+                self.algOMnew = self.OMnew.getDoFs(subdomainNew, dmNew, overlapType=self.commType, allowInteriorBoundary=True, useRequests=self.commType == 'standard', splitManager=self.splitOM)
+                if self.debugOverlaps and not self.is_overlapping:
+                    self.algOMnew.check(subdomainNew, dmNew, 'algebraicOverlaps from \'{}\' to \'{}\''.format(label1, self.label2))
+            self.global_comm.Barrier()
+            with self.Timer('Building distribute from \'{}\' to \'{}\''.format(label1, self.label2)):
+                self.algOMnew.prepareDistributeRepartition(dmNew)
+            if self.debugOverlaps:
+                from PyNucleus_fem import solSin1D, solSin2D, solSin3D
+                if subdomainNew.dim == 1:
+                    xNew = dmNew.interpolate(solSin1D)
+                elif subdomainNew.dim == 2:
+                    xNew = dmNew.interpolate(solSin2D)
+                elif subdomainNew.dim == 3:
+                    xNew = dmNew.interpolate(solSin3D)
+                else:
+                    raise NotImplementedError()
+
+                yNew = np.zeros((dmNew.num_dofs), dtype=REAL)
+                self.algOMnew.receive_py(yNew)
+                self.algOMnew.distribute_py(yNew)
+                assert np.linalg.norm(xNew-yNew) < 1e-9, (xNew, yNew)
+
+                self.algOMnew.send_py(xNew)
+
+        if self.OM is not None and self.OMnew is not None:
+            self.global_comm.Barrier()
+            with self.Timer('Building algebraic overlaps of type \'{}\' from \'{}\' to \'{}\' using Alltoallv'.format(self.commType, label1, self.label2)):
+                subdomain = self.hierarchy1.meshLevels[-1].mesh
+                dm = self.hierarchy1.algebraicLevels[-1].DoFMap
+                assert dm.num_dofs > 0
+                self.algOM = self.OM.getDoFs(subdomain, dm, overlapType=self.commType, allowInteriorBoundary=True, useRequests=self.commType == 'standard', waitRequests=False)
+
+                subdomainNew = self.hierarchy2.meshLevels[0].mesh
+                dmNew = self.hierarchy2.algebraicLevels[0].DoFMap
+                self.algOMnew = self.OMnew.getDoFs(subdomainNew, dmNew, overlapType=self.commType, allowInteriorBoundary=True, useRequests=self.commType == 'standard')
+                MPI.Request.Waitall(self.OM.requests)
+                self.OM.requests = []
+
+            if self.debugOverlaps and not self.is_overlapping:
+                self.algOM.check(subdomain, dm, 'algebraicOverlaps from \'{}\' to \'{}\''.format(label1, self.label2))
+            self.global_comm.Barrier()
+            with self.Timer('Building distribute from \'{}\' to \'{}\''.format(label1, self.label2)):
+                self.algOMnew.prepareDistributeRepartitionSend(dmNew)
+                self.algOM.prepareDistributeRepartition(dm, doSend=False)
+                self.algOM.prepareDistributeRepartitionSend(dm)
+                self.algOMnew.prepareDistributeRepartition(dmNew, doSend=False)
+            if self.debugOverlaps:
+                from PyNucleus_fem import solSin1D, solSin2D, solSin3D
+                if subdomain.dim == 1:
+                    xOld = dm.interpolate(solSin1D)
+                elif subdomain.dim == 2:
+                    xOld = dm.interpolate(solSin2D)
+                elif subdomain.dim == 3:
+                    xOld = dm.interpolate(solSin3D)
+                else:
+                    raise NotImplementedError()
+
+                if subdomain.dim == 1:
+                    xNew = dmNew.interpolate(solSin1D)
+                elif subdomain.dim == 2:
+                    xNew = dmNew.interpolate(solSin2D)
+                elif subdomain.dim == 3:
+                    xNew = dmNew.interpolate(solSin3D)
+                else:
+                    raise NotImplementedError()
+
+                self.algOM.send_py(xOld)
+                yNew = np.zeros((dmNew.num_dofs), dtype=REAL)
+                self.algOMnew.receive_py(yNew)
+                self.algOMnew.distribute_py(yNew)
+                assert np.linalg.norm(xNew-yNew) < 1e-9, (xNew, yNew)
+
+                self.algOMnew.send_py(xNew)
+                yOld = np.zeros((dm.num_dofs), dtype=REAL)
+                self.algOM.receive_py(yOld)
+                self.algOM.distribute_py(yOld)
+                assert np.linalg.norm(xOld-yOld) < 1e-9, (xOld, yOld)
+
+
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/geometricMG.py b/multilevelSolver/PyNucleus_multilevelSolver/geometricMG.py
new file mode 100644
index 0000000..f0317fe
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/geometricMG.py
@@ -0,0 +1,207 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import logging
+import numpy as np
+from PyNucleus_base.ip_norm import norm_serial as norm
+from PyNucleus_base import getLoggingTimer
+from PyNucleus_base import REAL, INDEX, uninitialized
+from PyNucleus_fem import P1_DoFMap
+from PyNucleus_base.linear_operators import LinearOperator
+
+LOGGER = logging.getLogger(__name__)
+
+
+def paramsForSerialMG(noRef, global_params):
+    symmetric = global_params.get('symmetric', False)
+    hierarchies = [
+        {'label': 'fine',
+         'ranks': set([0]),
+         'connectorStart': 'input',
+         'connectorEnd': None,
+         'params': {'noRef': noRef,
+                    'keepMeshes': 'all' if global_params.get('keepMeshes', False) else 'none',
+                    'keepAllDoFMaps': global_params.get('keepAllDoFMaps', False),
+                    'assemble': 'all',
+                    'symmetric': symmetric,
+                    'solver': 'Chol' if symmetric else 'LU'
+         }
+        }]
+    connectors = {}
+
+    return hierarchies, connectors
+
+
+def paramsForMG(noRef, onRanks, global_params, dim, element, repartitionFactor=0.05,
+                max_coarse_grid_size=5000):
+    from . connectors import repartitionConnector
+
+    numProcsAvail = len(onRanks)
+    onRanks = np.array(list(onRanks), dtype=INDEX)
+    if dim == 1:
+        numInitialCells = 2
+        if element in ('P1', 1):
+            cells2dofsFactor = 1
+        elif element in ('P2', 2):
+            cells2dofsFactor = 2
+        elif element in ('P3', 3):
+            cells2dofsFactor = 3
+        else:
+            raise NotImplementedError()
+    elif dim == 2:
+        numInitialCells = 8
+        if element in ('P1', 1):
+            cells2dofsFactor = 0.5
+        elif element in ('P2', 2):
+            cells2dofsFactor = 2
+        elif element in ('P3', 3):
+            cells2dofsFactor = 4.5
+        else:
+            raise NotImplementedError()
+    elif dim == 3:
+        numInitialCells = 6
+        if element in ('P1', 1):
+            cells2dofsFactor = 1./6.
+        elif element in ('P2', 2):
+            cells2dofsFactor = 1.35
+        elif element in ('P3', 3):
+            cells2dofsFactor = 4.5
+        else:
+            raise NotImplementedError()
+    else:
+        raise NotImplementedError()
+    uniformRefinementMutiplier = 2**dim
+    numCells = numInitialCells * uniformRefinementMutiplier**np.arange(noRef+1)
+    cg = 0
+    while numCells[cg+1]*cells2dofsFactor < max_coarse_grid_size and cg < noRef-1:
+        cg += 1
+    cellsPerProc = numCells[-1]/numProcsAvail
+    numProcs = uninitialized((noRef+1),dtype=int)
+    numProcs[-1] = numProcsAvail
+    numProcs[:cg+1] = 1
+    for i in range(noRef-1, cg, -1):
+        if numCells[i]/numProcs[i+1] < repartitionFactor * cellsPerProc:
+            numProcs[i] = int(np.ceil(numCells[i]/cellsPerProc))
+        else:
+            numProcs[i] = numProcs[i+1]
+
+    buildMass = global_params.get('buildMass', False)
+    symmetric = global_params.get('symmetric', False)
+    reaction = global_params.get('reaction', None)
+
+    hierarchies = [
+        {'label': 'seed',
+         'ranks': set([onRanks[0]]),
+         'connectorStart': 'input',
+         'connectorEnd': None,
+         'params': {'noRef': cg,
+                    'keepMeshes': 'all' if global_params.get('keepMeshes', False) else 'none',
+                    'assemble': 'all',
+                    'symmetric': symmetric,
+                    'reaction': reaction,
+                    'buildMass': buildMass,
+                    'element': element,
+                    'solver': 'Chol' if symmetric else 'LU',
+                    'solver_params': {}
+         }
+        }]
+
+    lvl = cg+1
+    hierarchies.append({'label': str(len(hierarchies)),
+                        'ranks': set(onRanks[range(0, numProcs[lvl])]),
+                        'connectorStart': None,
+                        'connectorEnd': None,
+                        'params': {'noRef': 1,
+                                   'keepMeshes': 'all' if global_params.get('keepMeshes', False) else 'last',
+                                   'assemble': 'all',
+                                   'symmetric': symmetric,
+                                   'reaction': reaction,
+                                   'buildMass': buildMass,
+                                   'element': element,
+                                   'solver': 'MG',
+                                   'solver_params': {
+                                       'maxIter': 1,
+                                       'tolerance': 0.,
+                                   },
+                        }
+    })
+
+    lvl += 1
+    while lvl < noRef:
+        if numProcs[lvl] == numProcs[lvl-1]:
+            hierarchies[-1]['params']['noRef'] += 1
+        else:
+            hierarchies.append({'label': str(len(hierarchies)),
+                                'ranks': set(onRanks[range(0, numProcs[lvl])]),
+                                'connectorStart': None,
+                                'connectorEnd': None,
+                                'params': {'noRef': 1,
+                                           'keepMeshes': 'all' if global_params.get('keepMeshes', False) else 'last',
+                                           'assemble': 'all',
+                                           'symmetric': symmetric,
+                                           'reaction': reaction,
+                                           'buildMass': buildMass,
+                                           'element': element,
+                                           'solver': 'MG',
+                                           'solver_params': {
+                                               'maxIter': 1,
+                                               'tolerance': 0.,
+                                           },
+                                }
+            })
+        lvl +=1
+
+    if 'tag' in global_params:
+        for i in range(len(hierarchies)):
+            h = hierarchies[i]
+            h['params']['tag'] = global_params['tag']
+
+    connectors = {}
+    for i in range(1, len(hierarchies)):
+        label = 'breakUp_' + hierarchies[i-1]['label'] + ':' + hierarchies[i]['label']
+        connectors[label] = {'type': repartitionConnector,
+                             'params': {'partitionerType': global_params.get('coarsePartitioner', global_params.get('partitioner', 'regular')),
+                                        'partitionerParams': global_params.get('coarsePartitionerParams', global_params.get('partitionerParams', {})),
+                                        'debugOverlaps': global_params.get('debugOverlaps', False)}}
+        hierarchies[i-1]['connectorEnd'] = label
+        hierarchies[i]['connectorStart'] = label
+
+    return hierarchies, connectors
+
+
+
+def writeToHDF(filename, levels, mesh):
+    import h5py
+    f = h5py.File(filename, 'w')
+    for i, lvl in enumerate(levels):
+        for key in lvl:
+            if key in ('P', 'R', 'A', 'mesh'):
+                val = lvl[key]
+                grp = f.create_group(str(i) + '/' + key)
+                val.HDF5write(grp)
+    if 'mesh' not in f[str(i)]:
+        grp = f.create_group(str(i) + '/' + 'mesh')
+        mesh.HDF5write(grp)
+    f.flush()
+    f.close()
+
+
+def readFromHDF(filename):
+    import h5py
+    f = h5py.File(filename, 'r')
+    LOGGER.info('Reading hierarchy from {}'.format(filename))
+    maxLvl = 0
+    for lvl in f:
+        maxLvl = max(maxLvl, int(lvl))
+    levels = [{} for i in range(maxLvl+1)]
+    for lvl in f:
+        for key in f[lvl]:
+            if key in ('P', 'R', 'A'):
+                levels[int(lvl)][key] = LinearOperator.HDF5read(f[lvl + '/' + key])
+    return levels
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/hierarchies.py b/multilevelSolver/PyNucleus_multilevelSolver/hierarchies.py
new file mode 100644
index 0000000..28bfdc1
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/hierarchies.py
@@ -0,0 +1,480 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+import logging
+import numpy as np
+from copy import deepcopy
+from PyNucleus_base.myTypes import REAL
+from PyNucleus_base import TimerManager, updateFromDefaults
+from PyNucleus_fem import boundaryLayer
+from PyNucleus_fem.algebraicOverlaps import multilevelAlgebraicOverlapManager
+from . levels import meshLevel, algebraicLevel
+from . levels import (DELETE_MESH, KEEP_MESH,
+                      SPARSITY_PATTERN, DOFMAPS,
+                      NO_BUILD, RESTRICTION_PROLONGATION_ONLY,
+                      SPARSITY_ONLY, SINGLE_LEVEL, FULL_BUILD)
+
+LOGGER = logging.getLogger(__name__)
+
+
+class EmptyHierarchy(object):
+    def __init__(self, params, label=''):
+        self.params = params
+        self.updateParamsFromDefaults()
+        self.label = label
+        self.connectorEnd = None
+
+    def isSetUp(self):
+        return False
+
+    def updateParamsFromDefaults(self):
+        defaults = {}
+        updateFromDefaults(self.params, defaults)
+
+
+class hierarchy:
+    def __init__(self, meshLevel, params, comm=None,
+                 label=''):
+        self._isSetUp = False
+        self.connectorStart = None
+        self.connectorEnd = None
+        self.params = params
+        self.updateParamsFromDefaults(self.params)
+        self.comm = comm
+        self.label = label
+
+        self.Timer = TimerManager(LOGGER, comm=self.comm, prefix=self.label+': ')
+
+        if self.params['keepMeshes'] == 'all':
+            self.meshInformationPolicy = [KEEP_MESH]*self.params['noRef'] + [KEEP_MESH]
+        elif self.params['keepMeshes'] == 'last':
+            self.meshInformationPolicy = [DELETE_MESH]*self.params['noRef'] + [KEEP_MESH]
+        elif self.params['keepMeshes'] == 'none':
+            self.meshInformationPolicy = [DELETE_MESH]*self.params['noRef'] + [DELETE_MESH]
+        else:
+            raise NotImplementedError()
+
+        if self.params['assemble'] == 'all':
+            if self.params['meshTransformation'] is None:
+                self.buildType = [SPARSITY_ONLY]*self.params['noRef'] + [FULL_BUILD]
+            else:
+                self.buildType = [FULL_BUILD]*self.params['noRef'] + [FULL_BUILD]
+        elif self.params['assemble'] == 'ALL':
+            self.buildType = [FULL_BUILD]*self.params['noRef'] + [FULL_BUILD]
+        elif self.params['assemble'] == 'last':
+            self.buildType = [RESTRICTION_PROLONGATION_ONLY]*self.params['noRef'] + [FULL_BUILD]
+        elif self.params['assemble'] == 'first+last':
+            self.buildType = [FULL_BUILD]+[RESTRICTION_PROLONGATION_ONLY]*(self.params['noRef']-1) + [FULL_BUILD]
+        elif self.params['assemble'] == 'dofmaps only':
+            self.buildType = [DOFMAPS]*(self.params['noRef']+1)
+        elif self.params['assemble'] == 'none':
+            self.buildType = [NO_BUILD]*(self.params['noRef']+1)
+        elif self.params['assemble'] == 'restrictionProlongation':
+            self.buildType = [RESTRICTION_PROLONGATION_ONLY]*(self.params['noRef']+1)
+        else:
+            raise NotImplementedError()
+
+        if 'buildInteriorBL' in params and params['buildInteriorBL']:
+            meshLevel.interiorBL = boundaryLayer(meshLevel.mesh, params['depth']+1,
+                                                 afterRefinements=params['noRef'])
+
+        if meshLevel is not None:
+            self.meshLevels = [meshLevel]
+            self.algebraicLevels = [self.meshLevels[-1].getAlgebraicLevel(self.buildType[0])]
+        else:
+            self.meshLevels = []
+            self.algebraicLevels = []
+        self.multilevelAlgebraicOverlapManager = None
+
+    @staticmethod
+    def updateParamsFromDefaults(params):
+        defaults = {
+            'keepMeshes': 'last',
+            'assemble': 'all',
+            'depth': 0,
+            'noRef': 0,
+            'buildInteriorBL': False,
+            'debugOverlaps': False,
+            'meshTransformation': None,
+            'diffusivity': None,
+            'reaction': None,
+            'symmetric': False,
+            'reorder': False,
+            'buildMass': False,
+            'element': 'P1',
+            'commType': 'standard',
+            'keepAllDoFMaps': False,
+            'interiorBC': 'homogeneousNeumann',
+            'buildSurfaceMass': False,
+            'buildSurfaceStiffness': False,
+            'overlapMatvec': False,
+            'meshTransformation': None,
+            'debugOverlaps': False
+        }
+        updateFromDefaults(params, defaults)
+
+    def refine(self, isLastLevel=False):
+        # refine mesh level
+        meshInformationPolicy = self.meshInformationPolicy[self.meshLevels[-1].levelNo+1-self.meshLevels[0].levelNo]
+        self.meshLevels.append(self.meshLevels[-1].refine(meshInformationPolicy))
+        self.meshLevels[-1].isLastLevel = isLastLevel
+
+        # build algebraic level
+        buildType = self.buildType[self.meshLevels[-1].levelNo-self.meshLevels[0].levelNo]
+        self.algebraicLevels.append(self.meshLevels[-1].getAlgebraicLevel(buildType))
+
+        # clean up unneeded data
+        if len(self.meshLevels) > 1:
+            self.meshLevels[-2].clean()
+            self.algebraicLevels[-2].clean()
+
+    def build(self):
+        for k in range(self.params['noRef']):
+            self.refine(k == self.params['noRef']-1)
+        self.algebraicLevels[-1].buildCoarserMatrices()
+
+        if self.algebraicLevels[-1].algebraicOverlaps is not None:
+            with self.Timer("Build multilevel overlaps"):
+                if False:
+                    raise NotImplementedError()
+                
+                else:
+                    multLvlAlgOvManager = multilevelAlgebraicOverlapManager(self.comm)
+                    for lvl in range(len(self.algebraicLevels)):
+                        multLvlAlgOvManager.levels.append(self.algebraicLevels[lvl].algebraicOverlaps)
+                        if self.buildType[lvl] & DOFMAPS:
+                            multLvlAlgOvManager.levels[lvl].prepareDistribute()
+
+                    if self.params['debugOverlaps']:
+                        from PyNucleus_fem import solSin1D, solSin2D, solSin3D
+                        for lvl in range(len(self.algebraicLevels)):
+                            if self.algebraicLevels[lvl].DoFMap is not None:
+                                dm = self.algebraicLevels[lvl].DoFMap
+                                if self.meshLevels[-1].mesh.dim == 1:
+                                    x = dm.interpolate(solSin1D)
+                                elif self.meshLevels[-1].mesh.dim == 2:
+                                    x = dm.interpolate(solSin2D)
+                                elif self.meshLevels[-1].mesh.dim == 3:
+                                    x = dm.interpolate(solSin3D)
+                                else:
+                                    raise NotImplementedError()
+                                y = np.zeros((dm.num_dofs), dtype=REAL)
+                                y[:] = x
+                                multLvlAlgOvManager.levels[lvl].distribute_py(y)
+                                multLvlAlgOvManager.levels[lvl].accumulate_py(y)
+                                assert np.linalg.norm(x-y) < 1e-9, (x, y)
+                self.multilevelAlgebraicOverlapManager = multLvlAlgOvManager
+        self._isSetUp = True
+
+    def isSetUp(self):
+        return self._isSetUp
+
+    def getLevelList(self, recurse=True):
+        if self.connectorStart is not None and recurse:
+            levels = self.connectorStart.getLevelList()
+        else:
+            levels = []
+        levelsMesh = [mL.getLevelDict() for mL in self.meshLevels]
+        levelsAlg = [aL.getLevelDict() for aL in self.algebraicLevels]
+        for i in range(len(levelsAlg)):
+            levelsAlg[i].update(levelsMesh[i])
+        levelsAlg = levels[:-1]+levelsAlg
+
+        if self.multilevelAlgebraicOverlapManager is not None:
+            levelsAlg[-1]['multilevelAlgebraicOverlapManager'] = self.multilevelAlgebraicOverlapManager
+        return levelsAlg
+
+    @staticmethod
+    def fromLevelList(levels, params={}, comm=None, label=''):
+        hierarchy.updateParamsFromDefaults(params)
+        params['assemble'] = 'none'
+        meshLevels = []
+        prevMeshLevel = None
+        algebraicLevels = []
+        for lvl in levels:
+            meshLevels.append(meshLevel.fromLevelDict(lvl, params=params, previousLevel=prevMeshLevel, comm=comm, label=label))
+            prevMeshLevel = meshLevels[-1]
+            algebraicLevels.append(algebraicLevel.fromLevelDict(prevMeshLevel, lvl))
+        h = hierarchy(meshLevels[0], params, comm=comm)
+        h.meshLevels = meshLevels
+        h.algebraicLevels = algebraicLevels
+        h._isSetUp = True
+        return h
+
+    def buildCollapsedRestrictionProlongation(self):
+        self.P = self.algebraicLevels[1].P
+        for lvlNo in range(2, len(self.algebraicLevels)):
+            self.P = self.algebraicLevels[lvlNo].P*self.P
+        self.P = self.P.to_csr_linear_operator()
+        self.R = self.P.transpose()
+
+    def gatherInformation(self, root=0):
+        import platform
+
+        subdomain = self.meshLevels[-1].mesh
+        A = self.algebraicLevels[-1].A
+        overlaps = self.multilevelAlgebraicOverlapManager
+        info = {}
+        info['numberVertices'] = self.comm.gather(subdomain.num_vertices, root=root)
+        info['numberCells'] = self.comm.gather(subdomain.num_cells, root=root)
+        info['numberDoFs'] = self.comm.gather(A.shape[0], root=root)
+        if self.comm.size > 1:
+            info['globalNumDoFs'] = overlaps.countDoFs()
+            info['numberSharedDoFs'] = self.comm.gather(overlaps.get_num_shared_dofs(unique=False), root=root)
+            info['maxCross'] = overlaps.levels[-1].max_cross
+            neighbors = [subdomainNo for subdomainNo in overlaps.levels[-1].overlaps]
+        else:
+            info['globalNumDoFs'] = A.shape[0]
+            info['numberSharedDoFs'] = [0]
+            info['maxCross'] = [0]
+            neighbors = []
+        info['nnz'] = self.comm.gather(A.nnz, root=root)
+        hostname = platform.node()
+        info['hostnames'] = self.comm.gather(hostname, root=root)
+        info['neighbors'] = self.comm.gather(neighbors, root=root)
+        info['rank'] = self.comm.gather(MPI.COMM_WORLD.rank, root=root)
+        return info
+
+    def __len__(self):
+        return len(self.meshLevels)
+
+    def getSubHierarchy(self, numLevels):
+        assert 0 <= numLevels < len(self)
+
+        h = hierarchy(None, self.params, self.comm, self.label)
+        h.connectorStart = self.connectorStart
+        h.connectorEnd = self.connectorEnd
+        h.meshLevels = self.meshLevels[:numLevels+1]
+        h.algebraicLevels = self.algebraicLevels[:numLevels+1]
+        h.multilevelAlgebraicOverlapManager = self.multilevelAlgebraicOverlapManager
+        return h
+
+
+
+class hierarchyManager(object):
+    def __init__(self, hierarchyDefs, connectorDefs, params, comm=None, doDeepCopy=True):
+        if doDeepCopy:
+            self.hierarchies = deepcopy(hierarchyDefs)
+            self.connectors = deepcopy(connectorDefs)
+        else:
+            self.hierarchies = hierarchyDefs
+            self.connectors = connectorDefs
+        self.params = params
+        if comm is None:
+            from PyNucleus_base.utilsCy import FakeComm
+            comm = FakeComm(0, 1)
+        self.comm = comm
+        for h in self.hierarchies:
+            updateFromDefaults(h['params'], self.params)
+        self._printRank = -1
+
+    def getPrintRank(self):
+        if self._printRank == -1:
+            self._printRank = self.comm.allreduce(self.comm.rank if not isinstance(self.builtHierarchies[-1], EmptyHierarchy) else self.comm.size, op=MPI.MIN)
+        return self._printRank
+
+    def setCommunicators(self):
+        for k in range(len(self.hierarchies)):
+            h = self.hierarchies[k]
+            if k == 0 or h['ranks'] != self.hierarchies[k-1]:
+                if (self.comm is not None) and (len(h['ranks']) < self.comm.size):
+                    if self.comm.rank in h['ranks']:
+                        h['comm'] = self.comm.Split(0)
+                    else:
+                        self.comm.Split(MPI.UNDEFINED)
+                        h['comm'] = None
+                else:
+                    h['comm'] = self.comm
+            else:
+                h['comm'] = self.hierarchies[k-1]['comm']
+            if h['connectorEnd'] is not None:
+                self.connectors[h['connectorEnd']]['comm1'] = h['comm']
+            if h['connectorStart'] is not None:
+                self.connectors[h['connectorStart']]['comm2'] = h['comm']
+
+        for conn in sorted(self.connectors):
+            c = self.connectors[conn]
+            if 'comm1' in c:
+                if c['comm1'] is not None or c['comm2'] is not None:
+                    c['global_comm'] = self.comm.Split(0)
+                else:
+                    self.comm.Split(MPI.UNDEFINED)
+                    c['global_comm'] = None
+            else:
+                c['comm1'] = None
+                c['global_comm'] = c['comm2']
+
+    def buildHierarchies(self):
+        builtHierarchies = []
+        builtConnectors = {}
+        currentHierarchy = None
+        for k in range(len(self.hierarchies)):
+            h = self.hierarchies[k]
+            c_params = self.connectors[h['connectorStart']]
+            self.comm.Barrier()
+            if c_params['global_comm'] is not None:
+                connector = c_params['type'](c_params['global_comm'], c_params['comm1'], c_params['comm2'], currentHierarchy, **c_params['params'])
+                currentHierarchy = connector.getNewHierarchy(h)
+                builtConnectors[h['connectorStart']] = connector
+                builtHierarchies.append(currentHierarchy)
+            else:
+                currentHierarchy = EmptyHierarchy(h['params'], label=h['label'])
+                builtHierarchies.append(currentHierarchy)
+            if c_params['global_comm'] is not None:
+                connector.build()
+            if h['comm'] is not None:
+                currentHierarchy.build()
+        self.builtHierarchies = builtHierarchies
+        self.builtConnectors = builtConnectors
+
+    def setup(self):
+        self.setCommunicators()
+        self.buildHierarchies()
+
+    def display(self, info=False):
+        msg = []
+        if self.comm.rank == 0:
+            msg.append('{:30} {}'.format('', ' '.join([str(i) for i in range(self.comm.size)])))
+        h = self.hierarchies[0]
+        if h['connectorStart'] is not None:
+            conn = h['connectorStart']
+            t = self.comm.gather(self.connectors[conn]['global_comm'] is not None)
+            if self.comm.rank == 0:
+                msg.append('{:30} {}'.format(conn, ' '.join(["-" if tt else " " for tt in t])))
+        for k, h in enumerate(self.hierarchies):
+            t = self.comm.gather(h['comm'] is not None, root=min(h['ranks']))
+            if self.comm.rank == min(h['ranks']):
+                msg2 = []
+                for j in range(len(self.builtHierarchies[k].meshLevels)):
+                    l = self.builtHierarchies[k].meshLevels[j]
+                    msg2.append('{:30} {}'.format(l.levelID, ' '.join(["o" if tt else " " for tt in t])))
+                    if info:
+                        algLevel = self.builtHierarchies[k].algebraicLevels[j]
+                        msg2[-1] += '  '
+                        keys = algLevel.getKeys()
+                        msg2[-1] += ' '.join(key for key in keys if getattr(algLevel, key) is not None)
+                msg2 = '\n'.join(msg2)
+                self.comm.send(msg2, dest=0, tag=7767)
+            if self.comm.rank == 0:
+                s2 = self.comm.recv(source=min(h['ranks']), tag=7767)
+                msg.append(s2)
+            if h['connectorEnd'] is not None:
+                conn = h['connectorEnd']
+                if self.connectors[conn]['comm1'] is not None and self.connectors[conn]['comm2'] is None:
+                    # symbol = '┴'
+                    symbol = '-'
+                elif self.connectors[conn]['comm1'] is not None and self.connectors[conn]['comm2'] is not None:
+                    # symbol = '┼'
+                    symbol = '-'
+                elif self.connectors[conn]['comm1'] is None and self.connectors[conn]['comm2'] is not None:
+                    # symbol = '┬'
+                    symbol = '-'
+                else:
+                    symbol = ' '
+                t = self.comm.gather(symbol)
+                if self.comm.rank == 0:
+                    s = t[0]
+                    for i in range(1, len(t)):
+                        if t[i-1] != ' ' and t[i] != ' ':
+                            # s += '─' + t[i]
+                            s += '-' + t[i]
+                        else:
+                            s += ' ' + t[i]
+                    msg.append('{:30} {}'.format(conn, s))
+        if self.comm.rank == 0:
+            LOGGER.info('\n' + '\n'.join(msg))
+
+    def getLevelList(self):
+        k = len(self.builtHierarchies)-1
+        while self.builtHierarchies[k] is None:
+            k -= 1
+        return self.builtHierarchies[k].getLevelList()
+
+    @staticmethod
+    def fromLevelList(levels, params={}, comm=None):
+        # TODO: Assumes single rank so far
+        if comm is None:
+            comm = MPI.COMM_SELF
+            comm = None
+        hierarchyDefs = [{'label': 'fine',
+                          'ranks': set([0]),
+                          'connectorStart': None,
+                          'connectorEnd': None,
+                          'params': {'solver': 'LU'}}]
+        connectorDefs = {}
+        hM = hierarchyManager(hierarchyDefs, connectorDefs, params, comm)
+        hM.setCommunicators()
+        hM.builtHierarchies = [hierarchy.fromLevelList(levels, params=hierarchyDefs[0]['params'], comm=comm, label=hierarchyDefs[0]['label'])]
+        hM.builtConnectors = {}
+        return hM
+
+    def getComm(self):
+        k = len(self.builtHierarchies)-1
+        while self.builtHierarchies[k] is None:
+            k -= 1
+        return self.builtHierarchies[k].comm
+
+    def hierarchyIsSetUp(self, label):
+        for h in self.builtHierarchies:
+            if h is not None and h.label == label:
+                return h.isSetUp()
+        return False
+
+    def getHierarchy(self, label):
+        for h in self.builtHierarchies:
+            if h is not None and h.label == label:
+                return h
+        return None
+
+    def __getitem__(self, label):
+        return self.getHierarchy(label)
+
+    def getSubManager(self, label=None):
+        if label is not None:
+            for k, h in enumerate(self.builtHierarchies):
+                if h is not None and h.label == label:
+                    subManager = hierarchyManager(self.hierarchies[:k+1], self.connectors, self.params, self.comm, doDeepCopy=False)
+                    subManager.builtHierarchies = self.builtHierarchies[:k+1]
+                    subManager.builtConnectors = self.builtConnectors
+                    return subManager
+            raise Exception()
+        else:
+            k = len(self.hierarchies)-2
+            subManager = hierarchyManager(self.hierarchies[:k+1], self.connectors, self.params, self.comm, doDeepCopy=False)
+            subManager.builtHierarchies = self.builtHierarchies[:k+1]
+            subManager.builtConnectors = self.builtConnectors
+            return subManager
+
+    def collectInformation(self, hierarchies, root=-1):
+        if root == -1:
+            root = self.getPrintRank()
+        info = {}
+        tag = 263
+        req = []
+        for label in hierarchies:
+            if not isinstance(self[label], EmptyHierarchy):
+                i = self[label].gatherInformation(root=0)
+                if self[label].comm.rank == 0:
+                    req.append(self.comm.isend(i, dest=root, tag=tag))
+            if self.comm.rank == root:
+                info[label] = self.comm.recv(source=MPI.ANY_SOURCE, tag=tag)
+            tag += 1
+        MPI.Request.Waitall(req)
+        return info
+
+    def getSubHierarchy(self, numFineLevels):
+        hM = hierarchyManager(self.hierarchies, self.connectors, self.params, self.comm, doDeepCopy=False)
+        hM.builtHierarchies = self.builtHierarchies[:-1]
+        hM.builtConnectors = self.builtConnectors
+        h = self.builtHierarchies[-1].getSubHierarchy(numFineLevels)
+        hM.builtHierarchies.append(h)
+
+        return hM
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/levels.py b/multilevelSolver/PyNucleus_multilevelSolver/levels.py
new file mode 100644
index 0000000..34f70d3
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/levels.py
@@ -0,0 +1,577 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import logging
+import numpy as np
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from PyNucleus_fem.DoFMaps import P1_DoFMap, P2_DoFMap
+from PyNucleus_fem import str2DoFMap
+from PyNucleus_base.myTypes import REAL
+from PyNucleus_base.linear_operators import CSR_LinearOperator
+from PyNucleus_base.linear_operators import SSS_LinearOperator
+from . restrictionProlongation import buildRestrictionProlongation
+from PyNucleus_base import TimerManager
+from PyNucleus_base.ip_norm import ip_serial, norm_serial, ip_distributed, norm_distributed
+from PyNucleus_fem import (assembleDrift,
+                 assembleMatrix,
+                 mass_0d_in_1d_sym_P1,
+                 mass_1d_in_2d_sym_P1,
+                 mass_1d_in_2d_sym_P2,
+                 DistributedLinearOperator,
+                 CSR_DistributedLinearOperator,
+                 function,
+                 DIRICHLET, NEUMANN, HOMOGENEOUS_DIRICHLET, HOMOGENEOUS_NEUMANN, boundaryConditions)
+from PyNucleus_fem.mesh import (PHYSICAL, NO_BOUNDARY, INTERIOR_NONOVERLAPPING, INTERIOR)
+LOGGER = logging.getLogger(__name__)
+
+# what should be built
+DOFMAPS = 1
+RESTRICTION_PROLONGATION = 2
+SPARSITY_PATTERN = 4
+OVERLAPS = 8
+ASSEMBLY = 16
+
+NO_BUILD = 0
+DOFMAPS_ONLY = DOFMAPS
+RESTRICTION_PROLONGATION_ONLY = DOFMAPS + RESTRICTION_PROLONGATION
+SPARSITY_ONLY = DOFMAPS + OVERLAPS + RESTRICTION_PROLONGATION + SPARSITY_PATTERN
+SINGLE_LEVEL = DOFMAPS + OVERLAPS + ASSEMBLY
+FULL_BUILD = DOFMAPS + OVERLAPS + RESTRICTION_PROLONGATION + ASSEMBLY
+
+# What information is retained in meshLevels
+DELETE_MESH = 0
+KEEP_MESH = 1
+
+
+class level:
+    def __init__(self, params, previousLevel=None,
+                 comm=None, label='', startLevelNo=0,
+                 isLastLevel=False):
+        self.params = params
+        self.previousLevel = previousLevel
+        if previousLevel is not None:
+            assert not previousLevel.isLastLevel
+        self.startLevelNo = startLevelNo
+        self.nextLevel = None
+        self.comm = comm
+        self.label = label
+        self.isLastLevel = isLastLevel
+
+        label = '{}: '.format(self.levelID)
+        self.Timer = TimerManager(LOGGER,
+                                  comm=self.comm, prefix=label)
+
+    def getLevelNo(self):
+        if self.previousLevel is None:
+            return self.startLevelNo
+        else:
+            return self.previousLevel.getLevelNo()+1
+
+    levelNo = property(fget=getLevelNo)
+
+    def getLevelID(self):
+        if len(self.label) > 0:
+            label = '{} {}'.format(self.label, self.levelNo)
+        else:
+            label = 'Level {}'.format(self.levelNo)
+        return label
+
+    levelID = property(fget=getLevelID)
+
+    def __repr__(self):
+        if len(self.label) > 0:
+            label = '{} {}'.format(self.label, self.levelNo)
+        else:
+            label = '{}'.format(self.levelNo)
+        s = '{} {}\n'.format(self.__class__.__name__, label)
+        return s
+
+
+######################################################################
+
+
+class meshLevel(level):
+    def __init__(self, mesh, params, previousLevel=None,
+                 interfaces=None, meshOverlaps=None,
+                 interiorBL=None,
+                 comm=None,
+                 label='', meshInformationPolicy=KEEP_MESH, startLevelNo=0,
+                 isLastLevel=False):
+        super(meshLevel, self).__init__(params, previousLevel, comm,
+                                        label, startLevelNo, isLastLevel)
+        self.mesh = mesh
+        self.global_mesh = None
+        self.interfaces = interfaces
+        if self.interfaces is not None and self.params['debugOverlaps']:
+            self.interfaces.validate(self.mesh, self.comm, label='Mesh interface \'{} {}\''.format(self.label, self.levelNo))
+        self.meshOverlaps = meshOverlaps
+        if self.meshOverlaps is not None and self.params['debugOverlaps']:
+            self.meshOverlaps.check(self.mesh, self.comm, label='Mesh overlap \'{} {}\''.format(self.label, self.levelNo))
+        self.interiorBL = interiorBL
+        self.algebraicLevel = None
+        self.meshInformationPolicy = meshInformationPolicy
+        self._h = None
+        self.algebraicLevelType = algebraicLevel
+
+    def setAlgebraicLevelType(self, algLevelType):
+        self.algebraicLevelType = algLevelType
+
+    def refine(self, meshInformationPolicy):
+        with self.Timer('Refined mesh'):
+            newMesh, self.lookup = self.mesh.refine(returnLookup=True)
+            if self.params['meshTransformation'] is not None:
+                self.params['meshTransformation'](newMesh, self.lookup)
+        if self.interfaces is not None:
+            with self.Timer('Refined interfaces'):
+                self.interfaces.refine(newMesh)
+        if self.meshOverlaps is not None:
+            with self.Timer('Refined mesh overlaps'):
+                meshOverlaps = self.meshOverlaps.copy()
+                meshOverlaps.refine(newMesh)
+        if self.meshOverlaps is None:
+            meshOverlaps = None
+        if self.interiorBL is not None:
+            with self.Timer('Refined boundary layers'):
+                self.interiorBL.refine(newMesh)
+        newMeshLevel = meshLevel(newMesh, self.params, self, self.interfaces, meshOverlaps, self.interiorBL, self.comm, self.label, meshInformationPolicy)
+        if hasattr(self, 'numberCellsBeforeExtension'):
+            newMeshLevel.numberCellsBeforeExtension = 2**self.mesh.dim * self.numberCellsBeforeExtension
+        if hasattr(self, 'numberCellsLastLayer'):
+            newMeshLevel.numberCellsLastLayer = 2**self.mesh.dim * self.numberCellsLastLayer
+        newMeshLevel.setAlgebraicLevelType(self.algebraicLevelType)
+        self.nextLevel = newMeshLevel
+        return newMeshLevel
+
+    def copy(self):
+        newMeshLevel = meshLevel(self.mesh, self.params, self, self.interfaces, self.meshOverlaps, self.interiorBL, self.comm, self.label, self.meshInformationPolicy)
+        return newMeshLevel
+
+    def getIsDistributed(self):
+        return self.interfaces is not None
+
+    isDistributed = property(fget=getIsDistributed)
+
+    def getAlgebraicLevel(self, buildType):
+        self.algebraicLevel = self.algebraicLevelType(self, buildType)
+        return self.algebraicLevel
+
+    def clean(self):
+        if self.meshInformationPolicy == DELETE_MESH:
+            self.mesh = None
+            self.meshOverlaps = None
+        self.interfaces = None
+
+    def getLevelDict(self):
+        lvl = {}
+        if self.mesh is not None:
+            lvl['mesh'] = self.mesh
+        if self.interfaces is not None:
+            lvl['interfaces'] = self.interfaces
+        if self.meshOverlaps is not None:
+            lvl['meshOverlaps'] = self.meshOverlaps
+        return lvl
+
+    @staticmethod
+    def fromLevelDict(lvl, params={}, previousLevel=None, comm=None, startLevelNo=0, label=''):
+        alvl = meshLevel(None, params, previousLevel, comm=comm, startLevelNo=startLevelNo, label=label)
+        if 'mesh' in lvl:
+            alvl.mesh = lvl['mesh']
+        if 'interfaces' in lvl:
+            alvl.interfaces = lvl['interfaces']
+        if 'meshOverlaps' in lvl:
+            alvl.meshOverlaps = lvl['meshOverlaps']
+        return alvl
+
+    def __repr__(self):
+        s = super(meshLevel, self).__repr__()
+        if self.mesh is not None:
+            s += ' mesh: '+self.mesh.__repr__()
+        if self.interfaces is not None:
+            s += self.interfaces.__repr__()
+        return s
+
+    def getH(self):
+        if self._h is None:
+            h = self.mesh.h
+            if self.comm is not None:
+                self._h = self.comm.allreduce(h, op=MPI.MAX)
+        return self._h
+
+    h = property(fget=getH)
+
+
+######################################################################
+
+class algebraicLevelBase(level):
+    def __init__(self, meshLevel, buildType):
+        if meshLevel.previousLevel is not None:
+            previousLevel = meshLevel.previousLevel.algebraicLevel
+        else:
+            previousLevel = None
+        super(algebraicLevelBase, self).__init__(meshLevel.params, previousLevel, meshLevel.comm, meshLevel.label, meshLevel.levelNo, meshLevel.isLastLevel)
+        self.meshLevel = meshLevel
+        self.P = None
+        self.R = None
+        self.DoFMap = None
+        self.algebraicOverlaps = None
+        self.build(buildType)
+
+    def build(self, buildType):
+
+        buildNeumann = self.params.get('buildNeumann', False)
+        element = self.params['element']
+        reorder = self.params['reorder']
+        commType = self.params['commType']
+        DoFMap_type = str2DoFMap(element)
+
+        # Set DoFMap
+        if buildType & DOFMAPS:
+            if 'tag' in self.params:
+                self.DoFMap = DoFMap_type(self.meshLevel.mesh, self.params['tag'])
+            elif 'boundaryCondition' in self.params:
+                if self.params['boundaryCondition'] == (HOMOGENEOUS_NEUMANN, DIRICHLET, NEUMANN):
+                    self.DoFMap = DoFMap_type(self.meshLevel.mesh, NO_BOUNDARY)
+                elif self.params['boundaryCondition'] == HOMOGENEOUS_DIRICHLET:
+                    self.DoFMap = DoFMap_type(self.meshLevel.mesh, PHYSICAL)
+                else:
+                    raise NotImplementedError(boundaryConditions[self.params['boundaryCondition']])
+            else:
+                if self.isLastLevel and self.params['interiorBC'] == 'homogeneousDirichlet' and hasattr(self.meshLevel, 'numberCellsLastLayer'):
+                    self.DoFMap = DoFMap_type(self.meshLevel.mesh, [PHYSICAL,
+                                                                    INTERIOR],
+                                              skipCellsAfter=self.meshLevel.mesh.num_cells-self.meshLevel.numberCellsLastLayer)
+                elif not hasattr(self.meshLevel, 'numberCellsLastLayer') or not self.isLastLevel or self.params['interiorBC'] == 'homogeneousNeumann':
+                    self.DoFMap = DoFMap_type(self.meshLevel.mesh, [PHYSICAL])
+                else:
+                    raise NotImplementedError()
+            if buildNeumann:
+                self.DoFMapNeumann = DoFMap_type(self.meshLevel.mesh, [PHYSICAL])
+
+        if not reorder:
+            if buildType & OVERLAPS:
+                # build algebraic overlaps
+                if self.meshLevel.meshOverlaps is not None:
+                    with self.Timer('Build algebraic overlaps of type \'{}\''.format(commType)):
+                        self.algebraicOverlaps = self.meshLevel.meshOverlaps.getDoFs(self.meshLevel.mesh, self.DoFMap, commType,
+                                                                                 allowInteriorBoundary=self.params['interiorBC'] == 'homogeneousNeumann' or not self.isLastLevel)
+                    if self.params['debugOverlaps']:
+                        self.algebraicOverlaps.check(mesh=self.meshLevel.mesh,
+                                                     dm=self.DoFMap,
+                                                     label='algebraicOverlaps in \'{} {}\''.format(self.label, self.levelNo),
+                                                     interfaces=self.meshLevel.meshOverlaps)
+                elif self.meshLevel.interfaces is not None:
+                    with self.Timer('Build algebraic overlaps of type \'{}\''.format(commType)):
+                        self.algebraicOverlaps = self.meshLevel.interfaces.getDoFs(self.meshLevel.mesh, self.DoFMap, commType)
+                    if self.params['debugOverlaps']:
+                        self.algebraicOverlaps.check(mesh=self.meshLevel.mesh,
+                                                     dm=self.DoFMap,
+                                                     label='algebraicOverlaps in \'{} {}\''.format(self.label, self.levelNo),
+                                                     interfaces=self.meshLevel.interfaces)
+
+            if self.algebraicOverlaps is not None:
+                self.inner = ip_distributed(self.algebraicOverlaps, 0)
+                self.norm = norm_distributed(self.algebraicOverlaps, 0)
+            else:
+                self.inner = ip_serial()
+                self.norm = norm_serial()
+            if self.DoFMap is not None:
+                self.DoFMap.set_ip_norm(self.inner, self.norm)
+
+            if (buildType & RESTRICTION_PROLONGATION) and (self.previousLevel is not None):
+                assert (self.previousLevel.DoFMap is not None) and (self.DoFMap is not None)
+                # use reorder here, since reorder=False bugs out
+                (self.R,
+                 self.P) = buildRestrictionProlongation(self.previousLevel.DoFMap,
+                                                        self.DoFMap)
+
+    def buildCoarserMatrices(self):
+        """
+        Recursively build matrices on coarser levels
+        """
+        if self.previousLevel is not None:
+            self.previousLevel.buildCoarserMatrices()
+
+    def clean(self):
+        if not self.params['keepAllDoFMaps'] and not self.previousLevel is None:
+            self.DoFMap = None
+
+    @classmethod
+    def getKeys(cls):
+        return ['P', 'R', 'DoFMap', 'algebraicOverlaps']
+
+    def getLevelDict(self):
+        lvl = {}
+        for key in self.getKeys():
+            if getattr(self, key) is not None:
+                lvl[key] = getattr(self, key)
+        return lvl
+
+    @classmethod
+    def fromLevelDict(cls, meshLevel, lvl):
+        alvl = algebraicLevel(meshLevel, NO_BUILD)
+        for key in cls.getKeys():
+            if key in lvl:
+                setattr(alvl, key, lvl[key])
+        return alvl
+
+    @property
+    def accumulateOperator(self):
+        if self.algebraicOverlaps is not None:
+            return self.algebraicOverlaps.getAccumulateOperator()
+        else:
+            return None
+
+
+class algebraicLevel(algebraicLevelBase):
+    def __init__(self, meshLevel, buildType):
+        self.A = None
+        self.S = None
+        self.D = None
+        self.M = None
+        self.surface_mass = None
+        self.surface_stiffness = None
+        super(algebraicLevel, self).__init__(meshLevel, buildType)
+
+    def build(self, buildType):
+        super(algebraicLevel, self).build(buildType)
+
+        diffusivity = self.params['diffusivity']
+        reaction = self.params['reaction']
+        symmetric = self.params['symmetric']
+        element = self.params['element']
+        reorder = self.params['reorder']
+        commType = self.params['commType']
+        buildMass = self.params['buildMass'] or reaction is not None
+        driftCoeff = self.params.get('driftCoeff', None)
+        buildNeumann = self.params.get('buildNeumann', False)
+
+        if buildType & SPARSITY_PATTERN:
+            # set up sparsity patterns only
+            DoFMap = self.DoFMap
+            mesh = self.meshLevel.mesh
+            self.fullyAssembled = False
+            with self.Timer('Prepared sparsity patterns'):
+                self.S = DoFMap.buildSparsityPattern(mesh.cells,
+                                                     symmetric=symmetric,
+                                                     reorder=reorder)
+                if driftCoeff is not None:
+                    self.D = self.S.copy()
+                if buildMass:
+                    self.M = self.S.copy()
+
+        if buildType & ASSEMBLY:
+            # fully build matrices
+            DoFMap = self.DoFMap
+            mesh = self.meshLevel.mesh
+            self.fullyAssembled = True
+            with self.Timer('Assembled matrices'):
+                self.S = DoFMap.assembleStiffness(sss_format=symmetric,
+                                                  reorder=reorder,
+                                                  diffusivity=diffusivity)
+                if buildMass:
+                    self.M = DoFMap.assembleMass(sss_format=symmetric,
+                                                 reorder=reorder)
+                if driftCoeff is not None:
+                    self.D = assembleDrift(mesh,
+                                           DoFMap,
+                                           driftCoeff)
+                if buildNeumann:
+                    self.neumannA = self.DoFMapNeumann.assembleStiffness(sss_format=symmetric,
+                                                                         reorder=reorder,
+                                                                         diffusivity=diffusivity)
+                if isinstance(reaction, (float, REAL)):
+                    self.A = self.S.copy()
+                    for j in range(self.A.data.shape[0]):
+                        self.A.data[j] += reaction*self.M.data[j]
+                        if isinstance(self.A, SSS_LinearOperator):
+                            for j in range(self.A.num_rows):
+                                self.A.diagonal[j] += reaction*self.M.diagonal[j]
+                elif isinstance(reaction, function):
+                    self.A = self.S.copy()
+                    dm = self.DoFMap
+                    c = dm.interpolate(reaction)
+                    for k in range(dm.num_dofs):
+                        for j in range(self.A.indptr[k], self.A.indptr[k+1]):
+                            self.A.data[j] += c[k]*self.M.data[j]
+                    if isinstance(self.A, SSS_LinearOperator):
+                        for k in range(self.A.num_rows):
+                            self.A.diagonal[k] += c[k]*self.M.diagonal[k]
+                elif reaction is None:
+                    self.A = self.S
+                else:
+                    raise NotImplementedError()
+
+            # surface mass matrix
+            if self.isLastLevel and self.params['buildSurfaceMass']:
+                with self.Timer('Build surface mass matrix'):
+                    if self.params['depth'] > 0:
+                        surface = mesh.get_surface_mesh(INTERIOR)
+                    else:
+                        surface = mesh.get_surface_mesh(INTERIOR_NONOVERLAPPING)
+                    from PyNucleus_fem import assembleSurfaceMass
+                    self.surface_mass = assembleSurfaceMass(mesh, surface,
+                                                            self.DoFMap,
+                                                            sss_format=symmetric,
+                                                            reorder=reorder)
+                    # ToDo: Don't just copy the sparsity pattern, this is a big waste of memory
+                    # data = np.zeros((self.A.nnz), dtype=REAL)
+                    # if symmetric:
+                    #     diagonal = np.zeros(self.A.shape[0], dtype=REAL)
+                    #     M = SSS_LinearOperator(self.A.indices, self.A.indptr, data, diagonal)
+                    # else:
+                    #     M = CSR_LinearOperator(self.A.indices, self.A.indptr, data)
+                    # if element == 'P1':
+                    #     dmS = P1_DoFMap(mesh, [PHYSICAL])
+                    #     dmS.cells = surface.cells
+                    # elif element == 'P2':
+                    #     assert False, "Surface mass matrix not implemented for P2."
+                    #     dmS = P2_DoFMap(mesh, [PHYSICAL])
+                    #     cellOrig = dmS.mesh.cells
+                    #     dmS.mesh.cells = surface.cells
+                    # if mesh.dim == 1 and element == 'P1':
+                    #     dmS.dofs_per_element = 1
+                    #     self.surface_mass = assembleMatrix(surface, dmS, mass_0d_in_1d_sym_P1(), A=M,
+                    #                                       sss_format=symmetric, reorder=reorder)
+                    # elif mesh.dim == 2 and element == 'P1':
+                    #     dmS.dofs_per_element = 2
+                    #     self.surface_mass = assembleMatrix(surface, dmS, mass_1d_in_2d_sym_P1(), A=M,
+                    #                                        sss_format=symmetric, reorder=reorder)
+                    # elif mesh.dim == 2 and element == 'P2':
+                    #     dmS.dofs_per_element = 3
+                    #     self.surface_mass = assembleMatrix(surface, dmS, mass_1d_in_2d_sym_P2(), A=M,
+                    #                                       sss_format=symmetric, reorder=reorder)
+                    #     dmS.mesh.cells = cellOrig
+                    # else:
+                    #     raise NotImplementedError()
+
+            # surface stiffness matrix
+            if self.isLastLevel and self.params['buildSurfaceStiffness']:
+                with self.Timer('Build surface stiffness matrix'):
+                    if self.params['depth'] > 0:
+                        surface = mesh.get_surface_mesh(INTERIOR)
+                    else:
+                        surface = mesh.get_surface_mesh(INTERIOR_NONOVERLAPPING)
+                    # ToDo: Don't just copy the sparsity pattern, this is a big waste of memory
+                    data = np.zeros((self.A.nnz), dtype=REAL)
+                    if symmetric:
+                        diagonal = np.zeros(self.A.shape[0], dtype=REAL)
+                        AS = SSS_LinearOperator(self.A.indices, self.A.indptr, data, diagonal)
+                    else:
+                        AS = CSR_LinearOperator(self.A.indices, self.A.indptr, data)
+                    assert element == 'P1', "Surface stiffness matrix only implemented for P1"
+                    dmS = P1_DoFMap(mesh, [PHYSICAL])
+                    dmS.cells = surface.cells
+                    if mesh.dim == 2:
+                        dmS.dofs_per_element = 2
+                        self.surfaceStiffness = assembleMatrix(surface, dmS, stiffness_1d_in_2d_sym(), A=AS,
+                                                               sss_format=symmetric, reorder=reorder)
+                    else:
+                        raise NotImplementedError()
+
+        if reorder and buildType & OVERLAPS:
+            # build algebraic overlaps
+            if self.meshLevel.meshOverlaps is not None:
+                with self.Timer('Build algebraic overlaps of type \'{}\''.format(commType)):
+                    self.algebraicOverlaps = self.meshLevel.meshOverlaps.getDoFs(self.meshLevel.mesh, self.DoFMap, commType,
+                                                                                 allowInteriorBoundary=self.params['interiorBC'] == 'homogeneousNeumann' or not self.isLastLevel)
+                if self.params['debugOverlaps']:
+                    self.algebraicOverlaps.check(mesh=self.meshLevel.mesh,
+                                                 dm=self.DoFMap,
+                                                 label='algebraicOverlaps in \'{} {}\''.format(self.label, self.levelNo),
+                                                 interfaces=self.meshLevel.meshOverlaps)
+            elif self.meshLevel.interfaces is not None:
+                with self.Timer('Build algebraic overlaps of type \'{}\''.format(commType)):
+                    self.algebraicOverlaps = self.meshLevel.interfaces.getDoFs(self.meshLevel.mesh, self.DoFMap, commType)
+                if self.params['debugOverlaps']:
+                    self.algebraicOverlaps.check(mesh=self.meshLevel.mesh,
+                                                 dm=self.DoFMap,
+                                                 label='algebraicOverlaps in \'{} {}\''.format(self.label, self.levelNo),
+                                                 interfaces=self.meshLevel.interfaces)
+
+        if reorder and (buildType & RESTRICTION_PROLONGATION) and (self.previousLevel is not None):
+            assert (self.previousLevel.DoFMap is not None) and (self.DoFMap is not None)
+            # use reorder here, since reorder=False bugs out
+            (self.R,
+             self.P) = buildRestrictionProlongation(self.previousLevel.DoFMap,
+                                                    self.DoFMap)
+
+    def buildCoarserMatrices(self):
+        """
+        Recursively build matrices on coarser levels
+        """
+        if self.previousLevel is None:
+            return
+        if self.S is not None and self.P is not None and self.previousLevel.S is not None and not self.previousLevel.fullyAssembled:
+            assert self.P.shape[0] == self.S.shape[0], (self.R.shape[1], self.S.shape[0])
+            assert self.P.shape[1] == self.previousLevel.S.shape[0]
+            with self.Timer('Restrict stiffness matrix'):
+                self.P.restrictMatrix(self.S, self.previousLevel.S)
+            if self.previousLevel.A is None:
+                self.previousLevel.A = self.previousLevel.S
+        if self.D is not None and self.P is not None and self.previousLevel.D is not None and not self.previousLevel.fullyAssembled:
+            assert self.P.shape[0] == self.D.shape[0]
+            assert self.P.shape[1] == self.previousLevel.D.shape[0]
+            with self.Timer('Restrict drift matrix'):
+                self.P.restrictMatrix(self.D, self.previousLevel.D)
+        if self.M is not None and self.P is not None and self.previousLevel.M is not None and not self.previousLevel.fullyAssembled:
+            assert self.P.shape[0] == self.M.shape[0]
+            assert self.P.shape[1] == self.previousLevel.M.shape[0]
+            with self.Timer('Restrict mass matrix'):
+                self.P.restrictMatrix(self.M, self.previousLevel.M)
+        if self.M is not None and self.A is not None and self.R is not None and self.previousLevel.A is not None and self.previousLevel.M is not None:
+            reaction = self.params['reaction']
+            if isinstance(reaction, (float, REAL)):
+                for j in range(self.previousLevel.A.data.shape[0]):
+                    self.previousLevel.A.data[j] += reaction*self.previousLevel.M.data[j]
+                    if isinstance(self.previousLevel.A, SSS_LinearOperator):
+                        for j in range(self.previousLevel.A.num_rows):
+                            self.previousLevel.A.diagonal[j] += reaction*self.previousLevel.M.diagonal[j]
+            elif isinstance(reaction, function):
+                dm = self.previousLevel.DoFMap
+                c = dm.interpolate(reaction)
+                for k in range(dm.num_dofs):
+                    for j in range(self.previousLevel.A.indptr[k], self.previousLevel.A.indptr[k+1]):
+                        self.previousLevel.A.data[j] += c[k]*self.previousLevel.M.data[j]
+                if isinstance(self.previousLevel.A, SSS_LinearOperator):
+                    for k in range(self.previousLevel.A.num_rows):
+                        self.previousLevel.A.diagonal[k] += c[k]*self.previousLevel.M.diagonal[k]
+            elif reaction is None:
+                pass
+            else:
+                raise NotImplementedError()
+        if self.previousLevel is not None:
+            self.previousLevel.fullyAssembled = True
+            self.previousLevel.buildCoarserMatrices()
+
+    @classmethod
+    def getKeys(cls):
+        return algebraicLevelBase.getKeys() + ['A', 'S', 'D', 'M', 'surface_mass', 'surface_stiffness']
+
+    def getLevelDict(self):
+        lvl = super(algebraicLevel, self).getLevelDict()
+        if hasattr(self, ' neumannA'):
+            lvl['neumannA'] = self.neumannA
+        return lvl
+
+    def getGlobalA(self, doDistribute=False, keepDistributedResult=False):
+        if self.A is not None:
+            if self.algebraicOverlaps is not None:
+                if isinstance(self.A, CSR_LinearOperator):
+                    return CSR_DistributedLinearOperator(self.A, self.algebraicOverlaps,
+                                                         doDistribute=doDistribute,
+                                                         keepDistributedResult=keepDistributedResult)
+                else:
+                    return DistributedLinearOperator(self.A, self.algebraicOverlaps,
+                                                     doDistribute=doDistribute,
+                                                     keepDistributedResult=keepDistributedResult)
+            else:
+                return self.A
+        else:
+            return None
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pxd b/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pxd
new file mode 100644
index 0000000..934eac8
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pxd
@@ -0,0 +1,17 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cpdef enum CycleType:
+    V = 1,
+    W = 2,
+    FMG_V = 666
+    FMG_W = 667
+
+
+include "multigrid_decl_REAL.pxi"
+include "multigrid_decl_COMPLEX.pxi"
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pyx b/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pyx
new file mode 100644
index 0000000..b45333e
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/multigrid.pyx
@@ -0,0 +1,10 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+include "multigrid_REAL.pxi"
+include "multigrid_COMPLEX.pxi"
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/multigrid_decl_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/multigrid_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..d342583
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/multigrid_decl_{SCALAR}.pxi
@@ -0,0 +1,31 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from mpi4py cimport MPI
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from PyNucleus_base.performanceLogger cimport PLogger, FakePLogger
+from PyNucleus_base.solvers cimport {SCALAR_label_lc_}solver, {SCALAR_label_lc_}iterative_solver
+from PyNucleus_fem.algebraicOverlaps cimport multilevelAlgebraicOverlapManager
+
+
+cdef class {SCALAR_label}multigrid({SCALAR_label_lc_}iterative_solver):
+    cdef:
+        object hierarchyManager
+        public multilevelAlgebraicOverlapManager overlap
+        public CycleType cycle
+        public {SCALAR_label_lc_}solver coarse_solver
+        public MPI.Comm comm
+        public list levels
+        REAL_t _tol
+    cdef void solveOnLevel(self, int lvlNo, {SCALAR}_t[::1] b, {SCALAR}_t[::1] x, BOOL_t simpleResidual=*)
+    cdef int solve(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] x) except -1
+    cpdef int solveFMG(self,
+                       {SCALAR}_t[::1] b,
+                       {SCALAR}_t[::1] x)
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/multigrid_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/multigrid_{SCALAR}.pxi
new file mode 100644
index 0000000..18a06e1
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/multigrid_{SCALAR}.pxi
@@ -0,0 +1,516 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from tabulate import tabulate
+from PyNucleus_base import INDEX, REAL, COMPLEX, uninitialized
+from PyNucleus_base.performanceLogger cimport FakeTimer
+from PyNucleus_base.ip_norm cimport (ip_serial, ip_distributed,
+                                      norm_serial, norm_distributed)
+from PyNucleus_base.blas cimport assign, update
+from PyNucleus_base.solvers cimport {SCALAR_label_lc_}preconditioner
+from PyNucleus_base.linear_operators cimport {SCALAR_label}LinearOperator, CSR_LinearOperator, wrapRealToComplexCSR
+
+from . coarseSolvers cimport {SCALAR_label}coarseSolver
+from . smoothers cimport ({SCALAR_label}smoother, {SCALAR_label}jacobiSmoother, {SCALAR_label}blockJacobiSmoother,
+                          {SCALAR_label}gmresSmoother,
+                          
+                          iluSmoother, flexibleSmoother)
+from . smoothers import (gaussSeidelSmoother,
+                         sorSmoother, ssorSmoother)
+from . hierarchies import hierarchyManager
+
+
+cdef class {SCALAR_label}levelMemory:
+    cdef:
+        INDEX_t size
+        BOOL_t coarsest, finest, allocated
+        public {SCALAR}_t[::1] rhs
+        public {SCALAR}_t[::1] sol
+        public {SCALAR}_t[::1] temp
+        public {SCALAR}_t[::1] D
+        public {SCALAR_label}LinearOperator A
+        public {SCALAR_label}LinearOperator R
+        public {SCALAR_label}LinearOperator P
+        public {SCALAR_label}smoother smoother
+        public tuple smootherType
+
+    def __init__(self, INDEX_t size, BOOL_t coarsest, BOOL_t finest):
+        self.size = size
+        self.coarsest = coarsest
+        self.finest = finest
+        self.rhs = None
+        self.sol = None
+        self.temp = None
+        self.D = None
+        self.A = None
+        self.R = None
+        self.P = None
+        self.smoother = None
+        self.allocated = False
+
+    cdef void allocate(self):
+        if not self.allocated:
+            if self.coarsest:
+                self.rhs = uninitialized(self.size, dtype={SCALAR})
+                self.sol = uninitialized(self.size, dtype={SCALAR})
+            elif self.finest:
+                self.temp = uninitialized(self.size, dtype={SCALAR})
+            else:
+                self.rhs = uninitialized(self.size, dtype={SCALAR})
+                self.sol = uninitialized(self.size, dtype={SCALAR})
+                self.temp = uninitialized(self.size, dtype={SCALAR})
+            self.allocated = True
+
+    def __getitem__(self, str key):
+        if key in ('A', 'R', 'P', 'D'):
+            return getattr(self, key)
+        else:
+            raise NotImplementedError(key)
+
+    def __setitem__(self, str key, {SCALAR_label}LinearOperator value):
+        if key in ('A', 'R', 'P', 'D'):
+            self.__setattr__(self, key, value)
+        else:
+            raise NotImplementedError(key)
+
+
+######################################################################
+# Multi-level solvers
+
+cdef class {SCALAR_label}multigrid({SCALAR_label_lc_}iterative_solver):
+    def __init__(self,
+                 myHierarchyManager,
+                 smoother=('jacobi', {'omega': 2.0/3.0}),
+                 BOOL_t logging=False,
+                 **kwargs):
+        cdef:
+            INDEX_t numLevels, length
+            list levels
+            dict lvlDict
+            {SCALAR_label}levelMemory lvl
+        if not isinstance(myHierarchyManager, hierarchyManager):
+            myHierarchyManager = hierarchyManager.fromLevelList(myHierarchyManager, comm=None)
+        self.hierarchyManager = myHierarchyManager
+        self.PLogger = PLogger() if logging else FakePLogger()
+
+        fineHierarchy = myHierarchyManager.builtHierarchies[-1]
+        levels = fineHierarchy.getLevelList(recurse=False)
+
+        numLevels = len(levels)
+        self.levels = []
+        lvlDict = levels[0]
+        try:
+            # A on level 0 might be a global matrix, that won't work in a parallel setting
+            length = lvlDict['R'].shape[0]
+        except KeyError:
+            length = lvlDict['A'].shape[0]
+        lvl = {SCALAR_label}levelMemory(length, True, False)
+        lvl.A = lvlDict['A']
+        self.levels.append(lvl)
+        for lvlNo in range(1, numLevels):
+            lvlDict = levels[lvlNo]
+            length = lvlDict['A'].shape[0]
+            lvl = {SCALAR_label}levelMemory(length, False, lvlNo == numLevels-1)
+            lvl.A = lvlDict['A']
+            if '{SCALAR}' == 'COMPLEX':
+                if isinstance(lvlDict['R'], CSR_LinearOperator):
+                    lvl.R = wrapRealToComplexCSR(lvlDict['R'])
+                else:
+                    lvl.R = lvlDict['R']
+                if isinstance(lvlDict['P'], CSR_LinearOperator):
+                    lvl.P = wrapRealToComplexCSR(lvlDict['P'])
+                else:
+                    lvl.P = lvlDict['P']
+            else:
+                lvl.R = lvlDict['R']
+                lvl.P = lvlDict['P']
+            self.levels.append(lvl)
+
+        {SCALAR_label_lc_}iterative_solver.__init__(self, lvl.A)
+        self.maxIter = 50
+
+        if 'multilevelAlgebraicOverlapManager' in levels[-1]:
+            overlap = levels[numLevels-1]['multilevelAlgebraicOverlapManager']
+            if overlap.comm.size == 1:
+                overlap = None
+        else:
+            overlap = None
+        self.overlap = overlap
+
+        # set norm and inner product
+        if overlap:
+            self.setOverlapNormInner(self.overlap, numLevels-1)
+            self.comm = self.overlap.comm
+
+        self.cycle = V
+
+        if not isinstance(smoother, list):
+            smoother = [smoother]*numLevels
+        else:
+            assert len(smoother) == numLevels
+        for lvlNo in range(1, numLevels):
+            if not isinstance(smoother[lvlNo], tuple):
+                self.levels[lvlNo].smootherType = (smoother[lvlNo], {})
+            else:
+                self.levels[lvlNo].smootherType = smoother[lvlNo]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef void setup(self, {SCALAR_label}LinearOperator A=None):
+        cdef:
+            INDEX_t lvlNo
+            INDEX_t numLevels = len(self.levels)
+            {SCALAR_label}levelMemory lvl
+            tuple smoother
+
+        ########################################################################
+        # set smoothers
+        lvl = self.levels[0]
+        lvl.allocate()
+        for lvlNo in range(1, numLevels):
+            lvl = self.levels[lvlNo]
+            lvl.allocate()
+            smoother = lvl.smootherType
+            smoother[1]['overlaps'] = self.overlap
+            # get matrix diagonals, accumulate if distributed
+            if not smoother[0] in ('ilu', 'block_jacobi'):
+                if self.overlap:
+                    lvl.D = uninitialized((lvl.A.num_rows), dtype={SCALAR})
+                    self.overlap.accumulate{SCALAR_label}(lvl.A.diagonal,
+                                                          lvl.D,
+                                                          level=lvlNo)
+                else:
+                    lvl.D = lvl.A.diagonal
+            tempMem = np.array(lvl.temp, copy=False)
+            if self.overlap is not None:
+                lvlOverlap = self.overlap.levels[lvlNo]
+            else:
+                lvlOverlap = None
+            if smoother[0] == 'jacobi':
+                lvl.smoother = {SCALAR_label}jacobiSmoother(lvl.A, lvl.D, smoother[1], tempMem, overlap=lvlOverlap)
+            elif smoother[0] == 'block_jacobi':
+                lvl.smoother = {SCALAR_label}blockJacobiSmoother(lvl.A, smoother[1], tempMem, overlap=lvlOverlap)
+            elif smoother[0] == 'gauss_seidel':
+                lvl.smoother = gaussSeidelSmoother(lvl.A, lvl.D, smoother[1], tempMem, overlap=lvlOverlap)
+            elif smoother[0] == 'sor':
+                lvl.smoother = sorSmoother(lvl.A, lvl.D, smoother[1], tempMem, overlap=lvlOverlap)
+            elif smoother[0] == 'ssor':
+                lvl.smoother = ssorSmoother(lvl.A, lvl.D, smoother[1], tempMem, overlap=lvlOverlap)
+            
+            elif smoother[0] == 'gmres':
+                lvl.smoother = {SCALAR_label}gmresSmoother(lvl.A, lvl.D, smoother[1], overlap=lvlOverlap)
+            elif smoother[0] == 'ilu':
+                lvl.smoother = iluSmoother(lvl.A, smoother[1], tempMem, overlap=lvlOverlap)
+            elif smoother[0] == 'flexible':
+                lvl.smoother = flexibleSmoother(lvl.A, smoother[1], overlap=lvlOverlap)
+            else:
+                raise NotImplementedError(smoother[0])
+
+        ########################################################################
+        # set coarse solver
+        myHierarchyManager = self.hierarchyManager
+        if len(myHierarchyManager.builtHierarchies) > 1:
+            coarseHierarchyManager = myHierarchyManager.getSubManager()
+            coarseHierarchy = coarseHierarchyManager.builtHierarchies[len(coarseHierarchyManager.builtHierarchies)-1]
+            coarseSolverName = coarseHierarchy.params['solver']
+            coarseSolverParams = coarseHierarchy.params['solver_params']
+            if coarseSolverName.find('MG') >= 0:
+                coarseSolverParams['smoother'] = smoother
+            self.coarse_solver = {SCALAR_label}coarseSolver(coarseHierarchyManager, self.PLogger, coarseSolverName, **coarseSolverParams)
+        else:
+            fineHierarchy = myHierarchyManager.builtHierarchies[0]
+            coarseSolverName = fineHierarchy.params['solver']
+            from PyNucleus_base import solverFactory
+            if solverFactory.isRegistered(coarseSolverName):
+                self.coarse_solver = solverFactory.build(coarseSolverName, A=self.levels[0].A, setup=True)
+            else:
+                raise NotImplementedError("No coarse solver named \"{}\"".format(coarseSolverName))
+        self.coarse_solver.setup()
+
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void solveOnLevel(self, int lvlNo, {SCALAR}_t[::1] b, {SCALAR}_t[::1] x,
+                           BOOL_t simpleResidual=False):
+        cdef:
+            {SCALAR}_t[::1] res, correction, defect, solcg
+            INDEX_t i
+            str label = str(lvlNo)
+            FakeTimer Timer = self.PLogger.Timer(label, manualDataEntry=True)
+            {SCALAR_label}levelMemory lvl, lvlCoarse
+        if lvlNo == 0:
+            Timer.start()
+            if isinstance(self.coarse_solver, {SCALAR_label_lc_}iterative_solver):
+                self.coarse_solver.tolerance = self._tol
+                self.coarse_solver.maxIter = 1
+            self.coarse_solver.solve(b, x)
+            Timer.end()
+        else:
+            Timer.start()
+            lvl = self.levels[lvlNo]
+            lvlCoarse = self.levels[lvlNo-1]
+            solcg = lvlCoarse.sol
+            defect = lvlCoarse.rhs
+            res = lvl.temp
+            correction = lvl.temp
+
+            # apply presmoother to x, result in x
+            lvl.smoother.eval(b, x, postsmoother=False,
+                              simpleResidual=simpleResidual)
+
+            # get residual in res -> temp
+            lvl.A.residual(x, b, res)
+
+            # restrict res -> temp to defect -> rhs
+            lvl.R.matvec(res, defect)
+            Timer.end()
+
+            # solve on coarser level with rhs defect -> rhs into solcg -> sol
+            solcg[:] = 0.0
+            simpleResidual = True
+            for i in range(self.cycle):
+                self.solveOnLevel(lvlNo-1, defect, solcg,
+                                  simpleResidual=simpleResidual)
+                simpleResidual = False
+
+            Timer.start()
+            # prolong solcg -> sol to correction -> temp
+            lvl.P.matvec(solcg, correction)
+            # lvl.P.matvec_no_overwrite(solcg, x)
+
+            # update fine grid solution
+            update(x, correction)
+
+            # apply postsmoother to x, result in x
+            lvl.smoother.eval(b, x, postsmoother=True)
+            Timer.end()
+        Timer.enterData()
+
+    def asPreconditioner(self, INDEX_t maxIter=1, CycleType cycle=V):
+        return {SCALAR_label}multigridPreconditioner(self, cycle, maxIter)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef int solve(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] x) except -1:
+        {SCALAR_label_lc_}iterative_solver.solve(self, b, x)
+
+        # For a distributed solve, b should be distributed, and x accumulated.
+        # The solution vector is accumulated.
+        cdef:
+            INDEX_t iterNo, lvlNo
+            INDEX_t numLevels = len(self.levels)
+            REAL_t tol = self.tol
+            INDEX_t maxiter = self.maxIter
+            {SCALAR}_t[::1] res
+            REAL_t n
+            BOOL_t simpleResidual = False
+            {SCALAR_label}LinearOperator A, R, P
+            {SCALAR}_t[::1] b_fine
+            {SCALAR}_t[::1] b_coarse, x_fine, x_coarse
+            {SCALAR_label}levelMemory lvl
+            CycleType cycle
+            BOOL_t doFMG
+            list residuals = []
+
+        lvl = self.levels[numLevels-1]
+        A = lvl.A
+        res = lvl.temp
+
+        # if isinstance(self.coarse_solver, coarseSolver_MG):
+            # self._tol = 0.
+        # else:
+        self._tol = tol
+
+        doFMG = False
+        if self.cycle in (FMG_V, FMG_W):
+            doFMG = True
+            cycle = self.cycle
+            if self.cycle == FMG_V:
+                self.cycle = V
+            else:
+                self.cycle = W
+
+            # coarsen rhs to all levels
+            b_fine = b
+            lvl = self.levels[numLevels-1]
+            for lvlNo in range(numLevels-2, -1, -1):
+                R = lvl.R
+                lvl = self.levels[lvlNo]
+                b_coarse = lvl.rhs
+                R.matvec(b_fine, b_coarse)
+                b_fine = lvl.rhs
+
+            # FMG cycle
+            lvl = self.levels[0]
+            for lvlNo in range(numLevels-2):
+                b_coarse = lvl.rhs
+                x_coarse = lvl.sol
+                lvl = self.levels[lvlNo+1]
+                x_fine = lvl.sol
+                P = lvl.P
+                self.solveOnLevel(lvlNo, b_coarse, x_coarse)
+                P.matvec(x_coarse, x_fine)
+            lvlNo = numLevels-2
+            lvl = self.levels[lvlNo]
+            b_coarse = lvl.rhs
+            x_coarse = lvl.sol
+            lvl = self.levels[lvlNo+1]
+            P = lvl.P
+            self.solveOnLevel(lvlNo, b_coarse, x_coarse)
+            P.matvec(x_coarse, x)
+            lvl.smoother.eval(b, x, postsmoother=True)
+
+            iterNo = 1
+        else:
+            if self.x0 is None:
+                simpleResidual = True
+            iterNo = 0
+
+        A.residual(x, b, res, simpleResidual)
+        n = self.norm.eval(res, False)
+        self.PLogger.addValue('residual', n)
+        residuals.append(n)
+
+        while (residuals[len(residuals)-1] > tol) and (iterNo < maxiter):
+            iterNo += 1
+            self.solveOnLevel(numLevels-1, b, x, simpleResidual)
+            simpleResidual = False
+            A.residual(x, b, res, False)
+            n = self.norm.eval(res, False)
+            self.PLogger.addValue('residual', n)
+            residuals.append(n)
+        if doFMG:
+            self.cycle = cycle
+        self.residuals = residuals
+        return iterNo
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef int solveFMG(self,
+                       {SCALAR}_t[::1] b,
+                       {SCALAR}_t[::1] x):
+        cdef:
+            CycleType cycle
+        cycle = self.cycle
+        self.cycle = FMG_V
+        numIter = self.solve(b, x)
+        self.cycle = cycle
+        return numIter
+
+    def __str__(self):
+        cdef:
+            INDEX_t numLevels = len(self.levels)
+        if self.overlap is None:
+            columns = []
+            for lvlNo in range(numLevels-1, -1, -1):
+                lvl = self.levels[lvlNo]
+                A = lvl.A
+                row = [lvlNo, '{:,}'.format(A.shape[0])]
+                if hasattr(A, 'nnz'):
+                    row.append('{:,}'.format(A.nnz))
+                    row.append('{:,}'.format(A.nnz/A.shape[0]))
+                else:
+                    row.append('')
+                    row.append('')
+                row.append(str(lvl.smoother) if lvlNo > 0 else str(self.coarse_solver))
+                columns.append(row)
+            return tabulate(columns, headers=['level', 'unknowns', 'nnz', 'nnz/row', 'solver']) + '\n'
+        else:
+            num_dofs = [self.overlap.countDoFs(self.levels[lvlNo].A.shape[0], lvlNo)
+                        for lvlNo in range(numLevels)]
+            num_nnz = [self.overlap.comm.allreduce(self.levels[lvlNo].A.nnz)
+                       for lvlNo in range(numLevels)]
+            coarse_solver_descr = str(self.coarse_solver)
+            if coarse_solver_descr.find('\n') >= 0:
+                short_coarse_solver_descr = 'Coarse solver'
+                coarse_solver_descr = '\n\n'+coarse_solver_descr
+            else:
+                short_coarse_solver_descr = coarse_solver_descr
+                coarse_solver_descr = ''
+            if self.overlap.comm.rank == 0:
+                return tabulate([[lvlNo,
+                                  '{:,}'.format(num_dofs[lvlNo]),
+                                  '{:,}'.format(num_dofs[lvlNo]/self.overlap.comm.size),
+                                  '{:,}'.format(num_nnz[lvlNo]),
+                                  '{:,}'.format(num_nnz[lvlNo]/self.overlap.comm.size),
+                                  '{:,}'.format(num_nnz[lvlNo]/num_dofs[lvlNo]),
+                                  str(self.levels[lvlNo].smoother) if lvlNo > 0 else short_coarse_solver_descr]
+                                 for lvlNo in range(numLevels-1, -1, -1)],
+                                headers=['level', 'unknowns', 'unknowns/rank', 'nnz', 'nnz/rank', 'nnz/row', 'solver']) + coarse_solver_descr + '\n'
+            else:
+                return ''
+
+    def iterationMatrix(self):
+        n = self.num_rows
+        M = np.zeros((n, n))
+        rhs = np.zeros((n), dtype={SCALAR})
+        maxiter = self.maxIter
+        self.maxIter = 1
+        zeroInitialGuess = self.x0 is None
+        if not zeroInitialGuess:
+            initGuess = np.array(self.x0, copy=True)
+        for i, x0 in enumerate(np.eye(n)):
+            x = np.zeros((n), dtype={SCALAR})
+            self.setInitialGuess(x0)
+            self.solve(b=rhs, x=x)
+            M[:, i] = x
+        self.maxIter = maxiter
+        if zeroInitialGuess:
+            self.setInitialGuess()
+        else:
+            self.setInitialGuess(initGuess)
+        return M
+
+    def operatorComplexity(self):
+        return sum([lvl.A.nnz for lvl in self.levels])/self.levels[-1].A.nnz
+
+
+cdef class {SCALAR_label}multigridPreconditioner({SCALAR_label_lc_}preconditioner):
+    cdef:
+        {SCALAR_label}multigrid ml
+        CycleType cycle
+        INDEX_t numLevels, maxIter
+
+    def __init__(self, {SCALAR_label}multigrid ml, CycleType cycle, INDEX_t maxIter=1):
+        {SCALAR_label_lc_}preconditioner.__init__(self, ml)
+        self.ml = ml
+        self.cycle = cycle
+        self.maxIter = maxIter
+        self.numLevels = len(self.ml.levels)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        {SCALAR}_t[::1] x,
+                        {SCALAR}_t[::1] y) except -1:
+        assert self.ml.initialized, 'solOp not initialized'
+        cdef:
+            BOOL_t simpleResidual = True
+            INDEX_t iterNo
+        self.cycle, self.ml.cycle = self.ml.cycle, self.cycle
+        self.ml._tol = 1e-8
+        y[:] = 0.
+        for iterNo in range(self.maxIter):
+            self.ml.solveOnLevel(self.numLevels-1, x, y, simpleResidual=simpleResidual)
+            simpleResidual = False
+        self.cycle, self.ml.cycle = self.ml.cycle, self.cycle
+        return 0
+
+    def __str__(self):
+        return '{} iterations of {}-cycle\n{}'.format(self.maxIter, self.cycle, self.ml)
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx b/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx
new file mode 100644
index 0000000..03377f6
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restrictionProlongation.pyx
@@ -0,0 +1,159 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_base import uninitialized
+import numpy as np
+cimport numpy as np
+cimport cython
+from PyNucleus_fem.meshCy cimport decode_edge
+from PyNucleus_base.linear_operators cimport (restrictionOp,
+                                               prolongationOp,
+                                               sparseGraph,
+                                               LinearOperator,
+                                               LinearOperator_wrapper,
+                                               CSR_LinearOperator,
+                                               SSS_LinearOperator)
+from PyNucleus_base.sparsityPattern cimport sparsityPattern
+from PyNucleus_fem.DoFMaps cimport (DoFMap,
+                          P0_DoFMap, P1_DoFMap, P2_DoFMap, P3_DoFMap)
+
+
+
+def buildRestrictionProlongation(DoFMap coarse_DoFMap,
+                                 DoFMap fine_DoFMap):
+    if isinstance(coarse_DoFMap, P0_DoFMap):
+        if isinstance(fine_DoFMap, P0_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P0(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P0(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P0(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        else:
+                raise NotImplementedError()
+    elif isinstance(coarse_DoFMap, P1_DoFMap):
+        if isinstance(fine_DoFMap, P1_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P1(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P1(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P1(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        elif isinstance(fine_DoFMap, P2_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P1_P2(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P1_P2(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P1_P2(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        elif isinstance(fine_DoFMap, P3_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P1_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P1_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P1_P3(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    elif isinstance(coarse_DoFMap, P2_DoFMap):
+        if isinstance(fine_DoFMap, P2_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P2(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P2(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P2(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        elif isinstance(fine_DoFMap, P3_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P2_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P2_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P2_P3(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    elif isinstance(coarse_DoFMap, P3_DoFMap):
+        if isinstance(fine_DoFMap, P3_DoFMap):
+            if coarse_DoFMap.dim == 1:
+                R = buildRestriction_1D_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 2:
+                R = buildRestriction_2D_P3(coarse_DoFMap, fine_DoFMap)
+            elif coarse_DoFMap.dim == 3:
+                R = buildRestriction_3D_P3(coarse_DoFMap, fine_DoFMap)
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+    else:
+        raise NotImplementedError('Unknown DoFMap: {}'.format(coarse_DoFMap))
+    P = R.transpose()
+    return R, P
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline void add(sparsityPattern sPat,
+                     const INDEX_t dof,
+                     const INDEX_t dofF):
+    if dofF >= 0:
+        sPat.add(dof, dofF)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline void enterData(CSR_LinearOperator R,
+                           const INDEX_t dof,
+                           const INDEX_t dofF,
+                           const REAL_t val):
+    if dofF >= 0:
+        R.setEntry(dof, dofF, val)
+
+
+include "restriction_1D_P0.pxi"
+include "restriction_2D_P0.pxi"
+include "restriction_3D_P0.pxi"
+
+include "restriction_1D_P1.pxi"
+include "restriction_2D_P1.pxi"
+include "restriction_3D_P1.pxi"
+
+include "restriction_1D_P1_P2.pxi"
+include "restriction_2D_P1_P2.pxi"
+include "restriction_3D_P1_P2.pxi"
+
+include "restriction_1D_P1_P3.pxi"
+include "restriction_2D_P1_P3.pxi"
+include "restriction_3D_P1_P3.pxi"
+
+include "restriction_1D_P2.pxi"
+include "restriction_2D_P2.pxi"
+include "restriction_3D_P2.pxi"
+
+include "restriction_1D_P2_P3.pxi"
+include "restriction_2D_P2_P3.pxi"
+include "restriction_3D_P2_P3.pxi"
+
+include "restriction_1D_P3.pxi"
+include "restriction_2D_P3.pxi"
+include "restriction_3D_P3.pxi"
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P0.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P0.pxi
new file mode 100644
index 0000000..75b7bfd
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P0.pxi
@@ -0,0 +1,41 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_1D_P0(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1.pxi
new file mode 100644
index 0000000..41d1510
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1.pxi
@@ -0,0 +1,49 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_1D_P1(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P2.pxi
new file mode 100644
index 0000000..2d5979a
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P2.pxi
@@ -0,0 +1,47 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_1D_P1_P2(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P3.pxi
new file mode 100644
index 0000000..f29f182
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P1_P3.pxi
@@ -0,0 +1,51 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_1D_P1_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.666666667)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2.pxi
new file mode 100644
index 0000000..3732f31
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2.pxi
@@ -0,0 +1,63 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_1D_P2(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.75)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2_P3.pxi
new file mode 100644
index 0000000..84547a7
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P2_P3.pxi
@@ -0,0 +1,59 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_1D_P2_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.222222222)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.888888889)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P3.pxi
new file mode 100644
index 0000000..49f66e0
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_1D_P3.pxi
@@ -0,0 +1,81 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_1D_P3(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 2*cellNo+0
+        subCellNo1 = 2*cellNo+1
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.3125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), -0.3125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.9375)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P0.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P0.pxi
new file mode 100644
index 0000000..41a5062
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P0.pxi
@@ -0,0 +1,49 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_2D_P0(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 0))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 0), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1.pxi
new file mode 100644
index 0000000..3fb17c6
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1.pxi
@@ -0,0 +1,67 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_2D_P1(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P2.pxi
new file mode 100644
index 0000000..5fc0d7b
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P2.pxi
@@ -0,0 +1,61 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_2D_P1_P2(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P3.pxi
new file mode 100644
index 0000000..99c40af
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P1_P3.pxi
@@ -0,0 +1,79 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_2D_P1_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.333333333)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2.pxi
new file mode 100644
index 0000000..6b83613
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2.pxi
@@ -0,0 +1,139 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_2D_P2(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.25)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.75)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.5)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2_P3.pxi
new file mode 100644
index 0000000..fbb6b49
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P2_P3.pxi
@@ -0,0 +1,109 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_2D_P2_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.444444444)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P3.pxi
new file mode 100644
index 0000000..fd8a529
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_2D_P3.pxi
@@ -0,0 +1,307 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_2D_P3(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 4*cellNo+0
+        subCellNo1 = 4*cellNo+1
+        subCellNo2 = 4*cellNo+2
+        subCellNo3 = 4*cellNo+3
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.3125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), -0.25)
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), -0.25)
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P0.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P0.pxi
new file mode 100644
index 0000000..abdba79
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P0.pxi
@@ -0,0 +1,103 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_3D_P0(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3, subCellNo4, subCellNo5, subCellNo6, subCellNo7
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 0))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 0))
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 0))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 0))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 0))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 1.0)
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), 1.0)
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 0), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 0), 1.0)
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 0), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 0), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 0), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1.pxi
new file mode 100644
index 0000000..19de3df
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1.pxi
@@ -0,0 +1,95 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_3D_P1(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3, subCellNo4, subCellNo5, subCellNo6, subCellNo7
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 3))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 2), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 1), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 3), 1.0)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P2.pxi
new file mode 100644
index 0000000..5daef03
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P2.pxi
@@ -0,0 +1,79 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_3D_P1_P2(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P3.pxi
new file mode 100644
index 0000000..3d83fd9
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P1_P3.pxi
@@ -0,0 +1,127 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_3D_P1_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.333333333)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.666666667)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.333333333)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.333333333)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2.pxi
new file mode 100644
index 0000000..a04893c
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2.pxi
@@ -0,0 +1,487 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_3D_P2(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3, subCellNo4, subCellNo5, subCellNo6, subCellNo7
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 7))
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 6))
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 6))
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 6))
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 6))
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 9))
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 7))
+            dof = coarse_DoFMap.cell2dof(cellNo, 1)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 7))
+            dof = coarse_DoFMap.cell2dof(cellNo, 2)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 7))
+            dof = coarse_DoFMap.cell2dof(cellNo, 3)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 7))
+            dof = coarse_DoFMap.cell2dof(cellNo, 4)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+            dof = coarse_DoFMap.cell2dof(cellNo, 5)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+            dof = coarse_DoFMap.cell2dof(cellNo, 6)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+            dof = coarse_DoFMap.cell2dof(cellNo, 7)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+            dof = coarse_DoFMap.cell2dof(cellNo, 8)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+            dof = coarse_DoFMap.cell2dof(cellNo, 9)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 8))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), -0.125)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.75)
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.75)
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.75)
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.25)
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 7), 0.25)
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 6), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 6), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 6), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 6), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 9), 0.25)
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 7), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 1)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 7), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 2)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 7), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 3)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 7), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 4)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 5)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 6)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 7)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 8)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 9)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 8), 0.25)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2_P3.pxi
new file mode 100644
index 0000000..764c725
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P2_P3.pxi
@@ -0,0 +1,199 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+#@cython.initializedcheck(False)
+#@cython.boundscheck(False)
+#@cython.wraparound(False)
+cdef buildRestriction_3D_P2_P3(DoFMap coarse_DoFMap,
+                                         DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 1*cellNo+0
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.222222222)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.111111111)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.111111111)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.444444444)
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.888888889)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.444444444)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.444444444)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P3.pxi b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P3.pxi
new file mode 100644
index 0000000..9a23a11
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/restriction_3D_P3.pxi
@@ -0,0 +1,1719 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef buildRestriction_3D_P3(DoFMap coarse_DoFMap,
+                            DoFMap fine_DoFMap):
+    cdef:
+        sparsityPattern sPat
+        INDEX_t cellNo, dof, k, middleEdgeDof
+        INDEX_t[::1] indptr, indices
+        REAL_t[::1] data
+        INDEX_t subCellNo0, subCellNo1, subCellNo2, subCellNo3, subCellNo4, subCellNo5, subCellNo6, subCellNo7
+        CSR_LinearOperator R
+    sPat = sparsityPattern(coarse_DoFMap.num_dofs)
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 1))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 7))
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 2))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+        dof = coarse_DoFMap.cell2dof(cellNo, 10)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 11)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 0))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 12)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 13)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 14)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+        dof = coarse_DoFMap.cell2dof(cellNo, 15)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 3))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 14))
+        dof = coarse_DoFMap.cell2dof(cellNo, 16)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 17)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 5))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 4))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 17))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 18)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 13))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 6))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 7))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 12))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        dof = coarse_DoFMap.cell2dof(cellNo, 19)
+        if dof >= 0:
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 8))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 11))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 18))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 15))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 10))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 16))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo1, 19))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo0, 14))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo3, 9))
+            add(sPat, dof, fine_DoFMap.cell2dof(subCellNo2, 17))
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 10)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                dof = coarse_DoFMap.cell2dof(cellNo, 11)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 12)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 13)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 14)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 15)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 16)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 17)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 10))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 18)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 19)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 18))
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 13))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 9))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 13))
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 9))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 12))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 12))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 13))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 13))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 10)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 11)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 12))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 12)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 9))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 13)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 9))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 14)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                dof = coarse_DoFMap.cell2dof(cellNo, 15)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 12))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                dof = coarse_DoFMap.cell2dof(cellNo, 16)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 14))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 15))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 17)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 19))
+                dof = coarse_DoFMap.cell2dof(cellNo, 18)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 5))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 4))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                dof = coarse_DoFMap.cell2dof(cellNo, 19)
+                if dof >= 0:
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 14))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 15))
+                    add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 14))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+            dof = coarse_DoFMap.cell2dof(cellNo, 1)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+            dof = coarse_DoFMap.cell2dof(cellNo, 2)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+            dof = coarse_DoFMap.cell2dof(cellNo, 3)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 14))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 4)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 12))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+            dof = coarse_DoFMap.cell2dof(cellNo, 5)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+            dof = coarse_DoFMap.cell2dof(cellNo, 6)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+            dof = coarse_DoFMap.cell2dof(cellNo, 7)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 8)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 9)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 12))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+            dof = coarse_DoFMap.cell2dof(cellNo, 10)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+            dof = coarse_DoFMap.cell2dof(cellNo, 11)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 8))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 12)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+            dof = coarse_DoFMap.cell2dof(cellNo, 13)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 12))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+            dof = coarse_DoFMap.cell2dof(cellNo, 14)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+            dof = coarse_DoFMap.cell2dof(cellNo, 15)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 12))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 16)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 14))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 17)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 11))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 17))
+            dof = coarse_DoFMap.cell2dof(cellNo, 18)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 14))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 10))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+            dof = coarse_DoFMap.cell2dof(cellNo, 19)
+            if dof >= 0:
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 18))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo4, 15))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo7, 17))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 19))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo6, 16))
+                add(sPat, dof, fine_DoFMap.cell2dof(subCellNo5, 11))
+    indptr, indices = sPat.freeze()
+    del sPat
+    data = uninitialized((indices.shape[0]), dtype=REAL)
+    R = CSR_LinearOperator(indices, indptr, data)
+    R.num_columns = fine_DoFMap.num_dofs
+    for cellNo in range(coarse_DoFMap.mesh.cells.shape[0]):
+        subCellNo0 = 8*cellNo+0
+        subCellNo1 = 8*cellNo+1
+        subCellNo2 = 8*cellNo+2
+        subCellNo3 = 8*cellNo+3
+        subCellNo4 = 8*cellNo+4
+        subCellNo5 = 8*cellNo+5
+        subCellNo6 = 8*cellNo+6
+        subCellNo7 = 8*cellNo+7
+
+        dof = coarse_DoFMap.cell2dof(cellNo, 0)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 11), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 0), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 1)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 0), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 1), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 13), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 12), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 2)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 2), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 15), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 14), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 3)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 14), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 3), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 13), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 15), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 11), 0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 12), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), 0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 4)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 5), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), -0.25)
+        dof = coarse_DoFMap.cell2dof(cellNo, 5)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 4), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 5), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 4), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.1875)
+        dof = coarse_DoFMap.cell2dof(cellNo, 6)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 7), 1.0)
+        dof = coarse_DoFMap.cell2dof(cellNo, 7)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 2), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 7), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 6), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 6), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.1875)
+        dof = coarse_DoFMap.cell2dof(cellNo, 8)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 9), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 9)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 8), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 8), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 9), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.1875)
+        dof = coarse_DoFMap.cell2dof(cellNo, 10)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 11), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 11), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 11)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 0), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 10), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 10), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 11), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 12)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 13), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 12), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 13), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 13)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 12), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 3), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 12), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 13), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), -0.25)
+        dof = coarse_DoFMap.cell2dof(cellNo, 14)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 15), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 14), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 15), 1.0)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.0625)
+        dof = coarse_DoFMap.cell2dof(cellNo, 15)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 3), 0.5625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), -0.25)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), -0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 14), -0.3125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 15), 0.9375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), -0.1875)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), -0.0625)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 14), 1.0)
+        dof = coarse_DoFMap.cell2dof(cellNo, 16)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 7), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 4), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 9), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 8), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 16), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 17)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 5), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 4), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 10), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 11), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 17), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 12), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 13), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 17), 0.5)
+        dof = coarse_DoFMap.cell2dof(cellNo, 18)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 13), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 15), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 6), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 7), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 14), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 18), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 12), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.375)
+        dof = coarse_DoFMap.cell2dof(cellNo, 19)
+        if dof >= 0:
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 8), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 11), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 18), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 15), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 10), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 16), 0.375)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 19), 0.5)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo1, 19), 0.125)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo0, 14), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo3, 9), 0.75)
+            enterData(R, dof, fine_DoFMap.cell2dof(subCellNo2, 17), 0.375)
+        if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo5, 0]:
+            if fine_DoFMap.mesh.cells[subCellNo4, 0] == fine_DoFMap.mesh.cells[subCellNo6, 0]:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 19), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 19), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 10), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 10), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 10)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 11)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 10), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 19), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 12)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 13)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 10), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 14)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 10), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 15)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 10), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 19), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 16)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 10), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 16), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 19), 0.5)
+                dof = coarse_DoFMap.cell2dof(cellNo, 17)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 10), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.5)
+                dof = coarse_DoFMap.cell2dof(cellNo, 18)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 19), 0.5)
+                dof = coarse_DoFMap.cell2dof(cellNo, 19)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 4), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 5), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 18), 1.0)
+            else:
+                dof = coarse_DoFMap.cell2dof(cellNo, 0)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 13), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 1)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 9), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 2)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 13), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 3)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 9), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), 0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 4)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 5)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 12), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 6)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 12), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 7)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 8)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 13), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 9)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 13), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 10)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 11)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 12), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 12)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 9), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 13)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 9), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 14)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.0625)
+                dof = coarse_DoFMap.cell2dof(cellNo, 15)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 12), -0.125)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), -0.0625)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), -0.125)
+                dof = coarse_DoFMap.cell2dof(cellNo, 16)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 14), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 15), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 16), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), 0.5)
+                dof = coarse_DoFMap.cell2dof(cellNo, 17)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 5), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 4), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 19), 1.0)
+                dof = coarse_DoFMap.cell2dof(cellNo, 18)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 5), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 4), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), 0.5)
+                dof = coarse_DoFMap.cell2dof(cellNo, 19)
+                if dof >= 0:
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 14), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 1.0)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.5)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 15), 0.25)
+                    enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.5)
+        else:
+            dof = coarse_DoFMap.cell2dof(cellNo, 0)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 14), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 1)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), 0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 2)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), 0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 3)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 19), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 14), 0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 4)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 12), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 5)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 17), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 6)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 7)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 8)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 9)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 12), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 10)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 11)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 8), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 12)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 13)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 12), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 14)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.0625)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), -0.125)
+            dof = coarse_DoFMap.cell2dof(cellNo, 15)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 12), -0.125)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), -0.0625)
+            dof = coarse_DoFMap.cell2dof(cellNo, 16)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 14), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 16), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.5)
+            dof = coarse_DoFMap.cell2dof(cellNo, 17)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 11), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 17), 1.0)
+            dof = coarse_DoFMap.cell2dof(cellNo, 18)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 14), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 18), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 10), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 18), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 0.25)
+            dof = coarse_DoFMap.cell2dof(cellNo, 19)
+            if dof >= 0:
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 18), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 17), 1.0)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo4, 15), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo7, 17), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 19), 0.25)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo6, 16), 0.5)
+                enterData(R, dof, fine_DoFMap.cell2dof(subCellNo5, 11), 0.25)
+    return R
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pxd b/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pxd
new file mode 100644
index 0000000..16e0868
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pxd
@@ -0,0 +1,36 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, COMPLEX_t, BOOL_t
+from PyNucleus_base.linear_operators cimport LinearOperator, ComplexLinearOperator
+from PyNucleus_fem.algebraicOverlaps cimport algebraicOverlapManager
+from PyNucleus_fem.distributed_operators cimport (DistributedLinearOperator,
+                                        ComplexDistributedLinearOperator,
+                                        CSR_DistributedLinearOperator,
+                                        ComplexCSR_DistributedLinearOperator)
+
+include "smoothers_decl_REAL.pxi"
+include "smoothers_decl_COMPLEX.pxi"
+
+
+
+
+cdef class iluPreconditioner(preconditioner):
+    cdef:
+        REAL_t[::1] temporaryMemory
+        LinearOperator A
+        LinearOperator preconditioner
+    cdef INDEX_t matvec(self, REAL_t[::1] x, REAL_t[::1] y) except -1
+
+
+cdef class iluSmoother(separableSmoother):
+    pass
+
+
+cdef class flexibleSmoother(separableSmoother):
+    pass
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pyx b/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pyx
new file mode 100644
index 0000000..3e4cbdf
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/smoothers.pyx
@@ -0,0 +1,442 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from scipy.sparse import csr_matrix
+from PyNucleus_base.myTypes import INDEX, REAL, COMPLEX
+from PyNucleus_base import uninitialized
+from PyNucleus_base.blas cimport (update, updateScaled,
+                                   assignScaled, assign3)
+from PyNucleus_base.linear_operators cimport (Product_Linear_Operator,
+                                               CSR_LinearOperator,
+                                               ComplexCSR_LinearOperator,
+                                               SSS_LinearOperator,
+                                               TimeStepperLinearOperator)
+from PyNucleus_base.linalg import estimateSpectralRadius
+from PyNucleus_base.linalg import ILU_solver
+import logging
+
+LOGGER = logging.getLogger(__name__)
+
+
+include "smoothers_REAL.pxi"
+include "smoothers_COMPLEX.pxi"
+
+
+######################################################################
+# SOR preconditioner and smoother
+
+# Assumes that the indices of A are ordered
+cdef class sorPreconditioner(preconditioner):
+    cdef:
+        public LinearOperator A
+        public REAL_t[::1] D
+        INDEX_t[::1] A_indptr, A_indices
+        REAL_t[::1] A_data, A_diagonal
+        REAL_t[::1] temp
+        REAL_t omega
+        public BOOL_t presmoother_forwardSweep
+        public BOOL_t postsmoother_forwardSweep
+        public BOOL_t forwardSweep
+
+    def __init__(self,
+                 LinearOperator A,
+                 REAL_t[::1] D,
+                 REAL_t omega,
+                 BOOL_t presmoother_forwardSweep,
+                 BOOL_t postsmoother_forwardSweep):
+        preconditioner.__init__(self, D.shape[0], D.shape[0])
+        self.D = D
+        if isinstance(A, (CSR_LinearOperator, csr_matrix, SSS_LinearOperator)):
+            self.A = A
+            self.A_indptr = self.A.indptr
+            self.A_indices = self.A.indices
+            self.A_data = self.A.data
+        elif isinstance(A, SSS_LinearOperator):
+            self.A = A
+            self.A_indptr = self.A.indptr
+            self.A_indices = self.A.indices
+            self.A_data = self.A.data
+            self.A_diagonal = self.A.diagonal
+        elif isinstance(A, TimeStepperLinearOperator):
+            assert isinstance(A.M, CSR_LinearOperator)
+            assert isinstance(A.S, CSR_LinearOperator)
+            assert A.M.nnz == A.S.nnz
+            self.A = A
+        else:
+            self.A = A.to_csr_linear_operator()
+            self.A_indptr = self.A.indptr
+            self.A_indices = self.A.indices
+            self.A_data = self.A.data
+        self.omega = omega
+        self.presmoother_forwardSweep = presmoother_forwardSweep
+        self.postsmoother_forwardSweep = postsmoother_forwardSweep
+        self.forwardSweep = False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] result) except -1:
+        cdef:
+            INDEX_t i, j, k, jj
+            REAL_t t
+            REAL_t facM, facS
+            INDEX_t[::1] Sindptr, Sindices
+            REAL_t[::1] Sdata, Mdata
+
+        result[:] = 0.0
+        if isinstance(self.A, (CSR_LinearOperator, csr_matrix)):
+            if self.forwardSweep:
+                for i in range(self.num_rows):
+                    t = x[i]
+                    for j in range(self.A_indptr[i], self.A_indptr[i+1]):
+                        k = self.A_indices[j]
+                        if k >= i:
+                            break
+                        t -= self.A_data[j]*result[k]
+                    result[i] = self.omega*t/self.D[i]
+            else:
+                for i in range(self.num_rows-1, -1, -1):
+                    t = x[i]
+                    for j in range(self.A_indptr[i+1]-1, self.A_indptr[i]-1, -1):
+                        k = self.A_indices[j]
+                        if k <= i:
+                            break
+                        t -= self.A_data[j]*result[k]
+                    result[i] = self.omega*t/self.D[i]
+        elif isinstance(self.A, SSS_LinearOperator):
+            if self.forwardSweep:
+                for i in range(self.num_rows):
+                    t = x[i]
+                    for jj in range(self.A_indptr[i], self.A_indptr[i+1]):
+                        t -= self.A_data[jj]*result[self.A_indices[jj]]
+                    result[i] = self.omega*t/self.A_diagonal[i]
+            else:
+                result[:] = 0.
+                for i in range(self.num_rows-1, -1, -1):
+                    result[i] = (x[i]-self.omega*result[i])/self.A_diagonal[i]
+                    for jj in range(self.A_indptr[i], self.A_indptr[i+1]):
+                        result[self.A_indices[jj]] += self.A_data[jj]*result[i]
+        elif isinstance(self.A, TimeStepperLinearOperator):
+            Sindptr = self.A.S.indptr
+            Sindices = self.A.S.indices
+            Sdata = self.A.S.data
+            Mdata = self.A.M.data
+            facS = self.A.facS
+            facM = self.A.facM
+            if self.forwardSweep:
+                for i in range(self.num_rows):
+                    t = x[i]
+                    for j in range(Sindptr[i], Sindptr[i+1]):
+                        k = Sindices[j]
+                        if k >= i:
+                            break
+                        t -= (facS*Sdata[j]+facM*Mdata[j])*result[k]
+                    result[i] = self.omega*t/self.D[i]
+            else:
+                for i in range(self.num_rows-1, -1, -1):
+                    t = x[i]
+                    for j in range(Sindptr[i+1]-1, Sindptr[i]-1, -1):
+                        k = Sindices[j]
+                        if k <= i:
+                            break
+                        t -= (facS*Sdata[j]+facM*Mdata[j])*result[k]
+                    result[i] = self.omega*t/self.D[i]
+        else:
+            return -1
+        return 0
+
+    cdef void setPre(self):
+        self.forwardSweep = self.presmoother_forwardSweep
+
+    cdef void setPost(self):
+        self.forwardSweep = self.postsmoother_forwardSweep
+
+
+# Assumes that the indices of A are ordered
+cdef class sorSmoother(separableSmoother):
+    # Needs 2n temporary memory for residual and result of application
+    # of preconditioner
+    def __init__(self, A, D,
+                 dict params,
+                 temporaryMemory=None,
+                 temporaryMemory2=None,
+                 overlap=None):
+        defaults = {'presmootherSweep': 'forward',
+                    'postsmootherSweep': 'backward',
+                    'omega': 1.0}
+        defaults.update(params)
+        super(sorSmoother, self).__init__(A, 1)
+        self.preconditioner = sorPreconditioner(A, D,
+                                                defaults['omega'],
+                                                defaults['presmootherSweep'] == 'forward',
+                                                defaults['postsmootherSweep'] == 'forward')
+        self.omega = defaults['omega']
+
+
+######################################################################
+# SSOR preconditioner and smoother
+
+# FIX: Not sure this really works
+# Assumes that the indices of A are ordered
+cdef class ssorPreconditioner(preconditioner):
+    cdef:
+        REAL_t[::1] D
+        LinearOperator A
+        INDEX_t[::1] A_indptr, A_indices
+        REAL_t[::1] A_data
+        REAL_t[::1] temp
+        REAL_t omega
+
+    def __init__(self,
+                 LinearOperator A,
+                 REAL_t[::1] D,
+                 REAL_t omega):
+        preconditioner.__init__(self, D.shape[0], D.shape[0])
+        self.D = D
+        self.A = A
+        self.A_indptr = A.indptr
+        self.A_indices = A.indices
+        self.A_data = A.data
+        self.omega = omega
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] result) except -1:
+        cdef:
+            INDEX_t i, j, k
+            REAL_t t
+
+        if isinstance(self.A, (CSR_LinearOperator, csr_matrix)):
+            result[:] = 0.0
+            # forward sweep
+            # x -> results
+            for i in range(self.num_rows):
+                t = x[i]
+                for j in range(self.A_indptr[i], self.A_indptr[i+1]):
+                    k = self.A_indices[j]
+                    if k >= i:
+                        break
+                    t -= self.A_data[j]*result[k]
+                result[i] = self.omega*t/self.D[i]
+            # D/omega
+            for i in range(self.num_rows):
+                result[i] *= self.D[i]/self.omega
+            # backward sweep
+            # result -> results
+            for i in range(self.num_rows-1, -1, -1):
+                t = result[i]
+                for j in range(self.A_indptr[i+1]-1, self.A_indptr[i]-1, -1):
+                    k = self.A_indices[j]
+                    if k <= i:
+                        break
+                    t -= self.A_data[j]*result[k]
+                result[i] = self.omega*t/self.D[i]
+            # correction
+            for i in range(self.num_rows):
+                result[i] *= (2-self.omega)
+        else:
+            return -1
+        return 0
+
+
+cdef class ssorSmoother(separableSmoother):
+    # Needs 2n temporary memory for residual and result of application
+    # of preconditioner
+    def __init__(self, A, D,
+                 dict params,
+                 temporaryMemory=None,
+                 overlap=None):
+        defaults = {'omega': 1.0}
+        defaults.update(params)
+        preconditioner = ssorPreconditioner(A, D, defaults['omega'])
+        super(ssorSmoother, self).__init__(A, preconditioner, params, temporaryMemory, overlap)
+        self.omega = defaults['omega']
+
+
+######################################################################
+# Gauss-Seidel smoother
+
+cdef class gaussSeidelSmoother(smoother):
+    cdef:
+        public REAL_t[::1] D
+        BOOL_t presmoother_forwardSweep, postsmoother_forwardSweep
+        INDEX_t presmoothingSteps, postsmoothingSteps
+        sorPreconditioner prec
+        REAL_t[::1] temporaryMemory, temporaryMemory2
+        INDEX_t[::1] boundaryDofs
+
+    def __init__(self, A,
+                 REAL_t[::1] D,
+                 dict params,
+                 temporaryMemory=None,
+                 temporaryMemory2=None,
+                 overlap=None):
+        defaults = {'presmootherSweep': 'forward',
+                    'postsmootherSweep': 'backward',
+                    'presmoothingSteps': 1,
+                    'postsmoothingSteps': 1}
+        defaults.update(params)
+        super(gaussSeidelSmoother, self).__init__(A)
+        self.overlap = overlap
+        if isinstance(A, (SSS_LinearOperator, TimeStepperLinearOperator)) or overlap is not None:
+            if overlap:
+                self.boundaryDofs = self.overlap.Didx
+            self.prec = sorPreconditioner(A, D, 1.,
+                                          defaults['presmootherSweep'] == 'forward',
+                                          defaults['postsmootherSweep'] == 'forward')
+            if temporaryMemory is not None:
+                self.temporaryMemory = temporaryMemory
+            else:
+                LOGGER.debug(('Allocating temporary memory for ' +
+                              'Gauss-Seidel smoother ({} elements)').format(D.shape[0]))
+                self.temporaryMemory = uninitialized((D.shape[0]), dtype=REAL)
+            if temporaryMemory2 is not None:
+                self.temporaryMemory2 = temporaryMemory2
+            else:
+                LOGGER.debug(('Allocating temporary memory for ' +
+                              'Gauss-Seidel smoother ({} elements)').format(D.shape[0]))
+                self.temporaryMemory2 = uninitialized((D.shape[0]), dtype=REAL)
+        self.setD(D)
+        self.presmoothingSteps = defaults['presmoothingSteps']
+        self.postsmoothingSteps = defaults['postsmoothingSteps']
+        self.presmoother_forwardSweep = (defaults['presmootherSweep'] ==
+                                         'forward')
+        self.postsmoother_forwardSweep = (defaults['postsmootherSweep'] ==
+                                          'forward')
+
+    def setD(self, REAL_t[::1] D):
+        self.D = D
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void eval(self,
+                   REAL_t[::1] rhs,
+                   REAL_t[::1] y,
+                   BOOL_t postsmoother,
+                   BOOL_t simpleResidual=False):
+        # simpleResidual is ignored, because GS uses updated values
+        cdef:
+            INDEX_t num_rows = self.A.shape[0]
+            INDEX_t[::1] A_indptr, A_indices
+            REAL_t[::1] A_data
+            INDEX_t i, j, steps, k, lv
+            REAL_t t
+            BOOL_t sweep
+
+        if postsmoother:
+            steps = self.postsmoothingSteps
+            sweep = self.postsmoother_forwardSweep
+        else:
+            steps = self.presmoothingSteps
+            sweep = self.presmoother_forwardSweep
+        if isinstance(self.A, (CSR_LinearOperator, csr_matrix)) and self.overlap is None:
+            A_indices = self.A.indices
+            A_indptr = self.A.indptr
+            A_data = self.A.data
+            if sweep:
+                for k in range(steps):
+                    for i in range(num_rows):
+                        t = rhs[i]
+                        for j in range(A_indptr[i], A_indptr[i+1]):
+                            t -= A_data[j]*y[A_indices[j]]
+                        t += self.D[i]*y[i]
+                        y[i] = t/self.D[i]
+            else:
+                for k in range(steps):
+                    for i in range(num_rows-1, -1, -1):
+                        t = rhs[i]
+                        for j in range(A_indptr[i], A_indptr[i+1]):
+                            t -= A_data[j]*y[A_indices[j]]
+                        t += self.D[i]*y[i]
+                        y[i] = t/self.D[i]
+        elif isinstance(self.A, (SSS_LinearOperator, TimeStepperLinearOperator)) or self.overlap is not None:
+            for k in range(steps):
+                self.A.residual(y, rhs, self.temporaryMemory)
+                if self.overlap:
+                    self.overlap.accumulate(self.temporaryMemory)
+                if sweep:
+                    self.prec.setPost()
+                else:
+                    self.prec.setPre()
+                self.prec(self.temporaryMemory, self.temporaryMemory2)
+                # FIX: This is not quite correct
+                #      I should do Jacobi on crosspoints of subdomains,
+                #      then SOR on boundaries,
+                #      and then SOR in the interior.
+                #      Or at least Jacobi on crosspoints and boundary,
+                #      and then SOR in the interior.
+                #      Also, this is not very efficient.
+                #      Also, this is does not take the sweep in account.
+                if self.overlap:
+                    # perform Jacobi on boundary dofs
+                    for lv in self.boundaryDofs:
+                        self.temporaryMemory2[lv] = self.temporaryMemory[lv]/self.prec.D[lv]
+                update(y, self.temporaryMemory2)
+        else:
+            raise NotImplementedError()
+
+    def __repr__(self):
+        return 'Gauss-Seidel ({} {} / {} {} sweeps)'.format(self.presmoothingSteps,
+                                                            'forward' if self.presmoother_forwardSweep else 'backward',
+                                                            self.postsmoothingSteps,
+                                                            'forward' if self.postsmoother_forwardSweep else 'backward',)
+
+
+cdef class iluPreconditioner(preconditioner):
+    def __init__(self,
+                 LinearOperator A,
+                 **kwargs):
+        preconditioner.__init__(self, A.shape[0], A.shape[0])
+        self.A = A
+        ILUS = ILU_solver(self.A.num_rows)
+        if 'fill_factor' in kwargs:
+            fill_factor = kwargs['fill_factor']
+        else:
+            fill_factor = 1.0
+        ILUS.setup(self.A, fill_factor=fill_factor)
+        self.preconditioner = ILUS.asPreconditioner()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self, REAL_t[::1] x, REAL_t[::1] y) except -1:
+        self.preconditioner(x, y)
+        return 0
+
+
+cdef class iluSmoother(separableSmoother):
+    def __init__(self,
+                 LinearOperator A,
+                 dict params,
+                 np.ndarray[REAL_t, ndim=1] temporaryMemory=None,
+                 overlap=None):
+        defaults = {'fill_factor': 1.0}
+        defaults.update(params)
+        preconditioner = iluPreconditioner(A, **defaults)
+        super(iluSmoother, self).__init__(A, preconditioner, params, temporaryMemory, overlap)
+
+
+cdef class flexibleSmoother(separableSmoother):
+    def __init__(self,
+                 LinearOperator A,
+                 dict params,
+                 overlap=None):
+        preconditioner = params['prec']
+        super(flexibleSmoother, self).__init__(A, preconditioner, params, None, overlap)
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/smoothers_decl_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/smoothers_decl_{SCALAR}.pxi
new file mode 100644
index 0000000..80a4d79
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/smoothers_decl_{SCALAR}.pxi
@@ -0,0 +1,65 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.solvers cimport solver, complex_solver
+
+
+cdef class {SCALAR_label}smoother:
+    cdef:
+        {SCALAR_label}LinearOperator _A
+        public algebraicOverlapManager overlap
+    cdef void eval(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] y,
+                   BOOL_t postsmoother,
+                   BOOL_t simpleResidual=*)
+
+
+cdef class {SCALAR_label}preconditioner({SCALAR_label}LinearOperator):
+    cdef void setPre(self)
+    cdef void setPost(self)
+
+
+cdef class {SCALAR_label}separableSmoother({SCALAR_label}smoother):
+    cdef:
+        public {SCALAR_label}preconditioner prec
+        INDEX_t presmoothingSteps, postsmoothingSteps
+        {SCALAR_label}LinearOperator _accA
+        public {SCALAR}_t[::1] temporaryMemory
+        public {SCALAR}_t[::1] temporaryMemory2
+
+
+cdef class {SCALAR_label}jacobiPreconditioner({SCALAR_label}preconditioner):
+    cdef:
+        {SCALAR}_t[::1] invD
+        public {SCALAR}_t omega
+    cdef INDEX_t matvec(self, {SCALAR}_t[::1] x, {SCALAR}_t[::1] y) except -1
+
+
+cdef class {SCALAR_label}jacobiSmoother({SCALAR_label}separableSmoother):
+    pass
+
+
+cdef class {SCALAR_label}blockJacobiPreconditioner({SCALAR_label}preconditioner):
+    cdef:
+        {SCALAR_label_lc_}solver invD
+        public {SCALAR}_t omega
+    cdef INDEX_t matvec(self, {SCALAR}_t[::1] x, {SCALAR}_t[::1] y) except -1
+
+
+cdef class {SCALAR_label}blockJacobiSmoother({SCALAR_label}separableSmoother):
+    pass
+
+
+cdef class {SCALAR_label}gmresSmoother({SCALAR_label}smoother):
+    cdef:
+        public {SCALAR_label_lc_}solver solver
+        INDEX_t presmoothingSteps, postsmoothingSteps
+        {SCALAR_label}LinearOperator _accA
+        public {SCALAR}_t[::1] temporaryMemory
+        public {SCALAR}_t[::1] temporaryMemory2
diff --git a/multilevelSolver/PyNucleus_multilevelSolver/smoothers_{SCALAR}.pxi b/multilevelSolver/PyNucleus_multilevelSolver/smoothers_{SCALAR}.pxi
new file mode 100644
index 0000000..cfb4e7a
--- /dev/null
+++ b/multilevelSolver/PyNucleus_multilevelSolver/smoothers_{SCALAR}.pxi
@@ -0,0 +1,288 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef class {SCALAR_label}smoother:
+    def __init__(self, {SCALAR_label}LinearOperator A):
+        self._A = A
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def __call__(self,
+                 {SCALAR}_t[::1] b,
+                 {SCALAR}_t[::1] y,
+                 BOOL_t postsmoother,
+                 BOOL_t simpleResidual=False):
+        self.eval(b, y, postsmoother, simpleResidual)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] y,
+                   BOOL_t postsmoother,
+                   BOOL_t simpleResidual=False):
+        raise NotImplementedError()
+
+    @property
+    def A(self):
+        return self._A
+
+    @A.setter
+    def A(self, {SCALAR_label}LinearOperator A):
+        self._A = A
+
+
+cdef class {SCALAR_label}preconditioner({SCALAR_label}LinearOperator):
+    def __init__(self, INDEX_t numRows, INDEX_t numColumns):
+        {SCALAR_label}LinearOperator.__init__(self, numRows, numColumns)
+
+    cdef void setPre(self):
+        pass
+
+    cdef void setPost(self):
+        pass
+
+
+cdef class {SCALAR_label}separableSmoother({SCALAR_label}smoother):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 {SCALAR_label}preconditioner P,
+                 dict params,
+                 np.ndarray[{SCALAR}_t, ndim=1] temporaryMemory=None,
+                 algebraicOverlapManager overlap=None):
+        defaults = {'presmoothingSteps': 1,
+                    'postsmoothingSteps': 1}
+        defaults.update(params)
+        {SCALAR_label}smoother.__init__(self, A)
+        self.overlap = overlap
+        self.prec = P
+        self.A = A
+        if temporaryMemory is not None:
+            self.temporaryMemory = temporaryMemory
+        else:
+            LOGGER.debug(('Allocating temporary memory for ' +
+                          'smoother ({} elements)').format(A.shape[0]))
+            self.temporaryMemory = uninitialized((A.shape[0]), dtype={SCALAR})
+        self.temporaryMemory2 = uninitialized((A.shape[0]), dtype={SCALAR})
+        self.presmoothingSteps = defaults['presmoothingSteps']
+        self.postsmoothingSteps = defaults['postsmoothingSteps']
+
+    @property
+    def A(self):
+        return self._A
+
+    @A.setter
+    def A(self, {SCALAR_label}LinearOperator A):
+        self._A = A
+        if self.overlap is not None:
+            if isinstance(A, {SCALAR_label}CSR_LinearOperator):
+                self._accA = {SCALAR_label}CSR_DistributedLinearOperator(A, self.overlap, doDistribute=False, keepDistributedResult=False)
+            else:
+                self._accA = {SCALAR_label}DistributedLinearOperator(A, self.overlap, doDistribute=False, keepDistributedResult=False)
+        else:
+            self._accA = A
+
+    def setD(self, {SCALAR}_t[::1] D):
+        self.prec.setD(D)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] y,
+                   BOOL_t postsmoother,
+                   BOOL_t simpleResidual=False):
+        cdef:
+            INDEX_t k, steps
+            {SCALAR}_t[::1] temp_mem = self.temporaryMemory
+            {SCALAR}_t[::1] temp_mem2 = self.temporaryMemory2
+        if postsmoother:
+            steps = self.postsmoothingSteps
+            self.prec.setPost()
+        else:
+            steps = self.presmoothingSteps
+            self.prec.setPre()
+        for k in range(steps):
+            # In a distributed setup, b is distributed, x is accumulated,
+            # the solution in x is accumulted.
+            # Residual is distributed.
+            # prec*residual is distributed.
+            self._accA.residual(y, b, temp_mem, simpleResidual=simpleResidual)
+            self.prec.matvec(temp_mem, temp_mem2)
+            simpleResidual = False
+            update(y, temp_mem2)
+
+
+######################################################################
+# Jacobi preconditioner and smoother
+
+cdef class {SCALAR_label}jacobiPreconditioner({SCALAR_label}preconditioner):
+    def __init__(self, {SCALAR}_t[::1] D, {SCALAR}_t omega):
+        {SCALAR_label}preconditioner.__init__(self, D.shape[0], D.shape[0])
+        self.omega = omega
+        self.setD(D)
+
+    def setD(self, {SCALAR}_t[::1] D):
+        self.invD = self.omega/np.array(D, copy=False)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self, {SCALAR}_t[::1] x, {SCALAR}_t[::1] y) except -1:
+        cdef:
+            INDEX_t i
+        for i in range(self.num_rows):
+            y[i] = x[i]*self.invD[i]
+        return 0
+
+
+cdef class {SCALAR_label}jacobiSmoother({SCALAR_label}separableSmoother):
+    # Needs n temporary memory for residual
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 {SCALAR}_t[::1] D,
+                 dict params,
+                 np.ndarray[{SCALAR}_t, ndim=1] temporaryMemory=None,
+                 overlap=None):
+        defaults = {'omega': 2.0/3.0}
+        defaults.update(params)
+        preconditioner = {SCALAR_label}jacobiPreconditioner(D, defaults['omega'])
+        {SCALAR_label}separableSmoother.__init__(self, A, preconditioner, params, temporaryMemory, overlap)
+
+    def __repr__(self):
+        return 'Jacobi ({}/{} sweeps, {:.3} damping)'.format(self.presmoothingSteps, self.postsmoothingSteps, self.prec.omega)
+
+
+######################################################################
+# Block Jacobi preconditioner and smoother
+
+from PyNucleus_base.solvers cimport lu_solver, complex_lu_solver
+from PyNucleus_base.linear_operators cimport sparseGraph
+
+
+cdef class {SCALAR_label}blockJacobiPreconditioner({SCALAR_label}preconditioner):
+    def __init__(self, {SCALAR_label}LinearOperator A, sparseGraph blocks, {SCALAR}_t omega):
+        {SCALAR_label}preconditioner.__init__(self, A.num_rows, A.num_columns)
+        self.omega = omega
+        self.setD(A, blocks)
+
+    def setD(self, {SCALAR_label}LinearOperator A, sparseGraph blocks):
+        cdef:
+            {SCALAR_label}CSR_LinearOperator D
+        if isinstance(A, {SCALAR_label}CSR_LinearOperator):
+            D = A.getBlockDiagonal(blocks)
+        else:
+            D = A.to_csr_linear_operator().getBlockDiagonal(blocks)
+        D.scale(1./self.omega)
+        self.invD = {SCALAR_label_lc_}lu_solver(D)
+        self.invD.setup()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self, {SCALAR}_t[::1] x, {SCALAR}_t[::1] y) except -1:
+        self.invD.solve(x, y)
+        return 0
+
+
+cdef class {SCALAR_label}blockJacobiSmoother({SCALAR_label}separableSmoother):
+    # Needs n temporary memory for residual
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 dict params,
+                 np.ndarray[{SCALAR}_t, ndim=1] temporaryMemory=None,
+                 overlap=None):
+        defaults = {'omega': 2.0/3.0}
+        defaults.update(params)
+        preconditioner = {SCALAR_label}blockJacobiPreconditioner(A, defaults['blocks'], defaults['omega'])
+        {SCALAR_label}separableSmoother.__init__(self, A, preconditioner, params, temporaryMemory, overlap)
+
+    def __repr__(self):
+        return 'Block Jacobi ({}/{} sweeps, {:.3} damping)'.format(self.presmoothingSteps, self.postsmoothingSteps, self.prec.omega)
+
+
+######################################################################
+# GMRES smoother
+
+from PyNucleus_base.solvers cimport {SCALAR_label_lc_}gmres_solver
+from PyNucleus_base.linear_operators cimport {SCALAR_label}diagonalOperator
+
+
+cdef class {SCALAR_label}gmresSmoother({SCALAR_label}smoother):
+    def __init__(self,
+                 {SCALAR_label}LinearOperator A,
+                 {SCALAR}_t[::1] D,
+                 dict params,
+                 algebraicOverlapManager overlap=None):
+        defaults = {'presmoothingSteps': 10,
+                    'postsmoothingSteps': 10}
+        defaults.update(params)
+        {SCALAR_label}smoother.__init__(self, A)
+        self.solver = {SCALAR_label_lc_}gmres_solver(A)
+        self.solver.setPreconditioner({SCALAR_label}diagonalOperator(1./np.array(D, copy=False)))
+        self.solver.maxIter = defaults['presmoothingSteps']
+        self.solver.restarts = 1
+        self.solver.tolerance = 1e-12
+        if overlap:
+            self.solver.setOverlapNormInner(overlap)
+        self.solver.setup()
+        self.overlap = overlap
+        self.A = A
+        self.presmoothingSteps = defaults['presmoothingSteps']
+        self.postsmoothingSteps = defaults['postsmoothingSteps']
+        self.temporaryMemory = uninitialized((A.shape[0]), dtype={SCALAR})
+        self.temporaryMemory2 = uninitialized((A.shape[0]), dtype={SCALAR})
+
+    @property
+    def A(self):
+        return self._A
+
+    @A.setter
+    def A(self, {SCALAR_label}LinearOperator A):
+        self._A = A
+        if self.overlap is not None:
+            if isinstance(A, {SCALAR_label}CSR_LinearOperator):
+                self._accA = {SCALAR_label}CSR_DistributedLinearOperator(A, self.overlap, doDistribute=False, keepDistributedResult=False)
+            else:
+                self._accA = {SCALAR_label}DistributedLinearOperator(A, self.overlap, doDistribute=False, keepDistributedResult=False)
+        else:
+            self._accA = A
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   {SCALAR}_t[::1] b,
+                   {SCALAR}_t[::1] y,
+                   BOOL_t postsmoother,
+                   BOOL_t simpleResidual=False):
+        cdef:
+            INDEX_t k, steps
+            {SCALAR}_t[::1] temp_mem = self.temporaryMemory
+            {SCALAR}_t[::1] temp_mem2 = self.temporaryMemory2
+        if postsmoother:
+            steps = self.postsmoothingSteps
+            # self.prec.setPost()
+        else:
+            steps = self.presmoothingSteps
+            # self.prec.setPre()
+        for k in range(steps):
+            # In a distributed setup, b is distributed, x is accumulated,
+            # the solution in x is accumulted.
+            # Residual is distributed.
+            # prec*residual is distributed.
+            self._A.residual(y, b, temp_mem, simpleResidual=simpleResidual)
+            self.solver.solve(temp_mem, temp_mem2)
+            simpleResidual = False
+            update(y, temp_mem2)
+
+    def __repr__(self):
+        return str(self.solver)
diff --git a/multilevelSolver/setup.cfg b/multilevelSolver/setup.cfg
new file mode 100644
index 0000000..6b45262
--- /dev/null
+++ b/multilevelSolver/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_multilevelSolver/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/multilevelSolver/setup.py b/multilevelSolver/setup.py
new file mode 100644
index 0000000..2ede0e6
--- /dev/null
+++ b/multilevelSolver/setup.py
@@ -0,0 +1,53 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from pathlib import Path
+try:
+    from PyNucleus_base.setupUtils import package
+except ImportError as e:
+    raise ImportError('\'PyNucleus_base\' needs to be installed first.') from e
+from PyNucleus_packageTools import fillTemplate
+
+
+p = package('PyNucleus_multilevelSolver')
+
+p.loadConfig()
+
+print('Generating templates')
+templates = [
+    'smoothers_{SCALAR}.pxi', 'smoothers_decl_{SCALAR}.pxi',
+    'coarseSolvers_{SCALAR}.pxi', 'coarseSolvers_decl_{SCALAR}.pxi',
+    'multigrid_{SCALAR}.pxi', 'multigrid_decl_{SCALAR}.pxi'
+]
+replacementGroups = [[('{SCALAR}', 'REAL'),
+                      ('{SCALAR_label}', ''),
+                      ('{SCALAR_label_lc}', ''),
+                      ('{SCALAR_label_lc_}', '')],
+                      [('{SCALAR}', 'COMPLEX'),
+                       ('{SCALAR_label}', 'Complex'),
+                       ('{SCALAR_label_lc}', 'complex'),
+                       ('{SCALAR_label_lc_}', 'complex_'),
+                       # for some reason, complex cannot handle += etc
+                       ('\s([^\s]+\[[^\]]*\])\s([\*\+-])=', ' \\1 = \\1 \\2'),
+                       ('\s([^\s]+)\s([\*\+-])=', ' \\1 = \\1 \\2')]]
+fillTemplate(Path(p.folder), templates, replacementGroups)
+
+p.addExtension("smoothers",
+               sources=[p.folder+"smoothers.pyx"])
+p.addExtension("restrictionProlongation",
+               sources=[p.folder+"restrictionProlongation.pyx"])
+p.addExtension("multigrid",
+               sources=[p.folder+"multigrid.pyx"])
+
+p.addExtension("coarseSolvers",
+               sources=[p.folder+"coarseSolvers.pyx"])
+
+
+p.setup(description="An implementation of geometric multigrid",
+        install_requires=['cython', 'mpi4py>=2.0.0', 'numpy', 'scipy',
+                          'tabulate', 'PyNucleus_fem', 'PyNucleus_metisCy'])
diff --git a/multilevelSolver/versioneer.py b/multilevelSolver/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/multilevelSolver/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/nl/.gitattributes b/nl/.gitattributes
new file mode 100644
index 0000000..c466cf8
--- /dev/null
+++ b/nl/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_nl/_version.py export-subst
diff --git a/nl/MANIFEST.in b/nl/MANIFEST.in
new file mode 100644
index 0000000..50a8f40
--- /dev/null
+++ b/nl/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_nl/_version.py
diff --git a/nl/PyNucleus_nl/__init__.py b/nl/PyNucleus_nl/__init__.py
new file mode 100644
index 0000000..f34fb8e
--- /dev/null
+++ b/nl/PyNucleus_nl/__init__.py
@@ -0,0 +1,605 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base import REAL, INDEX
+from PyNucleus_base.linear_operators import (LinearOperator,
+                                             diagonalOperator,
+                                             multiIntervalInterpolationOperator,
+                                             delayedConstructionOperator)
+from . twoPointFunctions import constantTwoPoint
+from . fractionalOrders import (constantFractionalLaplacianScaling,
+                                variableFractionalLaplacianScaling,
+                                constantIntegrableScaling,
+                                constFractionalOrder,
+                                variableFractionalOrder,
+                                variableConstFractionalOrder,
+                                leftRightFractionalOrder,
+                                smoothedLeftRightFractionalOrder,
+                                innerOuterFractionalOrder,
+                                islandsFractionalOrder,
+                                layersFractionalOrder)
+from . kernels2 import (Kernel,
+                        FractionalKernel,
+                        getKernelEnum,
+                        FRACTIONAL, INDICATOR, PERIDYNAMIC)
+from . kernels import getKernel, getIntegrableKernel, getFractionalKernel
+from . nonlocalLaplacian import (assembleFractionalLaplacian,
+                                 assembleNonlocalOperator,
+                                 
+                                 assembleFractionalLaplacianDiagonal,
+                                 nonlocalBuilder)
+from . clusterMethodCy import H2Matrix
+from . fractionalLaplacian1D import (fractionalLaplacian1D_P1,
+                                     fractionalLaplacian1D_P1_boundary)
+
+from . fractionalLaplacian2D import (fractionalLaplacian2D_P1,
+                                     fractionalLaplacian2D_P1_boundary)
+from . nonlocalLaplacianND import (integrable1D,
+                                   integrable2D)
+
+from PyNucleus_fem import (DIRICHLET, HOMOGENEOUS_DIRICHLET,
+                           NEUMANN, HOMOGENEOUS_NEUMANN,
+                           NORM, boundaryConditions)
+from . nonlocalProblems import (fractionalLaplacianProblem,
+                                nonlocalProblem,
+                                fractionalOrderFactory,
+                                interactionFactory,
+                                kernelFactory,
+                                nonlocalMeshFactory)
+from PyNucleus_fem import (P0_DoFMap, getSubmesh,
+                           constant, Lambda)
+from PyNucleus_fem.DoFMaps import (getSubMapRestrictionProlongation,
+                                   getSubMapRestrictionProlongation2)
+from PyNucleus_multilevelSolver.levels import (algebraicLevelBase,
+                                               SPARSITY_PATTERN,
+                                               ASSEMBLY,
+                                               NO_BUILD)
+from PyNucleus_multilevelSolver import hierarchyManager
+from PyNucleus_multilevelSolver.connectors import (inputConnector,
+                                                   repartitionConnector)
+from pathlib import Path
+import h5py
+import logging
+from PyNucleus_base import getLoggingTimer
+
+LOGGER = logging.getLogger(__name__)
+
+
+class fractionalLevel(algebraicLevelBase):
+    def __init__(self, meshLevel, buildType):
+        self.A = None
+        self.S = None
+        self.M = None
+        super(fractionalLevel, self).__init__(meshLevel, buildType)
+
+    def build(self, buildType):
+        super(fractionalLevel, self).build(buildType)
+
+        # diffusivity = self.params['diffusivity']
+        symmetric = self.params.get('symmetric', False)
+        reorder = self.params.get('reorder', False)
+        buildMass = self.params.get('buildMass', False)
+
+        if buildType & SPARSITY_PATTERN and buildMass:
+            # set up sparsity patterns only
+            DoFMap = self.DoFMap
+            mesh = self.meshLevel.mesh
+            self.fullyAssembled = False
+            with self.Timer('Prepared sparsity patterns'):
+                self.M = DoFMap.buildSparsityPattern(mesh.cells,
+                                                     symmetric=symmetric,
+                                                     reorder=reorder)
+
+        if buildType & ASSEMBLY:
+            # fully build matrices
+            DoFMap = self.DoFMap
+            mesh = self.meshLevel.mesh
+            self.fullyAssembled = True
+            with self.Timer('Assembled matrices'):
+                self.params.pop('mesh', None)
+                self.S = getFracLapl(mesh, DoFMap, **self.params)
+                self.A = self.S
+                # if not s.symmetric:
+                #     from PyNucleus_base.linear_operators import Dense_LinearOperator
+                #     self.A = Dense_LinearOperator(np.ascontiguousarray(self.A.toarray().T))
+                if buildMass:
+                    self.M = DoFMap.assembleMass(sss_format=symmetric,
+                                                 reorder=reorder)
+
+    def buildCoarserMatrices(self):
+        """
+        Recursively build matrices on coarser levels
+        """
+        if self.S is not None and self.P is not None and self.previousLevel.S is not None and not self.previousLevel.fullyAssembled:
+            assert self.P.shape[0] == self.S.shape[0], (self.P.shape[0], self.S.shape[0])
+            assert self.P.shape[1] == self.previousLevel.S.shape[0]
+            with self.Timer('Restrict stiffness matrix'):
+                self.P.restrictMatrix(self.S, self.previousLevel.S)
+            if self.previousLevel.A is None:
+                self.previousLevel.A = self.previousLevel.S
+        if self.M is not None and self.P is not None and self.previousLevel.M is not None and not self.previousLevel.fullyAssembled:
+            assert self.P.shape[0] == self.M.shape[0]
+            assert self.P.shape[1] == self.previousLevel.M.shape[0]
+            with self.Timer('Restrict mass matrix'):
+                self.P.restrictMatrix(self.M, self.previousLevel.M)
+        if self.previousLevel is not None:
+            self.previousLevel.fullyAssembled = True
+            self.previousLevel.buildCoarserMatrices()
+
+    @classmethod
+    def getKeys(cls):
+        return algebraicLevelBase.getKeys() + ['A', 'S', 'M']
+
+
+def paramsForFractionalHierarchy(noRef, global_params):
+
+    noRefCoarse = global_params.get('noRefCoarse', 0)
+
+    if noRefCoarse > 0:
+        hierarchies = [
+            {'label': 'seed',
+             'ranks': set([0]),
+             'connectorStart': 'input',
+             'connectorEnd': 'breakUp',
+             'params': {'noRef': noRefCoarse,
+                        'assemble': 'dofmaps only'}
+            },
+            {'label': 'fine',
+             'ranks': set([0]),
+             'connectorStart': 'breakUp',
+             'connectorEnd': None,
+             'params': {'noRef': noRef-noRefCoarse,
+                        'keepMeshes': global_params.get('keepMeshes', 'last'),
+                        'keepAllDoFMaps': global_params.get('keepAllDoFMaps', False),
+                        'assemble': global_params.get('assemble', 'ALL'),
+                        'solver': 'LU',
+                        'kernel': global_params.get('kernel', None),
+                        'genKernel': global_params.get('genKernel', False),
+                        'target_order': global_params.get('target_order', None),
+                        'rangedOpParams': global_params.get('rangedOpParams', {}),
+                        'cached': global_params.get('cached', False),
+                        'boundaryCondition': global_params.get('boundaryCondition', HOMOGENEOUS_DIRICHLET),
+                        'logging': global_params.get('logging', False)
+             }
+            }]
+        connectors = {}
+        connectors['input'] = {'type': inputConnector,
+                               'params': {'domain': global_params['domain'],
+                                          'meshParams': global_params.get('meshParams', {}),
+                                          'algebraicLevelType': fractionalLevel}}
+        connectors['breakUp'] = {'type': repartitionConnector,
+                                 'params': {'partitionerType': global_params.get('coarsePartitioner', global_params.get('partitioner', 'regular')),
+                                            'partitionerParams': global_params.get('coarsePartitionerParams', global_params.get('partitionerParams', {})),
+                                            'debugOverlaps': global_params.get('debugOverlaps', False),
+                                            'algebraicLevelType': fractionalLevel
+                                 }}
+    else:
+        hierarchies = [
+            {'label': 'fine',
+             'ranks': set([0]),
+             'connectorStart': 'input',
+             'connectorEnd': None,
+             'params': {'noRef': noRef,
+                        'keepMeshes': global_params.get('keepMeshes', 'last'),
+                        'keepAllDoFMaps': global_params.get('keepAllDoFMaps', False),
+                        'assemble': global_params.get('assemble', 'ALL'),
+                        'solver': 'LU',
+                        'kernel': global_params.get('kernel', None),
+                        'genKernel': global_params.get('genKernel', False),
+                        'target_order': global_params.get('target_order', None),
+                        'rangedOpParams': global_params.get('rangedOpParams', {}),
+                        'cached': global_params.get('cached', False),
+                        'boundaryCondition': global_params.get('boundaryCondition', HOMOGENEOUS_DIRICHLET),
+                        'logging': global_params.get('logging', False)
+             }
+            }]
+        connectors = {}
+        connectors['input'] = {'type': inputConnector,
+                               'params': {'domain': global_params['domain'],
+                                          'meshParams': global_params.get('meshParams', {}),
+                                          'algebraicLevelType': fractionalLevel}}
+
+    return hierarchies, connectors
+
+
+def fractionalHierarchy(mesh, s, NoRef, tag=None, eta=3.,
+                        buildMass=False, dense=False,
+                        driftCoeff=None,
+                        keepMeshes='finest',
+                        keepAllDoFMaps=False,
+                        target_order=None, dataDir='DATA',
+                        boundaryCondition=HOMOGENEOUS_DIRICHLET,
+                        comm=None,
+                        forceRebuild=False,
+                        horizon=np.inf,
+                        errorBound=None):
+
+    global_params = {'domain': mesh,
+                     'kernel': getFractionalKernel(mesh.dim, s=s, horizon=np.inf),
+                     'horizon': horizon,
+                     'tag': tag,
+                     'boundaryCondition': boundaryCondition,
+                     'eta': eta,
+                     'buildMass': buildMass,
+                     'dense': dense,
+                     'driftCoeff': driftCoeff,
+                     'keepMeshes': keepMeshes,
+                     'keepAllDoFMaps': keepAllDoFMaps,
+                     'interpolationErrorBound': errorBound,
+                     'forceRebuild': forceRebuild}
+    hierarchies, connectors = paramsForFractionalHierarchy(NoRef, global_params)
+    hM = hierarchyManager(hierarchies, connectors, global_params, comm=comm)
+    hM.setup()
+    return hM
+
+
+
+def processBC(tag, boundaryCondition, kernel):
+    if tag is None:
+        if boundaryCondition == HOMOGENEOUS_DIRICHLET:
+            if kernel is not None:
+                if kernel.s.max < 0.5:
+                    tag = -1
+                else:
+                    tag = 0
+                zeroExterior = True
+            else:
+                tag = 0
+                zeroExterior = -1
+        elif boundaryCondition == HOMOGENEOUS_NEUMANN:
+            tag = -1
+            zeroExterior = False
+        elif boundaryCondition == NORM:
+            tag = 0
+            zeroExterior = kernel.s.max >= 0.5
+        else:
+            raise NotImplementedError('{}, {}, {}'.format(tag, boundaryCondition, kernel))
+    else:
+        if boundaryCondition == HOMOGENEOUS_DIRICHLET:
+            if kernel is not None:
+                zeroExterior = True
+            else:
+                raise NotImplementedError()
+        elif boundaryCondition == HOMOGENEOUS_NEUMANN:
+            zeroExterior = False
+        elif boundaryCondition == NORM:
+            zeroExterior = kernel.s.max >= 0.5
+        else:
+            raise NotImplementedError('{}, {}, {}'.format(tag, boundaryCondition, kernel))
+
+    # variableOrder = isinstance(s, variableFractionalOrder)
+    # if tag is None:
+    #     if boundaryCondition == 'Dirichlet':
+    #         if isinstance(s, admissibleSet):
+    #             tag = 0
+    #             zeroExterior = True
+    #         elif (variableOrder and (s.max < 0.5)) or (not variableOrder and (s.value < 0.5)):
+    #             tag = -1
+    #             zeroExterior = True
+    #         else:
+    #             tag = 0
+    #             zeroExterior = True
+    #     elif boundaryCondition == 'Neumann':
+    #         tag = -1
+    #         zeroExterior = False
+    #     elif boundaryCondition == 'norm':
+    #         tag = 0
+    #         zeroExterior = s >= 0.5
+    #     else:
+    #         raise NotImplementedError()
+    # else:
+    #     if boundaryCondition == 'Dirichlet':
+    #         zeroExterior = True
+    #     elif boundaryCondition == 'Neumann':
+    #         zeroExterior = False
+    #     elif boundaryCondition == 'norm':
+    #         zeroExterior = s >= 0.5
+    #     else:
+    #         raise NotImplementedError()
+    # if not ((horizon == np.inf) or
+    #         (isinstance(horizon, constant) and horizon.value == np.inf) or
+    #         (isinstance(horizon, admissibleSet) and horizon.getLowerBounds()[0] == np.inf)):
+    #     if isinstance(horizon, admissibleSet):
+    #         tag = 0
+    #         zeroExterior = True
+    #     else:
+    #         tag = -1
+    #         zeroExterior = False
+    return tag, zeroExterior
+
+
+def getFracLapl(mesh, DoFMap, kernel=None, rangedOpParams={}, **kwargs):
+
+    assert kernel is not None or 's' in rangedOpParams, (kernel, rangedOpParams)
+
+    boundaryCondition = kwargs.get('boundaryCondition', 'Dirichlet')
+    tag = kwargs.get('tag', None)
+    zeroExterior = kwargs.get('zeroExterior', None)
+    dense = kwargs.get('dense', False)
+    diagonal = kwargs.get('diagonal', False)
+    cached = kwargs.get('cached', False)
+    trySparsification = kwargs.get('trySparsification', False)
+    logging = kwargs.get('logging', False)
+    timer = kwargs.get('timer', None)
+
+    target_order = kwargs.get('target_order', None)
+    eta = kwargs.get('eta', 3.)
+    returnNearField = kwargs.get('returnNearField', False)
+
+    comm = kwargs.get('assemblyComm', None)
+
+    dataDir = kwargs.get('dataDir', 'DATA')
+    doSave = kwargs.get('doSave', False)
+    overrideFileName = kwargs.get('overrideFileName', None)
+    forceRebuild = kwargs.get('forceRebuild', False)
+
+    if timer is None:
+        timer = getLoggingTimer(LOGGER, comm=comm, rootOutput=True)
+        kwargs['timer'] = timer
+
+    if kernel is None:
+         raise NotImplementedError()
+    else:
+        horizon = kernel.horizon
+        scaling = kernel.scaling
+        normalized = not isinstance(scaling, constantTwoPoint)
+
+    dataDir = Path(dataDir)
+    dataDir.mkdir(exist_ok=True, parents=True)
+
+    if tag is None or zeroExterior is None:
+        tag, zeroExterior = processBC(tag, boundaryCondition, kernel)
+
+    if overrideFileName is not None:
+        filename = overrideFileName
+    else:
+        base = mesh.vertices_as_array.min(axis=0)
+        if diagonal:
+            sparseDense = 'diagonal'
+        elif dense:
+            sparseDense = 'dense'
+        else:
+            sparseDense = 'sparse'
+        filename = dataDir/'{}-{}-{}-{:.5}-{}-{}-{}-{}-{}-{}-{:.5}-{:.5}-{}.hdf5'.format(sparseDense, base, mesh.dim, mesh.diam, mesh.num_vertices, mesh.num_cells, kernel, tag, target_order, eta, mesh.h, mesh.hmin, boundaryCondition)
+    A = None
+    Pnear = None
+    if ((isinstance(kernel, FractionalKernel) and (kernel.s.min == kernel.s.max == 1.)) or
+        (isinstance(horizon, constant) and (horizon.value == 0.))):
+        with timer('Sparse matrix'):
+            if kernel.phi is not None:
+                kappa = Lambda(lambda x: kernel.phi(x, x))
+            else:
+                kappa = None
+            A = DoFMap.assembleStiffness(diffusivity=kappa)
+    elif isinstance(kernel, FractionalKernel) and (kernel.s.min == kernel.s.max == 0.):
+        with timer('Sparse matrix'):
+            A = DoFMap.assembleMass()
+    elif not forceRebuild and filename.exists():
+        if comm is None or comm.rank == 0:
+            f = h5py.File(str(filename), 'r')
+            if f.attrs['type'] == 'h2':
+                A = H2Matrix.HDF5read(f)
+            else:
+                A = LinearOperator.HDF5read(f)
+            f.close()
+        else:
+            A = None
+    else:
+        params = {'target_order': target_order,
+                  'eta': eta,
+                  'forceUnsymmetric': kwargs.get('forceUnsymmetric', False)}
+        if 'genKernel' in kwargs:
+            params['genKernel'] = kwargs['genKernel']
+        if kernel is None:
+            kernel = getFractionalKernel(mesh.dim, s, constant(horizon.ranges[0, 0]), scaling=scaling, normalized=normalized)
+        builder = nonlocalBuilder(mesh, DoFMap, kernel, params, zeroExterior=zeroExterior, comm=comm, logging=logging)
+        if diagonal:
+            with timer('Assemble diagonal matrix {}, zeroExterior={}'.format(kernel, zeroExterior)):
+                A = builder.getDiagonal()
+        elif dense:
+            with timer('Assemble dense matrix {}, zeroExterior={}'.format(kernel, zeroExterior)):
+                if cached:
+                    A = builder.getDenseCached()
+                else:
+                    A = builder.getDense(trySparsification=trySparsification)
+        else:
+            with timer('Assemble sparse matrix {}, zeroExterior={}'.format(kernel, zeroExterior)):
+                if isinstance(horizon, constant):
+                    A, Pnear = builder.getH2(returnNearField=True)
+                else:
+                    A = builder.getH2FiniteHorizon()
+        if doSave and (comm is None or (comm and comm.rank == 0)):
+            if hasattr(A, 'HDF5write'):
+                with timer('Saving'):
+                    try:
+                        f = h5py.File(str(filename), 'w')
+                        A.HDF5write(f)
+                        f.flush()
+                        f.close()
+                    except OSError as e:
+                        LOGGER.warn('Unable to save to {}, reason: {}'.format(str(filename), e))
+            # else:
+            #     LOGGER.warn('Cannot save {}'.format(str(A)))
+
+    if returnNearField:
+        return A, Pnear
+    else:
+        return A
+
+
+class delayedFractionalLaplacianOp(delayedConstructionOperator):
+    def __init__(self, mesh, dm, kernel, *args, **kwargs):
+        super().__init__(dm.num_dofs,
+                         dm.num_dofs)
+        self.mesh = mesh
+        self.dm = dm
+        self.kernel = kernel
+        self.args = args
+        self.kwargs = kwargs
+
+    def construct(self):
+        from copy import copy
+        d = copy(self.kwargs)
+        d.update(self.params)
+        A = getFracLapl(self.mesh, self.dm, self.kernel,
+                        *self.args, **d)
+        return A
+
+
+
+
+NONE = -10
+DIRICHLET_EXTERIOR = 0
+DIRICHLET_INTERIOR = 1
+
+
+class DirichletCondition:
+    def __init__(self, fullMesh, fullDoFMap, fullOp, domainIndicator, fluxIndicator):
+        # The mesh is partitioned into
+        #  * 'domain'    (domainIndicator > 0)
+        #  * 'Neumann'   (fluxIndicator > 0)
+        #  * 'Dirichlet' (domainIndicator == 0 and fluxIndicator == 0)
+        # For computations, we keep domain and Neumann together as 'natural'.
+
+        self.fullMesh = fullMesh
+        self.fullDoFMap = fullDoFMap
+        self.domainIndicator = domainIndicator
+        self.fluxIndicator = fluxIndicator
+        self.fullOp = fullOp
+        self.setup()
+
+    def setup(self):
+        # from PyNucleus_fem import constant
+
+        # dmIndicator = P0_DoFMap(self.fullMesh)
+        dirichletIndicator = constant(1.)-self.domainIndicator-self.fluxIndicator
+        # dirichletIndicatorVec = dmIndicator.interpolate(dirichletIndicator).toarray()
+        # naturalCells = np.flatnonzero(dirichletIndicatorVec < 1e-9).astype(INDEX)
+
+        from PyNucleus_fem.splitting import meshSplitter, dofmapSplitter
+        from PyNucleus_fem.DoFMaps import getSubMapRestrictionProlongation
+
+        split = dofmapSplitter(self.fullDoFMap, {'Dirichlet': dirichletIndicator})
+        self.dirichletDoFMap = split.getSubMap('Dirichlet')
+        self.dirichletR, self.dirichletP = split.getRestrictionProlongation('Dirichlet')
+        self.naturalDoFMap = self.dirichletDoFMap.getComplementDoFMap()
+        self.naturalR, self.naturalP = getSubMapRestrictionProlongation(self.fullDoFMap, self.naturalDoFMap)
+
+        # self.naturalMesh = getSubmesh(self.fullMesh, naturalCells)
+        # self.naturalMesh.replaceBoundaryVertexTags(lambda x: DIRICHLET_EXTERIOR if dirichletIndicator(x) >= 1e-9 else DIRICHLET_INTERIOR,
+        #                                            set([DIRICHLET_EXTERIOR]))
+        # self.naturalMesh.replaceBoundaryEdgeTags(lambda x, y: DIRICHLET_EXTERIOR if dirichletIndicator(0.5*(np.array(x)+np.array(y))) >= 1e-9 else DIRICHLET_INTERIOR,
+        #                                          set([DIRICHLET_EXTERIOR]))
+
+        # self.naturalDoFMap = type(self.fullDoFMap)(self.fullMesh, self.domainIndicator+self.fluxIndicator)
+        # self.naturalR, self.naturalP = getSubMapRestrictionProlongation(self.fullDoFMap, self.naturalDoFMap)
+
+        # self.dirichletDoFMap = type(self.fullDoFMap)(self.fullMesh, dirichletIndicator)
+        # self.dirichletR, self.dirichletP = getSubMapRestrictionProlongation(self.fullDoFMap, self.dirichletDoFMap)
+
+
+        # import matplotlib.pyplot as plt
+        # plt.figure()
+        # self.dirichletDoFMap.plot()
+        # # self.dirichletMesh.plot(info=True)
+        # plt.figure()
+        # self.naturalDoFMap.plot()
+        # # self.naturalMesh.plot(info=True)
+        # plt.show()
+
+        assert self.fullDoFMap.num_dofs == self.naturalDoFMap.num_dofs+self.dirichletDoFMap.num_dofs, (self.fullDoFMap.num_dofs, self.naturalDoFMap.num_dofs, self.dirichletDoFMap.num_dofs)
+
+        self.naturalA = self.naturalR*(self.fullOp*self.naturalP)
+
+        self.domainDoFMap = type(self.fullDoFMap)(self.fullMesh, self.domainIndicator)
+        self.domainR, self.domainP = getSubMapRestrictionProlongation(self.fullDoFMap, self.domainDoFMap)
+
+    def setDirichletData(self, dirichletData):
+        if self.dirichletDoFMap.num_dofs > 0:
+            self.dirichletVector = self.dirichletDoFMap.interpolate(dirichletData)
+
+    def applyRHScorrection(self, b):
+        assert b.shape[0] == self.naturalDoFMap.num_dofs
+        if self.dirichletDoFMap.num_dofs > 0:
+            b -= self.naturalR*(self.fullOp*(self.dirichletP*self.dirichletVector))
+            # b -= self.naturalR*(self.domainP*(self.domainR*(self.fullOp*(self.dirichletP*self.dirichletVector))))
+
+    def augmentDirichlet(self, u):
+        return self.naturalP*u + self.dirichletP*self.dirichletVector
+
+    def plot(self):
+        if self.fullMesh.dim == 1:
+            x = self.dirichletP*self.dirichletDoFMap.ones() + 2*(self.naturalP*self.naturalDoFMap.ones())
+            self.fullMesh.plotFunction(x)
+        else:
+            raise NotImplementedError()
+
+
+class multilevelDirichletCondition(DirichletCondition):
+    def __init__(self, levels, domainIndicator, fluxIndicator):
+        super(multilevelDirichletCondition, self).__init__(levels[-1]['mesh'],
+                                                           levels[-1]['DoFMap'],
+                                                           levels[-1]['A'],
+                                                           domainIndicator,
+                                                           fluxIndicator)
+        self.levels = levels
+        self.setupHierarchy()
+
+    def setupCoarseOps(self, mesh, dm):
+        from PyNucleus_fem import constant
+
+        dmIndicator = P0_DoFMap(mesh)
+        dirichletIndicator = constant(1.)-self.domainIndicator-self.fluxIndicator
+        dirichletIndicatorVec = dmIndicator.interpolate(dirichletIndicator).toarray()
+        naturalCells = np.flatnonzero(dirichletIndicatorVec < 1e-9).astype(INDEX)
+
+        naturalMesh = getSubmesh(mesh, naturalCells)
+        naturalMesh.replaceBoundaryVertexTags(lambda x: DIRICHLET_EXTERIOR if dirichletIndicator(x) >= 1e-9 else DIRICHLET_INTERIOR,
+                                              set([DIRICHLET_EXTERIOR]))
+        naturalMesh.replaceBoundaryEdgeTags(lambda x, y: DIRICHLET_EXTERIOR if dirichletIndicator(0.5*(np.array(x)+np.array(y))) >= 1e-9 else DIRICHLET_INTERIOR,
+                                            set([DIRICHLET_EXTERIOR]))
+
+        naturalDoFMap = type(dm)(mesh, self.domainIndicator+self.fluxIndicator)
+        naturalR, naturalP = getSubMapRestrictionProlongation(dm, naturalDoFMap)
+
+        return naturalMesh, naturalDoFMap, naturalR, naturalP
+
+    def setupHierarchy(self):
+        levelsNew = []
+        prevNaturalR, prevNaturalP = None, None
+        for lvl in range(len(self.levels)):
+            levelsNew.append({})
+            naturalMesh, naturalDoFMap, naturalR, naturalP = self.setupCoarseOps(self.levels[lvl]['mesh'],
+                                                                                 self.levels[lvl]['DoFMap'])
+            for key in self.levels[lvl]:
+                if key == 'A':
+                    levelsNew[lvl][key] = naturalR*(self.levels[lvl][key]*naturalP)
+                    levelsNew[lvl][key].diagonal = naturalR*self.levels[lvl][key].diagonal
+                elif key == 'S':
+                    levelsNew[lvl][key] = naturalR*(self.levels[lvl][key]*naturalP)
+                    levelsNew[lvl][key].diagonal = naturalR*self.levels[lvl][key].diagonal
+                elif key == 'M':
+                    levelsNew[lvl][key] = naturalR*(self.levels[lvl][key]*naturalP)
+                    levelsNew[lvl][key].diagonal = naturalR*self.levels[lvl][key].diagonal
+                elif key == 'R':
+                    levelsNew[lvl][key] = (prevNaturalR*(self.levels[lvl][key]*naturalP)).to_csr_linear_operator()
+                elif key == 'P':
+                    levelsNew[lvl][key] = (naturalR*(self.levels[lvl][key]*prevNaturalP)).to_csr_linear_operator()
+                elif key == 'DoFMap':
+                    levelsNew[lvl][key] = naturalDoFMap
+                elif key == 'mesh':
+                    levelsNew[lvl][key] = naturalMesh
+                else:
+                    levelsNew[lvl][key] = self.levels[lvl][key]
+            levelsNew[lvl]['naturalR'] = naturalR
+            levelsNew[lvl]['naturalP'] = naturalP
+            prevNaturalR, prevNaturalP = naturalR, naturalP
+        self.naturalLevels = levelsNew
+
+
diff --git a/nl/PyNucleus_nl/_version.py b/nl/PyNucleus_nl/_version.py
new file mode 100644
index 0000000..30af9c9
--- /dev/null
+++ b/nl/PyNucleus_nl/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_nl/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/nl/PyNucleus_nl/adaptiveQuad.pxd b/nl/PyNucleus_nl/adaptiveQuad.pxd
new file mode 100644
index 0000000..73ef3a4
--- /dev/null
+++ b/nl/PyNucleus_nl/adaptiveQuad.pxd
@@ -0,0 +1,64 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap
+from PyNucleus_fem.quadrature cimport simplexQuadratureRule, simplexXiaoGimbutas
+from . nonlocalLaplacianBase cimport (double_local_matrix_t,
+                                        nonlocalLaplacian1D,
+                                        nonlocalLaplacian2D,
+                                        panelType,
+                                        MASK_t,
+                                        specialQuadRule)
+from . fractionalOrders cimport (fractionalOrderBase,
+                                 constFractionalOrder,
+                                 variableFractionalOrder)
+from . kernels2 cimport (Kernel,
+                         FractionalKernel)
+from . nonlocalLaplacianBase cimport nonlocalLaplacian1D
+from . nonlocalLaplacianBase cimport nonlocalLaplacian2D
+from . kernelsCy cimport kernelCy
+
+cdef extern from "kernels.hpp":
+    cdef cppclass kernel_t:
+        kernel_t()
+        REAL_t eval(REAL_t *x, REAL_t *y) nogil
+
+
+cdef class fractionalLaplacian1D_P1_automaticQuadrature(nonlocalLaplacian1D):
+    cdef:
+        kernelCy kernel_c
+        REAL_t abstol, reltol
+        void *user_ptr
+        object integrandId
+        object integrandVertex
+        object integrandDistant
+
+
+cdef class fractionalLaplacian1D_P1_nonsymAutomaticQuadrature(nonlocalLaplacian1D):
+    cdef:
+        kernelCy kernel_c
+        REAL_t abstol, reltol
+        void *user_ptr
+        object integrandId
+        object integrandVertex1
+        object integrandVertex2
+        object integrandDistant
+        REAL_t[::1] temp3
+        dict distantPHIx, distantPHIy
+
+
+cdef class fractionalLaplacian2D_P1_automaticQuadrature(nonlocalLaplacian2D):
+    cdef:
+        kernelCy kernel_c
+        REAL_t abstol, reltol
+        void *user_ptr
+        object integrandId
+        object integrandEdge
+        object integrandVertex
+        object integrandDistant
diff --git a/nl/PyNucleus_nl/adaptiveQuad.pyx b/nl/PyNucleus_nl/adaptiveQuad.pyx
new file mode 100644
index 0000000..4ac3b7f
--- /dev/null
+++ b/nl/PyNucleus_nl/adaptiveQuad.pyx
@@ -0,0 +1,1072 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport (sqrt, log, ceil, fabs as abs, M_PI as pi, pow)
+import numpy as np
+cimport numpy as np
+cimport cython
+
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_base import uninitialized, uninitialized_like
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule,
+                             doubleSimplexQuadratureRule,
+                             GaussJacobi,
+                             simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap, P2_DoFMap, P0_DoFMap
+from PyNucleus_fem.functions cimport function
+from . nonlocalLaplacianBase import ALL
+from scipy.special import gamma
+from scipy.integrate import nquad
+from cpython.pycapsule cimport PyCapsule_New
+from scipy import LowLevelCallable
+from libc.stdlib cimport malloc
+include "panelTypes.pxi"
+
+cdef enum:
+    OFFSET = sizeof(void*)
+
+cdef enum:
+    NUM_INTEGRAND_PARAMS = 9
+
+
+cdef enum packType:
+    fDOF1
+    fDOF2 = OFFSET
+    fNR1 = 2*OFFSET
+    fNC1 = 3*OFFSET
+    fNR2 = 4*OFFSET
+    fNC2 = 5*OFFSET
+    fSIMPLEX1 = 6*OFFSET
+    fSIMPLEX2 = 7*OFFSET
+    fKERNEL = 8*OFFSET
+
+
+cdef inline INDEX_t getINDEX(void *c_params, size_t pos):
+    return (<INDEX_t*>(c_params+pos))[0]
+
+cdef inline void setINDEX(void *c_params, size_t pos, INDEX_t val):
+    (<INDEX_t*>(c_params+pos))[0] = val
+
+cdef inline REAL_t getREAL(void *c_params, size_t pos):
+    return (<REAL_t*>(c_params+pos))[0]
+
+cdef inline void setREAL(void *c_params, size_t pos, REAL_t val):
+    (<REAL_t*>(c_params+pos))[0] = val
+
+cdef inline REAL_t* getREALArray(void *c_params, size_t pos):
+    return (<REAL_t**>(c_params+pos))[0]
+
+cdef inline void setREALArray(void *c_params, size_t pos, REAL_t[:, ::1] val):
+    (<REAL_t**>(c_params+pos))[0] = &val[0, 0]
+
+cdef inline kernel_t* getKernel(void *c_params, size_t pos):
+    return (<kernel_t**>(c_params+pos))[0]
+
+# cdef inline void setKernel_t(void *c_params, size_t pos, kernel_t *val):
+#     (<kernel_t**>(c_params+pos))[0] = val
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandId1D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t* lx = xx
+        REAL_t* ly = xx+n//2
+        REAL_t l1x = lx[0]
+        REAL_t l1y = ly[0]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        REAL_t *simplex1 = getREALArray(c_params, fSIMPLEX2)
+        # REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x-l0y
+    else:
+        psi1 = l1x-l1y
+
+    if j == 0:
+        psi2 = l0x-l0y
+    else:
+        psi2 = l1x-l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex1[0]+l1y*simplex1[1]
+
+    return psi1 * psi2 * kernel.eval(&x, &y)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandVertex1D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[n//2]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x
+    elif (i == 1) or (i == 2):
+        psi1 = l1x-l0y
+    else:
+        psi1 = -l1y
+
+    if j == 0:
+        psi2 = l0x
+    elif (j == 1) or (j == 2):
+        psi2 = l1x-l0y
+    else:
+        psi2 = -l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex2[0]+l1y*simplex2[1]
+
+    return psi1 * psi2 * kernel.eval(&x, &y)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandDistant1D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[n//2]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x
+    elif i == 1:
+        psi1 = l1x
+    elif i == 2:
+        psi1 = -l0y
+    else:
+        psi1 = -l1y
+
+    if j == 0:
+        psi2 = l0x
+    elif j == 1:
+        psi2 = l1x
+    elif j == 2:
+        psi2 = -l0y
+    else:
+        psi2 = -l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex2[0]+l1y*simplex2[1]
+
+    return psi1 * psi2 * kernel.eval(&x, &y)
+
+
+cdef class fractionalLaplacian1D_P1_automaticQuadrature(nonlocalLaplacian1D):
+    def __init__(self,
+                 Kernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 num_dofs=None,
+                 abstol=1e-4,
+                 reltol=1e-4,
+                 target_order=None,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian1D_P1_automaticQuadrature, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        if target_order is None:
+            if isinstance(self.kernel, FractionalKernel):
+                smin, smax = self.kernel.s.min, self.kernel.s.max
+                # this is the desired local quadrature error
+                target_order = 2.-smin
+            else:
+                target_order = 5
+        self.target_order = target_order
+
+        self.user_ptr = malloc(NUM_INTEGRAND_PARAMS*OFFSET)
+        setINDEX(self.user_ptr, fNR1, 2)
+        setINDEX(self.user_ptr, fNC1, 1)
+        setINDEX(self.user_ptr, fNR2, 2)
+        setINDEX(self.user_ptr, fNC2, 1)
+        c_params = PyCapsule_New(self.user_ptr, NULL, NULL)
+        func_type = b"double (int, double *, void *)"
+        func_capsule_id = PyCapsule_New(<void*>symIntegrandId1D, func_type, NULL)
+        func_capsule_vertex = PyCapsule_New(<void*>symIntegrandVertex1D, func_type, NULL)
+        func_capsule_distant = PyCapsule_New(<void*>symIntegrandDistant1D, func_type, NULL)
+        self.integrandId = LowLevelCallable(func_capsule_id, c_params, func_type)
+        self.integrandVertex = LowLevelCallable(func_capsule_vertex, c_params, func_type)
+        self.integrandDistant = LowLevelCallable(func_capsule_distant, c_params, func_type)
+        self.abstol = abstol
+        self.reltol = reltol
+        self.kernel.c_kernel.setKernel(self.user_ptr, fKERNEL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        return DISTANT
+
+    cdef void getNearQuadRule(self, panelType panel):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, t = 0
+            REAL_t val, err, vol1 = self.vol1, vol2 = self.vol2
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t horizon = self.kernel.getHorizonValue()
+
+        setREALArray(self.user_ptr, fSIMPLEX1, simplex1)
+        setREALArray(self.user_ptr, fSIMPLEX2, simplex2)
+
+        contrib[:] = 0.
+
+        if panel == COMMON_EDGE:
+            for I in range(2):
+                for J in range(I, 2):
+                    k = 4*I-(I*(I+1) >> 1) + J
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (0., y),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (y, 1.),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] += val
+                        contrib[k] *= vol1*vol1
+        elif panel == COMMON_VERTEX:
+            for i in range(2):
+                for j in range(2):
+                    if simplex1[i, 0] == simplex2[j, 0]:
+                        if (i == 1) and (j == 0):
+                            t = 2
+                            break
+                        elif (i == 0) and (j == 1):
+                            t = 3
+                            break
+                        else:
+                            raise IndexError()
+
+            # loop over all local DoFs
+            for I in range(3):
+                for J in range(I, 3):
+                    i = 3*(I//t)+(I%t)
+                    j = 3*(J//t)+(J%t)
+                    if j < i:
+                        i, j = j, i
+                    k = 4*i-(i*(i+1) >> 1) + j
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, i)
+                        setINDEX(self.user_ptr, fDOF2, j)
+                        val, err = nquad(self.integrandVertex,
+                                         ((0., 1.), (0., 1.)),
+                                         opts={'epsabs': self.abstol, 'epsrel': self.reltol})
+                        contrib[k] = val*vol1*vol2
+        elif panel == DISTANT:
+            k = 0
+            for I in range(4):
+                for J in range(I, 4):
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandDistant,
+                                         ((0., 1.), (0., 1.)),
+                                         opts=[lambda y: {'points': [((1-y)*simplex2[0, 0]+y*simplex2[1, 0]-horizon-simplex1[0, 0])/(simplex1[1, 0]-simplex1[0, 0]),
+                                                                     ((1-y)*simplex2[0, 0]+y*simplex2[1, 0]+horizon-simplex1[0, 0])/(simplex1[1, 0]-simplex1[0, 0])],
+                                                          'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val*vol1*vol2
+                    k += 1
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t nonsymIntegrandId(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[1]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        # REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t phi1x, phi1y
+        REAL_t psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        phi1x = l0x
+        phi1y = l0y
+    else:
+        phi1x = l1x
+        phi1y = l1y
+
+    if j == 0:
+        psi2 = l0x-l0y
+    else:
+        psi2 = l1x-l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex1[0]+l1y*simplex1[1]
+
+    return (phi1x * kernel.eval(&x, &y) - phi1y * kernel.eval(&y, &x)) * psi2
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef REAL_t nonsymIntegrandVertex1(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[1]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t phi1x, phi1y
+        REAL_t psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        phi1x = l0x
+        phi1y = 0.
+    elif (i == 1) or (i == 2):
+        phi1x = l1x
+        phi1y = l0y
+    else:
+        phi1x = 0.
+        phi1y = l1y
+
+    if j == 0:
+        psi2 = l0x
+    elif (j == 1) or (j == 2):
+        psi2 = l1x - l0y
+    else:
+        psi2 = -l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex2[0]+l1y*simplex2[1]
+
+    return (phi1x * kernel.eval(&x, &y) - phi1y * kernel.eval(&y, &x)) * psi2
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef REAL_t nonsymIntegrandVertex2(int n, REAL_t *xx, void *c_params):
+    assert n == 2
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[1]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t phi1x, phi1y
+        REAL_t psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if (i == 0) or (i == 3):
+        phi1x = l0x
+        phi1y = l1y
+    elif i == 1:
+        phi1x = l1x
+        phi1y = 0.
+    else:
+        phi1x = 0.
+        phi1y = l0y
+
+    if (j == 0) or (j == 3):
+        psi2 = l0x-l1y
+    elif j == 1:
+        psi2 = l1x
+    else:
+        psi2 = -l0y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex2[0]+l1y*simplex2[1]
+
+    return (phi1x * kernel.eval(&x, &y) - phi1y * kernel.eval(&y, &x)) * psi2
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef REAL_t nonsymIntegrandDistant(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t l1x = xx[0]
+        REAL_t l1y = xx[1]
+        REAL_t l0x = 1.-l1x
+        REAL_t l0y = 1.-l1y
+        REAL_t x
+        REAL_t y
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t phi1x, phi1y
+        REAL_t psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        phi1x = l0x
+        phi1y = 0.
+    elif i == 1:
+        phi1x = l1x
+        phi1y = 0.
+    elif i == 2:
+        phi1x = 0.
+        phi1y = l0y
+    else:
+        phi1x = 0.
+        phi1y = l1y
+
+    if j == 0:
+        psi2 = l0x
+    elif j == 1:
+        psi2 = l1x
+    elif j == 2:
+        psi2 = -l0y
+    else:
+        psi2 = -l1y
+
+    x = l0x*simplex1[0]+l1x*simplex1[1]
+    y = l0y*simplex2[0]+l1y*simplex2[1]
+
+    return (phi1x * kernel.eval(&x, &y) - phi1y * kernel.eval(&y, &x)) * psi2
+
+
+cdef class fractionalLaplacian1D_P1_nonsymAutomaticQuadrature(nonlocalLaplacian1D):
+    """
+    This implements the operator
+
+    \int_{R} (u(x)-u(y)) * k(x,y)
+
+    for unsymmetric k(x,y).
+
+    The adjoint of this operator is
+
+    \int_{R} (u(x) * k(x,y) - u(y) * k(y,x))
+
+    """
+
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 num_dofs=None,
+                 abstol=1e-4,
+                 reltol=1e-4,
+                 target_order=None,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian1D_P1_nonsymAutomaticQuadrature, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        smin, smax = self.kernel.s.min, self.kernel.s.max
+        if target_order is None:
+            # this is the desired local quadrature error
+            target_order = 2.-smin
+        self.target_order = target_order
+
+        self.user_ptr = malloc(NUM_INTEGRAND_PARAMS*OFFSET)
+        setINDEX(self.user_ptr, fNR1, 2)
+        setINDEX(self.user_ptr, fNC1, 1)
+        setINDEX(self.user_ptr, fNR2, 2)
+        setINDEX(self.user_ptr, fNC2, 1)
+        c_params = PyCapsule_New(self.user_ptr, NULL, NULL)
+        func_type = b"double (int, double *, void *)"
+        func_capsule_id = PyCapsule_New(<void*>nonsymIntegrandId, func_type, NULL)
+        func_capsule_vertex1 = PyCapsule_New(<void*>nonsymIntegrandVertex1, func_type, NULL)
+        func_capsule_vertex2 = PyCapsule_New(<void*>nonsymIntegrandVertex2, func_type, NULL)
+        func_capsule_distant = PyCapsule_New(<void*>nonsymIntegrandDistant, func_type, NULL)
+        self.integrandId = LowLevelCallable(func_capsule_id, c_params, func_type)
+        self.integrandVertex1 = LowLevelCallable(func_capsule_vertex1, c_params, func_type)
+        self.integrandVertex2 = LowLevelCallable(func_capsule_vertex2, c_params, func_type)
+        self.integrandDistant = LowLevelCallable(func_capsule_distant, c_params, func_type)
+        self.abstol = abstol
+        self.reltol = reltol
+        self.symmetricCells = False
+        self.symmetricLocalMatrix = False
+        self.kernel.c_kernel.setKernel(self.user_ptr, fKERNEL)
+
+        self.x = uninitialized((0, self.dim), dtype=REAL)
+        self.y = uninitialized((0, self.dim), dtype=REAL)
+        self.temp = uninitialized((0), dtype=REAL)
+        self.temp2 = uninitialized_like(self.temp)
+        self.temp3 = uninitialized_like(self.temp)
+        self.distantPSI = {}
+        self.distantPHIx = {}
+        self.distantPHIy = {}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2)
+            REAL_t s = self.kernel.sValue
+        if d < 0.05:
+            return DISTANT
+        else:
+            panel = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h2/self.H0)) - 2.*s*logdh2) /
+                                        (max(logdh1, 0) + 0.8)),
+                                   2)
+            panel2 = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h1/self.H0)) - 2.*s*logdh1) /
+                                         (max(logdh2, 0) + 0.8)),
+                                    2)
+            panel = max(panel, panel2)
+            try:
+                self.distantQuadRules[panel]
+            except KeyError:
+                self.addQuadRule(panel)
+            return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            INDEX_t I, k, i, j
+        qr = simplexXiaoGimbutas(panel, self.dim)
+        qr2 = doubleSimplexQuadratureRule(qr, qr)
+        self.distantQuadRules[panel] = qr2
+        PHIx = np.zeros((2*self.DoFMap.dofs_per_element,
+                         qr2.num_nodes), dtype=REAL)
+        PHIy = np.zeros((2*self.DoFMap.dofs_per_element,
+                         qr2.num_nodes), dtype=REAL)
+        PSI = uninitialized((2*self.DoFMap.dofs_per_element,
+                         qr2.num_nodes), dtype=REAL)
+        # phi_i(x) - phi_i(y) = phi_i(x) for i = 0,1
+        for I in range(self.DoFMap.dofs_per_element):
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    PSI[I, k] = self.getLocalShapeFunction(I)(qr2.rule1.nodes[:, i])
+                    PHIx[I, k] = self.getLocalShapeFunction(I)(qr2.rule1.nodes[:, i])
+                    k += 1
+        # phi_i(x) - phi_i(y) = -phi_i(y) for i = 2,3
+        for I in range(self.DoFMap.dofs_per_element):
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    PSI[I+self.DoFMap.dofs_per_element, k] = -self.getLocalShapeFunction(I)(qr2.rule2.nodes[:, j])
+                    PHIy[I+self.DoFMap.dofs_per_element, k] = self.getLocalShapeFunction(I)(qr2.rule2.nodes[:, j])
+                    k += 1
+        self.distantPSI[panel] = PSI
+        self.distantPHIx[panel] = PHIx
+        self.distantPHIy[panel] = PHIy
+
+        if qr2.rule1.num_nodes > self.x.shape[0]:
+            self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL)
+        if qr2.rule2.num_nodes > self.y.shape[0]:
+            self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL)
+        if qr2.num_nodes > self.temp.shape[0]:
+            self.temp = uninitialized((qr2.num_nodes), dtype=REAL)
+            self.temp2 = uninitialized_like(self.temp)
+            self.temp3 = uninitialized_like(self.temp)
+
+    cdef void getNearQuadRule(self, panelType panel):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, t
+            REAL_t val, vol1 = self.vol1, vol2 = self.vol2
+            set K1, K2
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t[:, ::1] PSI
+            doubleSimplexQuadratureRule qr2
+            kernel_t *kernel
+            REAL_t horizon = self.kernel.getHorizonValue()
+
+        setREALArray(self.user_ptr, fSIMPLEX1, simplex1)
+        setREALArray(self.user_ptr, fSIMPLEX2, simplex2)
+
+        contrib[:] = 0.
+
+        if panel == COMMON_EDGE:
+            for I in range(2):
+                for J in range(2):
+                    # k = 4*I-(I*(I+1) >> 1) + J
+                    k = 4*I+J
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (0., y),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (y, 1.),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] += val
+                        contrib[k] *= vol1*vol1
+        elif panel == COMMON_VERTEX:
+            K1 = set()
+            K2 = set()
+            for i in range(2):
+                for j in range(2):
+                    if simplex1[i, 0] == simplex2[j, 0]:
+                        K1.add(i)
+                        K2.add(j)
+            if K1 == set([1]) and K2 == set([0]):
+                t = 2
+            elif K1 == set([0]) and K2 == set([1]):
+                t = 3
+            else:
+                raise IndexError()
+
+            # loop over all local DoFs
+            for I in range(3):
+                if (I == 2) and (t == 2):
+                    I = 3
+                for J in range(3):
+                    if (J == 2) and (t == 2):
+                        J = 3
+                    k = 4*I+J
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        if t == 2:
+                            val, err = nquad(self.integrandVertex1,
+                                             ((0., 1.), (0., 1.)),
+                                             opts={'epsabs': self.abstol, 'epsrel': self.reltol})
+                        else:
+                            val, err = nquad(self.integrandVertex2,
+                                             ((0., 1.), (0., 1.)),
+                                             opts={'epsabs': self.abstol, 'epsrel': self.reltol})
+                        contrib[k] = val*vol1*vol2
+        elif panel == DISTANT:
+            vol = vol1*vol2
+            k = 0
+            for I in range(4):
+                for J in range(4):
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandDistant,
+                                         ((0., 1.), (0., 1.)),
+                                         opts=[lambda y: {'points': [((1-y)*simplex2[0, 0]+y*simplex2[1, 0]-horizon-simplex1[0, 0])/(simplex1[1, 0]-simplex1[0, 0]),
+                                                                     ((1-y)*simplex2[0, 0]+y*simplex2[1, 0]+horizon-simplex1[0, 0])/(simplex1[1, 0]-simplex1[0, 0])],
+                                                          'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val*vol
+                    k += 1
+        elif panel >= 1:
+            kernel = getKernel(self.user_ptr, fKERNEL)
+            qr2 = self.distantQuadRules[panel]
+            PSI = self.distantPSI[panel]
+            PHIx = self.distantPHIx[panel]
+            PHIy = self.distantPHIy[panel]
+            qr2.rule1.nodesInGlobalCoords(simplex1, self.x)
+            qr2.rule2.nodesInGlobalCoords(simplex2, self.y)
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    self.temp[k] = kernel.eval(&self.x[i, 0], &self.y[j, 0])
+                    self.temp3[k] = kernel.eval(&self.y[j, 0], &self.x[i, 0])
+                    k += 1
+
+            # ( phi1x * kernel(x, y) - phi1y * kernel(y, x) ) * psi2
+
+            vol = (vol1 * vol2)
+            k = 0
+            for I in range(4):
+                for J in range(4):
+                    if mask & (1 << k):
+                        for i in range(qr2.num_nodes):
+                            self.temp2[i] = (PHIx[I, i]*self.temp[i] - PHIy[I, i]*self.temp3[i])*PSI[J, i]
+                        contrib[k] = qr2.eval(self.temp2, vol)
+                    k += 1
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandId2D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t* lx = xx
+        REAL_t* ly = xx+n//2
+        REAL_t l1x = lx[0]
+        REAL_t l2x = lx[1]
+        REAL_t l1y = ly[0]
+        REAL_t l2y = ly[1]
+        REAL_t l0x = 1.-l1x-l2x
+        REAL_t l0y = 1.-l1y-l2y
+        REAL_t x[2]
+        REAL_t y[2]
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        REAL_t *simplex1 = getREALArray(c_params, fSIMPLEX2)
+        # REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x-l0y
+    elif i == 1:
+        psi1 = l1x-l1y
+    else:
+        psi1 = l2x-l2y
+
+    if j == 0:
+        psi2 = l0x-l0y
+    elif j == 1:
+        psi2 = l1x-l1y
+    else:
+        psi2 = l2x-l2y
+
+    x[0] = l0x*simplex1[0]+l1x*simplex1[2]+l2x*simplex1[4]
+    x[1] = l0x*simplex1[1]+l1x*simplex1[3]+l2x*simplex1[5]
+    y[0] = l0y*simplex1[0]+l1y*simplex1[2]+l2y*simplex1[4]
+    y[1] = l0y*simplex1[1]+l1y*simplex1[3]+l2y*simplex1[5]
+
+    return psi1 * psi2 * kernel.eval(x, y)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandVertex2D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        REAL_t* lx = xx
+        REAL_t* ly = xx+n//2
+        REAL_t l1x = lx[0]
+        REAL_t l2x = lx[1]
+        REAL_t l1y = ly[0]
+        REAL_t l2y = ly[1]
+        REAL_t l0x = 1.-l1x-l2x
+        REAL_t l0y = 1.-l1y-l2y
+        REAL_t x[2]
+        REAL_t y[2]
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x
+    elif (i == 1) or (i == 2):
+        psi1 = l1x-l0y
+    else:
+        psi1 = -l1y
+
+    if j == 0:
+        psi2 = l0x
+    elif (j == 1) or (j == 2):
+        psi2 = l1x-l0y
+    else:
+        psi2 = -l1y
+
+    x[0] = l0x*simplex1[0]+l1x*simplex1[2]+l2x*simplex1[4]
+    x[1] = l0x*simplex1[1]+l1x*simplex1[3]+l2x*simplex1[5]
+    y[0] = l0y*simplex2[0]+l1y*simplex2[2]+l2y*simplex2[4]
+    y[1] = l0y*simplex2[1]+l1y*simplex2[3]+l2y*simplex2[5]
+
+    return psi1 * psi2 * kernel.eval(x, y)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t symIntegrandDistant2D(int n, REAL_t *xx, void *c_params):
+    cdef:
+        # REAL_t* lx = xx
+        # REAL_t* ly = xx[2]
+        REAL_t l1x = xx[0]
+        REAL_t l2x = xx[1]
+        REAL_t l1y = xx[2]
+        REAL_t l2y = xx[3]
+        REAL_t l0x = 1.-l1x-l2x
+        REAL_t l0y = 1.-l1y-l2y
+        REAL_t x[2]
+        REAL_t y[2]
+        INDEX_t i = getINDEX(c_params, fDOF1)
+        INDEX_t j = getINDEX(c_params, fDOF2)
+        # INDEX_t nr1 = getINDEX(c_params, fNR1)
+        # INDEX_t nc1 = getINDEX(c_params, fNC1)
+        # INDEX_t nr2 = getINDEX(c_params, fNR2)
+        # INDEX_t nc2 = getINDEX(c_params, fNC2)
+        REAL_t* simplex1 = getREALArray(c_params, fSIMPLEX1)
+        REAL_t* simplex2 = getREALArray(c_params, fSIMPLEX2)
+        REAL_t psi1, psi2
+        kernel_t *kernel = getKernel(c_params, fKERNEL)
+
+    if i == 0:
+        psi1 = l0x
+    elif i == 1:
+        psi1 = l1x
+    elif i == 2:
+        psi1 = l2x
+    elif i == 3:
+        psi1 = -l0y
+    elif i == 4:
+        psi1 = -l1y
+    else:
+        psi1 = -l2y
+
+    if j == 0:
+        psi2 = l0x
+    elif j == 1:
+        psi2 = l1x
+    elif j == 2:
+        psi2 = l2x
+    elif j == 3:
+        psi2 = -l0y
+    elif j == 4:
+        psi2 = -l1y
+    else:
+        psi2 = -l2y
+
+    x[0] = l0x*simplex1[0]+l1x*simplex1[2]+l2x*simplex1[4]
+    x[1] = l0x*simplex1[1]+l1x*simplex1[3]+l2x*simplex1[5]
+    y[0] = l0y*simplex2[0]+l1y*simplex2[2]+l2y*simplex2[4]
+    y[1] = l0y*simplex2[1]+l1y*simplex2[3]+l2y*simplex2[5]
+
+    return psi1 * psi2 * kernel.eval(x, y)
+
+
+cdef class fractionalLaplacian2D_P1_automaticQuadrature(nonlocalLaplacian2D):
+    def __init__(self,
+                 Kernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 num_dofs=None,
+                 abstol=1e-4,
+                 reltol=1e-4,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian2D_P1_automaticQuadrature, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+        self.user_ptr = malloc(NUM_INTEGRAND_PARAMS*OFFSET)
+        setINDEX(self.user_ptr, fNR1, 3)
+        setINDEX(self.user_ptr, fNC1, 2)
+        setINDEX(self.user_ptr, fNR2, 3)
+        setINDEX(self.user_ptr, fNC2, 2)
+        c_params = PyCapsule_New(self.user_ptr, NULL, NULL)
+        func_type = b"double (int, double *, void *)"
+        func_capsule_id = PyCapsule_New(<void*>symIntegrandId2D, func_type, NULL)
+        func_capsule_vertex = PyCapsule_New(<void*>symIntegrandVertex2D, func_type, NULL)
+        func_capsule_distant = PyCapsule_New(<void*>symIntegrandDistant2D, func_type, NULL)
+        self.integrandId = LowLevelCallable(func_capsule_id, c_params, func_type)
+        self.integrandVertex = LowLevelCallable(func_capsule_vertex, c_params, func_type)
+        self.integrandDistant = LowLevelCallable(func_capsule_distant, c_params, func_type)
+        self.abstol = abstol
+        self.reltol = reltol
+        self.kernel.c_kernel.setKernel(self.user_ptr, fKERNEL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        return DISTANT
+
+    cdef void getNearQuadRule(self, panelType panel):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, t = 0
+            REAL_t val, err, vol1 = self.vol1, vol2 = self.vol2
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+
+        setREALArray(self.user_ptr, fSIMPLEX1, simplex1)
+        setREALArray(self.user_ptr, fSIMPLEX2, simplex2)
+
+        contrib[:] = 0.
+
+        if panel == COMMON_EDGE:
+            for I in range(2):
+                for J in range(I, 2):
+                    k = 4*I-(I*(I+1) >> 1) + J
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (0., y),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val
+                        val, err = nquad(self.integrandId,
+                                         (lambda y: (y, 1.),
+                                          (0., 1.)),
+                                         opts=[lambda y: {'points': [y], 'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] += val
+                        contrib[k] *= vol1*vol1
+        elif panel == COMMON_VERTEX:
+            for i in range(2):
+                for j in range(2):
+                    if simplex1[i, 0] == simplex2[j, 0]:
+                        if (i == 1) and (j == 0):
+                            t = 2
+                            break
+                        elif (i == 0) and (j == 1):
+                            t = 3
+                            break
+                        else:
+                            raise IndexError()
+
+            # loop over all local DoFs
+            for I in range(3):
+                for J in range(I, 3):
+                    i = 3*(I//t)+(I%t)
+                    j = 3*(J//t)+(J%t)
+                    if j < i:
+                        i, j = j, i
+                    k = 4*i-(i*(i+1) >> 1) + j
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, i)
+                        setINDEX(self.user_ptr, fDOF2, j)
+                        val, err = nquad(self.integrandVertex,
+                                         ((0., 1.), (0., 1.)),
+                                         opts={'epsabs': self.abstol, 'epsrel': self.reltol})
+                        contrib[k] = val*vol1*vol2
+        elif panel == DISTANT:
+            k = 0
+            for I in range(6):
+                for J in range(I, 6):
+                    if mask & (1 << k):
+                        setINDEX(self.user_ptr, fDOF1, I)
+                        setINDEX(self.user_ptr, fDOF2, J)
+                        val, err = nquad(self.integrandDistant,
+                                         (lambda  l2x, l1y, l2y: (0., l2x), (0., 1.), lambda l2y: (0., l2y), (0., 1.)),
+                                         opts=[{'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol},
+                                               {'epsabs': self.abstol, 'epsrel': self.reltol}])
+                        contrib[k] = val*vol1*vol2*4.
+                    k += 1
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
diff --git a/nl/PyNucleus_nl/clusterMethodCy.pxd b/nl/PyNucleus_nl/clusterMethodCy.pxd
new file mode 100644
index 0000000..75b55eb
--- /dev/null
+++ b/nl/PyNucleus_nl/clusterMethodCy.pxd
@@ -0,0 +1,73 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+from PyNucleus_fem.quadrature cimport (simplexDuffyTransformation,
+                             simplexQuadratureRule,
+                             simplexXiaoGimbutas)
+from PyNucleus_fem.functions cimport function
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_base.linear_operators cimport (LinearOperator,
+                                               Dense_LinearOperator)
+from PyNucleus_base.tupleDict cimport indexSet, indexSetIterator, arrayIndexSet, arrayIndexSetIterator, bitArray
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from PyNucleus_fem.meshCy cimport meshBase
+from . fractionalOrders cimport fractionalOrderBase
+from . kernels2 cimport FractionalKernel
+
+cdef class tree_node:
+    cdef:
+        public tree_node parent
+        public list children
+        public INDEX_t dim
+        public INDEX_t id
+        public INDEX_t distFromRoot
+        public indexSet _dofs
+        INDEX_t _num_dofs
+        public indexSet _cells
+        public REAL_t[:, ::1] box
+        public REAL_t[:, ::1] transferOperator
+        public REAL_t[:, :, ::1] value
+        public REAL_t[::1] coefficientsUp, coefficientsDown
+        public BOOL_t mixed_node
+        public BOOL_t canBeAssembled
+    cdef indexSet get_dofs(self)
+    cdef indexSet get_cells(self)
+    cpdef INDEX_t findCell(self, meshBase mesh, REAL_t[::1] vertex, REAL_t[:, ::1] simplex, REAL_t[::1] bary)
+    cpdef set findCells(self, meshBase mesh, REAL_t[::1] vertex, REAL_t r, REAL_t[:, ::1] simplex)
+    cdef tree_node get_node(self, INDEX_t id)
+    cdef BOOL_t trim(self, bitArray keep)
+    cdef void upwardPassMatrix(self, dict coefficientsUp)
+
+
+cdef class productIterator:
+    cdef:
+        INDEX_t m
+        INDEX_t dim
+        INDEX_t[::1] idx
+    cdef void reset(self)
+    cdef BOOL_t step(self)
+
+
+cdef class farFieldClusterPair:
+    cdef:
+        public tree_node n1, n2
+        public REAL_t[:, ::1] kernelInterpolant
+    cpdef void apply(self, REAL_t[::1] x, REAL_t[::1] y)
+
+
+cdef class H2Matrix(LinearOperator):
+    cdef:
+        public LinearOperator Anear
+        public dict Pfar
+        public tree_node tree
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1
+
+
diff --git a/nl/PyNucleus_nl/clusterMethodCy.pyx b/nl/PyNucleus_nl/clusterMethodCy.pyx
new file mode 100644
index 0000000..2f67876
--- /dev/null
+++ b/nl/PyNucleus_nl/clusterMethodCy.pyx
@@ -0,0 +1,2037 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_base import uninitialized
+from libc.math cimport sqrt, sin, cos, atan2, M_PI as pi
+from itertools import product
+from scipy.special import gamma
+import numpy as np
+cimport cython
+from PyNucleus_base.linear_operators cimport (LinearOperator,
+                                               sparseGraph,
+                                               Multiply_Linear_Operator)
+from PyNucleus_base.blas cimport gemv, gemvT, mydot, matmat, norm, assign
+from . nonlocalLaplacianBase cimport variableFractionalOrder
+from . nonlocalLaplacian cimport nearFieldClusterPair
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap, P2_DoFMap
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.functions cimport constant
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+
+COMPRESSION = 'gzip'
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline void merge_boxes(REAL_t[:, ::1] box1,
+                             REAL_t[:, ::1] box2,
+                             REAL_t[:, ::1] new_box):
+    cdef INDEX_t i
+    for i in range(box1.shape[0]):
+        new_box[i, 0] = min(box1[i, 0], box2[i, 0])
+        new_box[i, 1] = max(box1[i, 1], box2[i, 1])
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline void merge_boxes2(REAL_t[:, ::1] box1,
+                              REAL_t[:, :, ::1] box2,
+                              INDEX_t dof,
+                              REAL_t[:, ::1] new_box):
+    cdef INDEX_t i
+    for i in range(box1.shape[0]):
+        new_box[i, 0] = min(box1[i, 0], box2[dof, i, 0])
+        new_box[i, 1] = max(box1[i, 1], box2[dof, i, 1])
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline bint inBox(const REAL_t[:, ::1] box,
+                       const REAL_t[::1] vector):
+    cdef:
+        bint t = True
+        INDEX_t i
+    for i in range(box.shape[0]):
+        t = t and (box[i, 0] <= vector[i]) and (vector[i] < box[i, 1])
+    return t
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t minDist2FromBox(const REAL_t[:, ::1] box,
+                                     const REAL_t[::1] vector):
+    cdef:
+        INDEX_t i
+        REAL_t d2min = 0.
+    for i in range(box.shape[0]):
+        if vector[i] <= box[i, 0]:
+            d2min += (vector[i]-box[i, 0])**2
+        elif vector[i] >= box[i, 1]:
+            d2min += (vector[i]-box[i, 1])**2
+    return d2min
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline tuple distsFromBox(const REAL_t[:, ::1] box,
+                               const REAL_t[::1] vector):
+    cdef:
+        INDEX_t i
+        REAL_t d2min = 0., d2max = 0.
+    for i in range(box.shape[0]):
+        if vector[i] <= box[i, 0]:
+            d2min += (vector[i]-box[i, 0])**2
+            d2max += (vector[i]-box[i, 1])**2
+        elif vector[i] >= box[i, 1]:
+            d2min += (vector[i]-box[i, 1])**2
+            d2max += (vector[i]-box[i, 0])**2
+        else:
+            d2max += max((vector[i]-box[i, 0])**2, (vector[i]-box[i, 1])**2)
+    return sqrt(d2min), sqrt(d2max)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline bint distFromSimplex(const REAL_t[:, ::1] simplex,
+                                 const REAL_t[::1] vector,
+                                 const REAL_t radius):
+    cdef:
+        INDEX_t i, j
+        REAL_t t, p, q
+        REAL_t w_mem[2]
+        REAL_t z_mem[2]
+        REAL_t[::1] w = w_mem
+        REAL_t[::1] z = z_mem
+    for i in range(3):
+        for j in range(2):
+            w[j] = simplex[(i+1) % 3, j] - simplex[i, j]
+            z[j] = simplex[i, j]-vector[j]
+        t = 1./mydot(w, w)
+        p = 2*mydot(w, z)*t
+        q = (mydot(z, z)-radius**2)*t
+        q = 0.25*p**2-q
+        if q > 0:
+            q = sqrt(q)
+            t = -0.5*p+q
+            if (0 <= t) and (t <= 1):
+                return True
+            t = -0.5*p-q
+            if (0 <= t) and (t <= 1):
+                return True
+    return False
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t distBoxes(REAL_t[:, ::1] box1, REAL_t[:, ::1] box2):
+    cdef:
+        REAL_t dist = 0., a2, b1
+        INDEX_t i
+    for i in range(box1.shape[0]):
+        if box1[i, 0] > box2[i, 0]:
+            b1 = box2[i, 1]
+            a2 = box1[i, 0]
+        else:
+            b1 = box1[i, 1]
+            a2 = box2[i, 0]
+        dist += max(a2-b1, 0)**2
+    return sqrt(dist)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t maxDistBoxes(REAL_t[:, ::1] box1, REAL_t[:, ::1] box2):
+    cdef:
+        REAL_t dist = 0., a2, b1
+        INDEX_t i
+    for i in range(box1.shape[0]):
+        if box1[i, 0] > box2[i, 0]:
+            b1 = box2[i, 0]
+            a2 = box1[i, 1]
+        else:
+            b1 = box1[i, 0]
+            a2 = box2[i, 1]
+        dist += max(a2-b1, 0)**2
+    return sqrt(dist)
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef REAL_t diamBox(REAL_t[:, ::1] box):
+    cdef:
+        REAL_t d = 0.
+        INDEX_t i
+    for i in range(box.shape[0]):
+        d += (box[i, 1]-box[i, 0])**2
+    return sqrt(d)
+
+
+cdef class tree_node:
+    def __init__(self, tree_node parent, indexSet dofs, REAL_t[:, :, ::1] boxes, bint mixed_node=False, bint canBeAssembled=True):
+        cdef:
+            INDEX_t dof = -1
+            indexSetIterator it
+        self.parent = parent
+        self.dim = boxes.shape[1]
+        self.children = []
+        self._num_dofs = -1
+        self._dofs = dofs
+        self.mixed_node = mixed_node
+        self.canBeAssembled = canBeAssembled
+        if self.dim > 0:
+            self.box = uninitialized((self.dim, 2), dtype=REAL)
+            self.box[:, 0] = np.inf
+            self.box[:, 1] = -np.inf
+            it = self.get_dofs().getIter()
+            while it.step():
+                dof = it.i
+                merge_boxes2(self.box, boxes, dof, self.box)
+
+    cdef indexSet get_dofs(self):
+        cdef:
+            indexSet dofs
+            tree_node c
+        if self.isLeaf:
+            return self._dofs
+        else:
+            dofs = arrayIndexSet()
+            for c in self.children:
+                dofs = dofs.union(c.get_dofs())
+            return dofs
+        # return self.dofs
+
+    @property
+    def dofs(self):
+        return self.get_dofs()
+
+    @property
+    def num_dofs(self):
+        if self._num_dofs < 0:
+            self._num_dofs = self.get_dofs().getNumEntries()
+        return self._num_dofs
+
+    cdef indexSet get_cells(self):
+        cdef:
+            indexSet s
+            tree_node c
+        if self.isLeaf:
+            return self._cells
+        else:
+            s = arrayIndexSet()
+            for c in self.children:
+                s = s.union(c.get_cells())
+            return s
+
+    @property
+    def cells(self):
+        return self.get_cells()
+
+    def get_nodes(self):
+        if self.isLeaf:
+            return 1
+        else:
+            return 1+sum([c.nodes for c in self.children])
+
+    nodes = property(fget=get_nodes)
+
+    def refine(self,
+               REAL_t[:, :, ::1] boxes,
+               REAL_t[:, ::1] centers,
+               INDEX_t maxLevels=200,
+               INDEX_t maxLevelsMixed=200,
+               INDEX_t level=0,
+               INDEX_t minSize=1,
+               INDEX_t minMixedSize=1):
+        cdef:
+            indexSet dofs = self.get_dofs()
+            INDEX_t num_initial_dofs = dofs.getNumEntries(), dim, i = -1, j, num_dofs
+            REAL_t[:, ::1] subbox
+            indexSet s
+            indexSetIterator it = dofs.getIter()
+            set sPre
+            INDEX_t nD = 0
+        if not self.mixed_node:
+            if (level >= maxLevels) or (num_initial_dofs <= minSize):
+                return
+        else:
+            if (level >= maxLevelsMixed) or (num_initial_dofs <= minMixedSize):
+                return
+        dim = self.box.shape[0]
+        if (not self.mixed_node) or dim == 1:
+            for idx in product(*([[0, 1]]*dim)):
+                subbox = uninitialized((dim, 2), dtype=REAL)
+                for i, j in enumerate(idx):
+                    subbox[i, 0] = self.box[i, 0] + j*(self.box[i, 1]-self.box[i, 0])/2
+                    subbox[i, 1] = self.box[i, 0] + (j+1)*(self.box[i, 1]-self.box[i, 0])/2
+                sPre = set()
+                it.reset()
+                while it.step():
+                    i = it.i
+                    if inBox(subbox, centers[i, :]):
+                        sPre.add(i)
+                s = arrayIndexSet()
+                s.fromSet(sPre)
+                num_dofs = s.getNumEntries()
+                if num_dofs > 0 and num_dofs < num_initial_dofs:
+                    nD += num_dofs
+                    self.children.append(tree_node(self, s, boxes, mixed_node=self.mixed_node))
+                    self.children[-1].refine(boxes, centers, maxLevels, maxLevelsMixed, level+1, minSize, minMixedSize)
+        else:
+            # split along larger box dimension
+            for j in range(2):
+                subbox = uninitialized((dim, 2), dtype=REAL)
+                if self.box[0, 1]-self.box[0, 0] > self.box[1, 1]-self.box[1, 0]:
+                    subbox[0, 0] = self.box[0, 0] + j*(self.box[0, 1]-self.box[0, 0])/2
+                    subbox[0, 1] = self.box[0, 0] + (j+1)*(self.box[0, 1]-self.box[0, 0])/2
+                    subbox[1, 0] = self.box[1, 0]
+                    subbox[1, 1] = self.box[1, 1]
+                else:
+                    subbox[0, 0] = self.box[0, 0]
+                    subbox[0, 1] = self.box[0, 1]
+                    subbox[1, 0] = self.box[1, 0] + j*(self.box[1, 1]-self.box[1, 0])/2
+                    subbox[1, 1] = self.box[1, 0] + (j+1)*(self.box[1, 1]-self.box[1, 0])/2
+                sPre = set()
+                it.reset()
+                while it.step():
+                    i = it.i
+                    if inBox(subbox, centers[i, :]):
+                        sPre.add(i)
+                s = arrayIndexSet()
+                s.fromSet(sPre)
+                num_dofs = s.getNumEntries()
+                if num_dofs > 0 and num_dofs < num_initial_dofs:
+                    nD += num_dofs
+                    self.children.append(tree_node(self, s, boxes, mixed_node=self.mixed_node))
+                    self.children[-1].refine(boxes, centers, maxLevels, maxLevelsMixed, level+1, minSize, minMixedSize)
+
+        assert nD == 0 or nD == num_initial_dofs
+        if nD == num_initial_dofs:
+            self._dofs = None
+        else:
+            assert self.isLeaf
+
+    def get_is_leaf(self):
+        return len(self.children) == 0
+
+    isLeaf = property(fget=get_is_leaf)
+
+    def leaves(self):
+        cdef:
+            tree_node i, j
+        if self.isLeaf:
+            yield self
+        else:
+            for i in self.children:
+                for j in i.leaves():
+                    yield j
+
+    def get_tree_nodes(self):
+        cdef:
+            tree_node i, j
+        yield self
+        for i in self.children:
+            for j in i.get_tree_nodes():
+                yield j
+
+    def _getLevels(self):
+        if self.isLeaf:
+            return 1
+        else:
+            return 1+max([c._getLevels() for c in self.children])
+
+    numLevels = property(fget=_getLevels)
+
+    def plot(self, level=0, plotDoFs=False, REAL_t[:, ::1] dofCoords=None):
+        import matplotlib.pyplot as plt
+
+        cdef:
+            indexSet dofs
+            indexSetIterator it
+            INDEX_t dof, k, j
+            REAL_t[:, ::1] points
+            INDEX_t[::1] idx
+            REAL_t[::1] x, y
+
+        if plotDoFs:
+            from scipy.spatial import ConvexHull
+            if self.dim == 2:
+                assert dofCoords is not None
+                dofs = self.get_dofs()
+                points = uninitialized((len(dofs), self.dim), dtype=REAL)
+                it = dofs.getIter()
+                k = 0
+                while it.step():
+                    dof = it.i
+                    for j in range(self.dim):
+                        points[k, j] = dofCoords[dof, j]
+                    k += 1
+                if len(dofs) > 2:
+                    hull = ConvexHull(points, qhull_options='Qt QJ')
+                    idx = hull.vertices
+                else:
+                    idx = np.arange(len(dofs), dtype=INDEX)
+                x = uninitialized((idx.shape[0]+1), dtype=REAL)
+                y = uninitialized((idx.shape[0]+1), dtype=REAL)
+                for k in range(idx.shape[0]):
+                    x[k] = points[idx[k], 0]
+                    y[k] = points[idx[k], 1]
+                x[idx.shape[0]] = points[idx[0], 0]
+                y[idx.shape[0]] = points[idx[0], 1]
+                plt.plot(x, y, color='red' if self.mixed_node else 'blue')
+            else:
+                raise NotImplementedError()
+        else:
+            import matplotlib.patches as patches
+            if self.dim == 2:
+                plt.gca().add_patch(patches.Rectangle((self.box[0, 0], self.box[1, 0]),
+                                                      self.box[0, 1]-self.box[0, 0],
+                                                      self.box[1, 1]-self.box[1, 0],
+                                                      fill=False,
+                                                      color='red' if self.mixed_node else 'blue'))
+                if not self.isLeaf:
+                    myCenter = np.mean(self.box, axis=1)
+                    for c in self.children:
+                        cCenter = np.mean(c.box, axis=1)
+                        plt.arrow(myCenter[0], myCenter[1], cCenter[0]-myCenter[0], cCenter[1]-myCenter[1])
+                        plt.text(cCenter[0], cCenter[1], s=str(level+1))
+                        c.plot(level+1)
+            else:
+                raise NotImplementedError()
+
+    def prepareTransferOperators(self, INDEX_t m):
+        cdef:
+            tree_node c
+        if not self.isLeaf:
+            for c in self.children:
+                c.prepareTransferOperators(m)
+        if self.parent is not None:
+            self.transferOperator = uninitialized((m**self.dim, m**self.dim),
+                                                  dtype=REAL)
+            transferMatrix(self.parent.box, self.box, m,
+                           self.transferOperator)
+        self.coefficientsUp = uninitialized((m**self.dim), dtype=REAL)
+        self.coefficientsDown = uninitialized((m**self.dim), dtype=REAL)
+
+    def upwardPass(self, REAL_t[::1] x, INDEX_t componentNo=0):
+        cdef:
+            INDEX_t i, dof = -1, k = 0
+            tree_node c
+            indexSetIterator it
+        if self.isLeaf:
+            self.coefficientsUp[:] = 0.0
+            it = self.get_dofs().getIter()
+            while it.step():
+                dof = it.i
+                for i in range(self.coefficientsUp.shape[0]):
+                    self.coefficientsUp[i] += x[dof]*self.value[componentNo, k, i]
+                k += 1
+        else:
+            self.coefficientsUp[:] = 0.0
+            for c in self.children:
+                c.upwardPass(x, componentNo)
+                gemv(c.transferOperator, c.coefficientsUp, self.coefficientsUp, 1.)
+
+    def resetCoefficientsDown(self):
+        cdef:
+            tree_node c
+        self.coefficientsDown[:] = 0.0
+        if not self.isLeaf:
+            for c in self.children:
+                c.resetCoefficientsDown()
+
+    def downwardPass(self, REAL_t[::1] y, INDEX_t componentNo=0):
+        cdef:
+            INDEX_t i, dof = -1, k = 0
+            REAL_t val
+            tree_node c
+            indexSetIterator it
+        if self.isLeaf:
+            it = self.get_dofs().getIter()
+            while it.step():
+                dof = it.i
+                val = 0.0
+                for i in range(self.coefficientsDown.shape[0]):
+                    val += self.value[componentNo, k, i]*self.coefficientsDown[i]
+                y[dof] += val
+                k += 1
+        else:
+            for c in self.children:
+                gemvT(c.transferOperator, self.coefficientsDown, c.coefficientsDown, 1.)
+                c.downwardPass(y, componentNo)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    def enterLeaveValues(self,
+                         meshBase mesh,
+                         DoFMap DoFMap,
+                         INDEX_t order,
+                         REAL_t[:, :, ::1] boxes,
+                         comm=None):
+        cdef:
+            INDEX_t i, k, I, l, j, p, dim, dof = -1, r, start, end
+            REAL_t[:, ::1] coeff, simplex, local_vals, PHI, xi, x, box
+            REAL_t[::1] eta, fvals
+            REAL_t vol, beta, omega
+            tree_node n
+            simplexQuadratureRule qr
+            indexSetIterator it = arrayIndexSetIterator()
+        dim = mesh.dim
+        # Sauter Schwab p. 428
+        if isinstance(DoFMap, P1_DoFMap):
+            quadOrder = order+2
+        elif isinstance(DoFMap, P2_DoFMap):
+            quadOrder = order+3
+        else:
+            raise NotImplementedError()
+        qr = simplexXiaoGimbutas(quadOrder, dim)
+
+        # get values of basis function in quadrature nodes
+        PHI = uninitialized((DoFMap.dofs_per_element, qr.num_nodes), dtype=REAL)
+        for i in range(DoFMap.dofs_per_element):
+            for j in range(qr.num_nodes):
+                PHI[i, j] = DoFMap.localShapeFunctions[i](qr.nodes[:, j])
+
+        coeff = np.zeros((DoFMap.num_dofs, order**dim), dtype=REAL)
+        simplex = uninitialized((dim+1, dim), dtype=REAL)
+        local_vals = uninitialized((DoFMap.dofs_per_element, order**dim), dtype=REAL)
+
+        eta = np.cos((2.0*np.arange(order, 0, -1, dtype=REAL)-1.0) / (2.0*order) * np.pi)
+        xi = uninitialized((order, dim), dtype=REAL)
+        x = uninitialized((qr.num_nodes, dim), dtype=REAL)
+        fvals = uninitialized((qr.num_nodes), dtype=REAL)
+
+        if comm:
+            start = <INDEX_t>np.ceil(mesh.num_cells*comm.rank/comm.size)
+            end = <INDEX_t>np.ceil(mesh.num_cells*(comm.rank+1)/comm.size)
+        else:
+            start = 0
+            end = mesh.num_cells
+
+        # loop over elements
+        for i in range(start, end):
+            mesh.getSimplex(i, simplex)
+            vol = qr.getSimplexVolume(simplex)
+            # get quadrature nodes
+            qr.nodesInGlobalCoords(simplex, x)
+
+            # loop over element dofs
+            for k in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, k)
+                if I >= 0:
+                    # get box for dof
+                    # TODO: avoid slicing
+                    box = boxes[I, :, :]
+                    # get Chebyshev nodes of box
+                    for j in range(order):
+                        for l in range(dim):
+                            xi[j, l] = (box[l, 1]-box[l, 0])*0.5 * (eta[j]+1.0) + box[l, 0]
+                    # loop over interpolating ploynomial basis
+                    r = 0
+                    for idx in product(*([range(order)]*dim)):
+                        # evaluation of the idx-Chebyshev polynomial
+                        # at the quadrature nodes are saved in fvals
+                        fvals[:] = 1.0
+                        for q in range(dim):
+                            l = idx[q]
+                            beta = 1.0
+                            for j in range(order):
+                                if j != l:
+                                    beta *= xi[l, q]-xi[j, q]
+
+                            # loop over quadrature nodes
+                            for j in range(qr.num_nodes):
+                                # evaluate l-th polynomial at j-th quadrature node
+                                if abs(x[j, q]-xi[l, q]) > 1e-9:
+                                    omega = 1.0
+                                    for p in range(order):
+                                        if p != l:
+                                            omega *= x[j, q]-xi[p, q]
+                                    fvals[j] *= omega/beta
+                        # integrate chebyshev polynomial * local basis function over element
+                        local_vals[k, r] = 0.0
+                        for j in range(qr.num_nodes):
+                            local_vals[k, r] += vol*fvals[j]*PHI[k, j]*qr.weights[j]
+                        r += 1
+
+            # enter data into vector coeff
+            for k in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, k)
+                if I >= 0:
+                    for l in range(order**dim):
+                        coeff[I, l] += local_vals[k, l]
+        if comm and comm.size > 1:
+            if comm.rank == 0:
+                comm.Reduce(MPI.IN_PLACE, coeff, root=0)
+            else:
+                comm.Reduce(coeff, coeff, root=0)
+        if comm is None or comm.rank == 0:
+            # distribute entries of coeff to tree leaves
+            for n in self.leaves():
+                n.value = uninitialized((1, len(n.dofs), order**dim), dtype=REAL)
+                it.setIndexSet(n.dofs)
+                k = 0
+                while it.step():
+                    dof = it.i
+                    for i in range(order**dim):
+                        n.value[0, k, i] = coeff[dof, i]
+                    k += 1
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    def enterLeaveValuesGrad(self,
+                             meshBase mesh,
+                             DoFMap DoFMap,
+                             INDEX_t order,
+                             REAL_t[:, :, ::1] boxes,
+                             comm=None):
+        cdef:
+            INDEX_t i, k, I, l, j, p, dim, dof, r, start, end
+            REAL_t[:, ::1] simplex, local_vals, PHI, xi, x, box, gradients
+            REAL_t[:, :, ::1] coeff
+            REAL_t[::1] eta, fvals
+            REAL_t vol, beta, omega
+            tree_node n
+            simplexQuadratureRule qr
+            INDEX_t[:, ::1] cells = mesh.cells
+            REAL_t[:, ::1] vertices = mesh.vertices
+        dim = mesh.dim
+        # Sauter Schwab p. 428
+        if isinstance(DoFMap, P1_DoFMap):
+            quadOrder = order+1
+        else:
+            raise NotImplementedError()
+        qr = simplexXiaoGimbutas(quadOrder, dim)
+
+        coeff = np.zeros((DoFMap.num_dofs, dim, order**dim), dtype=REAL)
+        simplex = uninitialized((dim+1, dim), dtype=REAL)
+        local_vals = uninitialized((DoFMap.dofs_per_element, order**dim), dtype=REAL)
+        gradients = uninitialized((DoFMap.dofs_per_element, dim), dtype=REAL)
+
+        eta = np.cos((2.0*np.arange(order, 0, -1, dtype=REAL)-1.0) / (2.0*order) * np.pi)
+        xi = uninitialized((order, dim), dtype=REAL)
+        x = uninitialized((qr.num_nodes, dim), dtype=REAL)
+        fvals = uninitialized((qr.num_nodes), dtype=REAL)
+
+        if comm:
+            start = <INDEX_t>np.ceil(mesh.num_cells*comm.rank/comm.size)
+            end = <INDEX_t>np.ceil(mesh.num_cells*(comm.rank+1)/comm.size)
+        else:
+            start = 0
+            end = mesh.num_cells
+
+        # loop over elements
+        for i in range(start, end):
+            mesh.getSimplex(i, simplex)
+            vol = qr.getSimplexVolume(simplex)
+            # get quadrature nodes
+            qr.nodesInGlobalCoords(simplex, x)
+
+            # loop over element dofs
+            for k in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, k)
+                if I >= 0:
+                    # get box for dof
+                    # TODO: avoid slicing
+                    box = boxes[I, :, :]
+                    # get Chebyshev nodes of box
+                    for j in range(order):
+                        for l in range(dim):
+                            xi[j, l] = (box[l, 1]-box[l, 0])*0.5 * (eta[j]+1.0) + box[l, 0]
+                    # loop over interpolating ploynomial basis
+                    r = 0
+                    for idx in product(*([range(order)]*dim)):
+                        # evaluation of the idx-Chebyshev polynomial
+                        # at the quadrature nodes are saved in fvals
+                        fvals[:] = 1.0
+                        for q in range(dim):
+                            l = idx[q]
+                            beta = 1.0
+                            for j in range(order):
+                                if j != l:
+                                    beta *= xi[l, q]-xi[j, q]
+
+                            # loop over quadrature nodes
+                            for j in range(qr.num_nodes):
+                                # evaluate l-th polynomial at j-th quadrature node
+                                if abs(x[j, q]-xi[l, q]) > 1e-9:
+                                    omega = 1.0
+                                    for p in range(order):
+                                        if p != l:
+                                            omega *= x[j, q]-xi[p, q]
+                                    fvals[j] *= omega/beta
+                        # integrate chebyshev polynomial * local basis function over element
+                        #TODO: deal with multiple components, get gradient
+                        local_vals[k, r] = 0.0
+                        for j in range(qr.num_nodes):
+                            local_vals[k, r] += vol*fvals[j]*qr.weights[j]
+                        r += 1
+            # get gradients
+            if dim == 1:
+                det = simplex[1, 0]-simplex[0, 0]
+                gradients[0, 0] = 1./det
+                gradients[1, 0] = -1./det
+            elif dim == 2:
+                det = (simplex[1, 1]-simplex[2, 1])*(simplex[0, 0]-simplex[2, 0])+(simplex[2, 0]-simplex[1, 0])*(simplex[0, 1]-simplex[2, 1])
+                gradients[0, 0] = (simplex[1, 1]-simplex[2, 1])/det
+                gradients[0, 1] = (simplex[2, 0]-simplex[1, 0])/det
+                gradients[1, 0] = (simplex[2, 1]-simplex[0, 1])/det
+                gradients[1, 1] = (simplex[0, 0]-simplex[2, 0])/det
+                gradients[2, 0] = -gradients[0, 0]-gradients[1, 0]
+                gradients[2, 1] = -gradients[0, 1]-gradients[1, 1]
+
+            # enter data into vector coeff
+            for k in range(DoFMap.dofs_per_element):
+                I = DoFMap.cell2dof(i, k)
+                if I >= 0:
+                    for j in range(dim):
+                        for l in range(order**dim):
+                            coeff[I, j, l] += local_vals[k, l]*gradients[k, j]
+        if comm:
+            comm.Allreduce(MPI.IN_PLACE, coeff)
+        # distribute entries of coeff to tree leaves
+        for n in self.leaves():
+            n.value = uninitialized((dim, len(n.dofs), order**dim), dtype=REAL)
+            for k, dof in enumerate(sorted(n.dofs)):
+                for j in range(dim):
+                    for i in range(order**dim):
+                        n.value[j, k, i] = coeff[dof, j, i]
+
+    def set_id(self, INDEX_t maxID=0, INDEX_t distFromRoot=0):
+        self.id = maxID
+        self.distFromRoot = distFromRoot
+        maxID += 1
+        for c in self.children:
+            maxID = c.set_id(maxID, distFromRoot+1)
+        return maxID
+
+    def get_max_id(self):
+        cdef:
+            INDEX_t id = self.id
+            tree_node c
+        for c in self.children:
+            id = max(id, c.get_max_id())
+        return id
+
+    cdef tree_node get_node(self, INDEX_t id):
+        cdef:
+            tree_node c
+            INDEX_t k
+        if self.id == id:
+            return self
+        else:
+            for k in range(len(self.children)-1):
+                if self.children[k].id <= id < self.children[k+1].id:
+                    c = self.children[k]
+                    return c.get_node(id)
+            if self.children[len(self.children)-1].id <= id:
+                c = self.children[len(self.children)-1]
+                return c.get_node(id)
+
+    cdef BOOL_t trim(self, bitArray keep):
+        cdef:
+            tree_node c
+            BOOL_t delNode, c_delNode
+            list newChildren = []
+        delNode = not keep.inSet(self.id)
+        for c in self.children:
+            c_delNode = c.trim(keep)
+            if not c_delNode:
+                delNode = False
+                newChildren.append(c)
+        if not self.isLeaf and len(newChildren) == 0:
+            self._cells = arrayIndexSet()
+            for c in self.children:
+                self._cells = self._cells.union(c._cells)
+            self.children = []
+        return delNode
+
+    def HDF5write(self, node):
+        myNode = node.create_group(str(self.id))
+        if self.parent:
+            myNode.attrs['parent'] = self.parent.id
+        else:
+            myNode.attrs['parent'] = -1
+            node.attrs['numNodes'] = self.nodes
+        myNode.create_dataset('children',
+                              data=[c.id for c in self.children],
+                              compression=COMPRESSION)
+        for c in self.children:
+            c.HDF5write(node)
+        myNode.attrs['dim'] = self.dim
+        myNode.create_dataset('_dofs',
+                              data=list(self.dofs.toSet()),
+                              compression=COMPRESSION)
+        if self.isLeaf:
+            myNode.create_dataset('_cells',
+                                  data=list(self._cells),
+                                  compression=COMPRESSION)
+
+        try:
+            myNode.create_dataset('transferOperator',
+                                  data=np.array(self.transferOperator, copy=False),
+                                  compression=COMPRESSION)
+            node.attrs['M'] = self.transferOperator.shape[0]
+        except:
+            pass
+        try:
+            myNode.create_dataset('value',
+                                  data=np.array(self.value, copy=False),
+                                  compression=COMPRESSION)
+        except:
+            pass
+        myNode.create_dataset('box', data=np.array(self.box, copy=False),
+                              compression=COMPRESSION)
+
+    @staticmethod
+    def HDF5read(node):
+        nodes = []
+        boxes = uninitialized((0, 0, 0), dtype=REAL)
+        try:
+            M = node.attrs['M']
+        except:
+            M = 0
+        for _ in range(node.attrs['numNodes']):
+            n = tree_node(None, set(), boxes)
+            nodes.append(n)
+        for id in node:
+            n = nodes[int(id)]
+            myNode = node[id]
+            n.dim = myNode.attrs['dim']
+            dofs = arrayIndexSet()
+            dofs.fromSet(set(myNode['_dofs']))
+            n.dofs = dofs
+            try:
+                n._cells = set(myNode['_cells'])
+                n.box = np.array(myNode['box'], dtype=REAL)
+            except:
+                pass
+            try:
+                n.transferOperator = np.array(myNode['transferOperator'],
+                                              dtype=REAL)
+            except:
+                pass
+            try:
+                n.value = np.array(myNode['value'], dtype=REAL)
+            except:
+                pass
+            n.coefficientsUp = uninitialized((M), dtype=REAL)
+            n.coefficientsDown = uninitialized((M), dtype=REAL)
+            n.id = int(id)
+            nodes.append(n)
+        for id in node:
+            myNode = node[id]
+            if myNode.attrs['parent'] >= 0:
+                nodes[int(id)].parent = nodes[myNode.attrs['parent']]
+            else:
+                root = nodes[int(id)]
+            for c in list(myNode['children']):
+                nodes[int(id)].children.append(nodes[c])
+        return root, nodes
+
+    def HDF5writeNew(self, node):
+        cdef:
+            INDEX_t c = -1
+            tree_node n
+            indexSetIterator it = arrayIndexSetIterator()
+            INDEX_t dim = self.box.shape[0], i, j
+        numNodes = self.nodes
+        indptrChildren = uninitialized((numNodes+1), dtype=INDEX)
+        boxes = uninitialized((numNodes, dim, 2), dtype=REAL)
+        for n in self.get_tree_nodes():
+            indptrChildren[n.id+1] = len(n.children)
+            for i in range(dim):
+                for j in range(2):
+                    boxes[n.id, i, j] = n.box[i, j]
+        indptrChildren[0] = 0
+        for i in range(1, numNodes+1):
+            indptrChildren[i] += indptrChildren[i-1]
+        nnz = indptrChildren[numNodes]
+        indicesChildren = uninitialized((nnz), dtype=INDEX)
+        for n in self.get_tree_nodes():
+            k = indptrChildren[n.id]
+            for cl in n.children:
+                indicesChildren[k] = cl.id
+                k += 1
+        children = sparseGraph(indicesChildren, indptrChildren, numNodes, numNodes)
+        node.create_group('children')
+        children.HDF5write(node['children'])
+        node.create_dataset('boxes', data=boxes, compression=COMPRESSION)
+        del children
+
+        M = self.children[0].transferOperator.shape[0]
+        transferOperators = uninitialized((numNodes, M, M), dtype=REAL)
+        for n in self.get_tree_nodes():
+            try:
+                transferOperators[n.id, :, :] = n.transferOperator
+            except:
+                pass
+        node.create_dataset('transferOperators', data=transferOperators,
+                            compression=COMPRESSION)
+
+        indptrDofs = uninitialized((numNodes+1), dtype=INDEX)
+        for n in self.get_tree_nodes():
+            indptrDofs[n.id+1] = len(n.dofs)
+        indptrDofs[0] = 0
+        for i in range(1, numNodes+1):
+            indptrDofs[i] += indptrDofs[i-1]
+        nnz = indptrDofs[numNodes]
+        indicesDofs = uninitialized((nnz), dtype=INDEX)
+        maxDof = -1
+        for n in self.get_tree_nodes():
+            k = indptrDofs[n.id]
+            it.setIndexSet(n.dofs)
+            while it.step():
+                c = it.i
+                indicesDofs[k] = c
+                maxDof = max(maxDof, c)
+                k += 1
+        dofs = sparseGraph(indicesDofs, indptrDofs, numNodes, maxDof+1)
+        node.create_group('dofs')
+        dofs.HDF5write(node['dofs'])
+        del dofs
+
+        indptrCells = uninitialized((numNodes+1), dtype=INDEX)
+        for n in self.get_tree_nodes():
+            if n.isLeaf:
+                indptrCells[n.id+1] = len(n.cells)
+            else:
+                indptrCells[n.id+1] = 0
+        indptrCells[0] = 0
+        for i in range(1, numNodes+1):
+            indptrCells[i] += indptrCells[i-1]
+        nnz = indptrCells[numNodes]
+        indicesCells = uninitialized((nnz), dtype=INDEX)
+        maxCell = -1
+        for n in self.get_tree_nodes():
+            if n.isLeaf:
+                k = indptrCells[n.id]
+                for c in n.cells:
+                    indicesCells[k] = c
+                    maxCell = max(maxCell, c)
+                    k += 1
+        cells = sparseGraph(indicesCells, indptrCells, numNodes, maxCell+1)
+        node.create_group('cells')
+        cells.HDF5write(node['cells'])
+        del cells
+
+        noCoefficients = next(self.leaves()).value.shape[0]
+        values = uninitialized((maxDof+1, noCoefficients, M), dtype=REAL)
+        mapping = {}
+        k = 0
+        for n in self.leaves():
+            mapping[n.id] = k, k+n.value.shape[1]
+            k += n.value.shape[1]
+            values[mapping[n.id][0]:mapping[n.id][1], :, :] = np.swapaxes(n.value, 0, 1)
+        node.create_group('mapping')
+        keys = uninitialized((len(mapping)), dtype=INDEX)
+        vals = uninitialized((len(mapping), 2), dtype=INDEX)
+        k = 0
+        for i in mapping:
+            keys[k] = i
+            vals[k][0] = mapping[i][0]
+            vals[k][1] = mapping[i][1]
+            k += 1
+        node['mapping'].create_dataset('keys', data=keys, compression=COMPRESSION)
+        node['mapping'].create_dataset('vals', data=vals, compression=COMPRESSION)
+        node.create_dataset('values', data=values,
+                            compression=COMPRESSION)
+
+        node.attrs['dim'] = self.dim
+        node.attrs['M'] = M
+
+    @staticmethod
+    def HDF5readNew(node):
+        cdef:
+            list nodes
+            LinearOperator children
+            REAL_t[:, :, ::1] boxes
+            INDEX_t M
+            tree_node n
+            INDEX_t k
+            dict mapping
+            INDEX_t[::1] keys
+            INDEX_t[:, ::1] vals
+            indexSet cluster_dofs
+            LinearOperator dofs
+        children = LinearOperator.HDF5read(node['children'])
+        nodes = [0]*children.shape[0]
+        M = node.attrs['M']
+
+        transferOperators = np.array(node['transferOperators'], dtype=REAL)
+        boxes = np.array(node['boxes'], dtype=REAL)
+        tree = readNode(nodes, 0, None, boxes, children, M, transferOperators)
+        dofs = LinearOperator.HDF5read(node['dofs'])
+        cells = LinearOperator.HDF5read(node['cells'])
+        keys = np.array(node['mapping']['keys'], dtype=INDEX)
+        vals = np.array(node['mapping']['vals'], dtype=INDEX)
+        mapping = {}
+        for k in range(keys.shape[0]):
+            mapping[keys[k]] = k
+        values = np.array(node['values'], dtype=REAL)
+        for n in tree.leaves():
+            n._dofs = arrayIndexSet(dofs.indices[dofs.indptr[n.id]:dofs.indptr[n.id+1]], sorted=True)
+            n._cells = arrayIndexSet(cells.indices[cells.indptr[n.id]:cells.indptr[n.id+1]], sorted=True)
+            n.value = np.ascontiguousarray(np.swapaxes(np.array(values[vals[mapping[n.id], 0]:vals[mapping[n.id], 1], :, :], dtype=REAL), 0, 1))
+        # setDoFsFromChildren(tree)
+        return tree, nodes
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef INDEX_t findCell(self, meshBase mesh, REAL_t[::1] vertex, REAL_t[:, ::1] simplex, REAL_t[::1] bary):
+        cdef:
+            tree_node c
+            INDEX_t cellNo = -1
+        if minDist2FromBox(self.box, vertex) > 0.:
+            return -1
+        if self.isLeaf:
+            for cellNo in self.cells:
+                if mesh.vertexInCell(vertex, cellNo, simplex, bary):
+                    return cellNo
+                return -1
+        else:
+            for c in self.children:
+                cellNo = c.findCell(mesh, vertex, simplex, bary)
+                if cellNo >= 0:
+                    break
+            return cellNo
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef set findCells(self, meshBase mesh, REAL_t[::1] vertex, REAL_t r, REAL_t[:, ::1] simplex):
+        cdef:
+            set cells = set()
+            REAL_t h = mesh.h
+            REAL_t rmin = r-h
+            REAL_t rmax = r+h
+            REAL_t dmin, dmax
+            tree_node c
+            INDEX_t cellNo
+        dmin, dmax = distsFromBox(self.box, vertex)
+        if (dmax <= rmin) or (dmin >= rmax):
+            return cells
+        if self.isLeaf:
+            for cellNo in self._cells:
+                mesh.getSimplex(cellNo, simplex)
+                if distFromSimplex(simplex, vertex, r):
+                    cells.add(cellNo)
+        else:
+            for c in self.children:
+                cells |= c.findCells(mesh, vertex, r, simplex)
+        return cells
+
+    def __repr__(self):
+        m = '['
+        for i in range(self.box.shape[0]):
+            if i == 0:
+                m += '['
+            else:
+                m += ', ['
+            for j in range(self.box.shape[1]):
+                if j > 0:
+                    m += ', '
+                m += str(self.box[i, j])
+            m += ']'
+        m += ']'
+        return 'node({})'.format(m)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void upwardPassMatrix(self, dict coefficientsUp):
+        cdef:
+            INDEX_t k, i, m, j
+            INDEX_t[::1] dof, dofs
+            REAL_t[:, ::1] transfer, transfers
+            tree_node c
+        if self.id in coefficientsUp:
+            return
+        elif self.isLeaf:
+            coefficientsUp[self.id] = (self.value[0, :, :], np.array(self.dofs.toArray()))
+        else:
+            transfers = np.zeros((self.num_dofs, self.coefficientsUp.shape[0]), dtype=REAL)
+            dofs = np.empty((self.num_dofs), dtype=INDEX)
+            k = 0
+            for c in self.children:
+                if c.id not in coefficientsUp:
+                    c.upwardPassMatrix(coefficientsUp)
+                transfer, dof = coefficientsUp[c.id]
+                for i in range(dof.shape[0]):
+                    dofs[k] = dof[i]
+                    for m in range(self.coefficientsUp.shape[0]):
+                        for j in range(c.transferOperator.shape[1]):
+                            transfers[k, m] += transfer[i, j]*c.transferOperator[m, j]
+                    k += 1
+            coefficientsUp[self.id] = (transfers, dofs)
+
+
+@cython.initializedcheck(False)
+@cython.wraparound(False)
+@cython.boundscheck(False)
+cdef tree_node readNode(list nodes, INDEX_t myId, parent, REAL_t[:, :, ::1] boxes, LinearOperator children, INDEX_t M, REAL_t[:, :, ::1] transferOperators):
+    cdef:
+        indexSet bA = arrayIndexSet()
+        tree_node n = tree_node(parent, bA, boxes)
+        INDEX_t i, j
+    n.id = myId
+    nodes[myId] = n
+    n.transferOperator = uninitialized((transferOperators.shape[1],
+                                        transferOperators.shape[2]),
+                                       dtype=REAL)
+    n.box = uninitialized((boxes.shape[1],
+                           boxes.shape[2]),
+                          dtype=REAL)
+    for i in range(transferOperators.shape[1]):
+        for j in range(transferOperators.shape[2]):
+            n.transferOperator[i, j] = transferOperators[myId, i, j]
+    for i in range(boxes.shape[1]):
+        for j in range(boxes.shape[2]):
+            n.box[i, j] = boxes[myId, i, j]
+    n.coefficientsUp = uninitialized((M), dtype=REAL)
+    n.coefficientsDown = uninitialized((M), dtype=REAL)
+    for i in range(children.indptr[myId], children.indptr[myId+1]):
+        n.children.append(readNode(nodes, children.indices[i], n, boxes, children, M, transferOperators))
+    return n
+
+
+cdef indexSet setDoFsFromChildren(tree_node n):
+    if n.isLeaf:
+        return n.dofs
+    else:
+        dofs = arrayIndexSet()
+        for c in n.children:
+            dofs.union(setDoFsFromChildren(c))
+        n.dofs = dofs
+        return dofs
+
+
+# FIX: move it into tree_node and don't reallocate memory
+cdef inline void transferMatrix(REAL_t[:, ::1] boxP,
+                                REAL_t[:, ::1] boxC,
+                                INDEX_t m,
+                                REAL_t[:, ::1] T):
+    cdef:
+        INDEX_t dim, i, j, l, k, I, J
+        REAL_t[:, ::1] omega, beta, xiC, xiP
+    dim = boxP.shape[0]
+    omega = uninitialized((m, dim), dtype=REAL)
+    beta = uninitialized((m, dim), dtype=REAL)
+    xiC = uninitialized((m, dim), dtype=REAL)
+    xiP = uninitialized((m, dim), dtype=REAL)
+
+    eta = np.cos((2.0*np.arange(m, 0, -1)-1.0) / (2.0*m) * np.pi)
+    for i in range(m):
+        for j in range(dim):
+            xiC[i, j] = (boxC[j, 1]-boxC[j, 0])/2.*(eta[i]+1.0)+boxC[j, 0]
+            xiP[i, j] = (boxP[j, 1]-boxP[j, 0])/2.*(eta[i]+1.0)+boxP[j, 0]
+    for j in range(m):
+        for l in range(dim):
+            omega[j, l] = xiC[j, l]-xiP[0, l]
+            for k in range(1, m):
+                omega[j, l] *= xiC[j, l]-xiP[k, l]
+            beta[j, l] = 1.0
+            for k in range(m):
+                if k != j:
+                    beta[j, l] *= xiP[j, l]-xiP[k, l]
+    T[:, :] = 1.0
+    I = 0
+    for idxP in product(*([range(m)]*dim)):
+        J = 0
+        for idxC in product(*([range(m)]*dim)):
+            for k in range(dim):
+                i = idxP[k]
+                j = idxC[k]
+                if abs(xiP[i, k]-xiC[j, k]) > 1e-8:
+                    T[I, J] *= omega[j, k]/(xiC[j, k]-xiP[i, k])/beta[i, k]
+            J += 1
+        I += 1
+
+
+cdef class farFieldClusterPair:
+    def __init__(self, tree_node n1, tree_node n2):
+        self.n1 = n1
+        self.n2 = n2
+
+    def plot(self, color='blue'):
+        import matplotlib.pyplot as plt
+        import matplotlib.patches as patches
+        dim = self.n1.box.shape[0]
+        if dim == 1:
+            box1 = self.n1.box
+            box2 = self.n2.box
+            plt.gca().add_patch(patches.Rectangle((box1[0, 0], box2[0, 0]), box1[0, 1]-box1[0, 0], box2[0, 1]-box2[0, 0], fill=True, alpha=0.5, facecolor=color))
+        else:
+            for dof1 in self.n1.dofs:
+                for dof2 in self.n2.dofs:
+                    plt.gca().add_patch(patches.Rectangle((dof1-0.5, dof2-0.5), 1., 1., fill=True, alpha=0.5, facecolor=color))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef void apply(farFieldClusterPair self, REAL_t[::1] x, REAL_t[::1] y):
+        gemv(self.kernelInterpolant, x, y, 1.)
+
+    def __repr__(self):
+        return 'farFieldClusterPair<{}, {}>'.format(self.n1, self.n2)
+
+
+cdef class productIterator:
+    def __init__(self, INDEX_t m, INDEX_t dim):
+        self.m = m
+        self.dim = dim
+        self.idx = np.zeros((dim), dtype=INDEX)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void reset(self):
+        cdef:
+            INDEX_t i
+        for i in range(self.dim-1):
+            self.idx[i] = 0
+        self.idx[self.dim-1] = -1
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t step(self):
+        cdef:
+            INDEX_t i
+        i = self.dim-1
+        self.idx[i] += 1
+        while self.idx[i] == self.m:
+            self.idx[i] = 0
+            if i>0:
+                i -= 1
+                self.idx[i] += 1
+            else:
+                return False
+        return True
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def assembleFarFieldInteractions(FractionalKernel kernel, dict Pfar, INDEX_t m, DoFMap dm):
+    cdef:
+        INDEX_t lvl
+        REAL_t[:, ::1] box1, box2, x, y
+        INDEX_t k, i, j, p
+        REAL_t[::1] eta
+        REAL_t eta_p
+        farFieldClusterPair cP
+        INDEX_t dim = dm.mesh.dim
+        REAL_t[:, ::1] dofCoords = None
+        INDEX_t dof1, dof2
+        indexSetIterator it = arrayIndexSetIterator()
+        INDEX_t kiSize = m**dim
+        productIterator pit = productIterator(m, dim)
+        BOOL_t kernel_variable = kernel.variable
+
+    if kernel.variable:
+        dofCoords = dm.getDoFCoordinates()
+    eta = np.cos((2.0*np.arange(m, 0, -1)-1.0) / (2.0*m) * np.pi)
+
+    x = uninitialized((kiSize, dim))
+    y = uninitialized((kiSize, dim))
+
+    for lvl in Pfar:
+        for cP in Pfar[lvl]:
+            box1 = cP.n1.box
+            box2 = cP.n2.box
+            k = 0
+            pit.reset()
+            while pit.step():
+                for j in range(dim):
+                    p = pit.idx[j]
+                    eta_p = eta[p]+1.0
+                    x[k, j] = (box1[j, 1]-box1[j, 0])*0.5 * eta_p + box1[j, 0]
+                    y[k, j] = (box2[j, 1]-box2[j, 0])*0.5 * eta_p + box2[j, 0]
+                k += 1
+            cP.kernelInterpolant = uninitialized((kiSize, kiSize), dtype=REAL)
+            if kernel_variable:
+                it.setIndexSet(cP.n1.dofs)
+                it.step()
+                dof1 = it.i
+
+                it.setIndexSet(cP.n2.dofs)
+                it.step()
+                dof2 = it.i
+
+                kernel.evalParamsPtr(dim, &dofCoords[dof1, 0], &dofCoords[dof2, 0])
+            for i in range(kiSize):
+                for j in range(kiSize):
+                    cP.kernelInterpolant[i, j] = -kernel.evalPtr(dim, &x[i, 0], &y[j, 0])
+                    cP.kernelInterpolant[i, j] += -kernel.evalPtr(dim, &y[j, 0], &x[i, 0])
+
+
+cdef class H2Matrix(LinearOperator):
+    def __init__(self,
+                 tree_node tree,
+                 dict Pfar,
+                 LinearOperator Anear):
+        self.tree = tree
+        self.Pfar = Pfar
+        self.Anear = Anear
+        LinearOperator.__init__(self, Anear.shape[0], Anear.shape[1])
+
+    def isSparse(self):
+        return False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t level, componentNo
+            tree_node n1, n2
+            farFieldClusterPair clusterPair
+        self.Anear.matvec(x, y)
+        if len(self.Pfar) > 0:
+            for componentNo in range(next(self.tree.leaves()).value.shape[0]):
+                self.tree.upwardPass(x, componentNo)
+                self.tree.resetCoefficientsDown()
+                for level in self.Pfar:
+                    for clusterPair in self.Pfar[level]:
+                        n1, n2 = clusterPair.n1, clusterPair.n2
+                        clusterPair.apply(n2.coefficientsUp, n1.coefficientsDown)
+                self.tree.downwardPass(y, componentNo)
+        return 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        cdef:
+            INDEX_t level, componentNo
+            tree_node n1, n2
+            farFieldClusterPair clusterPair
+        self.Anear.matvec_no_overwrite(x, y)
+        if len(self.Pfar) > 0:
+            for componentNo in range(next(self.tree.leaves()).value.shape[0]):
+                self.tree.upwardPass(x, componentNo)
+                self.tree.resetCoefficientsDown()
+                for level in self.Pfar:
+                    for clusterPair in self.Pfar[level]:
+                        n1, n2 = clusterPair.n1, clusterPair.n2
+                        clusterPair.apply(n2.coefficientsUp, n1.coefficientsDown)
+                self.tree.downwardPass(y, componentNo)
+        return 0
+
+    property diagonal:
+        def __get__(self):
+            return self.Anear.diagonal
+
+    property tree_size:
+        def __get__(self):
+            try:
+                md = self.tree.children[0].transferOperator.shape[0]
+            except (IndexError, AttributeError):
+                # No children, ie only the root node
+                # or no far field clusters
+                md = 0
+            nodes = self.tree.nodes
+            dofs = self.shape[0]
+            # size of transferMatrix * number of nodes in tree + number of dofs * leaf values
+            return md**2*nodes + dofs*md
+
+    property num_far_field_clusters:
+        def __get__(self):
+            clusters = 0
+            for lvl in self.Pfar:
+                clusters += len(self.Pfar[lvl])
+            return clusters
+
+    property cluster_size:
+        def __get__(self):
+            try:
+                md = self.tree.children[0].transferOperator.shape[0]
+            except (IndexError, AttributeError):
+                # No children, ie only the root node
+                # or no far field clusters
+                md = 0
+            # number far field cluster pairs * size of kernel interpolant matrices
+            return md**2*self.num_far_field_clusters
+
+    property nearField_size:
+        def __get__(self):
+            if isinstance(self.Anear, Dense_LinearOperator):
+                return self.Anear.num_rows*self.Anear.num_columns
+            elif isinstance(self.Anear, Multiply_Linear_Operator):
+                return self.Anear.A.nnz
+            else:
+                return self.Anear.nnz
+
+    def __repr__(self):
+        return '<%dx%d %s %f fill from near field, %f fill from tree, %f fill from clusters, %d far-field clusters>' % (self.num_rows,
+                                                                                                                        self.num_columns,
+                                                                                                                        self.__class__.__name__,
+                                                                                                                        self.nearField_size/self.num_rows/self.num_columns,
+                                                                                                                        self.tree_size/self.num_rows/self.num_columns,
+                                                                                                                        self.cluster_size/self.num_rows/self.num_columns,
+                                                                                                                        self.num_far_field_clusters)
+
+    def getMemorySize(self):
+        return self.Anear.getMemorySize() + self.cluster_size*sizeof(REAL_t) + self.tree_size*sizeof(REAL_t)
+
+    def HDF5write(self, node, version=2, Pnear=None):
+        cdef:
+            INDEX_t K, S, j, lvl, d1, d2
+            farFieldClusterPair clusterPair
+            REAL_t[::1] kernelInterpolants
+            INDEX_t[:, ::1] nodeIds
+            REAL_t[:, ::1] sVals
+        node.attrs['type'] = 'h2'
+
+        node.create_group('Anear')
+        self.Anear.HDF5write(node['Anear'])
+
+        node.create_group('tree')
+        if version == 2:
+            self.tree.HDF5writeNew(node['tree'])
+        elif version == 1:
+            self.tree.HDF5write(node['tree'])
+        else:
+            raise NotImplementedError()
+        node.attrs['version'] = version
+
+        K = 0
+        j = 0
+        for lvl in self.Pfar:
+            for clusterPair in self.Pfar[lvl]:
+                K += clusterPair.kernelInterpolant.shape[0]*clusterPair.kernelInterpolant.shape[1]
+                j += 1
+        kernelInterpolants = uninitialized((K), dtype=REAL)
+        nodeIds = uninitialized((j, 5), dtype=INDEX)
+        K = 0
+        j = 0
+        for lvl in self.Pfar:
+            for clusterPair in self.Pfar[lvl]:
+                S = clusterPair.kernelInterpolant.shape[0]*clusterPair.kernelInterpolant.shape[1]
+                for d1 in range(clusterPair.kernelInterpolant.shape[0]):
+                    for d2 in range(clusterPair.kernelInterpolant.shape[1]):
+                        kernelInterpolants[K] = clusterPair.kernelInterpolant[d1, d2]
+                        K += 1
+                nodeIds[j, 0] = clusterPair.n1.id
+                nodeIds[j, 1] = clusterPair.n2.id
+                nodeIds[j, 2] = clusterPair.kernelInterpolant.shape[0]
+                nodeIds[j, 3] = clusterPair.kernelInterpolant.shape[1]
+                nodeIds[j, 4] = lvl
+                j += 1
+        g = node.create_group('Pfar')
+        g.create_dataset('kernelInterpolants', data=kernelInterpolants, compression=COMPRESSION)
+        g.create_dataset('nodeIds', data=nodeIds, compression=COMPRESSION)
+
+        if Pnear is not None:
+            node2 = node.create_group('Pnear')
+            k = 0
+            for clusterPairNear in Pnear:
+                node2.create_group(str(k))
+                clusterPairNear.HDF5write(node2[str(k)])
+                k += 1
+
+    @staticmethod
+    def HDF5read(node, returnPnear=False):
+        cdef:
+            dict Pfar
+            INDEX_t lvl, K, j, d1, d2
+            farFieldClusterPair cP
+            INDEX_t[:, ::1] nodeIds
+            REAL_t[:, ::1] sVals
+
+        Anear = LinearOperator.HDF5read(node['Anear'])
+
+        try:
+            version = node.attrs['version']
+        except:
+            version = 1
+        if version == 2:
+            tree, nodes = tree_node.HDF5readNew(node['tree'])
+        else:
+            tree, nodes = tree_node.HDF5read(node['tree'])
+
+        Pfar = {}
+        nodeIds = np.array(node['Pfar']['nodeIds'], dtype=INDEX)
+        kernelInterpolants = np.array(node['Pfar']['kernelInterpolants'], dtype=REAL)
+        K = 0
+        for j in range(nodeIds.shape[0]):
+            lvl = nodeIds[j, 4]
+            if lvl not in Pfar:
+                Pfar[lvl] = []
+            cP = farFieldClusterPair(nodes[nodeIds[j, 0]],
+                                     nodes[nodeIds[j, 1]])
+            d1 = nodeIds[j, 2]
+            d2 = nodeIds[j, 3]
+            cP.kernelInterpolant = uninitialized((d1, d2), dtype=REAL)
+            for d1 in range(cP.kernelInterpolant.shape[0]):
+                for d2 in range(cP.kernelInterpolant.shape[1]):
+                    cP.kernelInterpolant[d1, d2] = kernelInterpolants[K]
+                    K += 1
+            Pfar[lvl].append(cP)
+
+        if returnPnear:
+            Pnear = []
+            for k in node['Pnear']:
+                Pnear.append(nearFieldClusterPair.HDF5read(node['Pnear'][k], nodes))
+            return H2Matrix(tree, Pfar, Anear), Pnear
+        else:
+            return H2Matrix(tree, Pfar, Anear)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def toarray(self):
+        cdef:
+            INDEX_t minLvl, maxLvl, lvl, i, j, I, J, id
+            farFieldClusterPair cP
+            tree_node n1, n2
+            dict lvlNodes, delNodes
+            dict coefficientsUp
+            REAL_t[:, ::1] dense
+            REAL_t[:, ::1] tr1, tr2, d
+            INDEX_t[::1] dofs1, dofs2
+        dense = self.Anear.toarray()
+        minLvl = min(self.Pfar)
+        maxLvl = max(self.Pfar)
+        lvlNodes = {lvl : set() for lvl in range(minLvl, maxLvl+1)}
+        delNodes = {lvl : set() for lvl in range(minLvl, maxLvl+1)}
+        for lvl in self.Pfar:
+            for cP in self.Pfar[lvl]:
+                lvlNodes[lvl].add(cP.n1.id)
+                lvlNodes[lvl].add(cP.n2.id)
+        for lvl in range(minLvl+1, maxLvl+1):
+            lvlNodes[lvl] |= lvlNodes[lvl-1]
+        lvlNodes[maxLvl] = set(list(range(self.tree.get_max_id()+1)))
+        for lvl in range(minLvl, maxLvl):
+            delNodes[lvl] = lvlNodes[lvl+1]-lvlNodes[lvl]
+        del lvlNodes
+
+        coefficientsUp = {}
+        for lvl in reversed(sorted(self.Pfar.keys())):
+            for cP in self.Pfar[lvl]:
+                n1 = cP.n1
+                n2 = cP.n2
+                n1.upwardPassMatrix(coefficientsUp)
+                n2.upwardPassMatrix(coefficientsUp)
+                tr1, dofs1 = coefficientsUp[n1.id]
+                tr2, dofs2 = coefficientsUp[n2.id]
+                d = np.dot(tr1, np.dot(cP.kernelInterpolant, tr2.T))
+                for i in range(dofs1.shape[0]):
+                    I = dofs1[i]
+                    for j in range(dofs2.shape[0]):
+                        J = dofs2[j]
+                        dense[I, J] = d[i, j]
+
+            for id in delNodes[lvl]:
+                if id in coefficientsUp:
+                    del coefficientsUp[id]
+        del coefficientsUp
+        return np.array(dense, copy=False)
+
+    def plot(self, Pnear=[], fill='box', nearFieldColor='red', farFieldColor='blue', kernelApproximationColor='yellow', shiftCoefficientColor='red', printRank=False):
+        import matplotlib.pyplot as plt
+        if self.tree.dim == 1:
+            if fill == 'box':
+                for c in Pnear:
+                    c.plot()
+                for lvl in self.Pfar:
+                    for c in self.Pfar[lvl]:
+                        c.plot()
+                plt.xlim([self.tree.box[0, 0], self.tree.box[0, 1]])
+                plt.ylim([self.tree.box[0, 0], self.tree.box[0, 1]])
+            elif fill == 'dof':
+                import matplotlib.patches as patches
+                nd = self.shape[0]
+                for c in Pnear:
+                    box1 = [min(c.n1.dofs), max(c.n1.dofs)]
+                    box2 = [nd-max(c.n2.dofs), nd-min(c.n2.dofs)]
+                    plt.gca().add_patch(patches.Rectangle((box1[0], box2[0]), box1[1]-box1[0], box2[1]-box2[0], fill=True, facecolor=nearFieldColor))
+                for lvl in self.Pfar:
+                    for c in self.Pfar[lvl]:
+                        box1 = [min(c.n1.dofs), max(c.n1.dofs)]
+                        box2 = [nd-max(c.n2.dofs), nd-min(c.n2.dofs)]
+
+                        plt.gca().add_patch(patches.Rectangle((box1[0], box2[0]), box1[1]-box1[0], box2[1]-box2[0], fill=True, facecolor=farFieldColor))
+                        k = c.kernelInterpolant.shape[0]
+
+                        if shiftCoefficientColor is not None:
+                            box1 = [min(c.n1.dofs), min(c.n1.dofs)+k-1]
+                            box2 = [nd-min(c.n2.dofs), nd-max(c.n2.dofs)]
+                            plt.gca().add_patch(patches.Rectangle((box1[0], box2[0]), box1[1]-box1[0], box2[1]-box2[0], fill=True, facecolor=shiftCoefficientColor))
+
+                            box1 = [min(c.n1.dofs), max(c.n1.dofs)]
+                            box2 = [nd-min(c.n2.dofs), nd-min(c.n2.dofs)-k+1]
+                            plt.gca().add_patch(patches.Rectangle((box1[0], box2[0]), box1[1]-box1[0], box2[1]-box2[0], fill=True, facecolor=shiftCoefficientColor))
+
+                        if kernelApproximationColor is not None:
+                            box1 = [min(c.n1.dofs), min(c.n1.dofs)+k-1]
+                            box2 = [nd-min(c.n2.dofs), nd-min(c.n2.dofs)-k+1]
+                            plt.gca().add_patch(patches.Rectangle((box1[0], box2[0]), box1[1]-box1[0], box2[1]-box2[0], fill=True, facecolor=kernelApproximationColor))
+
+                        if printRank:
+                            plt.text(0.5*(min(c.n1.dofs)+max(c.n1.dofs)), nd-0.5*(min(c.n2.dofs)+max(c.n2.dofs)), str(k),
+                                     horizontalalignment='center',
+                                     verticalalignment='center')
+
+                plt.xlim([0, nd])
+                plt.ylim([0, nd])
+                plt.axis('equal')
+            else:
+                 raise NotImplementedError(fill)
+        elif self.tree.dim == 2:
+            Z = np.zeros((self.num_rows, self.num_columns), dtype=INDEX)
+            for lvl in self.Pfar:
+                for c in self.Pfar[lvl]:
+                    for dof1 in c.n1.dofs:
+                        for dof2 in c.n2.dofs:
+                            Z[dof1, dof2] = 1
+            plt.pcolormesh(Z)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def getDoFBoxesAndCells(meshBase mesh, DoFMap DoFMap, comm=None):
+    cdef:
+        INDEX_t i, j, I, k, start, end, dim = mesh.dim
+        REAL_t[:, :, ::1] boxes = uninitialized((DoFMap.num_dofs, dim, 2), dtype=REAL)
+        REAL_t[:, ::1] boxes2
+        REAL_t[:, ::1] simplex = uninitialized((dim+1, dim), dtype=REAL)
+        REAL_t[::1] m = uninitialized((dim), dtype=REAL), M = uninitialized((dim), dtype=REAL)
+        list cells
+
+    boxes[:, :, 0] = np.inf
+    boxes[:, :, 1] = -np.inf
+
+    cells = [set() for i in range(DoFMap.num_dofs)]
+
+    if comm:
+        start = <INDEX_t>np.ceil(mesh.num_cells*comm.rank/comm.size)
+        end = <INDEX_t>np.ceil(mesh.num_cells*(comm.rank+1)/comm.size)
+    else:
+        start = 0
+        end = mesh.num_cells
+    for i in range(start, end):
+        mesh.getSimplex(i, simplex)
+
+        for k in range(dim):
+            m[k] = simplex[0, k]
+            M[k] = simplex[0, k]
+        for j in range(dim):
+            for k in range(dim):
+                m[k] = min(m[k], simplex[j+1, k])
+                M[k] = max(M[k], simplex[j+1, k])
+        for j in range(DoFMap.dofs_per_element):
+            I = DoFMap.cell2dof(i, j)
+            if I >= 0:
+                for k in range(dim):
+                    boxes[I, k, 0] = min(boxes[I, k, 0], m[k])
+                    boxes[I, k, 1] = max(boxes[I, k, 1], M[k])
+    if comm:
+        boxes2 = uninitialized((DoFMap.num_dofs, dim), dtype=REAL)
+        for i in range(DoFMap.num_dofs):
+            for j in range(dim):
+                boxes2[i, j] = boxes[i, j, 0]
+        comm.Allreduce(MPI.IN_PLACE, boxes2, op=MPI.MIN)
+        for i in range(DoFMap.num_dofs):
+            for j in range(dim):
+                boxes[i, j, 0] = boxes2[i, j]
+                boxes2[i, j] = boxes[i, j, 1]
+        comm.Allreduce(MPI.IN_PLACE, boxes2, op=MPI.MAX)
+        for i in range(DoFMap.num_dofs):
+            for j in range(dim):
+                boxes[i, j, 1] = boxes2[i, j]
+    for i in range(mesh.num_cells):
+        for j in range(DoFMap.dofs_per_element):
+            I = DoFMap.cell2dof(i, j)
+            if I >= 0:
+                cells[I].add(i)
+    return np.array(boxes, copy=False), cells
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def getFractionalOrders(variableFractionalOrder s, meshBase mesh):
+    cdef:
+        REAL_t[:, ::1] centers
+        INDEX_t numCells = mesh.num_cells
+        INDEX_t cellNo1, cellNo2
+        REAL_t[:, ::1] orders = uninitialized((numCells, numCells), dtype=REAL)
+
+    centers = mesh.getCellCenters()
+
+    if s.symmetric:
+        for cellNo1 in range(numCells):
+            for cellNo2 in range(cellNo1, numCells):
+                orders[cellNo1, cellNo2] = s.eval(centers[cellNo1, :],
+                                                  centers[cellNo2, :])
+                orders[cellNo2, cellNo1] = orders[cellNo1, cellNo2]
+    else:
+        for cellNo1 in range(numCells):
+            for cellNo2 in range(numCells):
+                orders[cellNo1, cellNo2] = s.eval(centers[cellNo1, :],
+                                                  centers[cellNo2, :])
+    return orders
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def getFractionalOrdersDiagonal(variableFractionalOrder s, meshBase mesh):
+    cdef:
+        REAL_t[:, ::1] centers
+        INDEX_t numCells = mesh.num_cells
+        INDEX_t cellNo1
+        REAL_t[::1] orders = uninitialized((numCells), dtype=REAL)
+
+    centers = mesh.getCellCenters()
+
+    for cellNo1 in range(numCells):
+        orders[cellNo1] = s.eval(centers[cellNo1, :],
+                                 centers[cellNo1, :])
+    return orders
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cpdef BOOL_t getAdmissibleClusters(FractionalKernel kernel, tree_node n1, tree_node n2, INDEX_t farFieldInteractionSize, REAL_t eta=1., dict Pfar=None, list Pnear=None, INDEX_t level=0, INDEX_t maxLevels=200):
+    cdef:
+        tree_node t1, t2
+        bint seemsAdmissible
+        REAL_t dist, diam1, diam2, maxDist
+        function horizon
+        farFieldClusterPair cp
+        BOOL_t addedFarFieldClusters = False
+        INDEX_t lenNearField
+        REAL_t[:, ::1] boxUnion = np.empty((n1.box.shape[0], 2))
+        REAL_t diamUnion = 0.
+    dist = distBoxes(n1.box, n2.box)
+    diam1 = diamBox(n1.box)
+    diam2 = diamBox(n2.box)
+
+    seemsAdmissible = eta*dist >= max(diam1, diam2) and not n1.mixed_node and not n2.mixed_node and (farFieldInteractionSize <= n1.num_dofs*n2.num_dofs) and n1.canBeAssembled and n2.canBeAssembled
+
+    if kernel.finiteHorizon:
+        horizon = kernel.horizon
+        assert isinstance(horizon, constant)
+        maxDist = maxDistBoxes(n1.box, n2.box)
+        if not kernel.complement:
+            if  dist > horizon.value:
+                # return True, since we don't want fully ignored cluster pairs to be merged into near field ones.
+                return True
+        else:
+            if maxDist <= horizon.value:
+                # same
+                return True
+        if dist <= horizon.value and horizon.value <= maxDist:
+            seemsAdmissible = False
+        merge_boxes(n1.box, n2.box, boxUnion)
+        diamUnion = diamBox(boxUnion)
+    lenNearField = len(Pnear)
+    if seemsAdmissible:
+        cp = farFieldClusterPair(n1, n2)
+        try:
+            Pfar[level].append(cp)
+        except KeyError:
+            Pfar[level] = [cp]
+        return True
+    elif (n1.isLeaf and n2.isLeaf) or (level == maxLevels):
+        Pnear.append(nearFieldClusterPair(n1, n2))
+        if kernel.finiteHorizon and len(Pnear[len(Pnear)-1].cellsInter) > 0:
+            if diamUnion > kernel.horizon.value:
+                print("Near field cluster pairs need to fit within horizon.\nBox1 {}\nBox2 {}\n{}, {} -> {}".format(np.array(n1.box),
+                                                                                                                    np.array(n2.box),
+                                                                                                                    diam1,
+                                                                                                                    diam2,
+                                                                                                                    diamUnion))
+
+    elif (farFieldInteractionSize >= n1.num_dofs*n2.num_dofs) and (diamUnion < kernel.horizon.value):
+        Pnear.append(nearFieldClusterPair(n1, n2))
+        return False
+    elif n1.isLeaf:
+        for t2 in n2.children:
+            addedFarFieldClusters |= getAdmissibleClusters(kernel, n1, t2, farFieldInteractionSize, eta,
+                                                           Pfar, Pnear,
+                                                           level+1, maxLevels)
+    elif n2.isLeaf:
+        for t1 in n1.children:
+            addedFarFieldClusters |= getAdmissibleClusters(kernel, t1, n2, farFieldInteractionSize, eta,
+                                                           Pfar, Pnear,
+                                                           level+1, maxLevels)
+    else:
+        for t1 in n1.children:
+            for t2 in n2.children:
+                addedFarFieldClusters |= getAdmissibleClusters(kernel, t1, t2, farFieldInteractionSize, eta,
+                                                               Pfar, Pnear,
+                                                               level+1, maxLevels)
+    if not addedFarFieldClusters:
+        if diamUnion < kernel.horizon.value:
+            del Pnear[lenNearField:]
+            Pnear.append(nearFieldClusterPair(n1, n2))
+    return addedFarFieldClusters
+
+
+def symmetrizeNearFieldClusters(list Pnear):
+    cdef:
+        set clusters = set()
+        nearFieldClusterPair cpNear
+        farFieldClusterPair cpFar
+        INDEX_t id1, id2
+        dict lookup = {}
+    for cpNear in Pnear:
+        clusters.add((cpNear.n1.id, cpNear.n2.id))
+        lookup[cpNear.n1.id] = cpNear.n1
+        lookup[cpNear.n2.id] = cpNear.n2
+    while len(clusters) > 0:
+        id1, id2 = clusters.pop()
+        if id1 != id2:
+            if (id2, id1) not in clusters:
+                Pnear.append(nearFieldClusterPair(lookup[id2], lookup[id1]))
+            else:
+                clusters.remove((id2, id1))
+
+
+def trimTree(tree_node tree, list Pnear, dict Pfar):
+    cdef:
+        nearFieldClusterPair cpNear
+        farFieldClusterPair cpFar
+        bitArray used = bitArray(maxElement=tree.get_max_id())
+    for cpNear in Pnear:
+        used.set(cpNear.n1.id)
+        used.set(cpNear.n2.id)
+    for lvl in Pfar:
+        for cpFar in Pfar[lvl]:
+            used.set(cpFar.n1.id)
+            used.set(cpFar.n2.id)
+    print(used.getNumEntries(), tree.get_max_id(), tree.nodes)
+    tree.trim(used)
+    tree.set_id()
+    print(used.getNumEntries(), tree.get_max_id(), tree.nodes)
+    used.empty()
+    for cpNear in Pnear:
+        used.set(cpNear.n1.id)
+        used.set(cpNear.n2.id)
+    for lvl in Pfar:
+        for cpFar in Pfar[lvl]:
+            used.set(cpFar.n1.id)
+            used.set(cpFar.n2.id)
+    print(used.getNumEntries(), tree.get_max_id(), tree.nodes)
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef void insertion_sort(REAL_t[::1] a, INDEX_t start, INDEX_t end):
+    cdef:
+        INDEX_t i, j
+        REAL_t v
+    for i in range(start, end):
+        v = a[i]
+        j = i-1
+        while j >= start:
+            if a[j] <= v:
+                break
+            a[j+1] = a[j]
+            j -= 1
+        a[j+1] = v
+
+
+cpdef INDEX_t[:, ::1] checkNearFarFields(DoFMap dm, list Pnear, dict Pfar):
+    cdef:
+        INDEX_t[:, ::1] S = np.zeros((dm.num_dofs, dm.num_dofs), dtype=INDEX)
+        nearFieldClusterPair cNear
+        farFieldClusterPair cFar
+        INDEX_t i, j
+
+    for cNear in Pnear:
+        for i in cNear.n1.dofs:
+            for j in cNear.n2.dofs:
+                S[i, j] = 1
+    for lvl in Pfar:
+        for cFar in Pfar[lvl]:
+            for i in cFar.n1.dofs:
+                for j in cFar.n2.dofs:
+                    S[i, j] = 2
+    return S
+
+
+cdef class exactSphericalIntegral2D(function):
+    cdef:
+        REAL_t[::1] u
+        P1_DoFMap dm
+        REAL_t radius
+        REAL_t[:, ::1] simplex
+        REAL_t[::1] u_local, w, z, bary
+        public tree_node root
+        INDEX_t numThetas
+        REAL_t[::1] thetas
+
+    def __init__(self, REAL_t[::1] u, P1_DoFMap dm, REAL_t radius):
+        cdef:
+            meshBase mesh = dm.mesh
+            REAL_t[:, :, ::1] boxes = None
+            list cells = []
+            REAL_t[:, ::1] centers = None
+            INDEX_t i, j, maxLevels, dof, I
+        self.u = u
+        self.dm = dm
+        assert self.u.shape[0] == self.dm.num_dofs
+        assert mesh.dim == 2
+        self.radius = radius
+        self.simplex = uninitialized((3, 2))
+        self.u_local = uninitialized(3)
+        boxes, cells = getDoFBoxesAndCells(mesh, dm, None)
+        centers = uninitialized((dm.num_dofs, mesh.dim), dtype=REAL)
+        for i in range(dm.num_dofs):
+            for j in range(mesh.dim):
+                centers[i, j] = 0.5*(boxes[i, j, 0]+boxes[i, j, 1])
+        root = tree_node(None, set(np.arange(dm.num_dofs)), boxes)
+        maxLevels = int(np.floor(0.5*np.log2(mesh.num_vertices)))
+        root.refine(boxes, centers, maxLevels=maxLevels)
+        root.set_id()
+        # enter cells in leaf nodes
+        for n in root.leaves():
+            n._cells = set()
+            for dof in n.dofs:
+                n._cells |= cells[dof]
+        # update boxes (if we stopped at maxLevels, before each DoF has
+        # only it's support as box)
+        for n in root.leaves():
+            for I in n.dofs:
+                box = n.box
+                for i in range(mesh.dim):
+                    for j in range(2):
+                        boxes[I, i, j] = box[i, j]
+        self.root = root
+        self.w = uninitialized(2)
+        self.z = uninitialized(2)
+        self.bary = uninitialized(3)
+        self.thetas = uninitialized(6)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void findThetas(self, INDEX_t cellNo, REAL_t[::1] x):
+        cdef:
+            INDEX_t i
+            REAL_t p, q, t, theta
+        self.numThetas = 0
+        for i in range(3):
+            for j in range(2):
+                self.w[j] = self.simplex[(i+1) % 3, j] - self.simplex[i, j]
+                self.z[j] = self.simplex[i, j]-x[j]
+            t = 1./mydot(self.w, self.w)
+            p = 2*mydot(self.w, self.z)*t
+            q = (mydot(self.z, self.z)-self.radius**2)*t
+            q = 0.25*p**2-q
+            if q > 0:
+                q = sqrt(q)
+                t = -0.5*p+q
+                if (0 <= t) and (t <= 1):
+                    theta = atan2(t*self.w[1]+self.z[1], t*self.w[0]+self.z[0])
+                    if theta < 0:
+                        theta += 2*pi
+                    self.thetas[self.numThetas] = theta
+                    self.numThetas += 1
+                t = -0.5*p-q
+                if (0 <= t) and (t <= 1):
+                    theta = atan2(t*self.w[1]+self.z[1], t*self.w[0]+self.z[0])
+                    if theta < 0:
+                        theta += 2*pi
+                    self.thetas[self.numThetas] = theta
+                    self.numThetas += 1
+        insertion_sort(self.thetas, 0, self.numThetas)
+
+        theta = 0.5*(self.thetas[0]+self.thetas[1])
+        self.w[0] = x[0]+self.radius*cos(theta)
+        self.w[1] = x[1]+self.radius*sin(theta)
+        if not self.dm.mesh.vertexInCell(self.w, cellNo, self.simplex, self.bary):
+            theta = self.thetas[0]
+            for i in range(self.numThetas-1):
+                self.thetas[i] = self.thetas[i+1]
+            self.thetas[self.numThetas-1] = theta+2*pi
+        if self.numThetas == 1:
+            self.numThetas = 0
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t I = 0.
+            meshBase mesh = self.dm.mesh
+            P1_DoFMap dm = self.dm
+            INDEX_t i, j, cellNo
+            REAL_t vol, ax, ay, b, theta0, theta1
+        for cellNo in self.root.findCells(mesh, x, self.radius, self.simplex):
+            mesh.getSimplex(cellNo, self.simplex)
+
+            self.findThetas(cellNo, x)
+            if self.numThetas == 0:
+                continue
+            assert self.numThetas % 2 == 0, (np.array(self.thetas), self.numThetas)
+
+            for i in range(dm.dofs_per_element):
+                dof = dm.cell2dof(cellNo, i)
+                if dof >= 0:
+                    self.u_local[i] = self.u[dof]
+                else:
+                    self.u_local[i] = 0
+            vol = ((self.simplex[0, 0]-self.simplex[1, 0])*(self.simplex[2, 1]-self.simplex[1, 1]) -
+                   (self.simplex[0, 1]-self.simplex[1, 1])*(self.simplex[2, 0]-self.simplex[1, 0]))
+            ax = 0
+            ay = 0
+            b = 0
+            for i in range(dm.dofs_per_element):
+                ax += self.u_local[i]*(self.simplex[(i+2) % 3, 1]-self.simplex[(i+1) % 3, 1])
+                ay -= self.u_local[i]*(self.simplex[(i+2) % 3, 0]-self.simplex[(i+1) % 3, 0])
+                b -= self.u_local[i]*(self.simplex[(i+1) % 3, 0]*(self.simplex[(i+2) % 3, 1]-self.simplex[(i+1) % 3, 1]) -
+                                      self.simplex[(i+1) % 3, 1]*(self.simplex[(i+2) % 3, 0]-self.simplex[(i+1) % 3, 0]))
+            ax /= vol
+            ay /= vol
+            b /= vol
+
+            j = 0
+            while j < self.numThetas:
+                theta0, theta1 = self.thetas[j], self.thetas[j+1]
+                j += 2
+                if theta1-theta0 > theta0 + 2*pi-theta1:
+                    theta0 += 2*pi
+                    theta0, theta1 = theta1, theta0
+                # print(theta0, theta1, j, cellNo)
+                assert theta0 <= theta1, (theta0, theta1)
+
+                I += self.radius**2 * (ax*(sin(theta1)-sin(theta0)) - ay*(cos(theta1)-cos(theta0))) + (b*self.radius + self.radius*ax*x[0] + self.radius*ay*x[1])*(theta1-theta0)
+        return I
+
+
diff --git a/nl/PyNucleus_nl/fractionalLaplacian1D.pxd b/nl/PyNucleus_nl/fractionalLaplacian1D.pxd
new file mode 100644
index 0000000..a58e0a6
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalLaplacian1D.pxd
@@ -0,0 +1,51 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadQuadratureRule,
+                             doubleSimplexQuadratureRule, GaussJacobi,
+                             simplexDuffyTransformation, simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap, P0_DoFMap, P1_DoFMap, P2_DoFMap
+from PyNucleus_fem.functions cimport function
+from . nonlocalLaplacianBase cimport (double_local_matrix_t,
+                                        nonlocalLaplacian1D,
+                                        panelType,
+                                        MASK_t,
+                                        specialQuadRule)
+from . interactionDomains cimport CUT
+from . fractionalOrders cimport fractionalOrderBase
+from . kernels2 cimport (Kernel,
+                         FractionalKernel)
+
+
+cdef class fractionalLaplacian1DZeroExterior(nonlocalLaplacian1D):
+    cdef:
+        public quadQuadratureRule qrVertex
+        public REAL_t[:, ::1] PHI_dist, PHI_sep, PHI_vertex
+        dict distantPHI
+
+
+cdef class fractionalLaplacian1D_P1(nonlocalLaplacian1D):
+    cdef:
+        public quadQuadratureRule qrId, qrVertex
+        REAL_t[:, ::1] PSI_id, PSI_vertex
+
+
+cdef class fractionalLaplacian1D_P1_boundary(fractionalLaplacian1DZeroExterior):
+    pass
+
+
+
+cdef class fractionalLaplacian1D_P0(nonlocalLaplacian1D):
+    cdef:
+        public quadQuadratureRule qrId, qrVertex0, qrVertex1
+        REAL_t[:, ::1] PSI_id, PSI_vertex0, PSI_vertex1
+
+
+cdef class fractionalLaplacian1D_P0_boundary(fractionalLaplacian1DZeroExterior):
+    pass
diff --git a/nl/PyNucleus_nl/fractionalLaplacian1D.pyx b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx
new file mode 100644
index 0000000..297efda
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalLaplacian1D.pyx
@@ -0,0 +1,757 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport (log, ceil, fabs as abs, pow)
+import numpy as np
+cimport numpy as np
+cimport cython
+
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_base import uninitialized, uninitialized_like
+from PyNucleus_fem.meshCy cimport meshBase
+from . nonlocalLaplacianBase import ALL
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule,
+                             transformQuadratureRule,
+                             doubleSimplexQuadratureRule,
+                             GaussJacobi,
+                             simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap, P2_DoFMap, P0_DoFMap, shapeFunction
+include "panelTypes.pxi"
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+
+
+cdef class fractionalLaplacian1DZeroExterior(nonlocalLaplacian1D):
+    def __init__(self, FractionalKernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, **kwargs):
+        manifold_dim2 = mesh.dim-1
+        super(fractionalLaplacian1DZeroExterior, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2=manifold_dim2, **kwargs)
+        self.symmetricCells = False
+
+
+cdef class fractionalLaplacian1D_P1(nonlocalLaplacian1D):
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 quad_order_diagonal=None,
+                 target_order=None,
+                 num_dofs=None,
+                 **kwargs):
+        cdef:
+            REAL_t smin, smax
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian1D_P1, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        smin, smax = self.kernel.s.min, self.kernel.s.max
+
+        if target_order is None:
+            # this is the desired local quadrature error
+            target_order = 2.-smin
+        self.target_order = target_order
+        if quad_order_diagonal is None:
+            # measured log(2 rho_2) = 0.43
+            quad_order_diagonal = max(np.ceil(((target_order+2.)*log(self.num_dofs*self.H0) + (2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2)
+        self.quad_order_diagonal = quad_order_diagonal
+
+        self.x = uninitialized((0, self.dim))
+        self.y = uninitialized((0, self.dim))
+        self.temp = uninitialized((0), dtype=REAL)
+        self.temp2 = uninitialized((0), dtype=REAL)
+
+        self.idx = uninitialized((3), dtype=INDEX)
+        self.distantPSI = {}
+
+        if not self.kernel.variableOrder:
+            self.getNearQuadRule(COMMON_EDGE)
+            self.getNearQuadRule(COMMON_VERTEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2)
+            REAL_t s = (<FractionalKernel>self.kernel).getsValue()
+        panel = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h2/self.H0)) - 2.*s*logdh2) /
+                                    (max(logdh1, 0) + 0.8)),
+                               2)
+        panel2 = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h1/self.H0)) - 2.*s*logdh1) /
+                                     (max(logdh2, 0) + 0.8)),
+                                2)
+        panel = max(panel, panel2)
+        try:
+            self.distantQuadRules[panel]
+        except KeyError:
+            self.addQuadRule(panel)
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            INDEX_t I, k, i, j
+            INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element
+            shapeFunction sf
+        qr = simplexXiaoGimbutas(panel, self.dim)
+        qr2 = doubleSimplexQuadratureRule(qr, qr)
+        numQuadNodes0 = qr2.rule1.num_nodes
+        numQuadNodes1 = qr2.rule2.num_nodes
+        dofs_per_element = self.DoFMap.dofs_per_element
+        self.distantQuadRules[panel] = qr2
+        PSI = uninitialized((2*dofs_per_element,
+                             qr2.num_nodes), dtype=REAL)
+        # phi_i(x) - phi_i(y) = phi_i(x) for i = 0,1
+        for I in range(dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I, k] = sf.evalStrided(&qr2.rule1.nodes[0, i], numQuadNodes0)
+                    k += 1
+        # phi_i(x) - phi_i(y) = -phi_i(y) for i = 2,3
+        for I in range(dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I+dofs_per_element, k] = -sf.evalStrided(&qr2.rule2.nodes[0, j], numQuadNodes1)
+                    k += 1
+        self.distantPSI[panel] = PSI
+
+        if qr2.rule1.num_nodes > self.x.shape[0]:
+            self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL)
+        if qr2.rule2.num_nodes > self.y.shape[0]:
+            self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL)
+        if qr2.num_nodes > self.temp.shape[0]:
+            self.temp = uninitialized((qr2.num_nodes), dtype=REAL)
+            self.temp2 = uninitialized_like(self.temp)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t s = (<FractionalKernel>self.kernel).getsValue()
+            REAL_t eta0, eta1
+            specialQuadRule sQR0
+
+        if panel == COMMON_EDGE:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel)]
+            except KeyError:
+                qrId = GaussJacobi(((1, 1, 1-2*s),
+                                    (1, 0, 0)))
+
+                PSI_id = uninitialized((self.DoFMap.dofs_per_element, qrId.num_nodes), dtype=REAL)
+                # COMMON_FACE panels
+                for i in range(qrId.num_nodes):
+                    eta0 = qrId.nodes[0, i]
+                    eta1 = qrId.nodes[1, i]
+
+                    # x = 1-eta0+eta0*eta1
+                    # PHI_id[0, 0, i] = 1.-x
+                    # PHI_id[0, 1, i] = x
+
+                    # y = eta0*eta1
+                    # PHI_id[2, i] = 1.-y
+                    # PHI_id[3, i] = y
+
+                    PSI_id[0, i] = -1                      # ((1-x)-(1-y))/(1-eta0)
+                    PSI_id[1, i] = 1                       # (x-y)/(1-eta0)
+                sQR0 = specialQuadRule(qrId, PSI_id)
+                self.specialQuadRules[(s, panel)] = sQR0
+                if qrId.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrId.num_nodes), dtype=REAL)
+                    self.temp2 = uninitialized_like(self.temp)
+            self.qrId = sQR0.qr
+            self.PSI_id = sQR0.PSI
+        elif panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel)]
+            except KeyError:
+
+                qrVertex0 = GaussJacobi(((1, 2-2*s, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+                qrVertex1 = GaussJacobi(((self.quad_order_diagonal, 1, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+
+                PSI_vertex0 = uninitialized((2*self.DoFMap.dofs_per_element - self.DoFMap.dofs_per_vertex, qrVertex0.num_nodes), dtype=REAL)
+                PSI_vertex1 = uninitialized((2*self.DoFMap.dofs_per_element - self.DoFMap.dofs_per_vertex, qrVertex1.num_nodes), dtype=REAL)
+
+                # panels with common vertex
+                # first integral
+                for i in range(qrVertex0.num_nodes):
+                    eta0 = qrVertex0.nodes[0, i]
+                    eta1 = qrVertex0.nodes[1, i]
+                    # x = eta0*eta1
+                    # PHI_vertex0[0, i] = x
+                    # PHI_vertex0[1, i] = 1.-x
+
+                    # y = eta0*(1.-eta1)
+                    # PHI_vertex0[2, i] = 1.-y
+                    # PHI_vertex0[3, i] = y
+
+                    PSI_vertex0[0, i] = eta1               # (x)/eta0
+                    PSI_vertex0[1, i] = 1.-2.*eta1         # ((1-x)-(1-y))/eta0
+                    PSI_vertex0[2, i] = eta1-1.            # (-y)/eta0
+                # second integral
+                for i in range(qrVertex1.num_nodes):
+                    eta0 = qrVertex1.nodes[0, i]
+                    eta1 = qrVertex1.nodes[1, i]
+                    # x = 1-eta0+eta0*eta1
+                    # PHI_vertex1[0, i] = x
+                    # PHI_vertex1[1, i] = 1.-x
+
+                    # y = 1.-eta0*eta1
+                    # PHI_vertex1[2, i] = 1.-y
+                    # PHI_vertex1[3, i] = y
+
+                    PSI_vertex1[0, i] = 1.-eta0+eta0*eta1  # x
+                    PSI_vertex1[1, i] = eta0*(1.-2.*eta1)  # (1-x)-(1-y)
+                    PSI_vertex1[2, i] = eta0*eta1-1.       # -y
+
+                qrVertex = qrVertex0+qrVertex1
+                sQR0 = specialQuadRule(qrVertex, np.hstack((PSI_vertex0, PSI_vertex1)))
+                self.specialQuadRules[(s, panel)] = sQR0
+                if qrVertex.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex.num_nodes), dtype=REAL)
+                    self.temp2 = uninitialized_like(self.temp)
+            self.qrVertex = sQR0.qr
+            self.PSI_vertex = sQR0.PSI
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(fractionalLaplacian1D_P1, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal:       {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, t, m
+            REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2
+            INDEX_t[::1] idx = self.idx
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t s = (<FractionalKernel>self.kernel).getsValue()
+            REAL_t horizon2, horizon, c1, c2, PSI_I, PSI_J, l, r
+            transformQuadratureRule qr0, qr1
+            INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1
+            REAL_t a_b1[2]
+            REAL_t a_b2[2]
+            REAL_t a_A1[2][2]
+            REAL_t a_A2[2][2]
+            REAL_t intervals[3]
+            REAL_t[::1] b1, b2
+            REAL_t[:, ::1] A1, A2
+            BOOL_t cutElements = False, lr
+
+        if self.kernel.finiteHorizon and panel >= 1 :
+            # check if the horizon might cut the elements
+            if self.kernel.interaction.relPos == CUT:
+                cutElements = True
+            if self.kernel.complement:
+                cutElements = False
+                # TODO: cutElements should be set to True, but
+                #       need to figure out the element
+                #       transformation.
+
+        if panel == COMMON_EDGE:
+            # # exact value:
+            # val = scaling * vol1**(1.-2.*s)/(1.-s)/(3.-2.*s)
+            # contrib[0] = 0.
+            # contrib[1] = 0.
+            # contrib[2] = 0.
+            # contrib[3] = 0.
+            # contrib[4] = 0.
+            # contrib[5] = 0.
+            # contrib[6] = 0.
+            # contrib[7] = val
+            # contrib[8] = -val
+            # contrib[9] = val
+
+            # factor 2 comes from symmetric contributions
+            vol = self.kernel.getScalingValue() * 2.0*vol1**2
+
+            contrib[:] = 0.
+            # distance between x and y quadrature nodes
+            for i in range(self.qrId.num_nodes):
+                self.temp[i] = (simplex1[0, 0]*self.PSI_id[0, i] +
+                                simplex1[1, 0]*self.PSI_id[1, i])**2
+                self.temp[i] = self.qrId.weights[i]*pow(self.temp[i], -0.5-s)
+            for I in range(2):
+                for J in range(I, 2):
+                    k = 4*I-(I*(I+1) >> 1) + J
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(self.qrId.num_nodes):
+                            val += (self.temp[i] *
+                                    self.PSI_id[I, i] *
+                                    self.PSI_id[J, i])
+                        contrib[k] += val*vol
+        elif panel == COMMON_VERTEX:
+            vol = self.kernel.getScalingValue() * vol1*vol2
+
+            contrib[:] = 0.
+
+            i = 0
+            j = 0
+            for k in range(4):
+                if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                    break
+                elif j == 1:
+                    j = 0
+                    i += 1
+                else:
+                    j += 1
+            if i == 1 and j == 0:
+                idx[0], idx[1], idx[2] = 0, 1, 2
+                t = 2
+            elif i == 0 and j == 1:
+                idx[0], idx[1], idx[2] = 1, 0, 2
+                t = 3
+            else:
+                raise IndexError('COMMON_VERTEX')
+
+            # loop over all local DoFs
+            for I in range(3):
+                for J in range(I, 3):
+                    i = 3*(I//t)+(I%t)
+                    j = 3*(J//t)+(J%t)
+                    if j < i:
+                        i, j = j, i
+                    k = 4*i-(i*(i+1) >> 1) + j
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(self.qrVertex.num_nodes):
+                            val += (self.qrVertex.weights[i] *
+                                    self.PSI_vertex[idx[I], i] *
+                                    self.PSI_vertex[idx[J], i] *
+                                    pow(vol1*self.PSI_vertex[0, i]-vol2*self.PSI_vertex[2, i], -1.-2.*s))
+                        contrib[k] += val*vol
+        elif panel >= 1 and not cutElements:
+            qr2 = <doubleSimplexQuadratureRule>self.distantQuadRules[panel]
+            PSI = self.distantPSI[panel]
+            qr2.rule1.nodesInGlobalCoords(simplex1, self.x)
+            qr2.rule2.nodesInGlobalCoords(simplex2, self.y)
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(1,
+                                                                      &self.x[i, 0],
+                                                                      &self.y[j, 0])
+                    k += 1
+
+            vol = vol1*vol2
+            k = 0
+            for I in range(4):
+                for J in range(I, 4):
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(qr2.num_nodes):
+                            val += self.temp[i]*PSI[I, i]*PSI[J, i]
+                        contrib[k] = val*vol
+                    k += 1
+        elif panel >= 1 and cutElements:
+            qr2 = <doubleSimplexQuadratureRule>self.distantQuadRules[panel]
+            qr0 = transformQuadratureRule(qr2.rule1)
+            qr1 = transformQuadratureRule(qr2.rule2)
+            numQuadNodes0 = qr0.num_nodes
+            numQuadNodes1 = qr1.num_nodes
+
+            contrib[:] = 0.
+
+            vol = vol1*vol2
+            dofs_per_element = self.DoFMap.dofs_per_element
+
+            A1 = a_A1
+            b1 = a_b1
+            A2 = a_A2
+            b2 = a_b2
+
+            self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2)
+            while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1):
+                qr0.setBaryTransform(A1, b1)
+                qr0.nodesInGlobalCoords(simplex1, self.x)
+                for i in range(qr0.num_nodes):
+                    self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2)
+                    while self.kernel.interaction.nextSubSimplex_Node(A2, b2, &c2):
+                        qr1.setBaryTransform(A2, b2)
+                        qr1.nodesInGlobalCoords(simplex2, self.y)
+                        for j in range(qr1.num_nodes):
+                            val = qr0.weights[i]*qr1.weights[j]*self.kernel.evalPtr(1, &self.x[i, 0], &self.y[j, 0])
+                            val *= c1 * c2 * vol
+                            k = 0
+                            for I in range(2*dofs_per_element):
+                                if I < dofs_per_element:
+                                    PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                                else:
+                                    PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                                for J in range(I, 2*dofs_per_element):
+                                    if mask & (1 << k):
+                                        if J < dofs_per_element:
+                                            PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                                        else:
+                                            PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                                        contrib[k] += val * PSI_I*PSI_J
+                                    k += 1
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+cdef class fractionalLaplacian1D_P1_boundary(fractionalLaplacian1DZeroExterior):
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 quad_order_diagonal=None,
+                 target_order=None,
+                 num_dofs=None,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian1D_P1_boundary, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        if self.kernel.variableOrder:
+            smin, smax = self.kernel.s.min, self.kernel.s.max
+        else:
+            smin, smax = self.kernel.sValue, self.kernel.sValue
+        if target_order is None:
+            # this is the desired local quadrature error
+            target_order = 2.-smin
+        self.target_order = target_order
+
+        if quad_order_diagonal is None:
+            # measured log(2 rho_2) = 0.4
+            quad_order_diagonal = max(np.ceil(((target_order+1.)*log(self.num_dofs*self.H0)+(2.*smax-1.)*abs(log(self.hmin/self.H0)))/0.8), 2)
+        self.quad_order_diagonal = quad_order_diagonal
+
+        if not self.kernel.variableOrder:
+            self.getNearQuadRule(COMMON_VERTEX)
+
+        self.x = uninitialized((0, self.dim), dtype=REAL)
+        self.temp = uninitialized((0), dtype=REAL)
+        self.temp2 = uninitialized((0), dtype=REAL)
+        self.distantPHI = {}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = max(log(d/h1), 0.), logdh2 = max(log(d/h2), 0.)
+            REAL_t s = self.kernel.sValue
+            REAL_t h
+        panel = <panelType>max(ceil(((self.target_order+1.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h2/self.H0)) - 2.*s*log(d/h2)) /
+                                    (logdh1 + 0.8)),
+                               2)
+        panel2 = <panelType>max(ceil(((self.target_order+1.)*log(self.num_dofs*self.H0) + (2.*s-1.)*abs(log(h1/self.H0)) - 2.*s*log(d/h1)) /
+                                     (logdh2 + 0.8)),
+                                2)
+        panel = max(panel, panel2)
+        if self.kernel.finiteHorizon:
+            # check if the horizon might cut the elements
+            h = 0.5*max(h1, h2)
+            if (d-h < self.kernel.horizonValue) and (self.kernel.horizonValue < d+h):
+                panel *= 3
+        try:
+            self.distantQuadRules[panel]
+        except KeyError:
+            self.addQuadRule(panel)
+        return panel
+
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr0
+            REAL_t[:, ::1] PHI
+            INDEX_t i
+        qr0 = simplexXiaoGimbutas(panel, self.dim)
+        self.distantQuadRules[panel] = qr0
+        PHI = uninitialized((2, qr0.num_nodes), dtype=REAL)
+        for i in range(qr0.num_nodes):
+            PHI[0, i] = self.getLocalShapeFunction(0)(qr0.nodes[:, i])
+            PHI[1, i] = self.getLocalShapeFunction(1)(qr0.nodes[:, i])
+        self.distantPHI[panel] = PHI
+
+        if qr0.num_nodes > self.x.shape[0]:
+            self.x = uninitialized((qr0.num_nodes, self.dim), dtype=REAL)
+        if qr0.num_nodes > self.temp.shape[0]:
+            self.temp = uninitialized((qr0.num_nodes), dtype=REAL)
+            self.temp2 = uninitialized((qr0.num_nodes), dtype=REAL)
+
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t s = self.kernel.sValue
+            REAL_t eta
+            specialQuadRule sQR0
+        if panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel)]
+            except KeyError:
+
+                if s < 0.5:
+                    qrVertex = GaussJacobi(((self.quad_order_diagonal, -2*s, 0), ))
+                    PHI_vertex = uninitialized((2, qrVertex.num_nodes), dtype=REAL)
+                    for i in range(qrVertex.num_nodes):
+                        eta = qrVertex.nodes[0, i]
+                        PHI_vertex[0, i] = 1.-eta
+                        PHI_vertex[1, i] = eta
+                else:
+                    qrVertex = GaussJacobi(((self.quad_order_diagonal, 2-2*s, 0), ))
+                    PHI_vertex = uninitialized((2, qrVertex.num_nodes), dtype=REAL)
+                    for i in range(qrVertex.num_nodes):
+                        eta = qrVertex.nodes[0, i]
+                        PHI_vertex[0, i] = 1.  # unused
+                        PHI_vertex[1, i] = 1.
+
+                sQR0 = specialQuadRule(qrVertex, PHI=PHI_vertex)
+                self.specialQuadRules[(s, panel)] = sQR0
+            self.qrVertex = sQR0.qr
+            self.PHI_vertex = sQR0.PHI
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(fractionalLaplacian1D_P1_boundary, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal:       {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            REAL_t vol = self.vol1, val
+            INDEX_t i, j, k, m
+            INDEX_t[::1] idx = uninitialized((2), dtype=INDEX)
+            simplexQuadratureRule qr
+            REAL_t[:, ::1] PHI
+            REAL_t[:, ::1] simplex = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t s = self.kernel.sValue
+            REAL_t scaling = self.kernel.scalingValue
+        if panel == COMMON_VERTEX:
+            # For s >= 0.5, we have also an exact expression
+            # if s >= 0.5:
+            #     if abs(simplex[0, 0]-simplex2[0, 0]) < 1e-12:
+            #         contrib[0] = contrib[1] = 0.
+            #         contrib[2] = scaling*(vol**(1.-2.*s)/s/(3.-2.*s))
+            #     else:
+            #         contrib[0] = scaling*(vol**(1.-2.*s)/s/(3.-2.*s))
+            #         contrib[1] = contrib[2] = 0.
+            # else:
+            #     if abs(simplex[0, 0]-simplex2[0, 0]) < 1e-12:
+            #         idx[0], idx[1] = 0, 1
+            #     else:
+            #         idx[0], idx[1] = 1, 0
+            #     k = 0
+            #     for i in range(2):
+            #         for j in range(i, 2):
+            #             s = 0.
+            #             for m in range(self.qrVertex.num_nodes):
+            #                 s += self.PHI_vertex[idx[i], m]*self.PHI_vertex[idx[j], m]*self.qrVertex.weights[m]
+            #             s /= abs(simplex[1, 0]-simplex[0, 0])**(2.*s) * s
+            #             contrib[k] = s*vol*scaling
+            #             k += 1
+            if abs(simplex[0, 0]-simplex2[0, 0]) < 1e-12:
+                idx[0], idx[1] = 0, 1
+            else:
+                idx[0], idx[1] = 1, 0
+            k = 0
+            for i in range(2):
+                for j in range(i, 2):
+                    val = 0.
+                    for m in range(self.qrVertex.num_nodes):
+                        val += self.PHI_vertex[idx[i], m]*self.PHI_vertex[idx[j], m]*self.qrVertex.weights[m]
+                    val /= abs(simplex[1, 0]-simplex[0, 0])**(2.*s) * s
+                    contrib[k] = val*vol*scaling
+                    k += 1
+        elif panel >= 1:
+            qr = self.distantQuadRules[panel]
+            PHI = self.distantPHI[panel]
+            qr.nodesInGlobalCoords(simplex, self.x)
+            for j in range(qr.num_nodes):
+                self.temp[j] = (1./abs(self.x[j, 0]-simplex2[0, 0])**(2.*s)) / s
+            k = 0
+            for i in range(2):
+                for j in range(i, 2):
+                    for m in range(qr.num_nodes):
+                        self.temp2[m] = self.temp[m]*PHI[i, m]*PHI[j, m]
+                    contrib[k] = scaling*qr.eval(self.temp2, vol)
+                    k += 1
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+
+cdef class fractionalLaplacian1D_P0(nonlocalLaplacian1D):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        return 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        pass
+
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 quad_order_diagonal=None,
+                 target_order=None,
+                 num_dofs=None):
+        assert isinstance(DoFMap, P0_DoFMap)
+        super(fractionalLaplacian1D_P0, self).__init__(kernel, mesh, DoFMap, num_dofs=num_dofs)
+
+    def __repr__(self):
+        return (super(fractionalLaplacian1D_P0, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal:       {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            REAL_t T
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+        if panel == COMMON_EDGE:
+            contrib[:] = 0.
+        else:
+            T = -((pow(abs(simplex1[1, 0]-simplex2[1, 0]), 1.-2.*self.s) -
+                   pow(abs(simplex1[1, 0]-simplex2[0, 0]), 1.-2.*self.s) -
+                   pow(abs(simplex1[0, 0]-simplex2[1, 0]), 1.-2.*self.s) +
+                   pow(abs(simplex1[0, 0]-simplex2[0, 0]), 1.-2.*self.s)) /
+                  ((2.*self.s)*(2.*self.s-1.)))
+
+            contrib[0] = self.scaling*T
+            contrib[1] = -self.scaling*T
+            contrib[2] = self.scaling*T
+        # else:
+        #     print(np.array(simplex1), np.array(simplex2))
+        #     raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+cdef class fractionalLaplacian1D_P0_boundary(fractionalLaplacian1DZeroExterior):
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        return 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addQuadRule(self, panelType panel):
+        pass
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        pass
+
+
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 quad_order_diagonal=None,
+                 target_order=None,
+                 num_dofs=None):
+        assert isinstance(DoFMap, P0_DoFMap)
+        super(fractionalLaplacian1D_P0_boundary, self).__init__(kernel, mesh, DoFMap, num_dofs=num_dofs)
+
+    def __repr__(self):
+        return (super(fractionalLaplacian1D_P0_boundary, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal:       {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            REAL_t vol = self.vol1, T
+            REAL_t[:, ::1] simplex = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+        T = -((pow(abs(simplex[1, 0]-simplex2[0, 0]), 1.-2.*self.s) -
+               pow(abs(simplex[0, 0]-simplex2[0, 0]), 1.-2.*self.s)) /
+              ((2.*self.s)*(2.*self.s-1.))) * (-1)**(simplex[0, 0] < simplex2[0, 0])*2.
+
+        contrib[0] = self.scaling*T
diff --git a/nl/PyNucleus_nl/fractionalLaplacian2D.pxd b/nl/PyNucleus_nl/fractionalLaplacian2D.pxd
new file mode 100644
index 0000000..c806cc6
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalLaplacian2D.pxd
@@ -0,0 +1,43 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadQuadratureRule,
+                             doubleSimplexQuadratureRule, GaussJacobi,
+                             simplexDuffyTransformation, simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.functions cimport function
+from . nonlocalLaplacianBase cimport (double_local_matrix_t,
+                                        nonlocalLaplacian2D,
+                                        specialQuadRule,
+                                        panelType, MASK_t)
+from . interactionDomains cimport CUT
+from . fractionalOrders cimport fractionalOrderBase
+from . kernels2 cimport (Kernel,
+                         FractionalKernel)
+
+
+cdef class fractionalLaplacian2DZeroExterior(nonlocalLaplacian2D):
+    cdef:
+        public REAL_t[:, :, ::1] PHI_edge, PSI_edge, PHI_vertex, PSI_vertex
+        dict distantPHI
+        public REAL_t[::1] n, w
+
+
+cdef class fractionalLaplacian2D_P1(nonlocalLaplacian2D):
+    cdef:
+        public quadQuadratureRule qrEdge0, qrEdge1, qrVertex, qrId
+        REAL_t[:, :, ::1] PSI_edge, PSI_id, PSI_vertex
+
+
+cdef class fractionalLaplacian2D_P1_boundary(fractionalLaplacian2DZeroExterior):
+    cdef:
+        public quadQuadratureRule qrEdge, qrVertex0, qrVertex1
+
+
diff --git a/nl/PyNucleus_nl/fractionalLaplacian2D.pyx b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx
new file mode 100644
index 0000000..72c9b8d
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalLaplacian2D.pyx
@@ -0,0 +1,1182 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport (sqrt, log, ceil, fabs as abs, pow)
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.stdlib cimport malloc
+
+from PyNucleus_base.myTypes import INDEX, REAL, BOOL
+from PyNucleus_base import uninitialized, uninitialized_like
+from PyNucleus_base.blas cimport mydot
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule,
+                             transformQuadratureRule,
+                             doubleSimplexQuadratureRule, GaussJacobi,
+                             simplexDuffyTransformation, simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap, shapeFunction
+from scipy.special import gamma
+from . nonlocalLaplacianBase import ALL
+
+include "panelTypes.pxi"
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef inline REAL_t findIntersection(REAL_t[::1] x, REAL_t[::1] y1, REAL_t[::1] y2, REAL_t horizon2):
+    cdef:
+        REAL_t nn = 0., p = 0., q = 0., A, B, c
+        INDEX_t k
+    for k in range(2):
+        A = y2[k]-y1[k]
+        B = y1[k]-x[k]
+        nn += A**2
+        p += A*B
+        q += B**2
+    nn = 1./nn
+    p *= 2.*nn
+    q = (q-horizon2)*nn
+    A = -p*0.5
+    B = sqrt(A**2-q)
+    c = A+B
+    if (c < 0) or (c > 1):
+        c = A-B
+    return c
+
+
+cdef class fractionalLaplacian2DZeroExterior(nonlocalLaplacian2D):
+    def __init__(self, FractionalKernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, **kwargs):
+        manifold_dim2 = mesh.dim-1
+        super(fractionalLaplacian2DZeroExterior, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2=manifold_dim2, **kwargs)
+        self.symmetricCells = False
+
+
+cdef class fractionalLaplacian2D_P1(nonlocalLaplacian2D):
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 target_order=None,
+                 quad_order_diagonal=None,
+                 num_dofs=None,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian2D_P1, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        if target_order is None:
+            # this is the desired local quadrature error
+            # target_order = (2.-s)/self.dim
+            target_order = 0.5
+        self.target_order = target_order
+
+        smax = self.kernel.s.max
+        if quad_order_diagonal is None:
+            # measured log(2 rho_2) = 0.43
+            quad_order_diagonal = max(np.ceil((target_order+1.+smax)/(0.43)*abs(np.log(self.hmin/self.H0))), 4)
+            # measured log(2 rho_2) = 0.7
+            quad_order_diagonalV = max(np.ceil((target_order+1.+smax)/(0.7)*abs(np.log(self.hmin/self.H0))), 4)
+        else:
+            quad_order_diagonalV = quad_order_diagonal
+        self.quad_order_diagonal = quad_order_diagonal
+        self.quad_order_diagonalV = quad_order_diagonalV
+
+        self.x = uninitialized((0, self.dim), dtype=REAL)
+        self.y = uninitialized((0, self.dim), dtype=REAL)
+        self.temp = uninitialized((0), dtype=REAL)
+        self.idx1 = uninitialized((self.dim+1), dtype=INDEX)
+        self.idx2 = uninitialized((self.dim+1), dtype=INDEX)
+        self.idx3 = uninitialized((2*(self.dim+1)), dtype=INDEX)
+        self.idx4 = uninitialized(((2*self.DoFMap.dofs_per_element)*(2*self.DoFMap.dofs_per_element+1)//2), dtype=INDEX)
+
+        if not self.kernel.variableOrder:
+            self.getNearQuadRule(COMMON_FACE)
+            self.getNearQuadRule(COMMON_EDGE)
+            self.getNearQuadRule(COMMON_VERTEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2)
+            REAL_t c = (0.5*self.target_order+0.5)*log(self.num_dofs*self.H0**2) #-4.
+            REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0))
+            REAL_t loghminH0 = max(logh1H0, logh2H0)
+            REAL_t s = (<FractionalKernel>self.kernel).getsValue()
+        panel = <panelType>max(ceil((c + (s-1.)*logh2H0 + loghminH0 - s*logdh2) /
+                                    (max(logdh1, 0) + 0.4)),
+                               2)
+        panel2 = <panelType>max(ceil((c + (s-1.)*logh1H0 + loghminH0 - s*logdh1) /
+                                     (max(logdh2, 0) + 0.4)),
+                                2)
+        panel = max(panel, panel2)
+        if self.distantQuadRulesPtr[panel] == NULL:
+            self.addQuadRule(panel)
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr0, qr1
+            doubleSimplexQuadratureRule qr2
+            specialQuadRule sQR
+            REAL_t[:, ::1] PSI
+            INDEX_t I, k, i, j
+            INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element
+            shapeFunction sf
+        qr0 = simplexXiaoGimbutas(panel, self.dim)
+        qr1 = qr0
+        qr2 = doubleSimplexQuadratureRule(qr0, qr1)
+        numQuadNodes0 = qr0.num_nodes
+        numQuadNodes1 = qr1.num_nodes
+        dofs_per_element = self.DoFMap.dofs_per_element
+        PSI = uninitialized((2*dofs_per_element,
+                             qr2.num_nodes), dtype=REAL)
+        # phi_i(x) - phi_i(y) = phi_i(x) for i = 0,1,2
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I, k] = sf.evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                    k += 1
+        # phi_i(x) - phi_i(y) = -phi_i(y) for i = 3,4,5
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I+dofs_per_element, k] = -sf.evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                    k += 1
+        sQR = specialQuadRule(qr2, PSI)
+        self.distantQuadRules[panel] = sQR
+        self.distantQuadRulesPtr[panel] = <void*>(self.distantQuadRules[panel])
+
+        if numQuadNodes0 > self.x.shape[0]:
+            self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL)
+        if numQuadNodes1 > self.y.shape[0]:
+            self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL)
+        if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]:
+            self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t s = self.kernel.sValue
+            REAL_t eta0, eta1, eta2, eta3
+            specialQuadRule sQR0, sQR1
+            quadQuadratureRule qrId, qrEdge0, qrEdge1, qrVertex
+            REAL_t[:, :, ::1] PSI_id, PSI_edge, PSI_vertex
+        if panel == COMMON_FACE:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel, 0)]
+            except KeyError:
+                # COMMON_FACE panels have 3 integral contributions.
+                # Each integral is over a 1D domain.
+                qrId = GaussJacobi(((1, 3-2*s, 0),
+                                    (1, 2-2*s, 0),
+                                    (1, 1-2*s, 0),
+                                    (self.quad_order_diagonal, 0, 0)))
+                PSI_id = uninitialized((3,
+                                        self.DoFMap.dofs_per_element,
+                                        qrId.num_nodes),
+                                       dtype=REAL)
+                for i in range(qrId.num_nodes):
+                    eta0 = qrId.nodes[0, i]
+                    eta1 = qrId.nodes[1, i]
+                    eta2 = qrId.nodes[2, i]
+                    eta3 = qrId.nodes[3, i]
+
+                    PSI_id[0, 0, i] = -eta3
+                    PSI_id[0, 1, i] = eta3-1.
+                    PSI_id[0, 2, i] = 1.
+
+                    PSI_id[1, 0, i] = -1.
+                    PSI_id[1, 1, i] = 1.-eta3
+                    PSI_id[1, 2, i] = eta3
+
+                    PSI_id[2, 0, i] = eta3
+                    PSI_id[2, 1, i] = -1.
+                    PSI_id[2, 2, i] = 1.-eta3
+                sQR0 = specialQuadRule(qrId, PSI3=PSI_id)
+                self.specialQuadRules[(s, panel, 0)] = sQR0
+                if qrId.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrId.num_nodes), dtype=REAL)
+            self.qrId = sQR0.qr
+            self.PSI_id = sQR0.PSI3
+        elif panel == COMMON_EDGE:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel, 0)]
+                sQR1 = self.specialQuadRules[(s, panel, 1)]
+            except KeyError:
+                qrEdge0 = GaussJacobi(((1, 3-2*s, 0),
+                                       (1, 2-2*s, 0),
+                                       (self.quad_order_diagonal, 0, 0),
+                                       (self.quad_order_diagonal, 0, 0)))
+                qrEdge1 = GaussJacobi(((1, 3-2*s, 0),
+                                       (1, 2-2*s, 0),
+                                       (self.quad_order_diagonal, 1, 0),
+                                       (self.quad_order_diagonal, 0, 0)))
+                PSI_edge = uninitialized((5,
+                                          2*self.DoFMap.dofs_per_element-2*self.DoFMap.dofs_per_vertex-self.DoFMap.dofs_per_edge,
+                                          qrEdge0.num_nodes),
+                                    dtype=REAL)
+                for i in range(qrEdge0.num_nodes):
+                    eta0 = qrEdge0.nodes[0, i]
+                    eta1 = qrEdge0.nodes[1, i]
+                    eta2 = qrEdge0.nodes[2, i]
+                    eta3 = qrEdge0.nodes[3, i]
+
+                    PSI_edge[0, 0, i] = -eta2
+                    PSI_edge[0, 1, i] = 1.-eta3
+                    PSI_edge[0, 2, i] = eta3
+                    PSI_edge[0, 3, i] = eta2-1.
+
+                    eta0 = qrEdge1.nodes[0, i]
+                    eta1 = qrEdge1.nodes[1, i]
+                    eta2 = qrEdge1.nodes[2, i]
+                    eta3 = qrEdge1.nodes[3, i]
+
+                    PSI_edge[1, 0, i] = -eta2*eta3
+                    PSI_edge[1, 1, i] = eta2-1.
+                    PSI_edge[1, 2, i] = 1.
+                    PSI_edge[1, 3, i] = eta2*(eta3-1.)
+
+                    PSI_edge[2, 0, i] = eta2
+                    PSI_edge[2, 1, i] = eta2*eta3-1.
+                    PSI_edge[2, 2, i] = 1.-eta2
+                    PSI_edge[2, 3, i] = -eta2*eta3
+
+                    PSI_edge[3, 0, i] = eta2*eta3
+                    PSI_edge[3, 1, i] = 1.-eta2
+                    PSI_edge[3, 2, i] = eta2*(1.-eta3)
+                    PSI_edge[3, 3, i] = -1.
+
+                    PSI_edge[4, 0, i] = eta2*eta3
+                    PSI_edge[4, 1, i] = eta2-1.
+                    PSI_edge[4, 2, i] = 1.-eta2*eta3
+                    PSI_edge[4, 3, i] = -eta2
+
+                sQR0 = specialQuadRule(qrEdge0, PSI3=PSI_edge)
+                sQR1 = specialQuadRule(qrEdge1, PSI3=PSI_edge)
+                self.specialQuadRules[(s, panel, 0)] = sQR0
+                self.specialQuadRules[(s, panel, 1)] = sQR1
+                if qrEdge0.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrEdge0.num_nodes), dtype=REAL)
+                if qrEdge1.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrEdge1.num_nodes), dtype=REAL)
+            self.qrEdge0 = sQR0.qr
+            self.qrEdge1 = sQR1.qr
+            self.PSI_edge = sQR0.PSI3
+        elif panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel, 0)]
+            except KeyError:
+                qrVertex = GaussJacobi(((1, 3-2*s, 0),
+                                        (self.quad_order_diagonalV, 0, 0),
+                                        (self.quad_order_diagonalV, 1, 0),
+                                        (self.quad_order_diagonalV, 0, 0)))
+                PSI_vertex = uninitialized((2,
+                                            2*self.DoFMap.dofs_per_element-self.DoFMap.dofs_per_vertex,
+                                            qrVertex.num_nodes),
+                                           dtype=REAL)
+                for i in range(qrVertex.num_nodes):
+                    eta0 = qrVertex.nodes[0, i]
+                    eta1 = qrVertex.nodes[1, i]
+                    eta2 = qrVertex.nodes[2, i]
+                    eta3 = qrVertex.nodes[3, i]
+
+                    PSI_vertex[0, 0, i] = eta2-1.
+                    PSI_vertex[0, 1, i] = 1.-eta1
+                    PSI_vertex[0, 2, i] = eta1
+                    PSI_vertex[0, 3, i] = eta2*(eta3-1.)
+                    PSI_vertex[0, 4, i] = -eta2*eta3
+
+                    PSI_vertex[1, 0, i] = 1.-eta2
+                    PSI_vertex[1, 1, i] = eta2*(1.-eta3)
+                    PSI_vertex[1, 2, i] = eta2*eta3
+                    PSI_vertex[1, 3, i] = eta1-1.
+                    PSI_vertex[1, 4, i] = -eta1
+
+                sQR0 = specialQuadRule(qrVertex, PSI3=PSI_vertex)
+                self.specialQuadRules[(s, panel, 0)] = sQR0
+                if qrVertex.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex.num_nodes), dtype=REAL)
+            self.qrVertex = sQR0.qr
+            self.PSI_vertex = sQR0.PSI3
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(fractionalLaplacian2D_P1, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal:       {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, l, m, I, J, k2
+            REAL_t vol, val, temp
+            REAL_t vol1 = self.vol1, vol2 = self.vol2
+            INDEX_t[::1] idx1, idx2, idx3, idx4
+            INDEX_t numQuadNodes, numQuadNodes0, numQuadNodes1, dofs_per_element
+            specialQuadRule sQR
+            doubleSimplexQuadratureRule qr2
+            quadQuadratureRule qrEdge
+            REAL_t[:, ::1] PSI
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t s = (<FractionalKernel>self.kernel).getsValue()
+            REAL_t scaling = self.kernel.getScalingValue()
+            BOOL_t cutElements = False
+            REAL_t horizon2
+            simplexQuadratureRule qr0, qr1
+            transformQuadratureRule qr1trans
+            INDEX_t numInside
+            INDEX_t outside, inside1, inside2
+            INDEX_t inside, outside1, outside2
+            REAL_t vol3 = np.nan, vol4 = np.nan, d1, d2, c1, c2
+            REAL_t PSI_I, PSI_J
+            REAL_t a_b1[3]
+            REAL_t a_b2[3]
+            REAL_t a_A1[3][3]
+            REAL_t a_A2[3][3]
+            REAL_t[:, ::1] A1, A2
+            REAL_t[::1] b1, b2
+            BOOL_t a_ind[3]
+            BOOL_t[::1] ind
+
+        if self.kernel.finiteHorizon and panel >= 1 :
+            # check if the horizon might cut the elements
+            if self.kernel.interaction.relPos == CUT:
+                cutElements = True
+            if self.kernel.complement:
+                cutElements = False
+                # TODO: cutElements should be set to True, but
+                #       need to figure out the element
+                #       transformation.
+
+        contrib[:] = 0.
+
+        if panel >= 1 and not cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            PSI = sQR.PSI
+            numQuadNodes0 = qr2.rule1.num_nodes
+            numQuadNodes1 = qr2.rule2.num_nodes
+            qr2.rule1.nodesInGlobalCoords(simplex1, self.x)
+            qr2.rule2.nodesInGlobalCoords(simplex2, self.y)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    self.temp[k] = (qr2.weights[k] *
+                                    self.kernel.evalPtr(2,
+                                                        &self.x[i, 0],
+                                                        &self.y[j, 0]))
+                    k += 1
+            vol = vol1 * vol2
+            # loop over all local DoFs
+            k = 0
+            for I in range(6):
+                for J in range(I, 6):
+                    if mask & (1 << k):
+                        val = 0.
+                        for l in range(numQuadNodes0*numQuadNodes1):
+                            val += self.temp[l]*PSI[I, l]*PSI[J, l]
+                        contrib[k] = val*vol
+                    k += 1
+        elif panel >= 1 and cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            qr0 = qr2.rule1
+            qr1 = qr2.rule2
+            qr1trans = transformQuadratureRule(qr1)
+            numQuadNodes0 = qr0.num_nodes
+            numQuadNodes1 = qr1.num_nodes
+
+            horizon2 = self.kernel.getHorizonValue2()
+            vol = vol1*vol2
+            dofs_per_element = self.DoFMap.dofs_per_element
+
+            A1 = a_A1
+            A2 = a_A2
+            b1 = a_b1
+            b2 = a_b2
+
+            ind = a_ind
+            qr0.nodesInGlobalCoords(simplex1, self.x)
+            for i in range(qr0.num_nodes):
+                numInside = 0
+                for j in range(3):
+                    d2 = 0.
+                    for k in range(2):
+                        d2 += (simplex2[j, k]-self.x[i, k])**2
+                    ind[j] = (d2 <= horizon2)
+                    numInside += ind[j]
+                if numInside == 0:
+                    continue
+                elif numInside == 1:
+                    inside = 0
+                    while not ind[inside]:
+                        inside += 1
+                    outside1 = (inside+1)%3
+                    outside2 = (inside+2)%3
+                    c1 = findIntersection(self.x[i, :], simplex2[inside, :], simplex2[outside1, :], horizon2)
+                    c2 = findIntersection(self.x[i, :], simplex2[inside, :], simplex2[outside2, :], horizon2)
+                    A1[:, :] = 0.
+                    b1[:] = 0.
+                    A1[inside,inside] = c1+c2
+                    A1[inside,outside1] = c2
+                    A1[inside,outside2] = c1
+                    A1[outside1,outside1] = c1
+                    A1[outside2,outside2] = c2
+                    b1[inside] = 1-c1-c2
+                    vol3 = c1*c2
+                    qr1trans.setBaryTransform(A1, b1)
+                    qr1 = qr1trans
+                elif numInside == 2:
+                    # outside = np.where(ind == False)[0][0]
+                    outside = 0
+                    while ind[outside]:
+                        outside += 1
+                    inside1 = (outside+1)%3
+                    inside2 = (outside+2)%3
+                    c1 = findIntersection(self.x[i,: ], simplex2[outside, :], simplex2[inside1, :], horizon2)
+                    c2 = findIntersection(self.x[i,: ], simplex2[outside, :], simplex2[inside2, :], horizon2)
+                    d1 = 0.
+                    d2 = 0.
+                    for k in range(2):
+                        d1 += (simplex2[outside, k]
+                               + c1*(simplex2[inside1, k]-simplex2[outside, k])
+                               - simplex2[inside2, k])**2
+                        d2 += (simplex2[outside, k]
+                               + c2*(simplex2[inside2, k]-simplex2[outside, k])
+                               - simplex2[inside1, k])
+                    A1[:, :] = 0.
+                    b1[:] = 0.
+                    A2[:, :] = 0.
+                    b2[:] = 0.
+
+                    if d1 < d2:
+                        A1[outside,outside] = 1-c1
+                        A1[inside1,inside1] = 1-c1
+                        A1[inside1,inside2] = -c1
+                        A1[inside2,inside2] = 1.
+                        b1[inside1] = c1
+                        vol3 = 1-c1
+
+                        A2[outside,outside] = 1-c2
+                        A2[inside2,inside2] = 1
+                        A2[inside2,outside] = c2
+                        A2[outside,inside1] = 1-c1
+                        A2[inside1,inside1] = c1
+                        vol4 = c1*(1-c2)
+                    else:
+                        A1[outside,outside] = 1-c2
+                        A1[inside2,inside2] = 1-c2
+                        A1[inside2,inside1] = -c2
+                        A1[inside1,inside1] = 1.
+                        b1[inside2] = c2
+                        vol3 = 1-c2
+
+                        A2[outside,outside] = 1-c1
+                        A2[inside1,inside1] = 1
+                        A2[inside1,outside] = c1
+                        A2[outside,inside2] = 1-c2
+                        A2[inside2,inside2] = c2
+                        vol4 = c2*(1-c1)
+
+                    qr1trans.setBaryTransform(A1, b1)
+                    qr1 = qr1trans
+                else:
+                    qr1 = qr2.rule2
+                    vol3 = 1.
+
+                qr1.nodesInGlobalCoords(simplex2, self.y)
+                for j in range(qr1.num_nodes):
+                    val = qr0.weights[i]*qr1.weights[j]*self.kernel.evalPtr(2, &self.x[i, 0], &self.y[j, 0])
+                    val *= vol*vol3
+
+                    k = 0
+                    for I in range(6):
+                        if I < dofs_per_element:
+                            PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                        else:
+                            PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                        for J in range(I, 6):
+                            if mask & (1 << k):
+                                if J < dofs_per_element:
+                                    PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                                else:
+                                    PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                                contrib[k] += val * PSI_I*PSI_J
+                            k += 1
+                if numInside == 2:
+                    qr1trans.setBaryTransform(A2, b2)
+                    qr1.nodesInGlobalCoords(simplex2, self.y)
+                    for j in range(qr1.num_nodes):
+                        val = qr0.weights[i]*qr1.weights[j]*self.kernel.evalPtr(2, &self.x[i, 0], &self.y[j, 0])
+                        val *= vol*vol4
+
+                        k = 0
+                        for I in range(6):
+                            if I < dofs_per_element:
+                                PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                            else:
+                                PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                            for J in range(I, 6):
+                                if mask & (1 << k):
+                                    if J < dofs_per_element:
+                                        PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+                                    else:
+                                        PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+                                    contrib[k] += val * PSI_I*PSI_J
+                                k += 1
+
+        elif panel == COMMON_FACE:
+            # factor 2 comes from symmetric contributions
+            vol = scaling*4.0*2.0*vol1**2
+
+            # three different integrals
+            numQuadNodes = self.qrId.num_nodes
+            for l in range(3):
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[0, j]*self.PSI_id[l, 0, i] +
+                                 simplex1[1, j]*self.PSI_id[l, 1, i] +
+                                 simplex1[2, j]*self.PSI_id[l, 2, i])**2
+                    self.temp[i] = self.qrId.weights[i]*pow(temp, -1.-s)
+                # loop over all local DoFs
+                for I in range(3):
+                    for J in range(I, 3):
+                        k = 6*I-(I*(I+1) >> 1) + J
+                        if mask & (1 << k):
+                            val = 0.
+                            for i in range(numQuadNodes):
+                                val += (self.temp[i] *
+                                        self.PSI_id[l, I, i] *
+                                        self.PSI_id[l, J, i])
+                            contrib[k] += val*vol
+        elif panel == COMMON_EDGE:
+            # order so that common edge matches up and first triangle
+            # is ordered in usual sense and second triangle in counter
+            # sense
+
+            idx1 = self.idx1
+            idx2 = self.idx2
+            idx3 = self.idx3
+            idx4 = self.idx4
+
+            k = 0
+            for i in range(3):
+                for j in range(3):
+                    if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                        idx3[k] = i
+                        idx4[k] = j
+                        k += 1
+                        break
+
+            if idx3[0] > idx3[1]:
+                idx3[1], idx3[0] = idx3[0], idx3[1]
+
+            if idx3[0] == 0:
+                if idx3[1] == 1:
+                    idx1[0], idx1[1], idx1[2] = 0, 1, 2
+                elif idx3[1] == 2:
+                    idx1[0], idx1[1], idx1[2] = 2, 0, 1
+                else:
+                    raise NotImplementedError("Something went wrong for COMMON_EDGE 1")
+            elif idx3[0] == 1 and idx3[1] == 2:
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            else:
+                raise NotImplementedError("Something went wrong for COMMON_EDGE 1")
+
+            if idx4[0] > idx4[1]:
+                idx4[1], idx4[0] = idx4[0], idx4[1]
+
+            if idx4[0] == 0:
+                if idx4[1] == 1:
+                    idx2[0], idx2[1], idx2[2] = 1, 0, 2
+                elif idx4[1] == 2:
+                    idx2[0], idx2[1], idx2[2] = 0, 2, 1
+                else:
+                    raise NotImplementedError("Something went wrong for COMMON_EDGE 2")
+            elif idx4[0] == 1 and idx4[1] == 2:
+                idx2[0], idx2[1], idx2[2] = 2, 1, 0
+            else:
+                raise NotImplementedError("Something went wrong for COMMON_EDGE 2")
+
+            idx3[0], idx3[1], idx3[2], idx3[3] = idx1[0], idx1[1], idx1[2], 3+idx2[2]
+
+            vol = scaling*4.0*vol1*vol2
+
+            # loop over all local DoFs
+            m = 0
+            for I in range(4):
+                for J in range(I, 4):
+                    i = idx3[I]
+                    j = idx3[J]
+                    if j < i:
+                        i, j = j, i
+                    idx4[m] = 6*i-(i*(i+1) >> 1) + j
+                    m += 1
+
+            # five different integrals
+            for l in range(5):
+                if l == 0:
+                    qrEdge = self.qrEdge0
+                else:
+                    qrEdge = self.qrEdge1
+                numQuadNodes = qrEdge.num_nodes
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[idx1[0], j]*self.PSI_edge[l, 0, i] +
+                                 simplex1[idx1[1], j]*self.PSI_edge[l, 1, i] +
+                                 simplex1[idx1[2], j]*self.PSI_edge[l, 2, i] +
+                                 simplex2[idx2[2], j]*self.PSI_edge[l, 3, i])**2
+                    self.temp[i] = qrEdge.weights[i]*pow(temp, -1.-s)
+
+                # loop over all local DoFs
+                m = 0
+                for I in range(4):
+                    for J in range(I, 4):
+                        k = idx4[m]
+                        m += 1
+                        if mask & (1 << k):
+                            val = 0.
+                            for i in range(numQuadNodes):
+                                val += (self.temp[i] *
+                                        self.PSI_edge[l, I, i] *
+                                        self.PSI_edge[l, J, i])
+                            contrib[k] += val*vol
+        elif panel == COMMON_VERTEX:
+            # Find vertex that matches
+            i = 0
+            j = 0
+            while True:
+                if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                    break
+                if j == 2:
+                    i += 1
+                    j = 0
+                else:
+                    j += 1
+
+            idx1 = self.idx1
+            idx2 = self.idx2
+            idx3 = self.idx3
+
+            if i == 0:
+                idx1[0], idx1[1], idx1[2] = 0, 1, 2
+            elif i == 1:
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            else:
+                idx1[0], idx1[1], idx1[2] = 2, 0, 1
+            if j == 0:
+                idx2[0], idx2[1], idx2[2] = 0, 1, 2
+            elif j == 1:
+                idx2[0], idx2[1], idx2[2] = 1, 2, 0
+            else:
+                idx2[0], idx2[1], idx2[2] = 2, 0, 1
+            idx3[0], idx3[1], idx3[2], idx3[3], idx3[4] = idx1[0], idx1[1], idx1[2], 3+idx2[1], 3+idx2[2]
+
+            # factor 4. comes from inverse sqare of volume of standard simplex
+            vol = scaling*4.0*vol1*vol2
+
+            # two different integrals
+            numQuadNodes = self.qrVertex.num_nodes
+            for l in range(2):
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[idx1[0], j]*self.PSI_vertex[l, 0, i] +
+                                 simplex1[idx1[1], j]*self.PSI_vertex[l, 1, i] +
+                                 simplex1[idx1[2], j]*self.PSI_vertex[l, 2, i] +
+                                 simplex2[idx2[1], j]*self.PSI_vertex[l, 3, i] +
+                                 simplex2[idx2[2], j]*self.PSI_vertex[l, 4, i])**2
+                    self.temp[i] = self.qrVertex.weights[i]*pow(temp, -1.-s)
+
+                # loop over all local DoFs
+                for I in range(5):
+                    i = idx3[I]
+                    for J in range(I, 5):
+                        j = idx3[J]
+                        if j < i:
+                            k = 6*j-(j*(j+1) >> 1) + i
+                        else:
+                            k = 6*i-(i*(i+1) >> 1) + j
+                        if mask & (1 << k):
+                            val = 0.
+                            for k2 in range(numQuadNodes):
+                                val += (self.temp[k2] *
+                                        self.PSI_vertex[l, I, k2] *
+                                        self.PSI_vertex[l, J, k2])
+                            contrib[k] += val*vol
+        else:
+            raise NotImplementedError('Panel type unknown: {}'.format(panel))
+
+
+cdef class fractionalLaplacian2D_P1_boundary(fractionalLaplacian2DZeroExterior):
+    def __init__(self,
+                 FractionalKernel kernel,
+                 meshBase mesh,
+                 DoFMap DoFMap,
+                 target_order=None,
+                 quad_order_diagonal=None,
+                 num_dofs=None,
+                 **kwargs):
+        assert isinstance(DoFMap, P1_DoFMap)
+        super(fractionalLaplacian2D_P1_boundary, self).__init__(kernel, mesh, DoFMap, num_dofs, **kwargs)
+
+        smax = self.kernel.s.max
+        if target_order is None:
+            # this is the desired global order wrt to the number of DoFs
+            # target_order = (2.-s)/self.dim
+            target_order = 0.5
+        self.target_order = target_order
+        self.distantPHI = {}
+
+        if quad_order_diagonal is None:
+            # measured log(2 rho_2) = 0.4
+            quad_order_diagonal = max(np.ceil((target_order+0.5+smax)/(0.35)*abs(np.log(self.hmin/self.H0))), 2)
+        self.quad_order_diagonal = quad_order_diagonal
+
+        self.x = uninitialized((0, self.dim), dtype=REAL)
+        self.y = uninitialized((0, self.dim), dtype=REAL)
+        self.temp = uninitialized((0), dtype=REAL)
+
+        self.n = uninitialized((self.dim), dtype=REAL)
+        self.w = uninitialized((self.dim), dtype=REAL)
+
+        self.idx1 = uninitialized((self.dim+1), dtype=INDEX)
+        self.idx2 = uninitialized((self.dim), dtype=INDEX)
+
+        if not self.kernel.variableOrder:
+            self.getNearQuadRule(COMMON_EDGE)
+            self.getNearQuadRule(COMMON_VERTEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = max(log(d/h1), 0.), logdh2 = max(log(d/h2), 0.)
+            REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0))
+            REAL_t loghminH0 = max(logh1H0, logh2H0)
+            REAL_t s = self.kernel.sValue
+            REAL_t h
+        panel = <panelType>max(ceil(((0.5*self.target_order+0.25)*log(self.num_dofs*self.H0**2) + loghminH0 + (s-1.)*logh2H0 - s*logdh2) /
+                                    (max(logdh1, 0) + 0.35)),
+                               2)
+        panel2 = <panelType>max(ceil(((0.5*self.target_order+0.25)*log(self.num_dofs*self.H0**2) + loghminH0 + (s-1.)*logh1H0 - s*logdh1) /
+                                     (max(logdh2, 0) + 0.35)),
+                                2)
+        panel = max(panel, panel2)
+        if self.kernel.finiteHorizon:
+            # check if the horizon might cut the elements
+            h = 0.5*max(h1, h2)
+            if (d-h < self.kernel.horizonValue) and (self.kernel.horizonValue < d+h):
+                panel *= 3
+        try:
+            self.distantQuadRules[panel]
+        except KeyError:
+            self.addQuadRule(panel)
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr0, qr1
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PHI
+            INDEX_t i, j, k, l
+        qr0 = simplexXiaoGimbutas(panel, self.dim)
+        qr1 = simplexDuffyTransformation(panel, self.dim, self.dim-1)
+        qr2 = doubleSimplexQuadratureRule(qr0, qr1)
+        self.distantQuadRules[panel] = qr2
+        PHI = uninitialized((3, qr2.num_nodes), dtype=REAL)
+        for i in range(3):
+            for j in range(qr2.rule1.num_nodes):
+                for k in range(qr2.rule2.num_nodes):
+                    l = j*qr2.rule2.num_nodes+k
+                    PHI[i, l] = self.getLocalShapeFunction(i)(qr2.rule1.nodes[:, j])
+        self.distantPHI[panel] = PHI
+
+        if qr2.rule1.num_nodes > self.x.shape[0]:
+            self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL)
+        if qr2.rule2.num_nodes > self.y.shape[0]:
+            self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL)
+        if qr2.num_nodes > self.temp.shape[0]:
+            self.temp = uninitialized((qr2.num_nodes), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t s = self.kernel.sValue
+            REAL_t eta0, eta1, eta2, x, y
+            specialQuadRule sQR0, sQR1
+        if panel == COMMON_EDGE:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel, 0)]
+            except KeyError:
+                if s < 0.5:
+                    qrEdge = GaussJacobi(((2, -2.*s, 1.),
+                                          (self.quad_order_diagonal, 0., 0.),
+                                          (2, 0., 0.)))
+                    PHI_edge = uninitialized((3, 3, qrEdge.num_nodes), dtype=REAL)
+                    PSI_edge = uninitialized((3, 3, qrEdge.num_nodes), dtype=REAL)
+                    for i in range(qrEdge.num_nodes):
+                        eta0 = qrEdge.nodes[0, i]
+                        eta1 = qrEdge.nodes[1, i]
+                        eta2 = qrEdge.nodes[2, i]
+
+                        # int 0
+                        x = eta0 + (1.-eta0)*eta2
+                        y = eta0*eta1
+
+                        PHI_edge[0, 0, i] = 1.-x
+                        PHI_edge[0, 1, i] = x-y
+                        PHI_edge[0, 2, i] = y
+
+                        PSI_edge[0, 0, i] = -1.
+                        PSI_edge[0, 1, i] = 1.-eta1
+                        PSI_edge[0, 2, i] = eta1
+
+                        # int 1
+                        x = eta0 + (1.-eta0)*eta2
+                        y = eta0
+
+                        PHI_edge[1, 0, i] = 1.-x
+                        PHI_edge[1, 1, i] = x-y
+                        PHI_edge[1, 2, i] = y
+
+                        PSI_edge[1, 0, i] = -eta1
+                        PSI_edge[1, 1, i] = eta1-1.
+                        PSI_edge[1, 2, i] = 1.
+
+                        # int 2
+                        x = eta0*eta1 + (1.-eta0)*eta2
+                        y = eta0*eta1
+
+                        PHI_edge[2, 0, i] = 1.-x
+                        PHI_edge[2, 1, i] = x-y
+                        PHI_edge[2, 2, i] = y
+
+                        PSI_edge[2, 0, i] = 1.-eta1
+                        PSI_edge[2, 1, i] = -1.
+                        PSI_edge[2, 2, i] = eta1
+                else:
+                    qrEdge = GaussJacobi(((2, 2.-2.*s, 1.),
+                                          (self.quad_order_diagonal, 0., 0.),
+                                          (2, 0., 0.)))
+                    PHI_edge = uninitialized((3, 3, qrEdge.num_nodes), dtype=REAL)
+                    PSI_edge = uninitialized((3, 3, qrEdge.num_nodes), dtype=REAL)
+                    for i in range(qrEdge.num_nodes):
+                        eta0 = qrEdge.nodes[0, i]
+                        eta1 = qrEdge.nodes[1, i]
+                        eta2 = qrEdge.nodes[2, i]
+
+                        # int 0
+                        x = eta0 + (1.-eta0)*eta2
+                        y = eta1
+
+                        PHI_edge[0, 0, i] = 0.
+                        PHI_edge[0, 1, i] = 0.
+                        PHI_edge[0, 2, i] = y
+
+                        PSI_edge[0, 0, i] = -1.
+                        PSI_edge[0, 1, i] = 1.-eta1
+                        PSI_edge[0, 2, i] = eta1
+
+                        # int 1
+                        x = eta0 + (1.-eta0)*eta2
+                        y = 1.
+
+                        PHI_edge[1, 0, i] = 0.
+                        PHI_edge[1, 1, i] = 0.
+                        PHI_edge[1, 2, i] = y
+
+                        PSI_edge[1, 0, i] = -eta1
+                        PSI_edge[1, 1, i] = eta1-1.
+                        PSI_edge[1, 2, i] = 1.
+
+                        # int 2
+                        x = eta0*eta1 + (1.-eta0)*eta2
+                        y = eta1
+
+                        PHI_edge[2, 0, i] = 0.
+                        PHI_edge[2, 1, i] = 0.
+                        PHI_edge[2, 2, i] = y
+
+                        PSI_edge[2, 0, i] = 1.-eta1
+                        PSI_edge[2, 1, i] = -1.
+                        PSI_edge[2, 2, i] = eta1
+
+                sQR0 = specialQuadRule(qrEdge, PSI3=PSI_edge, PHI3=PHI_edge)
+                self.specialQuadRules[(s, panel, 0)] = sQR0
+                if qrEdge.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrEdge.num_nodes), dtype=REAL)
+            self.qrEdge = sQR0.qr
+            self.PSI_edge = sQR0.PSI3
+            self.PHI_edge = sQR0.PHI3
+        elif panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(s, panel, 0)]
+                sQR1 = self.specialQuadRules[(s, panel, 1)]
+            except KeyError:
+                qrVertex0 = GaussJacobi(((2, 1.0-2.0*s, 0),
+                                         (self.quad_order_diagonal, 0, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+                qrVertex1 = GaussJacobi(((2, 1.0-2.0*s, 0),
+                                         (self.quad_order_diagonal, 1.0, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+                PHI_vertex = uninitialized((2, 3, qrVertex0.num_nodes), dtype=REAL)
+                PSI_vertex = uninitialized((2, 4, qrVertex0.num_nodes), dtype=REAL)
+                for i in range(qrVertex0.num_nodes):
+                    eta0 = qrVertex0.nodes[0, i]
+                    eta1 = qrVertex0.nodes[1, i]
+                    eta2 = qrVertex0.nodes[2, i]
+
+                    # int 0
+                    x = eta0
+                    y = eta0*eta1
+
+                    PHI_vertex[0, 0, i] = 1.-x
+                    PHI_vertex[0, 1, i] = x-y
+                    PHI_vertex[0, 2, i] = y
+
+                    PSI_vertex[0, 0, i] = eta2-1.
+                    PSI_vertex[0, 1, i] = 1.-eta1
+                    PSI_vertex[0, 2, i] = eta1
+                    PSI_vertex[0, 3, i] = -eta2
+
+                    # int 1
+                    eta0 = qrVertex1.nodes[0, i]
+                    eta1 = qrVertex1.nodes[1, i]
+                    eta2 = qrVertex1.nodes[2, i]
+
+                    x = eta0*eta1
+                    y = eta0*eta1*eta2
+
+                    PHI_vertex[1, 0, i] = 1.-x
+                    PHI_vertex[1, 1, i] = x-y
+                    PHI_vertex[1, 2, i] = y
+
+                    PSI_vertex[1, 0, i] = 1.-eta1
+                    PSI_vertex[1, 1, i] = eta1*(1.-eta2)
+                    PSI_vertex[1, 2, i] = eta1*eta2
+                    PSI_vertex[1, 3, i] = -1.
+
+                sQR0 = specialQuadRule(qrVertex0, PSI3=PSI_vertex, PHI3=PHI_vertex)
+                sQR1 = specialQuadRule(qrVertex1, PSI3=PSI_vertex, PHI3=PHI_vertex)
+                self.specialQuadRules[(s, panel, 0)] = sQR0
+                self.specialQuadRules[(s, panel, 1)] = sQR1
+                if qrVertex0.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex0.num_nodes), dtype=REAL)
+                if qrVertex1.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex1.num_nodes), dtype=REAL)
+            self.qrVertex0 = sQR0.qr
+            self.qrVertex1 = sQR1.qr
+            self.PSI_vertex = sQR0.PSI3
+            self.PHI_vertex = sQR0.PHI3
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(fractionalLaplacian2D_P1_boundary, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order) +
+                'quad_order_diagonal:           {}\n'.format(self.quad_order_diagonal) +
+                'quad_order_off_diagonal        {}\n'.format(list(self.distantQuadRules.keys())))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            REAL_t vol1 = self.vol1, vol2 = self.vol2, vol
+            INDEX_t l, i, j, k, m, I, J
+            set K1, K2
+            INDEX_t[::1] idx1 = self.idx1, idx2 = self.idx2
+            doubleSimplexQuadratureRule qr2
+            quadQuadratureRule qrVertex
+            REAL_t[:, ::1] PHI
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            REAL_t s = self.kernel.sValue
+            REAL_t scaling = self.kernel.scalingValue
+        self.n[0] = simplex2[1, 1] - simplex2[0, 1]
+        self.n[1] = simplex2[0, 0] - simplex2[1, 0]
+        # F is same as vol2
+        val = 1./sqrt(mydot(self.n, self.n))
+        self.n[0] *= val
+        self.n[1] *= val
+
+        contrib[:] = 0.
+
+        if panel == COMMON_EDGE:
+            # find reordering of cell and edge so that the singularity
+            # is on the first edge of the cell
+            K1 = set()
+            for i in range(3):
+                for j in range(2):
+                    if simplex1[i, 0] == simplex2[j, 0] and simplex1[i, 1] == simplex2[j, 1]:
+                        K1.add(i)
+            if K1 == set([0, 1]):
+                idx1[0], idx1[1], idx1[2] = 0, 1, 2
+            elif K1 == set([1, 2]):
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            elif K1 == set([2, 0]):
+                idx1[0], idx1[1], idx1[2] = 2, 0, 1
+            else:
+                raise NotImplementedError("Something went wrong for COMMON_EDGE")
+
+            vol = -scaling*2.0*vol1*vol2/s
+
+            # We need to calculate 3 integrals
+            for l in range(3):
+                for i in range(self.qrEdge.num_nodes):
+                    for j in range(2):
+                        self.w[j] = (simplex1[idx1[0], j]*self.PSI_edge[l, 0, i] +
+                                     simplex1[idx1[1], j]*self.PSI_edge[l, 1, i] +
+                                     simplex1[idx1[2], j]*self.PSI_edge[l, 2, i])
+                    self.temp[i] = self.qrEdge.weights[i] * mydot(self.n, self.w) * pow(mydot(self.w, self.w), -1.-s)
+                for I in range(3):
+                    for J in range(I, 3):
+                        val = 0.
+                        for i in range(self.qrEdge.num_nodes):
+                            val += (self.temp[i] *
+                                    self.PHI_edge[l, I, i] *
+                                    self.PHI_edge[l, J, i])
+                        i = idx1[I]
+                        j = idx1[J]
+                        if j < i:
+                            i, j = j, i
+                        k = 4*i-(i*(i+1))//2 + j-i
+                        contrib[k] += val*vol
+        elif panel == COMMON_VERTEX:
+            K1 = set()
+            K2 = set()
+            i = 0
+            j = 0
+            while True:
+                if simplex1[i, 0] == simplex2[j, 0] and simplex1[i, 1] == simplex2[j, 1]:
+                    break
+                if j == 1:
+                    i += 1
+                    j = 0
+                else:
+                    j += 1
+            if i == 0:
+                idx1[0], idx1[1], idx1[2] = 0, 1, 2
+            elif i == 1:
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            else:
+                idx1[0], idx1[1], idx1[2] = 2, 0, 1
+
+            if j == 0:
+                idx2[0], idx2[1] = 0, 1
+            else:
+                idx2[0], idx2[1] = 1, 0
+
+            vol = -scaling*2.0*vol1*vol2/s
+
+            for l in range(2):
+                if l == 0:
+                    qrVertex = self.qrVertex0
+                else:
+                    qrVertex = self.qrVertex1
+
+                for i in range(qrVertex.num_nodes):
+                    for j in range(2):
+                        self.w[j] = (simplex1[idx1[0], j]*self.PSI_vertex[l, 0, i] +
+                                     simplex1[idx1[1], j]*self.PSI_vertex[l, 1, i] +
+                                     simplex1[idx1[2], j]*self.PSI_vertex[l, 2, i] +
+                                     simplex2[idx2[1], j]*self.PSI_vertex[l, 3, i])
+                    self.temp[i] = qrVertex.weights[i] * mydot(self.n, self.w) * pow(mydot(self.w, self.w), -1.-s)
+                for I in range(3):
+                    for J in range(I, 3):
+                        val = 0.
+                        for i in range(qrVertex.num_nodes):
+                            val += (self.temp[i] *
+                                    self.PHI_vertex[l, I, i] *
+                                    self.PHI_vertex[l, J, i])
+                        i = idx1[I]
+                        j = idx1[J]
+                        if j < i:
+                            i, j = j, i
+                        k = 4*i-(i*(i+1))//2 + j-i
+                        contrib[k] += val*vol
+        elif panel >= 1:
+            qr2 = self.distantQuadRules[panel]
+            PHI = self.distantPHI[panel]
+            qr2.rule1.nodesInGlobalCoords(simplex1, self.x)
+            qr2.rule2.nodesInGlobalCoords(simplex2, self.y)
+            for k in range(qr2.rule1.num_nodes):
+                for m in range(qr2.rule2.num_nodes):
+                    for j in range(2):
+                        self.w[j] = self.y[m, j]-self.x[k, j]
+                    i = k*qr2.rule2.num_nodes+m
+                    self.temp[i] = qr2.weights[i] * mydot(self.n, self.w) * pow(mydot(self.w, self.w), -1.-s)
+            vol = scaling*vol1*vol2/s
+            k = 0
+            for i in range(3):
+                for j in range(i, 3):
+                    val = 0.
+                    for m in range(qr2.num_nodes):
+                        val += self.temp[m]*PHI[i, m]*PHI[j, m]
+                    contrib[k] = val*vol
+                    k += 1
+        else:
+            raise NotImplementedError('Panel type unknown: {}.'.format(panel))
+
+
diff --git a/nl/PyNucleus_nl/fractionalOrders.pxd b/nl/PyNucleus_nl/fractionalOrders.pxd
new file mode 100644
index 0000000..de83610
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalOrders.pxd
@@ -0,0 +1,60 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_fem.functions cimport function
+from . twoPointFunctions cimport (twoPointFunction,
+                                  constantTwoPoint,
+                                  parametrizedTwoPointFunction)
+from . interactionDomains cimport interactionDomain
+
+include "kernel_params_decl.pxi"
+
+
+cdef class constantFractionalLaplacianScaling(constantTwoPoint):
+    cdef:
+        INDEX_t dim
+        REAL_t s, horizon
+
+
+cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction):
+    cdef:
+        INDEX_t dim
+        fractionalOrderBase sFun
+        function horizonFun
+        REAL_t facInfinite, facFinite
+        twoPointFunction phi
+        BOOL_t normalized
+
+cdef class variableFractionalLaplacianScalingWithDifferentHorizon(variableFractionalLaplacianScaling):
+    pass
+
+
+cdef class fractionalOrderBase(twoPointFunction):
+    cdef:
+        public REAL_t min, max
+
+
+cdef class constFractionalOrder(fractionalOrderBase):
+    cdef:
+        public REAL_t value
+
+
+cdef class variableFractionalOrder(fractionalOrderBase):
+    cdef:
+        void *c_params
+    cdef void setFractionalOrderFun(self, void* params)
+
+
+cdef class constantIntegrableScaling(constantTwoPoint):
+    cdef:
+        kernelType kType
+        INDEX_t dim
+        REAL_t horizon
+        interactionDomain interaction
diff --git a/nl/PyNucleus_nl/fractionalOrders.pyx b/nl/PyNucleus_nl/fractionalOrders.pyx
new file mode 100644
index 0000000..c31d90e
--- /dev/null
+++ b/nl/PyNucleus_nl/fractionalOrders.pyx
@@ -0,0 +1,685 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan, atan2,
+                        log, ceil,
+                        fabs as abs, M_PI as pi, pow,
+                        tgamma as gamma, exp)
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL
+from PyNucleus_fem.functions cimport constant
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from libc.stdlib cimport malloc
+from libc.string cimport memcpy
+import warnings
+from . interactionDomains cimport ball1, ball2, ballInf
+
+include "kernel_params.pxi"
+
+cdef REAL_t inf = np.inf
+
+######################################################################
+
+cdef enum:
+    NUM_FRAC_ORDER_PARAMS = 10
+
+
+cdef enum fracOrderParams:
+    fSFUN = 0
+    fDIM = 2*OFFSET
+    #
+    fLAMBDA = 3*OFFSET
+    #
+    fSL = 3*OFFSET
+    fSR = 4*OFFSET
+    fR = 7*OFFSET
+    fSLOPE = 8*OFFSET
+    fFAC = 9*OFFSET
+    #
+    fSLL = 3*OFFSET
+    fSRR = 4*OFFSET
+    fSLR = 5*OFFSET
+    fSRL = 6*OFFSET
+
+
+cdef class fractionalOrderBase(twoPointFunction):
+    def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric):
+        super(fractionalOrderBase, self).__init__(symmetric)
+        self.min = smin
+        self.max = smax
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        raise NotImplementedError()
+
+    def __getstate__(self):
+        return (self.min, self.max, self.symmetric)
+
+    def __setstate__(self, state):
+        fractionalOrderBase.__init__(self, state[0], state[1], state[2])
+
+
+cdef class constFractionalOrder(fractionalOrderBase):
+    def __init__(self, REAL_t s):
+        super(constFractionalOrder, self).__init__(s, s, True)
+        self.value = s
+
+    def __getstate__(self):
+        return self.value
+
+    def __setstate__(self, state):
+        constFractionalOrder.__init__(self, state)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.value
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        return self.value
+
+    def __repr__(self):
+        return '{}'.format(self.value)
+
+
+cdef class variableFractionalOrder(fractionalOrderBase):
+    def __init__(self, REAL_t smin, REAL_t smax, BOOL_t symmetric):
+        super(variableFractionalOrder, self).__init__(smin, smax, symmetric)
+        self.c_params = malloc(NUM_FRAC_ORDER_PARAMS*OFFSET)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            fun_t sFun = getFun(self.c_params, fSFUN)
+        return sFun(&x[0], &y[0], self.c_params)
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            fun_t sFun = getFun(self.c_params, fSFUN)
+        return sFun(x, y, self.c_params)
+
+    cdef void setFractionalOrderFun(self, void* params):
+        memcpy(params, self.c_params, NUM_FRAC_ORDER_PARAMS*OFFSET)
+
+    def __repr__(self):
+        return '{}({})'.format(self.__class__.__name__, self.symmetric)
+
+    def __add__(self, variableFractionalOrder other):
+        return sumFractionalOrder(self, 1., other, 1.)
+
+
+cdef REAL_t lambdaFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fDIM)
+        REAL_t[::1] xA =<REAL_t[:dim]> x
+        REAL_t[::1] yA =<REAL_t[:dim]> y
+    return (<object>((<void**>(c_params+fLAMBDA))[0]))(xA, yA)
+
+
+cdef class lambdaFractionalOrder(variableFractionalOrder):
+    cdef:
+        tuple fun
+
+    def __init__(self, INDEX_t dim, REAL_t smin, REAL_t smax, BOOL_t symmetric, fun):
+        super(lambdaFractionalOrder, self).__init__(smin, smax, symmetric)
+        self.fun = (fun, )
+        setINDEX(self.c_params, fDIM, dim)
+        (<void**>(self.c_params+fLAMBDA))[0] = <void*>fun
+        setFun(self.c_params, fSFUN, &lambdaFractionalOrderFun)
+
+
+cdef REAL_t constFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t s = getREAL(c_params, fSL)
+    return s
+
+
+cdef class variableConstFractionalOrder(variableFractionalOrder):
+    cdef:
+        public REAL_t value
+
+    def __init__(self, REAL_t s):
+        super(variableConstFractionalOrder, self).__init__(s, s, True)
+        self.value = s
+        setREAL(self.c_params, fSL, self.value)
+        setFun(self.c_params, fSFUN, &constFractionalOrderFun)
+
+    def __repr__(self):
+        return '{}(s={},sym={})'.format(self.__class__.__name__, self.value, self.symmetric)
+
+    def __getstate__(self):
+        return self.value
+
+    def __setstate__(self, state):
+        variableConstFractionalOrder.__init__(self, state)
+
+
+cdef REAL_t leftRightFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t sll, srr, slr, srl
+        REAL_t interface = getREAL(c_params, fR)
+    if x[0] < interface:
+        if y[0] < interface:
+            sll = getREAL(c_params, fSLL)
+            return sll
+        else:
+            slr = getREAL(c_params, fSLR)
+            return slr
+    else:
+        if y[0] < interface:
+            srl = getREAL(c_params, fSRL)
+            return srl
+        else:
+            srr = getREAL(c_params, fSRR)
+            return srr
+
+
+cdef class leftRightFractionalOrder(variableFractionalOrder):
+    def __init__(self, REAL_t sll, REAL_t srr, REAL_t slr=np.nan, REAL_t srl=np.nan, REAL_t interface=0.):
+        if not np.isfinite(slr):
+            slr = 0.5*(sll+srr)
+        if not np.isfinite(srl):
+            srl = 0.5*(sll+srr)
+        symmetric = (slr == srl)
+        super(leftRightFractionalOrder, self).__init__(min([sll, srr, slr, srl]), max([sll, srr, slr, srl]), symmetric)
+
+        setFun(self.c_params, fSFUN, &leftRightFractionalOrderFun)
+        setREAL(self.c_params, fSLL, sll)
+        setREAL(self.c_params, fSRR, srr)
+        setREAL(self.c_params, fSLR, slr)
+        setREAL(self.c_params, fSRL, srl)
+        setREAL(self.c_params, fR, interface)
+
+    def __getstate__(self):
+        sll = getREAL(self.c_params, fSLL)
+        srr = getREAL(self.c_params, fSRR)
+        slr = getREAL(self.c_params, fSLR)
+        srl = getREAL(self.c_params, fSRL)
+        interface = getREAL(self.c_params, fR)
+        return (sll, srr, slr, srl, interface)
+
+    def __setstate__(self, state):
+        leftRightFractionalOrder.__init__(self, state[0], state[1], state[2], state[3], state[4])
+
+    def __repr__(self):
+        sll = getREAL(self.c_params, fSLL)
+        srr = getREAL(self.c_params, fSRR)
+        slr = getREAL(self.c_params, fSLR)
+        srl = getREAL(self.c_params, fSRL)
+        interface = getREAL(self.c_params, fR)
+        return '{}(ll={},rr={},lr={},rl={},interface={},sym={})'.format(self.__class__.__name__, sll, srr, slr, srl, interface, self.symmetric)
+
+
+cdef REAL_t smoothedLeftRightFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t sl = getREAL(c_params, fSL)
+        REAL_t sr = getREAL(c_params, fSR)
+        REAL_t r = getREAL(c_params, fR)
+        REAL_t slope, fac
+    if x[0] < -r:
+        return sl
+    elif x[0] > r:
+        return sr
+    slope = getREAL(c_params, fSLOPE)
+    fac = getREAL(c_params, fFAC)
+    return 0.5*(sl+sr)+0.5*(sr-sl)*atan(x[0]*slope) * fac
+
+
+cdef class smoothedLeftRightFractionalOrder(variableFractionalOrder):
+    def __init__(self, REAL_t sl, REAL_t sr, REAL_t r=0.1, REAL_t slope=200.):
+        super(smoothedLeftRightFractionalOrder, self).__init__(min(sl, sr), max(sl, sr), False)
+        fac = 1./atan(r*slope)
+        setFun(self.c_params, fSFUN, &smoothedLeftRightFractionalOrderFun)
+        setREAL(self.c_params, fSL, sl)
+        setREAL(self.c_params, fSR, sr)
+        setREAL(self.c_params, fR, r)
+        setREAL(self.c_params, fSLOPE, slope)
+        setREAL(self.c_params, fFAC, fac)
+
+    def __getstate__(self):
+        sll = getREAL(self.c_params, fSL)
+        srr = getREAL(self.c_params, fSR)
+        r = getREAL(self.c_params, fR)
+        slope = getREAL(self.c_params, fSLOPE)
+        return (sll, srr, r, slope)
+
+    def __setstate__(self, state):
+        smoothedLeftRightFractionalOrder.__init__(self, state[0], state[1], state[2], state[3])
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return smoothedLeftRightFractionalOrderFun(&x[0], &y[0], self.c_params)
+
+    def __repr__(self):
+        sl = getREAL(self.c_params, fSL)
+        sr = getREAL(self.c_params, fSR)
+        r = getREAL(self.c_params, fR)
+        slope = getREAL(self.c_params, fSLOPE)
+        return '{}(l={},r={},r={},slope={},sym={})'.format(self.__class__.__name__, sl, sr, r, slope, self.symmetric)
+
+
+cdef REAL_t linearLeftRightFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t sl = getREAL(c_params, fSL)
+        REAL_t sr = getREAL(c_params, fSR)
+        REAL_t r = getREAL(c_params, fR)
+        REAL_t slope
+    if x[0] < -r:
+        return sl
+    elif x[0] > r:
+        return sr
+    slope = getREAL(c_params, fSLOPE)
+    return sl + slope*(x[0]+r)
+
+
+cdef class linearLeftRightFractionalOrder(variableFractionalOrder):
+    def __init__(self, INDEX_t dim, REAL_t sl, REAL_t sr, REAL_t r=0.1):
+        super(linearLeftRightFractionalOrder, self).__init__(min(sl, sr), max(sl, sr), False)
+        slope = (sr-sl)/(2.0*r)
+        setINDEX(self.c_params, fDIM, dim)
+        setFun(self.c_params, fSFUN, &linearLeftRightFractionalOrderFun)
+        setREAL(self.c_params, fSL, sl)
+        setREAL(self.c_params, fSR, sr)
+        setREAL(self.c_params, fR, r)
+        setREAL(self.c_params, fSLOPE, slope)
+
+    def __getstate__(self):
+        dim = getINDEX(self.c_params, fDIM)
+        sll = getREAL(self.c_params, fSL)
+        srr = getREAL(self.c_params, fSR)
+        r = getREAL(self.c_params, fR)
+        slope = getREAL(self.c_params, fSLOPE)
+        return (dim, sll, srr, r, slope)
+
+    def __setstate__(self, state):
+        linearLeftRightFractionalOrder.__init__(self, state[0], state[1], state[2], state[3], state[4])
+
+    def __repr__(self):
+        sl = getREAL(self.c_params, fSL)
+        sr = getREAL(self.c_params, fSR)
+        r = getREAL(self.c_params, fR)
+        slope = getREAL(self.c_params, fSLOPE)
+        return '{}(l={},r={},r={},slope={},sym={})'.format(self.__class__.__name__, sl, sr, r, slope, self.symmetric)
+
+
+cdef REAL_t innerOuterFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fDIM)
+        REAL_t sii = getREAL(c_params, fSLL)
+        REAL_t soo = getREAL(c_params, fSRR)
+        REAL_t sio = getREAL(c_params, fSLR)
+        REAL_t soi = getREAL(c_params, fSRL)
+        REAL_t r2 = getREAL(c_params, fR)
+        REAL_t r2x = 0., r2y = 0.
+        INDEX_t i
+        REAL_t[::1] center =<REAL_t[:dim]> (<REAL_t*>(c_params+fSLOPE))
+    for i in range(dim):
+        r2x += (x[i]-center[i])**2
+        r2y += (y[i]-center[i])**2
+    if r2x < r2 and r2y < r2:
+        return sii
+    elif r2x >= r2 and r2y >= r2:
+        return soo
+    elif r2x < r2 and r2y >= r2:
+        return sio
+    elif r2x >= r2 and r2y < r2:
+        return soi
+    else:
+        raise NotImplementedError()
+
+
+cdef class innerOuterFractionalOrder(variableFractionalOrder):
+    cdef:
+        REAL_t sii, soo, sio, soi
+        REAL_t r2
+        REAL_t[::1] center
+
+    def __init__(self, INDEX_t dim, REAL_t sii, REAL_t soo, REAL_t r, REAL_t[::1] center, REAL_t sio=np.nan, REAL_t soi=np.nan):
+        if not np.isfinite(sio):
+            sio = 0.5*(sii+soo)
+        if not np.isfinite(soi):
+            soi = 0.5*(sii+soo)
+        super(innerOuterFractionalOrder, self).__init__(min([sii, soo, sio, soi]), max([sii, soo, sio, soi]), sio == soi)
+        setINDEX(self.c_params, fDIM, dim)
+        setFun(self.c_params, fSFUN, &innerOuterFractionalOrderFun)
+        setREAL(self.c_params, fSLL, sii)
+        setREAL(self.c_params, fSRR, soo)
+        setREAL(self.c_params, fSLR, sio)
+        setREAL(self.c_params, fSRL, soi)
+        setREAL(self.c_params, fR, r*r)
+        setREAL(self.c_params, fSLOPE, center[0])
+
+    def __getstate__(self):
+        dim = getINDEX(self.c_params, fDIM)
+        sii = getREAL(self.c_params, fSLL)
+        soo = getREAL(self.c_params, fSRR)
+        sio = getREAL(self.c_params, fSLR)
+        soi = getREAL(self.c_params, fSRL)
+        r = sqrt(getREAL(self.c_params, fR))
+        center =<REAL_t[:dim]> (<REAL_t*>(self.c_params+fSLOPE))
+        return (dim, sii, soo, r, np.array(center), sio, soi)
+
+    def __setstate__(self, state):
+        innerOuterFractionalOrder.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6])
+
+    def __repr__(self):
+        return '{}(ii={},oo={},io={},oi={},r={},sym={})'.format(self.__class__.__name__, self.sii, self.soo, self.sio, self.soi, np.sqrt(self.r2), self.symmetric)
+
+
+cdef class sumFractionalOrder(variableFractionalOrder):
+    cdef:
+        variableFractionalOrder s1, s2
+        REAL_t fac1, fac2
+
+    def __init__(self, variableFractionalOrder s1, REAL_t fac1, variableFractionalOrder s2, REAL_t fac2):
+        super(sumFractionalOrder, self).__init__(min(s1.min, s2.min), max(s1.max, s2.max), s1.symmetric and s2.symmetric)
+        self.s1 = s1
+        self.fac1 = fac1
+        self.s2 = s2
+        self.fac2 = fac2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.s1.eval(x, y) + self.s2.eval(x, y)
+
+
+cdef REAL_t islandsFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fDIM)
+        REAL_t sii = getREAL(c_params, fSLL)
+        REAL_t soo = getREAL(c_params, fSRR)
+        REAL_t sio = getREAL(c_params, fSLR)
+        REAL_t soi = getREAL(c_params, fSRL)
+        REAL_t r = getREAL(c_params, fR)
+        REAL_t r2 = getREAL(c_params, fSLOPE)
+        REAL_t p
+        INDEX_t i
+        BOOL_t xInIsland = True, yInIsland = True
+    for i in range(dim):
+        p = abs(x[i])
+        if not(p >= r and p <= r2):
+            xInIsland = False
+            break
+
+    for i in range(dim):
+        p = abs(y[i])
+        if not(p >= r and p <= r2):
+            yInIsland = False
+            break
+
+    if xInIsland:
+        if yInIsland:
+            return sii
+        else:
+            return sio
+    else:
+        if yInIsland:
+            return soi
+        else:
+            return soo
+
+
+cdef class islandsFractionalOrder(variableFractionalOrder):
+    def __init__(self, REAL_t sii, REAL_t soo, REAL_t r, REAL_t r2, REAL_t sio=np.nan, REAL_t soi=np.nan):
+        if not np.isfinite(sio):
+            sio = 0.5*(sii+soo)
+        if not np.isfinite(soi):
+            soi = 0.5*(sii+soo)
+        super(islandsFractionalOrder, self).__init__(min([sii, soo, sio, soi]), max([sii, soo, sio, soi]), sio == soi)
+        setINDEX(self.c_params, fDIM, 2)
+        setFun(self.c_params, fSFUN, &islandsFractionalOrderFun)
+        setREAL(self.c_params, fSLL, sii)
+        setREAL(self.c_params, fSRR, soo)
+        setREAL(self.c_params, fSLR, sio)
+        setREAL(self.c_params, fSRL, soi)
+        setREAL(self.c_params, fR, r)
+        setREAL(self.c_params, fSLOPE, r2)
+
+    def __getstate__(self):
+        sii = getREAL(self.c_params, fSLL)
+        soo = getREAL(self.c_params, fSRR)
+        sio = getREAL(self.c_params, fSLR)
+        soi = getREAL(self.c_params, fSRL)
+        r = getREAL(self.c_params, fR)
+        r2 = getREAL(self.c_params, fSLOPE)
+        return (sii, soo, r, r2, sio, soi)
+
+    def __setstate__(self, state):
+        islandsFractionalOrder.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5])
+
+    def __repr__(self):
+        sii = getREAL(self.c_params, fSLL)
+        soo = getREAL(self.c_params, fSRR)
+        sio = getREAL(self.c_params, fSLR)
+        soi = getREAL(self.c_params, fSRL)
+        r = getREAL(self.c_params, fR)
+        r2 = getREAL(self.c_params, fSLOPE)
+        return '{}(ii={},oo={},io={},oi={},r={},r2={},sym={})'.format(self.__class__.__name__, sii, soo, sio, soi, r, r2, self.symmetric)
+
+
+cdef REAL_t layersFractionalOrderFun(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fDIM)
+        INDEX_t numLayers = getINDEX(c_params, fR)
+        REAL_t* layerBoundaries = getREALArray1D(c_params, fSL)
+        REAL_t* layerOrders = getREALArray2D(c_params, fSR)
+        INDEX_t i, j, I = 0, J = 0
+        REAL_t c
+
+    c = x[dim-1]
+    if c <= layerBoundaries[0]:
+        I = 0
+    elif c >= layerBoundaries[numLayers]:
+        I = numLayers-1
+    else:
+        for i in range(numLayers):
+            if (layerBoundaries[i] <= c) and (c <= layerBoundaries[i+1]):
+                I = i
+                break
+    c = y[dim-1]
+    if c <= layerBoundaries[0]:
+        J = 0
+    elif c >= layerBoundaries[numLayers]:
+        J = numLayers-1
+    else:
+        for j in range(numLayers):
+            if (layerBoundaries[j] <= c) and (c <= layerBoundaries[j+1]):
+                J = j
+                break
+    return layerOrders[I*numLayers+J]
+
+
+cdef class layersFractionalOrder(variableFractionalOrder):
+    cdef:
+        public REAL_t[::1] layerBoundaries
+        public REAL_t[:, ::1] layerOrders
+
+    def __init__(self, INDEX_t dim, REAL_t[::1] layerBoundaries, REAL_t[:, ::1] layerOrders):
+        cdef:
+            REAL_t smin, smax
+            INDEX_t i, j, numLayers
+            BOOL_t sym
+
+        smin = np.inf
+        smax = 0.
+        sym = True
+        numLayers = layerBoundaries.shape[0]-1
+        assert layerOrders.shape[0] == numLayers
+        assert layerOrders.shape[1] == numLayers
+        for i in range(numLayers):
+            for j in range(numLayers):
+                smin = min(smin, layerOrders[i, j])
+                smax = max(smax, layerOrders[i, j])
+                if layerOrders[i, j] != layerOrders[j, i]:
+                    sym = False
+        super(layersFractionalOrder, self).__init__(smin, smax, sym)
+        self.layerBoundaries = layerBoundaries
+        self.layerOrders = layerOrders
+        setINDEX(self.c_params, fDIM, dim)
+        setFun(self.c_params, fSFUN, &layersFractionalOrderFun)
+        setINDEX(self.c_params, fR, numLayers)
+        setREALArray1D(self.c_params, fSL, self.layerBoundaries)
+        setREALArray2D(self.c_params, fSR, self.layerOrders)
+
+    def __getstate__(self):
+        dim = getINDEX(self.c_params, fDIM)
+        return (dim, np.array(self.layerBoundaries), np.array(self.layerOrders))
+
+    def __setstate__(self, state):
+        layersFractionalOrder.__init__(self, state[0], state[1], state[2])
+
+    def __repr__(self):
+        numLayers = getINDEX(self.c_params, fR)
+        return '{}(numLayers={})'.format(self.__class__.__name__, numLayers)
+
+
+######################################################################
+
+cdef class constantFractionalLaplacianScaling(constantTwoPoint):
+    def __init__(self, INDEX_t dim, REAL_t s, REAL_t horizon):
+        self.dim = dim
+        self.s = s
+        self.horizon = horizon
+        if dim == 1:
+            if horizon < inf:
+                value = (2.-2*s) * pow(horizon, 2*s-2.) * 0.5
+            else:
+                value = 2.0**(2.0*s) * s * gamma(s+0.5)/sqrt(pi)/gamma(1.0-s) * 0.5
+        elif dim == 2:
+            if horizon < inf:
+                value = (2.-2*s)*pow(horizon, 2*s-2.) * 2./pi * 0.5
+            else:
+                value = 2.0**(2.0*s) * s * gamma(s+1.0)/pi/gamma(1.-s) * 0.5
+        else:
+            raise NotImplementedError()
+        super(constantFractionalLaplacianScaling, self).__init__(value)
+
+    def __getstate__(self):
+        return (self.dim, self.s, self.horizon)
+
+    def __setstate__(self, state):
+        constantFractionalLaplacianScaling.__init__(self, state[0], state[1], state[2])
+
+    def __repr__(self):
+        return '{}({},{} -> {})'.format(self.__class__.__name__, self.s, self.horizon, self.value)
+
+
+cdef class constantIntegrableScaling(constantTwoPoint):
+    def __init__(self, kernelType kType, interactionDomain interaction, INDEX_t dim, REAL_t horizon):
+        self.kType = kType
+        self.dim = dim
+        self.interaction = interaction
+        self.horizon = horizon
+        if kType == INDICATOR:
+            if dim == 1:
+                value = 3./horizon**3 / 2.
+            elif dim == 2:
+                if isinstance(self.interaction, ball2):
+                    value = 8./pi/horizon**4 / 2.
+                elif isinstance(self.interaction, ballInf):
+                    value = 3./4./horizon**4 / 2.
+                else:
+                    raise NotImplementedError()
+            else:
+                raise NotImplementedError()
+        elif kType == PERIDYNAMIC:
+            if dim == 1:
+                value = 2./horizon**2 / 2.
+            elif dim == 2:
+                if isinstance(self.interaction, ball2):
+                    value = 6./pi/horizon**3 / 2.
+                else:
+                    raise NotImplementedError()
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError()
+        super(constantIntegrableScaling, self).__init__(value)
+
+    def __getstate__(self):
+        return (self.kType, self.interaction, self.dim, self.horizon)
+
+    def __setstate__(self, state):
+        constantIntegrableScaling.__init__(self, state[0], state[1], state[2], state[3])
+
+    def __repr__(self):
+        return '{}({} -> {})'.format(self.__class__.__name__, self.horizon, self.value)
+
+
+cdef class variableFractionalLaplacianScaling(parametrizedTwoPointFunction):
+    def __init__(self, BOOL_t symmetric):
+        super(variableFractionalLaplacianScaling, self).__init__(symmetric)
+
+    cdef void setParams(self, void *params):
+        parametrizedTwoPointFunction.setParams(self, params)
+        self.dim = getINDEX(self.params, fKDIM)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s = getREAL(self.params, fS)
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+
+        if self.dim == 1:
+            if horizon2 < inf:
+                return (2.-2*s) * pow(horizon2, s-1.) * 0.5
+            else:
+                return 2.0**(2.0*s) * s * gamma(s+0.5)/sqrt(pi)/gamma(1.0-s) * 0.5
+        elif self.dim == 2:
+            if horizon2 < inf:
+                return (2.-2*s)*pow(horizon2, s-1.) * 2./pi * 0.5
+            else:
+                return 2.0**(2.0*s) * s * gamma(s+1.0)/pi/gamma(1.-s) * 0.5
+        else:
+            raise NotImplementedError()
+
+    def getScalingWithDifferentHorizon(self):
+        cdef:
+            variableFractionalLaplacianScalingWithDifferentHorizon scaling
+            function horizonFun
+            BOOL_t horizonFunNull = isNull(self.params, fHORIZONFUN)
+        if not horizonFunNull:
+            horizonFun = <function>((<void**>(self.params+fHORIZONFUN))[0])
+        else:
+            horizonFun = constant(sqrt(getREAL(self.params, fHORIZON2)))
+        scaling = variableFractionalLaplacianScalingWithDifferentHorizon(self.symmetric, horizonFun)
+        return scaling
+
+
+######################################################################
+
+
+cdef class variableFractionalLaplacianScalingWithDifferentHorizon(variableFractionalLaplacianScaling):
+    def __init__(self, BOOL_t symmetric, function horizonFun):
+        super(variableFractionalLaplacianScalingWithDifferentHorizon, self).__init__(symmetric)
+        self.horizonFun = horizonFun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            void* params
+            void* paramsModified = malloc(NUM_KERNEL_PARAMS*OFFSET)
+            REAL_t horizon
+        horizon = self.horizonFun.eval(x)
+        params = self.getParams()
+        memcpy(paramsModified, params, NUM_KERNEL_PARAMS*OFFSET)
+        setREAL(paramsModified, fHORIZON2, horizon**2)
+        self.setParams(paramsModified)
+        scalingValue = variableFractionalLaplacianScaling.eval(self, x, y)
+        self.setParams(params)
+        return scalingValue
diff --git a/nl/PyNucleus_nl/interactionDomains.pxd b/nl/PyNucleus_nl/interactionDomains.pxd
new file mode 100644
index 0000000..fdb54c9
--- /dev/null
+++ b/nl/PyNucleus_nl/interactionDomains.pxd
@@ -0,0 +1,48 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from . twoPointFunctions cimport parametrizedTwoPointFunction
+
+cdef enum RELATIVE_POSITION_t:
+    INTERACT
+    REMOTE
+    CUT
+
+
+cdef class interactionDomain(parametrizedTwoPointFunction):
+    cdef:
+        public RELATIVE_POSITION_t relPos
+        public BOOL_t complement
+        REAL_t[::1] intervals1, intervals2
+        REAL_t[:, :, ::1] A_Simplex
+        REAL_t[:, ::1] b_Simplex
+        REAL_t[::1] vol_Simplex
+        REAL_t[:, :, ::1] A_Node
+        REAL_t[:, ::1] b_Node
+        REAL_t[::1] vol_Node
+        INDEX_t iter_Simplex, iterEnd_Simplex
+        INDEX_t iter_Node, iterEnd_Node
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:,::1] simplex1, REAL_t[:,::1] simplex2)
+    cdef void startLoopSubSimplices_Simplex(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2)
+    cdef BOOL_t nextSubSimplex_Simplex(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t *vol)
+    cdef void startLoopSubSimplices_Node(self, REAL_t[::1] node1, REAL_t[:, ::1] simplex2)
+    cdef BOOL_t nextSubSimplex_Node(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t *vol)
+
+
+cdef class ball1(interactionDomain):
+    pass
+
+
+cdef class ball2(interactionDomain):
+    pass
+
+
+cdef class ballInf(interactionDomain):
+    pass
diff --git a/nl/PyNucleus_nl/interactionDomains.pyx b/nl/PyNucleus_nl/interactionDomains.pyx
new file mode 100644
index 0000000..eb5e9e3
--- /dev/null
+++ b/nl/PyNucleus_nl/interactionDomains.pyx
@@ -0,0 +1,890 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+cimport cython
+import numpy as np
+cimport numpy as np
+from libc.math cimport sqrt, M_PI as pi, pow
+import warnings
+from PyNucleus_base.myTypes import REAL
+
+include "kernel_params.pxi"
+
+
+cdef REAL_t inf = np.inf
+
+
+cdef class interactionDomain(parametrizedTwoPointFunction):
+    def __init__(self, BOOL_t isComplement):
+        super(interactionDomain, self).__init__(True)
+        self.complement = isComplement
+
+    def getComplement(self):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void startLoopSubSimplices_Simplex(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t nextSubSimplex_Simplex(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t * vol):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void startLoopSubSimplices_Node(self, REAL_t[::1] node1, REAL_t[:, ::1] simplex2):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t nextSubSimplex_Node(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t * vol):
+        raise NotImplementedError()
+
+    def startLoopSubSimplices_Simplex_py(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        self.startLoopSubSimplices_Simplex(simplex1, simplex2)
+
+    def nextSubSimplex_Simplex_py(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t[::1] vol):
+        return self.nextSubSimplex_Simplex(A, b, &vol[0])
+
+    def startLoopSubSimplices_Node_py(self, REAL_t[::1] node1, REAL_t[:, ::1] simplex2):
+        self.startLoopSubSimplices_Node(node1, simplex2)
+
+    def nextSubSimplex_Node_py(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t[::1] vol):
+        return self.nextSubSimplex_Node(A, b, &vol[0])
+
+    def plot_Simplex(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        import matplotlib.pyplot as plt
+        cdef:
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            REAL_t horizon = sqrt(horizon2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+            REAL_t vol
+
+        if dim == 2:
+            def plotSimplex(simplex, doFill=False):
+                if doFill:
+                    plt.fill(np.concatenate((simplex[:, 0], [simplex[0, 0]])),
+                             np.concatenate((simplex[:, 1], [simplex[0, 1]])))
+                else:
+                    plt.plot(np.concatenate((simplex[:, 0], [simplex[0, 0]])),
+                             np.concatenate((simplex[:, 1], [simplex[0, 1]])))
+
+            A = np.empty((dim+1, dim+1), dtype=REAL)
+            b = np.empty((dim+1, 1), dtype=REAL)
+            plotSimplex(simplex1)
+            plotSimplex(simplex2)
+            t = np.linspace(0, 2*pi, 101)
+            for j in range(dim+1):
+                plt.plot(simplex2[j, 0]+horizon*np.cos(t), simplex2[j, 1]+horizon*np.sin(t))
+            self.startLoopSubSimplices_Simplex(simplex1, simplex2)
+            while self.nextSubSimplex_Simplex(A, b[:, 0], &vol):
+                plotSimplex(A.T.dot(simplex1)+b.T.dot(simplex1), True)
+            c = np.vstack((simplex1, simplex2))
+            mins = c.min(axis=0)
+            maxs = c.max(axis=0)
+            plt.axis('equal')
+            plt.xlim([mins[0], maxs[0]])
+            plt.ylim([mins[1], maxs[1]])
+
+    def plot_Node(self, REAL_t[::1] node1, REAL_t[:, ::1] simplex2):
+        import matplotlib.pyplot as plt
+        cdef:
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            REAL_t horizon = sqrt(horizon2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+            REAL_t vol
+
+        if dim == 2:
+            def plotSimplex(simplex, doFill=False):
+                if doFill:
+                    plt.fill(np.concatenate((simplex[:, 0], [simplex[0, 0]])),
+                             np.concatenate((simplex[:, 1], [simplex[0, 1]])))
+                else:
+                    plt.plot(np.concatenate((simplex[:, 0], [simplex[0, 0]])),
+                             np.concatenate((simplex[:, 1], [simplex[0, 1]])))
+
+            A = np.empty((dim+1, dim+1), dtype=REAL)
+            b = np.empty((dim+1, 1), dtype=REAL)
+            # plotSimplex(simplex1)
+            plotSimplex(simplex2)
+            plt.scatter(node1[0], node1[1])
+            t = np.linspace(0, 2*pi, 101)
+            plt.plot(node1[0]+horizon*np.cos(t), node1[1]+horizon*np.sin(t))
+            self.startLoopSubSimplices_Node(node1, simplex2)
+            while self.nextSubSimplex_Node(A, b[:, 0], &vol):
+                plotSimplex(A.T.dot(simplex2)+b.T.dot(simplex2), True)
+            c = np.vstack((node1, simplex2))
+            mins = c.min(axis=0)
+            maxs = c.max(axis=0)
+            plt.axis('equal')
+            plt.xlim([mins[0], maxs[0]])
+            plt.ylim([mins[1], maxs[1]])
+
+
+cdef class fullSpace(interactionDomain):
+    def __init__(self):
+        super(fullSpace, self).__init__(False)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        return INTERACT
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return 1.
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        return 1.
+
+    def __repr__(self):
+        dim = getINDEX(self.params, fKDIM)
+        return 'R^{}'.format(dim)
+
+
+cdef class ball1(interactionDomain):
+    def __init__(self):
+        super(ball1, self).__init__(False)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t i, k, j
+            INDEX_t noSimplex1 = simplex1.shape[0]
+            INDEX_t noSimplex2 = simplex2.shape[0]
+            REAL_t d2
+            REAL_t dmin2 = inf
+            REAL_t dmax2 = 0.
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+        for i in range(noSimplex1):
+            for k in range(noSimplex2):
+                d2 = 0.
+                for j in range(dim):
+                    d2 += abs(simplex1[i, j] - simplex2[k, j])
+                d2 *= d2**2
+                dmin2 = min(dmin2, d2)
+                dmax2 = max(dmax2, d2)
+        if dmin2 >= horizon2:
+            self.relPos = REMOTE
+        elif dmax2 <= horizon2:
+            self.relPos = INTERACT
+        else:
+            self.relPos = CUT
+        return self.relPos
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(x.shape[0]):
+            s += abs(x[i]-y[i])
+        s = s*s
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(dim):
+            s += abs(x[i]-y[i])
+        s = s*s
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    def __repr__(self):
+        horizon2 = getREAL(self.params, fHORIZON2)
+        return '|x-y|_1 <= {}'.format(sqrt(horizon2))
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+@cython.wraparound(False)
+cdef inline REAL_t findIntersection(REAL_t[::1] x, REAL_t[::1] y1, REAL_t[::1] y2, REAL_t horizon2):
+    cdef:
+        REAL_t nn = 0., p = 0., q = 0., A, B, c
+        INDEX_t k
+    for k in range(2):
+        A = y2[k]-y1[k]
+        B = y1[k]-x[k]
+        nn += A**2
+        p += A*B
+        q += B**2
+    nn = 1./nn
+    p *= 2.*nn
+    q = (q-horizon2)*nn
+    A = -p*0.5
+    B = sqrt(A**2-q)
+    c = A+B
+    if (c < 0) or (c > 1):
+        c = A-B
+    return c
+
+
+cdef class ball2(interactionDomain):
+    def __init__(self):
+        super(ball2, self).__init__(False)
+        self.intervals1 = np.empty((4), dtype=REAL)
+        self.intervals2 = np.empty((3), dtype=REAL)
+        self.A_Simplex = np.empty((2, 3, 3), dtype=REAL)
+        self.b_Simplex = np.empty((2, 3), dtype=REAL)
+        self.vol_Simplex = np.empty((2), dtype=REAL)
+        self.A_Node = np.empty((2, 3, 3), dtype=REAL)
+        self.b_Node = np.empty((2, 3), dtype=REAL)
+        self.vol_Node = np.empty((2), dtype=REAL)
+
+    def getComplement(self):
+        return ball2Complement()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t i, k, j
+            INDEX_t noSimplex1 = simplex1.shape[0]
+            INDEX_t noSimplex2 = simplex2.shape[0]
+            REAL_t d2
+            REAL_t dmin2 = inf
+            REAL_t dmax2 = 0.
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+        for i in range(noSimplex1):
+            for k in range(noSimplex2):
+                d2 = 0.
+                for j in range(dim):
+                    d2 += (simplex1[i, j] - simplex2[k, j])**2
+                dmin2 = min(dmin2, d2)
+                dmax2 = max(dmax2, d2)
+        if dmin2 >= horizon2:
+            self.relPos = REMOTE
+        elif dmax2 <= horizon2:
+            self.relPos = INTERACT
+        else:
+            self.relPos = CUT
+        return self.relPos
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    cdef void startLoopSubSimplices_Simplex(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            REAL_t horizon = sqrt(horizon2)
+            REAL_t vol1, invVol1
+            BOOL_t lr
+            BOOL_t insideIJ[3][3]
+            BOOL_t insideI[3]
+            BOOL_t isInside
+            INDEX_t numInside
+            INDEX_t outside, inside1, inside2
+            INDEX_t inside, outside1, outside2
+            REAL_t d1, d2
+            INDEX_t i, k, j
+            REAL_t c1, c2
+        if dim == 1:
+            lr = simplex1[0, 0] < simplex2[0, 0]
+            vol1 = abs(simplex1[0, 0]-simplex1[1, 0])
+            invVol1 = 1./vol1
+            if lr:
+                self.intervals1[0] = simplex1[0,0]*invVol1
+                self.intervals1[1] = max(simplex1[0,0], simplex2[0,0]-horizon)*invVol1
+                self.intervals1[2] = min(simplex1[1,0], simplex2[1,0]-horizon)*invVol1
+                self.intervals1[3] = simplex1[1,0]*invVol1
+                self.iter_Simplex = 1
+                self.iterEnd_Simplex = 3
+            else:
+                self.intervals1[0] = simplex1[0,0]*invVol1
+                self.intervals1[1] = max(simplex1[0,0], simplex2[0,0]+horizon)*invVol1
+                self.intervals1[2] = min(simplex1[1,0], simplex2[1,0]+horizon)*invVol1
+                self.intervals1[3] = simplex1[1,0]*invVol1
+                self.iter_Simplex = 0
+                self.iterEnd_Simplex = 2
+        elif dim == 2:
+            # self.b_Simplex[0, :] = 0.
+            # self.A_Simplex[0, :, :] = 0
+            # self.A_Simplex[0, 0, 0] = 1
+            # self.A_Simplex[0, 1, 1] = 1
+            # self.A_Simplex[0, 2, 2] = 1
+            # self.vol_Simplex[0] = 1
+            # self.iter_Simplex = 0
+            # self.iterEnd_Simplex = 1
+
+            numInside = 0
+            for i in range(3):
+                isInside = False
+                for k in range(3):
+                    d2 = 0.
+                    for j in range(2):
+                        d2 += (simplex1[i, j] - simplex2[k, j])**2
+                    insideIJ[i][k] = d2 <= horizon2
+                    isInside |= insideIJ[i][k]
+                insideI[i] = isInside
+                numInside += isInside
+            if numInside == 0:
+                raise NotImplementedError()
+            elif numInside == 1:
+                inside = 0
+                while not insideI[inside]:
+                    inside += 1
+                outside1 = (inside+1)%3
+                outside2 = (inside+2)%3
+                c1 = 0
+                c2 = 0
+                for j in range(3):
+                    if insideIJ[inside][j]:
+                        c1 = max(c1, findIntersection(simplex2[j, :], simplex1[inside, :], simplex1[outside1, :], horizon2))
+                        c2 = max(c2, findIntersection(simplex2[j, :], simplex1[inside, :], simplex1[outside2, :], horizon2))
+                self.iter_Simplex = 0
+                if c1*c2 > 0:
+                    self.A_Simplex[0, :, :] = 0.
+                    self.b_Simplex[0, :] = 0.
+                    self.A_Simplex[0, inside, inside] = c1+c2
+                    self.A_Simplex[0, inside, outside1] = c2
+                    self.A_Simplex[0, inside, outside2] = c1
+                    self.A_Simplex[0, outside1, outside1] = c1
+                    self.A_Simplex[0, outside2, outside2] = c2
+                    self.b_Simplex[0, inside] = 1-c1-c2
+                    self.vol_Simplex[0] = c1*c2
+
+                    self.iterEnd_Simplex = 1
+                else:
+                    self.iterEnd_Simplex = 0
+            elif numInside == 2:
+                outside = 0
+                while insideI[outside]:
+                    outside += 1
+                inside1 = (outside+1)%3
+                inside2 = (outside+2)%3
+                c1 = 1
+                c2 = 1
+                for j in range(3):
+                    if insideIJ[inside1][j]:
+                        c1 = min(c1, findIntersection(simplex2[j, :], simplex1[outside, :], simplex1[inside1, :], horizon2))
+                    if insideIJ[inside2][j]:
+                        c2 = min(c2, findIntersection(simplex2[j, :], simplex1[outside, :], simplex1[inside2, :], horizon2))
+                d1 = 0.
+                d2 = 0.
+                for k in range(2):
+                    d1 += (simplex2[outside, k]
+                           + c1*(simplex2[inside1, k]-simplex2[outside, k])
+                           - simplex2[inside2, k])**2
+                    d2 += (simplex2[outside, k]
+                           + c2*(simplex2[inside2, k]-simplex2[outside, k])
+                           - simplex2[inside1, k])
+                self.A_Simplex[:, :, :] = 0.
+                self.b_Simplex[:, :] = 0.
+
+                self.iter_Simplex = 0
+                self.iterEnd_Simplex = 0
+                if d1 < d2:
+                    if 1-c1 > 0:
+                        self.A_Simplex[self.iterEnd_Simplex, outside, outside] = 1-c1
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, inside1] = 1-c1
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, inside2] = -c1
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, inside2] = 1.
+                        self.b_Simplex[self.iterEnd_Simplex, inside1] = c1
+                        self.vol_Simplex[self.iterEnd_Simplex] = 1-c1
+                        self.iterEnd_Simplex += 1
+
+                    if c1*(1-c2) > 0.:
+                        self.A_Simplex[self.iterEnd_Simplex, outside, outside] = 1-c2
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, inside2] = 1
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, outside] = c2
+                        self.A_Simplex[self.iterEnd_Simplex, outside, inside1] = 1-c1
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, inside1] = c1
+                        self.vol_Simplex[self.iterEnd_Simplex] = c1*(1-c2)
+                        self.iterEnd_Simplex += 1
+                else:
+                    if 1-c2 > 0:
+                        self.A_Simplex[self.iterEnd_Simplex, outside, outside] = 1-c2
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, inside2] = 1-c2
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, inside1] = -c2
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, inside1] = 1.
+                        self.b_Simplex[self.iterEnd_Simplex, inside2] = c2
+                        self.vol_Simplex[self.iterEnd_Simplex] = 1-c2
+                        self.iterEnd_Simplex += 1
+
+                    if c2*(1-c1) > 0.:
+                        self.A_Simplex[self.iterEnd_Simplex, outside, outside] = 1-c1
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, inside1] = 1
+                        self.A_Simplex[self.iterEnd_Simplex, inside1, outside] = c1
+                        self.A_Simplex[self.iterEnd_Simplex, outside, inside2] = 1-c2
+                        self.A_Simplex[self.iterEnd_Simplex, inside2, inside2] = c2
+                        self.vol_Simplex[self.iterEnd_Simplex] = c2*(1-c1)
+                        self.iterEnd_Simplex += 1
+            else:
+                self.b_Simplex[0, :] = 0.
+                self.A_Simplex[0, :, :] = 0
+                self.A_Simplex[0, 0, 0] = 1
+                self.A_Simplex[0, 1, 1] = 1
+                self.A_Simplex[0, 2, 2] = 1
+                self.vol_Simplex[0] = 1
+                self.iter_Simplex = 0
+                self.iterEnd_Simplex = 1
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t nextSubSimplex_Simplex(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t *vol):
+        cdef:
+            INDEX_t dim, i, j
+            REAL_t l, r, v0, v1
+        if self.iter_Simplex == self.iterEnd_Simplex:
+            return False
+        dim = getINDEX(self.params, fKDIM)
+        if dim == 1:
+            l = self.intervals1[self.iter_Simplex]
+            r = self.intervals1[self.iter_Simplex+1]
+            if r-l <= 0:
+                self.iter_Simplex += 1
+                return self.nextSubSimplex_Simplex(A, b, vol)
+            v0 = self.intervals1[0]
+            v1 = self.intervals1[3]
+            A[0, 0] = r-l
+            A[0, 1] = 0.
+            A[1, 0] = 0.
+            A[1, 1] = r-l
+            b[0] = v1-r
+            b[1] = l-v0
+            vol[0] = r-l
+            self.iter_Simplex += 1
+            return True
+        elif dim == 2:
+            for i in range(3):
+                b[i] = self.b_Simplex[self.iter_Simplex, i]
+                for j in range(3):
+                    A[i, j] = self.A_Simplex[self.iter_Simplex, i, j]
+            vol[0] = self.vol_Simplex[self.iter_Simplex]
+            self.iter_Simplex += 1
+            return True
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    cdef void startLoopSubSimplices_Node(self, REAL_t[::1] node1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            REAL_t horizon = sqrt(horizon2)
+            REAL_t vol2, invVol2
+            BOOL_t lr
+            INDEX_t numInside
+            BOOL_t ind[3]
+            INDEX_t j
+            REAL_t d1, d2
+            INDEX_t inside, outside1, outside2
+            INDEX_t outside, inside1, inside2
+            REAL_t c1, c2
+        if dim == 1:
+            lr = node1[0] < simplex2[0, 0]
+            vol2 = abs(simplex2[0, 0]-simplex2[1, 0])
+            invVol2 = 1./vol2
+            if lr:
+                self.intervals2[0] = simplex2[0, 0] * invVol2
+                self.intervals2[1] = min(simplex2[1, 0], node1[0]+horizon) * invVol2
+                self.intervals2[2] = simplex2[1, 0] * invVol2
+                self.iter_Node = 0
+                self.iterEnd_Node = 1
+            else:
+                self.intervals2[0] = simplex2[0, 0] * invVol2
+                self.intervals2[1] = max(simplex2[0, 0], node1[0]-horizon) * invVol2
+                self.intervals2[2] = simplex2[1, 0] * invVol2
+                self.iter_Node = 1
+                self.iterEnd_Node = 2
+        elif dim == 2:
+            numInside = 0
+            for j in range(3):
+                d2 = 0.
+                for k in range(2):
+                    d2 += (simplex2[j, k]-node1[k])**2
+                ind[j] = (d2 <= horizon2)
+                numInside += ind[j]
+            if numInside == 0:
+                self.iter_Node = 0
+                self.iterEnd_Node = 0
+            elif numInside == 1:
+                inside = 0
+                while not ind[inside]:
+                    inside += 1
+                outside1 = (inside+1)%3
+                outside2 = (inside+2)%3
+                c1 = findIntersection(node1, simplex2[inside, :], simplex2[outside1, :], horizon2)
+                c2 = findIntersection(node1, simplex2[inside, :], simplex2[outside2, :], horizon2)
+
+                self.iter_Node = 0
+                if c1+c2 > 0:
+                    self.A_Node[0, :, :] = 0.
+                    self.b_Node[0, :] = 0.
+                    self.A_Node[0, inside, inside] = c1+c2
+                    self.A_Node[0, inside, outside1] = c2
+                    self.A_Node[0, inside, outside2] = c1
+                    self.A_Node[0, outside1, outside1] = c1
+                    self.A_Node[0, outside2, outside2] = c2
+                    self.b_Node[0, inside] = 1-c1-c2
+                    self.vol_Node[0] = c1*c2
+
+                    self.iterEnd_Node = 1
+                else:
+                    self.iterEnd_Node = 0
+
+            elif numInside == 2:
+                outside = 0
+                while ind[outside]:
+                    outside += 1
+                inside1 = (outside+1)%3
+                inside2 = (outside+2)%3
+                c1 = findIntersection(node1, simplex2[outside, :], simplex2[inside1, :], horizon2)
+                c2 = findIntersection(node1, simplex2[outside, :], simplex2[inside2, :], horizon2)
+                d1 = 0.
+                d2 = 0.
+                for k in range(2):
+                    d1 += (simplex2[outside, k]
+                           + c1*(simplex2[inside1, k]-simplex2[outside, k])
+                           - simplex2[inside2, k])**2
+                    d2 += (simplex2[outside, k]
+                           + c2*(simplex2[inside2, k]-simplex2[outside, k])
+                           - simplex2[inside1, k])
+                self.A_Node[:, :, :] = 0.
+                self.b_Node[:, :] = 0.
+
+                self.iter_Node = 0
+                self.iterEnd_Node = 0
+                if d1 < d2:
+                    if 1-c1 > 0:
+                        self.A_Node[self.iterEnd_Node, outside, outside] = 1-c1
+                        self.A_Node[self.iterEnd_Node, inside1, inside1] = 1-c1
+                        self.A_Node[self.iterEnd_Node, inside1, inside2] = -c1
+                        self.A_Node[self.iterEnd_Node, inside2, inside2] = 1.
+                        self.b_Node[self.iterEnd_Node, inside1] = c1
+                        self.vol_Node[self.iterEnd_Node] = 1-c1
+                        self.iterEnd_Node += 1
+
+                    if c1*(1-c2) > 0.:
+                        self.A_Node[self.iterEnd_Node, outside, outside] = 1-c2
+                        self.A_Node[self.iterEnd_Node, inside2, inside2] = 1
+                        self.A_Node[self.iterEnd_Node, inside2, outside] = c2
+                        self.A_Node[self.iterEnd_Node, outside, inside1] = 1-c1
+                        self.A_Node[self.iterEnd_Node, inside1, inside1] = c1
+                        self.vol_Node[self.iterEnd_Node] = c1*(1-c2)
+                        self.iterEnd_Node += 1
+                else:
+                    if 1-c2 > 0:
+                        self.A_Node[self.iterEnd_Node, outside, outside] = 1-c2
+                        self.A_Node[self.iterEnd_Node, inside2, inside2] = 1-c2
+                        self.A_Node[self.iterEnd_Node, inside2, inside1] = -c2
+                        self.A_Node[self.iterEnd_Node, inside1, inside1] = 1.
+                        self.b_Node[self.iterEnd_Node, inside2] = c2
+                        self.vol_Node[self.iterEnd_Node] = 1-c2
+                        self.iterEnd_Node += 1
+
+                    if c2*(1-c1) > 0.:
+                        self.A_Node[self.iterEnd_Node, outside, outside] = 1-c1
+                        self.A_Node[self.iterEnd_Node, inside1, inside1] = 1
+                        self.A_Node[self.iterEnd_Node, inside1, outside] = c1
+                        self.A_Node[self.iterEnd_Node, outside, inside2] = 1-c2
+                        self.A_Node[self.iterEnd_Node, inside2, inside2] = c2
+                        self.vol_Node[self.iterEnd_Node] = c2*(1-c1)
+                        self.iterEnd_Node += 1
+            else:
+                self.A_Node[0, :, :] = 0.
+                self.b_Node[0, :] = 0.
+                self.A_Node[0, 0, 0] = 1.
+                self.A_Node[0, 1, 1] = 1.
+                self.A_Node[0, 2, 2] = 1.
+                self.vol_Node[0] = 1.
+                self.iter_Node = 0
+                self.iterEnd_Node = 1
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef BOOL_t nextSubSimplex_Node(self, REAL_t[:, ::1] A, REAL_t[::1] b, REAL_t *vol):
+        cdef:
+            INDEX_t dim, i, j
+            REAL_t l, r, v0, v1
+        if self.iter_Node == self.iterEnd_Node:
+            return False
+        dim = getINDEX(self.params, fKDIM)
+        if dim == 1:
+            l = self.intervals2[self.iter_Node]
+            r = self.intervals2[self.iter_Node+1]
+            if r-l <= 0:
+                self.iter_Node += 1
+                return self.nextSubSimplex_Node(A, b, vol)
+            v0 = self.intervals2[0]
+            v1 = self.intervals2[2]
+            A[0, 0] = r-l
+            A[0, 1] = 0.
+            A[1, 0] = 0.
+            A[1, 1] = r-l
+            b[0] = v1-r
+            b[1] = l-v0
+            vol[0] = r-l
+            self.iter_Node += 1
+            return True
+        elif dim == 2:
+            for i in range(3):
+                b[i] = self.b_Node[self.iter_Node, i]
+                for j in range(3):
+                    A[i, j] = self.A_Node[self.iter_Node, i, j]
+            vol[0] = self.vol_Node[self.iter_Node]
+            self.iter_Node += 1
+            return True
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(x.shape[0]):
+            s += (x[i]-y[i])**2
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(dim):
+            s += (x[i]-y[i])**2
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    def __repr__(self):
+        horizon2 = getREAL(self.params, fHORIZON2)
+        return '|x-y|_2 <= {}'.format(sqrt(horizon2))
+
+    def __setstate__(self, state):
+        ball2.__init__(self)
+
+
+cdef class ballInf(interactionDomain):
+    def __init__(self):
+        super(ballInf, self).__init__(False)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t i, k, j
+            INDEX_t noSimplex1 = simplex1.shape[0]
+            INDEX_t noSimplex2 = simplex2.shape[0]
+            REAL_t d2
+            REAL_t dmin2 = inf
+            REAL_t dmax2 = 0.
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+        for i in range(noSimplex1):
+            for k in range(noSimplex2):
+                d2 = 0.
+                for j in range(dim):
+                    d2 = max(d2, (simplex1[i, j] - simplex2[k, j])**2)
+                dmin2 = min(dmin2, d2)
+                dmax2 = max(dmax2, d2)
+        if dmin2 >= horizon2:
+            self.relPos = REMOTE
+        elif dmax2 <= horizon2:
+            self.relPos = INTERACT
+        else:
+            self.relPos = CUT
+        return self.relPos
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(x.shape[0]):
+            s = max(s, (x[i]-y[i])**2)
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(dim):
+            s = max(s, (x[i]-y[i])**2)
+        if s <= horizon2:
+            return 1.
+        else:
+            return 0.
+
+    def __repr__(self):
+        horizon2 = getREAL(self.params, fHORIZON2)
+        return '|x-y|_inf <= {}'.format(sqrt(horizon2))
+
+
+cdef class ball2Complement(interactionDomain):
+    def __init__(self):
+        super(ball2Complement, self).__init__(True)
+
+    cdef void setParams(self, void *params):
+        cdef:
+            INDEX_t dim, k
+        interactionDomain.setParams(self, params)
+        warnings.warn('cut elements are currently not implemented for \'ball2Complement\', expect quadrature errors')
+        dim = getINDEX(self.params, fKDIM)
+        self.A_Simplex = np.zeros((1, dim+1, dim+1), dtype=REAL)
+        for k in range(dim+1):
+            self.A_Simplex[0, k, k] = 1.
+        self.b_Simplex = np.zeros((1, dim+1), dtype=REAL)
+        self.vol_Simplex = np.ones((1), dtype=REAL)
+        self.A_Node = np.zeros((1, dim+1, dim+1), dtype=REAL)
+        for k in range(dim+1):
+            self.A_Node[0, k, k] = 1.
+        self.b_Node = np.zeros((1, dim+1), dtype=REAL)
+        self.vol_Node = np.ones((1), dtype=REAL)
+
+    def getComplement(self):
+        return ball2()
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef RELATIVE_POSITION_t getRelativePosition(self, REAL_t[:, ::1] simplex1, REAL_t[:, ::1] simplex2):
+        cdef:
+            INDEX_t i, k, j
+            INDEX_t noSimplex1 = simplex1.shape[0]
+            INDEX_t noSimplex2 = simplex2.shape[0]
+            REAL_t d2
+            REAL_t dmin2 = inf
+            REAL_t dmax2 = 0.
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+            INDEX_t dim = getINDEX(self.params, fKDIM)
+        for i in range(noSimplex1):
+            for k in range(noSimplex2):
+                d2 = 0.
+                for j in range(dim):
+                    d2 += (simplex1[i, j] - simplex2[k, j])**2
+                dmin2 = min(dmin2, d2)
+                dmax2 = max(dmax2, d2)
+        if dmin2 >= horizon2:
+            self.relPos = INTERACT
+        elif dmax2 <= horizon2:
+            self.relPos = REMOTE
+        else:
+            self.relPos = CUT
+        return self.relPos
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(x.shape[0]):
+            s += (x[i]-y[i])**2
+        if s > horizon2:
+            return 1.
+        else:
+            return 0.
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t s = 0.
+            INDEX_t i
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        for i in range(dim):
+            s += (x[i]-y[i])**2
+        if s > horizon2:
+            return 1.
+        else:
+            return 0.
+
+    def __repr__(self):
+        horizon2 = getREAL(self.params, fHORIZON2)
+        return '|x-y|_2 > {}'.format(sqrt(horizon2))
+
+
+cdef class ellipse(interactionDomain):
+    cdef:
+        public REAL_t aFac2
+        public REAL_t bFac2
+
+    def __init__(self, REAL_t aFac, REAL_t bFac):
+        super(ellipse, self).__init__()
+        assert 0 < aFac <= 1.
+        assert 0 < bFac <= 1.
+        self.aFac2 = aFac**2
+        self.bFac2 = bFac**2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            REAL_t s
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        s = (x[0]-y[0])**2/(self.aFac2*horizon2) + (x[1]-y[1])**2/(self.bFac2*horizon2)
+        if s <= 1.:
+            return 1.
+        else:
+            return 0.
+
+    @cython.cdivision(True)
+    cdef REAL_t evalPtr(ellipse self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t s
+            REAL_t horizon2 = getREAL(self.params, fHORIZON2)
+        s = (x[0]-y[0])**2/(self.aFac2*horizon2) + (x[1]-y[1])**2/(self.bFac2*horizon2)
+        if s <= 1.:
+            return 1.
+        else:
+            return 0.
+
+    def __getstate__(self):
+        return (sqrt(self.aFac2), sqrt(self.bFac2))
+
+    def __setstate__(self, state):
+        ellipse.__init__(self, state[0], state[1])
diff --git a/nl/PyNucleus_nl/kernel_params.pxi b/nl/PyNucleus_nl/kernel_params.pxi
new file mode 100644
index 0000000..23bf2fc
--- /dev/null
+++ b/nl/PyNucleus_nl/kernel_params.pxi
@@ -0,0 +1,67 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cdef enum:
+    OFFSET = sizeof(void*)
+
+cdef enum:
+    NUM_KERNEL_PARAMS = 10
+
+cdef enum kernelParams:
+    fS = 0*OFFSET
+    fSINGULARITY = 1*OFFSET
+    fHORIZON2 = 2*OFFSET
+    fSCALING = 3*OFFSET
+    fKDIM = 4*OFFSET
+    fORDERFUN = 5*OFFSET
+    fHORIZONFUN = 6*OFFSET
+    fSCALINGFUN = 7*OFFSET
+    fEVAL = 8*OFFSET
+    fINTERACTION = 9*OFFSET
+
+
+cdef inline BOOL_t isNull(void *c_params, size_t pos):
+    return (<void**>(c_params+pos))[0] == NULL
+
+cdef inline INDEX_t getINDEX(void *c_params, size_t pos):
+    return (<INDEX_t*>(c_params+pos))[0]
+
+cdef inline void setINDEX(void *c_params, size_t pos, INDEX_t val):
+    (<INDEX_t*>(c_params+pos))[0] = val
+
+cdef inline REAL_t getREAL(void *c_params, size_t pos):
+    return (<REAL_t*>(c_params+pos))[0]
+
+cdef inline void setREAL(void *c_params, size_t pos, REAL_t val):
+    (<REAL_t*>(c_params+pos))[0] = val
+
+ctypedef REAL_t (*fun_t)(REAL_t *x, REAL_t *y, void *c_params)
+
+cdef inline void setFun(void *c_params, size_t pos, fun_t val):
+    (<fun_t*>(c_params+pos))[0] = val
+
+cdef inline fun_t getFun(void *c_params, size_t pos):
+    return (<fun_t*>(c_params+pos))[0]
+
+cdef inline REAL_t* getREALArray1D(void *c_params, size_t pos):
+    return (<REAL_t**>(c_params+pos))[0]
+
+cdef inline void setREALArray1D(void *c_params, size_t pos, REAL_t[::1] val):
+    (<REAL_t**>(c_params+pos))[0] = &val[0]
+
+cdef inline REAL_t* getREALArray2D(void *c_params, size_t pos):
+    return (<REAL_t**>(c_params+pos))[0]
+
+cdef inline void setREALArray2D(void *c_params, size_t pos, REAL_t[:, ::1] val):
+    (<REAL_t**>(c_params+pos))[0] = &val[0, 0]
+
+
+cpdef enum:
+    FRACTIONAL = 0
+    INDICATOR = 1
+    PERIDYNAMIC = 2
diff --git a/nl/PyNucleus_nl/kernel_params_decl.pxi b/nl/PyNucleus_nl/kernel_params_decl.pxi
new file mode 100644
index 0000000..0785072
--- /dev/null
+++ b/nl/PyNucleus_nl/kernel_params_decl.pxi
@@ -0,0 +1,9 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+ctypedef INDEX_t kernelType
diff --git a/nl/PyNucleus_nl/kernels.cpp b/nl/PyNucleus_nl/kernels.cpp
new file mode 100644
index 0000000..14f02c1
--- /dev/null
+++ b/nl/PyNucleus_nl/kernels.cpp
@@ -0,0 +1,41 @@
+/////////////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 National Technology & Engineering Solutions of Sandia,           //
+// LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           //
+// U.S. Government retains certain rights in this software.                        //
+// If you want to use this code, please refer to the README.rst and LICENSE files. //
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+#include "kernels.hpp"
+#include "math.h"
+#include <iostream>
+
+kernel_t::kernel_t() {
+  // pass
+}
+
+REAL_t kernel_t::eval(REAL_t *x, REAL_t *y) {
+  std::cout << "Calling base\n";
+  return 0.;
+};
+
+fractional_kernel_t::fractional_kernel_t(REAL_t s_,
+                                         REAL_t C_) {
+  s = s_;
+  C = C_;
+}
+
+REAL_t fractional_kernel_t::eval(REAL_t *x, REAL_t *y){
+  return C * pow(abs(*x-*y), -1.-2.*s);
+}
+
+
+callback_kernel_t::callback_kernel_t(kernel_callback_t kernel_callback_, void *user_data_) {
+  kernel_callback = kernel_callback_;
+  user_data = user_data_;
+}
+
+REAL_t callback_kernel_t::eval(REAL_t *x, REAL_t *y){
+  return kernel_callback(x, y, user_data);
+}
+
diff --git a/nl/PyNucleus_nl/kernels.hpp b/nl/PyNucleus_nl/kernels.hpp
new file mode 100644
index 0000000..5c7d276
--- /dev/null
+++ b/nl/PyNucleus_nl/kernels.hpp
@@ -0,0 +1,44 @@
+/////////////////////////////////////////////////////////////////////////////////////
+// Copyright 2021 National Technology & Engineering Solutions of Sandia,           //
+// LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           //
+// U.S. Government retains certain rights in this software.                        //
+// If you want to use this code, please refer to the README.rst and LICENSE files. //
+/////////////////////////////////////////////////////////////////////////////////////
+
+
+#ifndef KERNELS_HPP
+#define KERNELS_HPP
+
+#include <stdint.h>
+#include "myTypes.h"
+
+typedef REAL_t (*kernel_callback_t)(REAL_t *x, REAL_t *y, void *user_data);
+
+class kernel_t{
+public:
+  kernel_t();
+  virtual REAL_t eval(REAL_t *x, REAL_t *y);
+};
+
+
+class fractional_kernel_t : public kernel_t {
+public:
+  fractional_kernel_t(REAL_t s_, REAL_t C_);
+  REAL_t eval(REAL_t *x, REAL_t *y);
+private:
+  REAL_t s;
+  REAL_t C;
+};
+
+
+class callback_kernel_t : public kernel_t {
+public:
+  callback_kernel_t(kernel_callback_t kernel_callback_, void* user_data_);
+  REAL_t eval(REAL_t *x, REAL_t *y);
+private:
+  kernel_callback_t kernel_callback;
+  void* user_data;
+};
+
+
+#endif
diff --git a/nl/PyNucleus_nl/kernels.py b/nl/PyNucleus_nl/kernels.py
new file mode 100644
index 0000000..5b7816a
--- /dev/null
+++ b/nl/PyNucleus_nl/kernels.py
@@ -0,0 +1,164 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_fem.functions import function, constant
+from PyNucleus_fem.mesh import meshNd
+from PyNucleus_fem.DoFMaps import DoFMap
+from . twoPointFunctions import constantTwoPoint
+from . interactionDomains import (interactionDomain,
+                                  fullSpace,
+                                  ball1,
+                                  ball2,
+                                  ballInf)
+from . fractionalOrders import (fractionalOrderBase,
+                                constFractionalOrder,
+                                constantFractionalLaplacianScaling,
+                                variableFractionalLaplacianScaling,
+                                constantIntegrableScaling)
+from . kernels2 import (Kernel,
+                        FractionalKernel,
+                        FRACTIONAL, INDICATOR, PERIDYNAMIC,
+                        getKernelEnum)
+
+
+def _getDim(dim):
+    if isinstance(dim, meshNd):
+        return dim.dim
+    elif isinstance(dim, (INDEX, int)):
+        return dim
+    else:
+        raise NotImplementedError('Dim: {}'.format(dim))
+
+
+def _getKernelType(kernel):
+    if isinstance(kernel, str):
+        kType = getKernelEnum(kernel)
+    elif isinstance(kernel, int):
+        kType = kernel
+    else:
+        raise NotImplementedError('Kernel type: {}'.format(kernel))
+    return kType
+
+
+def _getFractionalOrder(s):
+    if isinstance(s, fractionalOrderBase):
+        sFun = s
+    elif isinstance(s, (REAL, float)):
+        sFun = constFractionalOrder(s)
+    else:
+        raise NotImplementedError('Fractional order: {}'.format(s))
+    return sFun
+
+
+def _getHorizon(horizon):
+    if isinstance(horizon, function):
+        horizonFun = horizon
+    elif isinstance(horizon, (REAL, float, int)):
+        horizonFun = constant(horizon)
+    elif horizon is None:
+        horizonFun = constant(np.inf)
+    else:
+        raise NotImplementedError('Horizon: {}'.format(horizon))
+    return horizonFun
+
+
+def _getInteraction(interaction, horizon):
+    if isinstance(interaction, interactionDomain):
+        pass
+    elif isinstance(horizon, constant) and horizon.value == np.inf:
+        interaction = fullSpace()
+    elif interaction is None:
+        interaction = ball2()
+    elif isinstance(interaction, str):
+        if interaction == 'fullSpace':
+            interaction = fullSpace()
+        elif interaction == 'ball1':
+            interaction = ball1()
+        elif interaction == 'ball2':
+            interaction = ball2()
+        elif interaction == 'ballInf':
+            interaction = ballInf()
+        else:
+            raise NotImplementedError('Interaction: {}'.format(interaction))
+    else:
+        raise NotImplementedError('Interaction: {}'.format(interaction))
+    return interaction
+
+
+def getFractionalKernel(dim,
+                        s,
+                        horizon=None,
+                        interaction=None,
+                        scaling=None,
+                        normalized=True,
+                        piecewise=True,
+                        phi=None):
+    dim_ = _getDim(dim)
+    sFun = _getFractionalOrder(s)
+    horizonFun = _getHorizon(horizon)
+    interaction = _getInteraction(interaction, horizonFun)
+
+    if scaling is None:
+        if normalized:
+            if isinstance(sFun, constFractionalOrder) and isinstance(horizonFun, constant):
+                scaling = constantFractionalLaplacianScaling(dim, sFun.value, horizonFun.value)
+            else:
+                symmetric = sFun.symmetric and isinstance(horizonFun, constant)
+                scaling = variableFractionalLaplacianScaling(symmetric)
+        else:
+            scaling = constantTwoPoint(0.5)
+    kernel = FractionalKernel(dim_, sFun, horizonFun, interaction, scaling, phi, piecewise=piecewise)
+    return kernel
+
+
+def getIntegrableKernel(dim,
+                        kernel,
+                        horizon,
+                        scaling=None,
+                        interaction=None,
+                        normalized=True,
+                        piecewise=True,
+                        phi=None):
+    dim_ = _getDim(dim)
+    kType = _getKernelType(kernel)
+    horizonFun = _getHorizon(horizon)
+    interaction = _getInteraction(interaction, horizonFun)
+
+    if scaling is None:
+        if normalized:
+            if isinstance(horizonFun, constant):
+                scaling = constantIntegrableScaling(kType, interaction, dim_, horizonFun.value)
+            else:
+                raise NotImplementedError()
+        else:
+            scaling = constantTwoPoint(0.5)
+    return Kernel(dim_, kType=kType, horizon=horizonFun, interaction=interaction, scaling=scaling, phi=phi, piecewise=piecewise)
+
+
+def getKernel(dim,
+              s=None,
+              horizon=None,
+              scaling=None,
+              interaction=None,
+              normalized=True,
+              piecewise=True,
+              phi=None,
+              kernel=FRACTIONAL):
+    kType = _getKernelType(kernel)
+    if kType == FRACTIONAL:
+        return getFractionalKernel(dim, s, horizon, interaction, scaling, normalized, piecewise, phi)
+    else:
+        return getIntegrableKernel(dim,
+                                   kernel=kType,
+                                   horizon=horizon,
+                                   scaling=scaling,
+                                   interaction=interaction,
+                                   normalized=normalized,
+                                   piecewise=piecewise, phi=phi)
diff --git a/nl/PyNucleus_nl/kernels2.pxd b/nl/PyNucleus_nl/kernels2.pxd
new file mode 100644
index 0000000..a97dcd1
--- /dev/null
+++ b/nl/PyNucleus_nl/kernels2.pxd
@@ -0,0 +1,51 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_fem.functions cimport function
+from . twoPointFunctions cimport twoPointFunction, constantTwoPoint, parametrizedTwoPointFunction
+from . interactionDomains cimport interactionDomain
+from . fractionalOrders cimport fractionalOrderBase
+from . kernelsCy cimport kernelCy
+
+include "kernel_params_decl.pxi"
+
+
+cdef class Kernel(twoPointFunction):
+    cdef:
+        public INDEX_t dim
+        public kernelType kernelType
+        public REAL_t min_singularity
+        public REAL_t max_singularity
+        public function horizon
+        public interactionDomain interaction
+        public twoPointFunction scaling
+        public twoPointFunction phi
+        public BOOL_t variableSingularity
+        public BOOL_t variableHorizon
+        public BOOL_t finiteHorizon
+        public BOOL_t complement
+        public BOOL_t variableScaling
+        public BOOL_t variable
+        public BOOL_t piecewise
+        kernelCy c_kernel
+        void *c_kernel_params
+    cdef REAL_t getSingularityValue(self)
+    cdef REAL_t getHorizonValue(self)
+    cdef REAL_t getHorizonValue2(self)
+    cdef REAL_t getScalingValue(self)
+    cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y)
+    cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y)
+
+
+cdef class FractionalKernel(Kernel):
+    cdef:
+        public fractionalOrderBase s
+        public BOOL_t variableOrder
+    cdef REAL_t getsValue(self)
diff --git a/nl/PyNucleus_nl/kernels2.pyx b/nl/PyNucleus_nl/kernels2.pyx
new file mode 100644
index 0000000..3748207
--- /dev/null
+++ b/nl/PyNucleus_nl/kernels2.pyx
@@ -0,0 +1,566 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.stdlib cimport malloc
+from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan, atan2,
+                        log, ceil,
+                        fabs as abs, M_PI as pi, pow,
+                        tgamma as gamma, exp)
+cimport cython
+import numpy as np
+cimport numpy as np
+from PyNucleus_base.myTypes import REAL
+from PyNucleus_fem.functions cimport constant
+from . interactionDomains cimport ball1, ball2, ballInf
+from . fractionalOrders cimport (variableFractionalOrder,
+                                 constantFractionalLaplacianScaling,
+                                 variableFractionalLaplacianScaling)
+
+include "kernel_params.pxi"
+
+
+def getKernelEnum(str kernelTypeString):
+    if kernelTypeString.upper() == "FRACTIONAL":
+        return FRACTIONAL
+    elif kernelTypeString.upper() == "INDICATOR":
+        return INDICATOR
+    elif kernelTypeString.upper() == "PERIDYNAMIC":
+        return PERIDYNAMIC
+    else:
+        raise NotImplementedError(kernelTypeString)
+
+
+cdef REAL_t fracKernelFinite1D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t s, C, d2
+        twoPointFunction interaction = <twoPointFunction>((<void**>(c_params+fINTERACTION))[0])
+    if interaction.evalPtr(1, x, y) != 0.:
+        s = getREAL(c_params, fS)
+        C = getREAL(c_params, fSCALING)
+        d2 = (x[0]-y[0])*(x[0]-y[0])
+        return C*pow(d2, -0.5-s)
+    else:
+        return 0.
+
+
+cdef REAL_t fracKernelFinite2D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t s, C, d2
+        twoPointFunction interaction = <twoPointFunction>((<void**>(c_params+fINTERACTION))[0])
+    if interaction.evalPtr(2, x, y) != 0.:
+        s = getREAL(c_params, fS)
+        C = getREAL(c_params, fSCALING)
+        d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1])
+        return C*pow(d2, -1.-s)
+    else:
+        return 0.
+
+
+cdef REAL_t fracKernelInfinite1D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t s = getREAL(c_params, fS)
+        REAL_t C = getREAL(c_params, fSCALING)
+        REAL_t d2
+    d2 = (x[0]-y[0])*(x[0]-y[0])
+    return C*pow(d2, -0.5-s)
+
+
+cdef REAL_t fracKernelInfinite2D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        REAL_t s = getREAL(c_params, fS)
+        REAL_t C = getREAL(c_params, fSCALING)
+        REAL_t d2
+    d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1])
+    return C*pow(d2, -1.-s)
+
+
+cdef REAL_t indicatorKernel1D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        twoPointFunction interaction = <twoPointFunction>((<void**>(c_params+fINTERACTION))[0])
+        REAL_t C
+    if interaction.evalPtr(1, x, y) != 0.:
+        C = getREAL(c_params, fSCALING)
+        return C
+    else:
+        return 0.
+
+
+cdef REAL_t indicatorKernel2D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        twoPointFunction interaction = <twoPointFunction>((<void**>(c_params+fINTERACTION))[0])
+        REAL_t C
+    if interaction.evalPtr(2, x, y) != 0.:
+        C = getREAL(c_params, fSCALING)
+        return C
+    else:
+        return 0.
+
+
+@cython.cdivision(True)
+cdef REAL_t peridynamicKernel1D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        interactionDomain interaction = <interactionDomain>((<void**>(c_params+fINTERACTION))[0])
+        REAL_t C
+        REAL_t d2
+    if interaction.evalPtr(1, x, y) != 0.:
+        d2 = (x[0]-y[0])*(x[0]-y[0])
+        C = getREAL(c_params, fSCALING)
+        return C/sqrt(d2)
+    else:
+        return 0.
+
+
+@cython.cdivision(True)
+cdef REAL_t peridynamicKernel2D(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        interactionDomain interaction = <interactionDomain>((<void**>(c_params+fINTERACTION))[0])
+        REAL_t C
+        REAL_t d2
+    if interaction.evalPtr(2, x, y) != 0.:
+        d2 = (x[0]-y[0])*(x[0]-y[0]) + (x[1]-y[1])*(x[1]-y[1])
+        C = getREAL(c_params, fSCALING)
+        return C/sqrt(d2)
+    else:
+        return 0.
+
+
+cdef REAL_t updateAndEvalIntegrable(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fKDIM)
+        REAL_t[::1] xA
+        REAL_t[::1] yA
+        function horizonFun
+        twoPointFunction scalingFun
+        REAL_t horizon, C
+        fun_t kernel = getFun(c_params, fEVAL)
+        BOOL_t horizonFunNull = isNull(c_params, fHORIZONFUN)
+        BOOL_t scalingFunNull = isNull(c_params, fSCALINGFUN)
+    if not horizonFunNull or not scalingFunNull:
+        xA = <REAL_t[:dim]> x
+    if not horizonFunNull:
+        horizonFun = <function>((<void**>(c_params+fHORIZONFUN))[0])
+        horizon = horizonFun.eval(xA)
+        setREAL(c_params, fHORIZON2, horizon*horizon)
+    if not scalingFunNull:
+        yA = <REAL_t[:dim]> y
+        scalingFun = <twoPointFunction>((<void**>(c_params+fSCALINGFUN))[0])
+        C = scalingFun.eval(xA, yA)
+        setREAL(c_params, fSCALING, C)
+    return kernel(x, y, c_params)
+
+
+cdef REAL_t updateAndEvalFractional(REAL_t *x, REAL_t *y, void *c_params):
+    cdef:
+        INDEX_t dim = getINDEX(c_params, fKDIM)
+        REAL_t[::1] xA
+        REAL_t[::1] yA
+        fractionalOrderBase sFun
+        function horizonFun
+        twoPointFunction scalingFun
+        REAL_t s, horizon, C
+        fun_t kernel = getFun(c_params, fEVAL)
+    xA = <REAL_t[:dim]> x
+    yA = <REAL_t[:dim]> y
+
+    if not isNull(c_params, fORDERFUN):
+        sFun = <fractionalOrderBase>((<void**>(c_params+fORDERFUN))[0])
+        s = sFun.eval(xA, yA)
+        setREAL(c_params, fS, s)
+    if not isNull(c_params, fHORIZONFUN):
+        horizonFun = <function>((<void**>(c_params+fHORIZONFUN))[0])
+        horizon = horizonFun.eval(xA)
+        setREAL(c_params, fHORIZON2, horizon*horizon)
+    if not isNull(c_params, fSCALINGFUN):
+        scalingFun = <twoPointFunction>((<void**>(c_params+fSCALINGFUN))[0])
+        C = scalingFun.eval(xA, yA)
+        setREAL(c_params, fSCALING, C)
+    return kernel(x, y, c_params)
+
+
+cdef class Kernel(twoPointFunction):
+    def __init__(self, INDEX_t dim, kernelType kType, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi, BOOL_t piecewise=True):
+        cdef:
+            parametrizedTwoPointFunction parametrizedScaling
+            int i
+
+        self.dim = dim
+        self.kernelType = kType
+        self.piecewise = piecewise
+
+        self.c_kernel = kernelCy()
+        self.c_kernel_params = malloc(NUM_KERNEL_PARAMS*OFFSET)
+        for i in range(NUM_KERNEL_PARAMS):
+            (<void**>(self.c_kernel_params+i*OFFSET))[0] = NULL
+        self.c_kernel.setParams(self.c_kernel_params)
+        setINDEX(self.c_kernel_params, fKDIM, dim)
+
+        symmetric = isinstance(horizon, constant) and scaling.symmetric
+        super(Kernel, self).__init__(symmetric)
+
+        if self.kernelType == INDICATOR:
+            self.min_singularity = 0.
+            self.max_singularity = 0.
+            self.singularityValue = 0.
+        elif self.kernelType == PERIDYNAMIC:
+            self.min_singularity = -1.
+            self.max_singularity = -1.
+            self.singularityValue = -1.
+
+        self.horizon = horizon
+        self.variableHorizon = not isinstance(self.horizon, constant)
+        if self.variableHorizon:
+            self.horizonValue2 = np.nan
+            self.finiteHorizon = True
+            (<void**>(self.c_kernel_params+fHORIZONFUN))[0] = <void*>horizon
+        else:
+            self.horizonValue = self.horizon.value
+            self.finiteHorizon = self.horizon.value != np.inf
+
+        self.interaction = interaction
+        self.complement = self.interaction.complement
+        (<void**>(self.c_kernel_params+fINTERACTION))[0] = <void*>self.interaction
+        self.interaction.setParams(self.c_kernel_params)
+
+        self.phi = phi
+        if phi is not None:
+            scaling = phi*scaling
+        self.scaling = scaling
+        self.variableScaling = not isinstance(self.scaling, (constantFractionalLaplacianScaling, constantTwoPoint))
+        if self.variableScaling:
+            if isinstance(self.scaling, parametrizedTwoPointFunction):
+                parametrizedScaling = self.scaling
+                parametrizedScaling.setParams(self.c_kernel_params)
+            self.scalingValue = np.nan
+            (<void**>(self.c_kernel_params+fSCALINGFUN))[0] = <void*>self.scaling
+        else:
+            self.scalingValue = self.scaling.value
+
+        self.variable = self.variableHorizon or self.variableScaling
+
+        if self.piecewise:
+            if dim == 1:
+                if self.kernelType == INDICATOR:
+                    self.c_kernel.setCallback(indicatorKernel1D)
+                elif self.kernelType == PERIDYNAMIC:
+                    self.c_kernel.setCallback(peridynamicKernel1D)
+            elif dim == 2:
+                if self.kernelType == INDICATOR:
+                    self.c_kernel.setCallback(indicatorKernel2D)
+                elif self.kernelType == PERIDYNAMIC:
+                    self.c_kernel.setCallback(peridynamicKernel2D)
+            else:
+                raise NotImplementedError()
+        else:
+            self.c_kernel.setCallback(updateAndEvalIntegrable)
+
+            if dim == 1:
+                if self.kernelType == INDICATOR:
+                    setFun(self.c_kernel_params, fEVAL, indicatorKernel1D)
+                elif self.kernelType == PERIDYNAMIC:
+                    setFun(self.c_kernel_params, fEVAL, peridynamicKernel1D)
+            elif dim == 2:
+                if self.kernelType == INDICATOR:
+                    setFun(self.c_kernel_params, fEVAL, indicatorKernel2D)
+                elif self.kernelType == PERIDYNAMIC:
+                    setFun(self.c_kernel_params, fEVAL, peridynamicKernel2D)
+            else:
+                raise NotImplementedError()
+
+    @property
+    def singularityValue(self):
+        return getREAL(self.c_kernel_params, fSINGULARITY)
+
+    @singularityValue.setter
+    def singularityValue(self, REAL_t singularity):
+        setREAL(self.c_kernel_params, fSINGULARITY, singularity)
+
+    cdef REAL_t getSingularityValue(self):
+        return getREAL(self.c_kernel_params, fSINGULARITY)
+
+    @property
+    def horizonValue(self):
+        return sqrt(getREAL(self.c_kernel_params, fHORIZON2))
+
+    @horizonValue.setter
+    def horizonValue(self, REAL_t horizon):
+        setREAL(self.c_kernel_params, fHORIZON2, horizon**2)
+
+    cdef REAL_t getHorizonValue(self):
+        return sqrt(getREAL(self.c_kernel_params, fHORIZON2))
+
+    @property
+    def horizonValue2(self):
+        return getREAL(self.c_kernel_params, fHORIZON2)
+
+    cdef REAL_t getHorizonValue2(self):
+        return getREAL(self.c_kernel_params, fHORIZON2)
+
+    @horizonValue2.setter
+    def horizonValue2(self, REAL_t horizon2):
+        setREAL(self.c_kernel_params, fHORIZON2, horizon2)
+
+    @property
+    def scalingValue(self):
+        return getREAL(self.c_kernel_params, fSCALING)
+
+    @scalingValue.setter
+    def scalingValue(self, REAL_t scaling):
+        setREAL(self.c_kernel_params, fSCALING, scaling)
+
+    cdef REAL_t getScalingValue(self):
+        return getREAL(self.c_kernel_params, fSCALING)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y):
+        if self.piecewise:
+            if self.variableHorizon:
+                self.horizonValue = self.horizon.eval(x)
+            if self.variableScaling:
+                self.scalingValue = self.scaling.eval(x, y)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t[::1] xA
+        if self.piecewise:
+            if self.variableHorizon:
+                xA = <REAL_t[:dim]> x
+                self.horizonValue = self.horizon.eval(xA)
+            if self.variableScaling:
+                self.scalingValue = self.scaling.evalPtr(dim, x, y)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.c_kernel.eval(x, y)
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        return self.c_kernel.evalPtr(x, y)
+
+    def __call__(self, REAL_t[::1] x, REAL_t[::1] y, BOOL_t callEvalParams=True):
+        if self.piecewise and callEvalParams:
+            self.evalParams(x, y)
+        return self.c_kernel.eval(x, y)
+
+    def getModifiedKernel(self,
+                          function horizon=None,
+                          twoPointFunction scaling=None):
+        if horizon is None:
+            horizon = self.horizon
+            interaction = self.interaction
+        else:
+            if scaling is None and isinstance(self.scaling, variableFractionalLaplacianScaling):
+                scaling = self.scaling.getScalingWithDifferentHorizon()
+            interaction = type(self.interaction)()
+        if scaling is None:
+            scaling = self.scaling
+        from . kernels import getKernel
+        newKernel = getKernel(dim=self.dim, kernel=self.kernelType, horizon=horizon, interaction=interaction, scaling=scaling, piecewise=self.piecewise)
+        return newKernel
+
+    def getComplementKernel(self):
+        raise NotImplementedError()
+        from . kernels import getKernel
+        newKernel = getKernel(dim=self.dim, kernel=self.kernelType, horizon=self.horizon, interaction=self.interaction.getComplement(), scaling=self.scaling, piecewise=self.piecewise)
+        return newKernel
+
+    def __repr__(self):
+        if self.kernelType == INDICATOR:
+            kernelName = 'indicator'
+        elif self.kernelType == PERIDYNAMIC:
+            kernelName = 'peridynamic'
+        else:
+            raise NotImplementedError()
+        return "{}({}, {}, {})".format(self.__class__.__name__, kernelName, repr(self.interaction), self.scaling)
+
+    def __getstate__(self):
+        return (self.dim, self.kernelType, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise)
+
+    def __setstate__(self, state):
+        Kernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6])
+
+    def plot(self, x0=None):
+        from matplotlib import ticker
+        import matplotlib.pyplot as plt
+        if self.finiteHorizon:
+            delta = self.horizonValue
+        else:
+            delta = 2.
+        x = np.linspace(-1.1*delta, 1.1*delta, 201)
+        if x0 is None:
+            x0 = np.zeros((self.dim), dtype=REAL)
+        if self.dim == 1:
+            vals = np.zeros_like(x)
+            for i in range(x.shape[0]):
+                y = x0+np.array([x[i]], dtype=REAL)
+                if np.linalg.norm(x0-y) > 1e-9 or self.singularityValue >= 0:
+                    vals[i] = self(x0, y)
+                else:
+                    vals[i] = np.nan
+            plt.plot(x, vals)
+            plt.yscale('log')
+            if not self.finiteHorizon:
+                plt.xlim([x[0], x[-1]])
+            if self.singularityValue < 0:
+                plt.ylim(top=np.nanmax(vals))
+            plt.xlabel('$x-y$')
+        elif self.dim == 2:
+            X, Y = np.meshgrid(x, x)
+            Z = np.zeros_like(X)
+            for i in range(x.shape[0]):
+                for j in range(x.shape[0]):
+                    y = x0+np.array([x[i], x[j]], dtype=REAL)
+                    if np.linalg.norm(x0-y) > 1e-9 or self.singularityValue >= 0:
+                        Z[i,j] = self(x0, y)
+                    else:
+                        Z[i,j] = np.nan
+            levels = np.logspace(np.log10(Z[np.absolute(Z)>0].min()),
+                                 np.log10(Z[np.absolute(Z)>0].max()), 10)
+            if levels[0] < levels[-1]:
+                plt.contourf(X, Y, Z, locator=ticker.LogLocator(),
+                             levels=levels)
+            else:
+                plt.contourf(X, Y, Z)
+            plt.axis('equal')
+            plt.colorbar()
+            plt.xlabel('$x_1-y_1$')
+            plt.ylabel('$x_2-y_2$')
+
+
+
+cdef class FractionalKernel(Kernel):
+    def __init__(self, INDEX_t dim, fractionalOrderBase s, function horizon, interactionDomain interaction, twoPointFunction scaling, twoPointFunction phi=None, BOOL_t piecewise=True):
+        super(FractionalKernel, self).__init__(dim, FRACTIONAL, horizon, interaction, scaling, phi, piecewise)
+
+        self.symmetric = s.symmetric and isinstance(horizon, constant) and scaling.symmetric
+
+        self.s = s
+        self.variableOrder = isinstance(self.s, variableFractionalOrder)
+        self.variableSingularity = self.variableOrder
+        if self.variableOrder:
+            self.sValue = np.nan
+            (<void**>(self.c_kernel_params+fORDERFUN))[0] = <void*>s
+            self.singularityValue = np.nan
+            self.min_singularity = -self.dim-2*self.s.min
+            self.max_singularity = -self.dim-2*self.s.max
+        else:
+            self.sValue = self.s.value
+            self.singularityValue = -self.dim-2*self.sValue
+            self.min_singularity = self.singularityValue
+            self.max_singularity = self.singularityValue
+
+        self.variable = self.variableOrder or self.variableHorizon or self.variableScaling
+
+        if self.piecewise:
+            if isinstance(self.horizon, constant) and self.horizon.value == np.inf:
+                if dim == 1:
+                    self.c_kernel.setCallback(fracKernelInfinite1D)
+                elif dim == 2:
+                    self.c_kernel.setCallback(fracKernelInfinite2D)
+                else:
+                    raise NotImplementedError()
+            else:
+                if dim == 1:
+                    self.c_kernel.setCallback(fracKernelFinite1D)
+                elif dim == 2:
+                    self.c_kernel.setCallback(fracKernelFinite2D)
+                else:
+                    raise NotImplementedError()
+        else:
+            self.c_kernel.setCallback(updateAndEvalFractional)
+
+            if isinstance(self.horizon, constant) and self.horizon.value == np.inf:
+                if dim == 1:
+                    setFun(self.c_kernel_params, fEVAL, fracKernelInfinite1D)
+                elif dim == 2:
+                    setFun(self.c_kernel_params, fEVAL, fracKernelInfinite2D)
+                else:
+                    raise NotImplementedError()
+            else:
+                if dim == 1:
+                    setFun(self.c_kernel_params, fEVAL, fracKernelFinite1D)
+                elif dim == 2:
+                    setFun(self.c_kernel_params, fEVAL, fracKernelFinite2D)
+                else:
+                    raise NotImplementedError()
+
+    @property
+    def sValue(self):
+        return getREAL(self.c_kernel_params, fS)
+
+    @sValue.setter
+    def sValue(self, REAL_t s):
+        setREAL(self.c_kernel_params, fS, s)
+
+    cdef REAL_t getsValue(self):
+        return getREAL(self.c_kernel_params, fS)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void evalParams(self, REAL_t[::1] x, REAL_t[::1] y):
+        if self.piecewise:
+            if self.variableOrder:
+                self.sValue = self.s.eval(x, y)
+                self.singularityValue = -self.dim-2*self.sValue
+            if self.variableHorizon:
+                self.horizonValue = self.horizon.eval(x)
+            if self.variableScaling:
+                self.scalingValue = self.scaling.eval(x, y)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void evalParamsPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t[::1] xA
+        if self.piecewise:
+            if self.variableOrder:
+                self.sValue = self.s.evalPtr(dim, x, y)
+                self.singularityValue = -self.dim-2*self.sValue
+            if self.variableHorizon:
+                xA = <REAL_t[:dim]> x
+                self.horizonValue = self.horizon.eval(xA)
+            if self.variableScaling:
+                self.scalingValue = self.scaling.evalPtr(dim, x, y)
+
+    def getModifiedKernel(self,
+                          fractionalOrderBase s=None,
+                          function horizon=None,
+                          twoPointFunction scaling=None):
+        if s is None:
+            s = self.s
+        else:
+            if scaling is None and isinstance(self.scaling, variableFractionalLaplacianScaling):
+                raise NotImplementedError()
+        if horizon is None:
+            horizon = self.horizon
+            interaction = self.interaction
+        else:
+            if scaling is None and isinstance(self.scaling, variableFractionalLaplacianScaling):
+                scaling = self.scaling.getScalingWithDifferentHorizon()
+            interaction = type(self.interaction)()
+        if scaling is None:
+            scaling = self.scaling
+        from . kernels import getFractionalKernel
+        newKernel = getFractionalKernel(dim=self.dim, s=s, horizon=horizon, interaction=interaction, scaling=scaling, piecewise=self.piecewise)
+        return newKernel
+
+    def getComplementKernel(self):
+        from . kernels import getFractionalKernel
+        newKernel = getFractionalKernel(dim=self.dim, s=self.s, horizon=self.horizon, interaction=self.interaction.getComplement(), scaling=self.scaling, piecewise=self.piecewise)
+        return newKernel
+
+    def __repr__(self):
+        return "kernel(fractional, {}, {}, {})".format(self.s, repr(self.interaction), self.scaling)
+
+    def __getstate__(self):
+        return (self.dim, self.s, self.horizon, self.interaction, self.scaling, self.phi, self.piecewise)
+
+    def __setstate__(self, state):
+        FractionalKernel.__init__(self, state[0], state[1], state[2], state[3], state[4], state[5], state[6])
diff --git a/nl/PyNucleus_nl/kernelsCy.pxd b/nl/PyNucleus_nl/kernelsCy.pxd
new file mode 100644
index 0000000..6e8e8e9
--- /dev/null
+++ b/nl/PyNucleus_nl/kernelsCy.pxd
@@ -0,0 +1,22 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport REAL_t
+
+ctypedef REAL_t (*kernel_callback_t)(REAL_t *x, REAL_t *y, void* user_data)
+
+
+cdef class kernelCy:
+    cdef:
+        kernel_callback_t callback
+        void *params
+    cdef void setCallback(self, kernel_callback_t callback)
+    cdef void setParams(self, void* params)
+    cdef void setKernel(self, void *user_data, size_t pos)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y)
+    cdef REAL_t evalPtr(self, REAL_t* x, REAL_t* y)
diff --git a/nl/PyNucleus_nl/kernelsCy.pyx b/nl/PyNucleus_nl/kernelsCy.pyx
new file mode 100644
index 0000000..78cbd27
--- /dev/null
+++ b/nl/PyNucleus_nl/kernelsCy.pyx
@@ -0,0 +1,48 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport cython
+
+cdef extern from "kernels.hpp":
+    cdef cppclass kernel_t:
+        kernel_t()
+        REAL_t eval(REAL_t *x, REAL_t *y) nogil
+
+    cdef cppclass fractional_kernel_t(kernel_t):
+        fractional_kernel_t(REAL_t s_, REAL_t C_)
+
+    # ctypedef REAL_t (*kernel_callback_t)(REAL_t *x, REAL_t *y, void* user_data)
+
+    cdef cppclass callback_kernel_t(kernel_t):
+        callback_kernel_t(kernel_callback_t kernel_callback_, void* user_data)
+
+
+cdef class kernelCy:
+    def __init__(self):
+        pass
+
+    cdef void setCallback(self, kernel_callback_t callback):
+        self.callback = callback
+
+    cdef void setParams(self, void* params):
+        self.params = params
+
+    cdef void setKernel(self, void *user_data, size_t pos):
+        (<kernel_t**>(user_data+pos))[0] = new callback_kernel_t(self.callback[0], self.params)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.callback(&x[0], &y[0], self.params)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t evalPtr(self, REAL_t* x, REAL_t* y):
+        return self.callback(x, y, self.params)
diff --git a/nl/PyNucleus_nl/nonlocalLaplacian.pxd b/nl/PyNucleus_nl/nonlocalLaplacian.pxd
new file mode 100644
index 0000000..6474932
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacian.pxd
@@ -0,0 +1,72 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_base.tupleDict cimport tupleDictMASK, indexSet, indexSetIterator, arrayIndexSet, arrayIndexSetIterator
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadQuadratureRule,
+                                       doubleSimplexQuadratureRule, GaussJacobi,
+                                       simplexDuffyTransformation, simplexXiaoGimbutas)
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from . clusterMethodCy cimport tree_node, farFieldClusterPair, H2Matrix
+from . nonlocalLaplacianBase cimport (double_local_matrix_t,
+                                        nonlocalLaplacian,
+                                        panelType,
+                                        MASK_t)
+from . fractionalLaplacian1D cimport (fractionalLaplacian1D_P1,
+                                      fractionalLaplacian1D_P1_boundary,
+                                      
+                                      fractionalLaplacian1D_P0,
+                                      fractionalLaplacian1D_P0_boundary)
+from . fractionalLaplacian2D cimport (fractionalLaplacian2D_P1,
+                                      fractionalLaplacian2D_P1_boundary,
+                                      
+                                      )
+from . nonlocalLaplacianND cimport integrable1D, integrable2D
+
+
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+from PyNucleus_base.performanceLogger cimport PLogger, FakePLogger, LoggingPLogger
+from PyNucleus_base.linear_operators cimport LinearOperator
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from . kernels2 cimport (Kernel,
+                         FractionalKernel)
+
+
+include "config.pxi"
+
+
+cdef class nonlocalBuilder:
+    cdef:
+        meshBase mesh
+        public DoFMap dm
+        public DoFMap dm2
+        public Kernel kernel
+        public double_local_matrix_t local_matrix
+        public double_local_matrix_t local_matrix_zeroExterior
+        public double_local_matrix_t local_matrix_surface
+        bint zeroExterior
+        REAL_t[::1] contrib, contribZeroExterior
+        list _d2c
+        MPI.Comm comm
+        public FakePLogger PLogger
+        dict params
+    cdef inline double_local_matrix_t getLocalMatrix(self, dict params)
+    cdef inline double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon)
+    cpdef REAL_t getEntry(self, INDEX_t I, INDEX_t J)
+    cpdef REAL_t getEntryCluster(self, INDEX_t I, INDEX_t J)
+    cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=*, LinearOperator Anear=*, dict jumps=*, BOOL_t forceSymmetric=*, indexSet myDofs=*, str prefix=*)
+
+
+cdef class nearFieldClusterPair:
+    cdef:
+        public tree_node n1, n2
+        public indexSet cellsUnion, cellsInter
diff --git a/nl/PyNucleus_nl/nonlocalLaplacian.pyx b/nl/PyNucleus_nl/nonlocalLaplacian.pyx
new file mode 100644
index 0000000..e4e2fd2
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacian.pyx
@@ -0,0 +1,2567 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from libc.math cimport ceil
+import numpy as np
+cimport numpy as np
+cimport cython
+
+include "config.pxi"
+
+from libc.math cimport sin, cos, M_PI as pi
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL
+from PyNucleus_base import uninitialized
+from PyNucleus_base.intTuple cimport intTuple
+from PyNucleus_fem.mesh import mesh0d, mesh1d
+from PyNucleus_fem.functions cimport function, constant
+from PyNucleus_fem.DoFMaps cimport P0_DoFMap, P1_DoFMap, P2_DoFMap
+from PyNucleus_fem.meshCy cimport sortEdge, encode_edge, decode_edge, encode_face
+from PyNucleus_fem.femCy cimport local_matrix_t
+from PyNucleus_fem.femCy import assembleMatrix, mass_1d_sym_scalar_anisotropic, mass_2d_sym_scalar_anisotropic
+from PyNucleus_fem.quadrature import simplexXiaoGimbutas
+from PyNucleus_base.sparsityPattern cimport sparsityPattern
+from PyNucleus_base.linear_operators cimport (CSR_LinearOperator,
+                                              SSS_LinearOperator,
+                                              Dense_LinearOperator,
+                                              Dense_SubBlock_LinearOperator,
+                                              diagonalOperator,
+                                              TimeStepperLinearOperator,
+                                              nullOperator)
+from . nonlocalLaplacianBase import MASK
+from . twoPointFunctions cimport constantTwoPoint
+from . fractionalOrders cimport (fractionalOrderBase,
+                                 constFractionalOrder,
+                                 variableFractionalOrder,
+                                 variableFractionalLaplacianScaling)
+from . kernels import getFractionalKernel
+
+from . clusterMethodCy import (assembleFarFieldInteractions,
+                               getDoFBoxesAndCells,
+                               getFractionalOrders,
+                               getFractionalOrdersDiagonal,
+                               getAdmissibleClusters,
+                               symmetrizeNearFieldClusters,
+                               trimTree)
+import logging
+from logging import INFO
+import warnings
+import mpi4py.rc
+mpi4py.rc.initialize = False
+from mpi4py import MPI
+from mpi4py cimport MPI
+include "panelTypes.pxi"
+
+LOGGER = logging.getLogger(__name__)
+
+
+cdef class IndexManager:
+    cdef:
+        DoFMap dm
+        indexSet myDofs
+        public INDEX_t[::1] localDoFs
+        INDEX_t[::1] permutedDoFsLocal
+        INDEX_t[:, ::1] idxCellFlip
+        LinearOperator A
+        sparsityPattern sP
+        public dict cache
+        intTuple hv
+
+    def __init__(self, DoFMap dm, LinearOperator A=None, cellPairIdentifierSize=1, indexSet myDofs=None, sparsityPattern sP=None):
+        cdef:
+            INDEX_t[:, ::1] idxCellFlip
+            INDEX_t j, offset
+        self.dm = dm
+        self.myDofs = myDofs
+        self.localDoFs = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX)
+        self.permutedDoFsLocal = uninitialized((2*self.dm.dofs_per_element), dtype=INDEX)
+        self.hv = intTuple.create(uninitialized(cellPairIdentifierSize, dtype=INDEX))
+        self.A = A
+        self.sP = sP
+        if self.dm.mesh.dim == 1:
+            idxCellFlip = uninitialized((2, self.dm.dofs_per_element), dtype=INDEX)
+            for j in range(self.dm.dofs_per_vertex):
+                idxCellFlip[0, j] = j
+                idxCellFlip[0, self.dm.dofs_per_vertex+j] = self.dm.dofs_per_vertex+j
+
+                idxCellFlip[1, j] = self.dm.dofs_per_vertex+j
+                idxCellFlip[1, self.dm.dofs_per_vertex+j] = j
+            offset = 2*self.dm.dofs_per_vertex
+            for j in range(self.dm.dofs_per_cell):
+                idxCellFlip[0, offset+j] = offset+j
+                idxCellFlip[1, offset+self.dm.dofs_per_cell-1-j] = offset+j
+
+        elif self.dm.mesh.dim == 2:
+            idxCellFlip = uninitialized((3, self.dm.dofs_per_element), dtype=INDEX)
+            for j in range(self.dm.dofs_per_vertex):
+                idxCellFlip[0, j] = j
+                idxCellFlip[0, self.dm.dofs_per_vertex+j] = self.dm.dofs_per_vertex+j
+                idxCellFlip[0, 2*self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j
+
+                idxCellFlip[1, j] = self.dm.dofs_per_vertex+j
+                idxCellFlip[1, self.dm.dofs_per_vertex+j] = 2*self.dm.dofs_per_vertex+j
+                idxCellFlip[1, 2*self.dm.dofs_per_vertex+j] = j
+
+                idxCellFlip[2, j] = 2*self.dm.dofs_per_vertex+j
+                idxCellFlip[2, self.dm.dofs_per_vertex+j] = j
+                idxCellFlip[2, 2*self.dm.dofs_per_vertex+j] = self.dm.dofs_per_vertex+j
+        else:
+            raise NotImplementedError()
+        self.idxCellFlip = idxCellFlip
+        self.cache = {}
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void getDoFsElem(self, INDEX_t cellNo):
+        cdef:
+            INDEX_t p, dof
+        for p in range(self.dm.dofs_per_element):
+            self.localDoFs[p] = self.dm.cell2dof(cellNo, p)
+        if self.myDofs is not None:
+            for p in range(self.dm.dofs_per_element):
+                dof = self.localDoFs[p]
+                if not self.myDofs.inSet(dof):
+                    self.localDoFs[p] = -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline BOOL_t getDoFsElemElem(self, INDEX_t cellNo1, INDEX_t cellNo2):
+        cdef:
+            INDEX_t p, dof
+            BOOL_t canSkip = True
+        for p in range(self.dm.dofs_per_element):
+            dof = self.dm.cell2dof(cellNo1, p)
+            self.localDoFs[p] = dof
+            canSkip = canSkip and dof < 0
+        for p in range(self.dm.dofs_per_element):
+            dof = self.dm.cell2dof(cellNo2, p)
+            self.localDoFs[self.dm.dofs_per_element+p] = dof
+            canSkip = canSkip and dof < 0
+        return canSkip
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToMatrixElemSym(self, const REAL_t[::1] contrib, REAL_t fac):
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if I >= 0:
+                self.A.addToEntry(I, I, fac*contrib[k])
+                k += 1
+                for q in range(p+1, self.dm.dofs_per_element):
+                    J = self.localDoFs[q]
+                    if J >= 0:
+                        self.A.addToEntry(I, J, fac*contrib[k])
+                        self.A.addToEntry(J, I, fac*contrib[k])
+                    k += 1
+            else:
+                k += self.dm.dofs_per_element-p
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToSparsityElemElemSym(self):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if I >= 0:
+                self.sP.add(I, I)
+                k += 1
+                for q in range(p+1, 2*self.dm.dofs_per_element):
+                    J = self.localDoFs[q]
+                    if J >= 0:
+                        self.sP.add(I, J)
+                        self.sP.add(J, I)
+                    k += 1
+            else:
+                k += 2*self.dm.dofs_per_element-p
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToMatrixElemElemSym(self, const REAL_t[::1] contrib, REAL_t fac):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if I >= 0:
+                self.A.addToEntry(I, I, fac*contrib[k])
+                k += 1
+                for q in range(p+1, 2*self.dm.dofs_per_element):
+                    J = self.localDoFs[q]
+                    if J >= 0:
+                        self.A.addToEntry(I, J, fac*contrib[k])
+                        self.A.addToEntry(J, I, fac*contrib[k])
+                    k += 1
+            else:
+                k += 2*self.dm.dofs_per_element-p
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToSparsityElemElem(self):
+        # Add general 'contrib' to elements i and j
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if I >= 0:
+                for q in range(2*self.dm.dofs_per_element):
+                    J = self.localDoFs[q]
+                    if J >= 0:
+                        self.sP.add(I, J)
+                    k += 1
+            else:
+                k += 2*self.dm.dofs_per_element
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToMatrixElemElem(self, const REAL_t[::1] contrib, REAL_t fac):
+        # Add general 'contrib' to elements i and j
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if I >= 0:
+                for q in range(2*self.dm.dofs_per_element):
+                    J = self.localDoFs[q]
+                    if J >= 0:
+                        self.A.addToEntry(I, J, fac*contrib[k])
+                    k += 1
+            else:
+                k += 2*self.dm.dofs_per_element
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef tupleDictMASK buildMasksForClusters(self, list clusterList, bint symmetricCells, INDEX_t *startCluster):
+        cdef:
+            nearFieldClusterPair cluster = clusterList[0]
+            MASK_t cellMask1, cellMask2
+            indexSet cellsUnion = cluster.cellsUnion
+            indexSetIterator it = cellsUnion.getIter(), it2 = cellsUnion.getIter()
+            indexSet clusterDofs1, clusterDofs2
+            INDEX_t cellNo1 = -1, cellNo2 = -1
+            INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] cellPair2 = uninitialized((2), dtype=INDEX)
+            tupleDictMASK masks = tupleDictMASK(self.dm.mesh.num_cells, deleteHits=False, logicalAndHits=True, length_inc=20)
+            INDEX_t p, I
+            dict cellMasks1, cellMasks2
+            MASK_t mask, mask1, mask2, cellMask11, cellMask12, cellMask21, cellMask22, k
+            INDEX_t dofs_per_element = self.dm.dofs_per_element
+
+        cellMask1, cellMask2 = 0, 0
+        for cluster in clusterList[startCluster[0]:]:
+            startCluster[0] += 1
+            cellsUnion = cluster.cellsUnion
+            cellMasks1 = {}
+            cellMasks2 = {}
+            clusterDofs1 = cluster.n1.get_dofs()
+            clusterDofs2 = cluster.n2.get_dofs()
+
+            it.setIndexSet(cellsUnion)
+
+            while it.step():
+                cellNo1 = it.i
+                mask1 = 0
+                mask2 = 0
+                k = 1
+                for p in range(dofs_per_element):
+                    I = self.dm.cell2dof(cellNo1, p)
+                    if I >= 0:
+                        if clusterDofs1.inSet(I):
+                            mask1 |= k
+                        if clusterDofs2.inSet(I):
+                            mask2 |= k
+                    k = k << 1
+                cellMasks1[cellNo1] = mask1
+                cellMasks2[cellNo1] = mask2
+
+            if not symmetricCells:
+                # TODO: Think some more about this branch, maybe this can be improved.
+                it.reset()
+                it2.setIndexSet(cellsUnion)
+                while it.step():
+                    cellNo1 = it.i
+                    cellPair[0] = cellNo1
+                    cellMask11 = cellMasks1[cellNo1]
+                    cellMask12 = cellMasks2[cellNo1]
+                    it2.reset()
+                    while it2.step():
+                        cellNo2 = it2.i
+                        if ((cellNo1 > cellNo2) and symmetricCells):
+                            continue
+                        cellMask21 = cellMasks1[cellNo2]
+                        cellMask22 = cellMasks2[cellNo2]
+                        cellMask1 = cellMask11 | (cellMask21 << dofs_per_element)
+                        cellMask2 = cellMask12 | (cellMask22 << dofs_per_element)
+                        if (cellMask1 == 0) or (cellMask2 == 0):
+                            continue
+                        cellPair[1] = cellNo2
+                        mask = self.getElemElemSymMask(cellMask1, cellMask2)
+                        # does a logical "and" if there already is an entry
+                        masks.enterValue(cellPair, mask)
+            else:
+                it.setIndexSet(cluster.n1.cells)
+                it2.setIndexSet(cluster.n2.cells)
+                while it.step():
+                    cellNo1 = it.i
+                    cellPair[0] = cellNo1
+                    cellPair2[1] = cellNo1
+                    cellMask11 = cellMasks1[cellNo1]
+                    cellMask12 = cellMasks2[cellNo1]
+                    it2.reset()
+                    while it2.step():
+                        cellNo2 = it2.i
+                        cellMask21 = cellMasks1[cellNo2]
+                        cellMask22 = cellMasks2[cellNo2]
+                        if cellNo1 > cellNo2:
+                            cellMask1 = cellMask21 | (cellMask11 << dofs_per_element)
+                            cellMask2 = cellMask22 | (cellMask12 << dofs_per_element)
+                            if (cellMask1 == 0) or (cellMask2 == 0):
+                                continue
+                            cellPair2[0] = cellNo2
+                            mask = self.getElemElemSymMask(cellMask1, cellMask2)
+                            # does a logical "and" if there already is an entry
+                            masks.enterValue(cellPair2, mask)
+                        else:
+                            cellMask1 = cellMask11 | (cellMask21 << dofs_per_element)
+                            cellMask2 = cellMask12 | (cellMask22 << dofs_per_element)
+                            if (cellMask1 == 0) or (cellMask2 == 0):
+                                continue
+                            cellPair[1] = cellNo2
+                            mask = self.getElemElemSymMask(cellMask1, cellMask2)
+                            # does a logical "and" if there already is an entry
+                            masks.enterValue(cellPair, mask)
+
+            if masks.nnz > 10000000:
+                break
+
+        return masks
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline MASK_t getElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t I, INDEX_t J):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t p, q, K, L
+            MASK_t k = 1
+            MASK_t mask = 0
+        for p in range(self.dm.dofs_per_element):
+            K = self.dm.cell2dof(cellNo1, p)
+            for q in range(p, self.dm.dofs_per_element):
+                L = self.dm.cell2dof(cellNo1, q)
+                if (I == K and J == L) or (J == K and I == L):
+                    mask |= k
+                k = k << 1
+        return mask
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline MASK_t getElemElemSymMask(self, MASK_t mask_dofs1, MASK_t mask_dofs2):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t p, q
+            MASK_t k = 1
+            MASK_t mask = 0
+        for p in range(2*self.dm.dofs_per_element):
+            if (mask_dofs1 & (1 << p)):
+                for q in range(p, 2*self.dm.dofs_per_element):
+                    if (mask_dofs2 & (1 << q)):
+                        mask |= k
+                    k = k << 1
+            else:
+                k = k << (2*self.dm.dofs_per_element-p)
+        return mask
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline MASK_t getElemElemSymEntryMask(self, INDEX_t cellNo1, INDEX_t cellNo2, INDEX_t I, INDEX_t J):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t p, q, K, L
+            MASK_t k = 1
+            MASK_t mask = 0
+        for p in range(2*self.dm.dofs_per_element):
+            if p < self.dm.dofs_per_element:
+                K = self.dm.cell2dof(cellNo1, p)
+            else:
+                K = self.dm.cell2dof(cellNo2, p-self.dm.dofs_per_element)
+
+            for q in range(p, 2*self.dm.dofs_per_element):
+                if q < self.dm.dofs_per_element:
+                    L = self.dm.cell2dof(cellNo1, q)
+                else:
+                    L = self.dm.cell2dof(cellNo2, q-self.dm.dofs_per_element)
+                if (I == K and J == L) or (J == K and I == L):
+                    mask |= k
+                k = k << 1
+        return mask
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToMatrixElemElemSymMasked(self, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            if mask & (1 << k):
+                self.A.addToEntry(I, I, fac*contrib[k])
+            k += 1
+            for q in range(p+1, 2*self.dm.dofs_per_element):
+                if mask & (1 << k):
+                    J = self.localDoFs[q]
+                    self.A.addToEntry(I, J, fac*contrib[k])
+                    self.A.addToEntry(J, I, fac*contrib[k])
+                k += 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToMatrixElemElemMasked(self, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask):
+        # Add symmetric 'contrib' to elements i and j in symmetric fashion
+        cdef:
+            INDEX_t k, p, q, I, J
+        k = 0
+        for p in range(2*self.dm.dofs_per_element):
+            I = self.localDoFs[p]
+            for q in range(2*self.dm.dofs_per_element):
+                J = self.localDoFs[q]
+                if mask & (1 << k):
+                    self.A.addToEntry(I, J, fac*contrib[k])
+                k += 1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToCache(self, REAL_t[::1] contrib, INDEX_t[::1] ID, INDEX_t perm, BOOL_t inv=False):
+        cdef:
+            intTuple hv = intTuple.create(ID)
+        contribNew = uninitialized((contrib.shape[0]), dtype=REAL)
+        self.permute(contrib, contribNew, perm, inv)
+        self.cache[hv] = contribNew
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void permute(self, REAL_t[::1] contrib, REAL_t[::1] contribNew, INDEX_t perm, BOOL_t inv=False):
+        cdef:
+            INDEX_t K, p, q
+            INDEX_t k, i, j
+            INDEX_t dofs_per_element = self.dm.dofs_per_element
+            INDEX_t dofs_per_element2 = 2*dofs_per_element
+            BOOL_t perm0 = perm & 1
+            INDEX_t perm1 = (perm >> 1) & 3
+            INDEX_t perm2 = (perm >> 3) & 3
+            INDEX_t[::1] permutedDoFsLocal = self.permutedDoFsLocal
+        if inv and self.dm.dim == 2:
+            if perm1 == 1:
+                perm1 = 2
+            elif perm1 == 2:
+                perm1 = 1
+
+            if perm2 == 1:
+                perm2 = 2
+            elif perm2 == 2:
+                perm2 = 1
+            if perm0:
+                perm1, perm2 = perm2, perm1
+
+        for p in range(dofs_per_element2):
+            if perm0:
+                i = p+dofs_per_element
+                if i >= dofs_per_element2:
+                    i -= dofs_per_element2
+            else:
+                i = p
+            if (i < dofs_per_element):
+                i = self.idxCellFlip[perm1, i]
+            else:
+                i = dofs_per_element + self.idxCellFlip[perm2, i-dofs_per_element]
+            permutedDoFsLocal[p] = i
+
+        K = 0
+        for p in range(dofs_per_element2):
+            i = permutedDoFsLocal[p]
+
+            k = 2*dofs_per_element*i-(i*(i+1) >> 1) + i
+            contribNew[K] = contrib[k]
+            K += 1
+
+            for q in range(p+1, dofs_per_element2):
+                j = permutedDoFsLocal[q]
+
+                if i > j:
+                    k = dofs_per_element2*j-(j*(j+1) >> 1) + i
+                else:
+                    k = dofs_per_element2*i-(i*(i+1) >> 1) + j
+                contribNew[K] = contrib[k]
+                K += 1
+
+    
+    def __repr__(self):
+        s = ''
+        s += 'Cache size: {}'.format(len(self.cache))
+        return s
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline MASK_t getElemSymMask(DoFMap DoFMap, INDEX_t cellNo1, INDEX_t I, INDEX_t J):
+    # Add symmetric 'contrib' to elements i and j in symmetric fashion
+    cdef:
+        INDEX_t p, q, K, L
+        MASK_t k = 1
+        MASK_t mask = 0
+    for p in range(DoFMap.dofs_per_element):
+        K = DoFMap.cell2dof(cellNo1, p)
+        for q in range(p, DoFMap.dofs_per_element):
+            L = DoFMap.cell2dof(cellNo1, q)
+            if (I == K and J == L) or (J == K and I == L):
+                mask |= k
+            k = k << 1
+    return mask
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline MASK_t getElemElemSymMask(DoFMap DoFMap, INDEX_t cellNo1, INDEX_t cellNo2, INDEX_t I, INDEX_t J):
+    # Add symmetric 'contrib' to elements i and j in symmetric fashion
+    cdef:
+        INDEX_t p, q, K, L
+        MASK_t k = 1
+        MASK_t mask = 0
+    for p in range(2*DoFMap.dofs_per_element):
+        if p < DoFMap.dofs_per_element:
+            K = DoFMap.cell2dof(cellNo1, p)
+        else:
+            K = DoFMap.cell2dof(cellNo2, p-DoFMap.dofs_per_element)
+
+        for q in range(p, 2*DoFMap.dofs_per_element):
+            if q < DoFMap.dofs_per_element:
+                L = DoFMap.cell2dof(cellNo1, q)
+            else:
+                L = DoFMap.cell2dof(cellNo2, q-DoFMap.dofs_per_element)
+            if (I == K and J == L) or (J == K and I == L):
+                mask |= k
+            k = k << 1
+    return mask
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t extractElemSymMasked(DoFMap DoFMap, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask):
+    # Add symmetric 'contrib' to elements i and j in symmetric fashion
+    cdef:
+        INDEX_t k, p, q
+        REAL_t s = 0.
+    k = 0
+    for p in range(DoFMap.dofs_per_element):
+        for q in range(p, DoFMap.dofs_per_element):
+            if mask & (1 << k):
+                s += fac*contrib[k]
+            k += 1
+    return s
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef inline REAL_t extractElemElemSymMasked(DoFMap DoFMap, const REAL_t[::1] contrib, REAL_t fac, MASK_t mask):
+    # Add symmetric 'contrib' to elements i and j in symmetric fashion
+    cdef:
+        INDEX_t k, p, q
+        REAL_t s = 0.
+    k = 0
+    for p in range(2*DoFMap.dofs_per_element):
+        for q in range(p, 2*DoFMap.dofs_per_element):
+            if mask & (1 << k):
+                s += fac*contrib[k]
+            k += 1
+    return s
+
+
+cdef class nonlocalBuilder:
+    def __init__(self,
+                 meshBase mesh,
+                 DoFMap dm,
+                 Kernel kernel,
+                 dict params={},
+                 bint zeroExterior=True,
+                 MPI.Comm comm=None,
+                 double_local_matrix_t lm_interior=None,
+                 double_local_matrix_t lm_zeroExterior=None,
+                 FakePLogger PLogger=None,
+                 DoFMap dm2=None,
+                 **kwargs):
+        if 'boundary' in kwargs:
+            warnings.warn('"boundary" parameter deprecated', DeprecationWarning)
+            zeroExterior = kwargs['boundary']
+
+        self.mesh = mesh
+        self.dm = dm
+        assert self.dm.mesh == self.mesh
+        if dm2 is None:
+            pass
+        else:
+            self.dm2 = dm2
+            assert self.dm.mesh == self.dm2.mesh
+        self.kernel = kernel
+        if self.kernel.finiteHorizon:
+            self.zeroExterior = False
+        else:
+            self.zeroExterior = zeroExterior
+        self.comm = comm
+        self.params = params
+
+        assert isinstance(self.kernel.horizon, constant)
+        assert kernel.dim == mesh.dim
+        assert kernel.dim == dm.mesh.dim
+
+        if lm_interior is not None:
+            self.local_matrix = lm_interior
+        else:
+            self.local_matrix = self.getLocalMatrix(params)
+
+        if self.local_matrix.symmetricLocalMatrix:
+            self.contrib = uninitialized(((2*dm.dofs_per_element)*(2*dm.dofs_per_element+1)//2), dtype=REAL)
+        else:
+            self.contrib = uninitialized(((2*dm.dofs_per_element)**2), dtype=REAL)
+
+        LOGGER.debug(self.local_matrix)
+
+        if lm_zeroExterior is not None:
+            self.local_matrix_zeroExterior = lm_zeroExterior
+        else:
+            self.local_matrix_zeroExterior = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=True)
+            self.local_matrix_surface = self.getLocalMatrixBoundaryZeroExterior(params, infHorizon=False)
+
+        if self.local_matrix_zeroExterior is not None:
+            self.local_matrix_zeroExterior.setMesh1(mesh)
+            self.local_matrix_surface.setMesh1(mesh)
+            if self.local_matrix_zeroExterior.symmetricLocalMatrix:
+                self.contribZeroExterior = uninitialized((dm.dofs_per_element*(dm.dofs_per_element+1)//2), dtype=REAL)
+            else:
+                self.contribZeroExterior = uninitialized(((dm.dofs_per_element)**2), dtype=REAL)
+            LOGGER.debug(self.local_matrix_zeroExterior)
+            LOGGER.debug(self.local_matrix_surface)
+        else:
+            self.contribZeroExterior = uninitialized((0), dtype=REAL)
+
+        self.local_matrix.setMesh1(mesh)
+        self.local_matrix.setMesh2(mesh)
+
+        if PLogger is not None:
+            self.PLogger = PLogger
+        else:
+            self.PLogger = FakePLogger()
+
+    @property
+    def d2c(self):
+        if self._d2c is None:
+            self._d2c = self.dm.getPatchLookup()
+        return self._d2c
+
+    cdef inline double_local_matrix_t getLocalMatrix(self, dict params):
+        cdef:
+            bint symmetric, genKernel, forceNonSym
+            fractionalOrderBase s
+        target_order = params.get('target_order', None)
+        genKernel = params.get('genKernel', False)
+        forceNonSym = params.get('forceNonSym', False)
+        autoQuad = params.get('automaticQuadrature', False)
+        symmetric = not forceNonSym and self.kernel.symmetric
+        if genKernel:
+             LOGGER.warning('General kernel not implemented for boundary term')
+        elif isinstance(self.kernel, FractionalKernel):
+            s = self.kernel.s
+            assert ((s.min < 1.) and (s.max < 1.)) or ((s.min > 1.) and (s.max > 1.)), "smin={}, smax={} not supported".format(s.min, s.max)
+
+            if isinstance(self.dm, P0_DoFMap):
+                if self.mesh.dim == 1:
+                    if s.min > 0. and s.max < 0.5:
+                        local_matrix = fractionalLaplacian1D_P0(self.kernel,
+                                                                mesh=self.mesh,
+                                                                DoFMap=self.dm,
+                                                                target_order=target_order)
+                    else:
+                        raise NotImplementedError()
+                else:
+                    raise NotImplementedError()
+            elif isinstance(self.dm, P1_DoFMap):
+                if self.mesh.dim == 1:
+                    if s.min > 0. and s.max < 1.:
+                        if symmetric:
+                            if autoQuad:
+                                 raise NotImplementedError()
+                            else:
+                                local_matrix = fractionalLaplacian1D_P1(self.kernel,
+                                                                        mesh=self.mesh,
+                                                                        DoFMap=self.dm,
+                                                                        target_order=target_order)
+                        else:
+                             raise NotImplementedError()
+                    
+                    else:
+                        raise NotImplementedError(self.kernel)
+                elif self.mesh.dim == 2:
+                    if s.min > 0. and s.max < 1.:
+                        if symmetric:
+                            if autoQuad:
+                                 raise NotImplementedError()
+                            else:
+                                local_matrix = fractionalLaplacian2D_P1(self.kernel,
+                                                                        mesh=self.mesh,
+                                                                        DoFMap=self.dm,
+                                                                        target_order=target_order)
+                        else:
+                            raise NotImplementedError(self.kernel)
+                    
+                    else:
+                        raise NotImplementedError()
+                else:
+                    raise NotImplementedError()
+            
+            else:
+                raise NotImplementedError()
+        else:
+            if self.mesh.dim == 1:
+                local_matrix = integrable1D(self.kernel,
+                                            mesh=self.mesh,
+                                            DoFMap=self.dm,
+                                            target_order=target_order)
+            elif self.mesh.dim == 2:
+                local_matrix = integrable2D(self.kernel,
+                                            mesh=self.mesh,
+                                            DoFMap=self.dm,
+                                            target_order=target_order)
+            else:
+                raise NotImplementedError()
+        return local_matrix
+
+    cdef inline double_local_matrix_t getLocalMatrixBoundaryZeroExterior(self, dict params, BOOL_t infHorizon):
+        cdef:
+            bint genKernel
+            fractionalOrderBase s
+        target_order = params.get('target_order', None)
+        genKernel = params.get('genKernel', False)
+        if isinstance(self.kernel, FractionalKernel):
+            s = self.kernel.s
+            assert ((s.min < 1.) and (s.max < 1.)) or ((s.min > 1.) and (s.max > 1.))
+            assert isinstance(self.kernel.horizon, constant)
+            if infHorizon:
+                kernelInfHorizon = self.kernel.getModifiedKernel(horizon=constant(np.inf))
+            else:
+                kernelInfHorizon = self.kernel
+            if genKernel:
+                LOGGER.warning('General kernel not implemented for boundary term')
+            if isinstance(self.dm, P0_DoFMap):
+                if self.mesh.dim == 1:
+                    if s.min > 0. and s.max < 0.5:
+                        local_matrix = fractionalLaplacian1D_P0_boundary(kernelInfHorizon,
+                                                                         mesh=self.mesh,
+                                                                         DoFMap=self.dm,
+                                                                         target_order=target_order)
+                    else:
+                        raise NotImplementedError()
+                else:
+                    raise NotImplementedError()
+            elif isinstance(self.dm, P1_DoFMap):
+                if self.mesh.dim == 1:
+                    if s.min > 0. and s.max < 1.:
+                        local_matrix = fractionalLaplacian1D_P1_boundary(kernelInfHorizon,
+                                                                         mesh=self.mesh,
+                                                                         DoFMap=self.dm,
+                                                                         target_order=target_order)
+                    
+                    else:
+                        raise NotImplementedError()
+                elif self.mesh.dim == 2:
+                    if s.min > 0. and s.max < 1.:
+                        local_matrix = fractionalLaplacian2D_P1_boundary(kernelInfHorizon,
+                                                                         mesh=self.mesh,
+                                                                         DoFMap=self.dm,
+                                                                         target_order=target_order)
+                    
+                    else:
+                        raise NotImplementedError()
+                else:
+                    raise NotImplementedError()
+            
+            else:
+                raise NotImplementedError()
+        else:
+            local_matrix = None
+        return local_matrix
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def getDense(self, BOOL_t trySparsification=False):
+        cdef:
+            INDEX_t cellNo1, cellNo2
+            LinearOperator A = None
+            REAL_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior
+            INDEX_t start, end
+            meshBase surface
+            IndexManager iM
+            INDEX_t i, j, explicitZerosRow
+            np.int64_t explicitZeros
+            REAL_t[:, ::1] data
+            REAL_t sparsificationThreshold = 0.8
+            BOOL_t symmetricLocalMatrix = self.local_matrix.symmetricLocalMatrix
+            BOOL_t symmetricCells = self.local_matrix.symmetricCells
+
+        if self.comm:
+            start = <INDEX_t>np.ceil(self.mesh.num_cells*self.comm.rank/self.comm.size)
+            end = <INDEX_t>np.ceil(self.mesh.num_cells*(self.comm.rank+1)/self.comm.size)
+        else:
+            start = 0
+            end = self.mesh.num_cells
+
+        if (trySparsification
+            and (self.comm is None or self.comm.size == 1)
+            and not self.zeroExterior
+            and self.dm2 is None
+            and self.kernel.finiteHorizon
+            and (self.mesh.volume*(1.-sparsificationThreshold) > self.kernel.getHorizonValue()**self.mesh.dim)):
+
+            with self.PLogger.Timer('build sparsity pattern'):
+
+                sP = sparsityPattern(self.dm.num_dofs)
+                iM = IndexManager(self.dm, None, sP=sP)
+
+                for cellNo1 in range(start, end):
+                    self.local_matrix.setCell1(cellNo1)
+                    for cellNo2 in range(cellNo1, self.mesh.num_cells):
+                        self.local_matrix.setCell2(cellNo2)
+                        if iM.getDoFsElemElem(cellNo1, cellNo2):
+                            continue
+                        panel = self.local_matrix.getPanelType()
+                        if cellNo1 == cellNo2:
+                            if panel != IGNORED:
+                                if self.local_matrix.symmetricLocalMatrix:
+                                    iM.addToSparsityElemElemSym()
+                                else:
+                                    iM.addToSparsityElemElem()
+                        else:
+                            if self.local_matrix.symmetricCells:
+                                if panel != IGNORED:
+                                    if self.local_matrix.symmetricLocalMatrix:
+                                        iM.addToSparsityElemElemSym()
+                                    else:
+                                        iM.addToSparsityElemElem()
+                            else:
+                                if panel != IGNORED:
+                                    if self.local_matrix.symmetricLocalMatrix:
+                                        iM.addToSparsityElemElemSym()
+                                    else:
+                                        iM.addToSparsityElemElem()
+                                self.local_matrix.swapCells()
+                                panel = self.local_matrix.getPanelType()
+                                if panel != IGNORED:
+                                    if iM.getDoFsElemElem(cellNo2, cellNo1):
+                                        continue
+                                    if self.local_matrix.symmetricLocalMatrix:
+                                        iM.addToSparsityElemElemSym()
+                                    else:
+                                        iM.addToSparsityElemElem()
+                                self.local_matrix.swapCells()
+                indptr, indices = sP.freeze()
+                useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells
+                if useSymmetricMatrix:
+                    A = SSS_LinearOperator(indices, indptr,
+                                           np.zeros((indices.shape[0]), dtype=REAL),
+                                           np.zeros((self.dm.num_dofs), dtype=REAL))
+                    ratio = ((A.nnz+A.num_rows)/REAL(A.num_rows))/REAL(A.num_columns)
+                else:
+                    A = CSR_LinearOperator(indices, indptr,
+                                           np.zeros((indices.shape[0]), dtype=REAL))
+                    ratio = (A.nnz/REAL(A.num_rows))/REAL(A.num_columns)
+                LOGGER.warning('Assembling into sparse{} matrix, since {}% of entries are zero.'.format(', symmetric' if useSymmetricMatrix else '',
+                                                                                                     100.*(1.-ratio)))
+                trySparsification = False
+        else:
+            if self.dm2 is None:
+                A = Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm.num_dofs), dtype=REAL))
+            else:
+                A = Dense_LinearOperator(np.zeros((self.dm.num_dofs, self.dm2.num_dofs), dtype=REAL))
+
+        if self.dm2 is None:
+            iM = IndexManager(self.dm, A)
+        else:
+            LOGGER.warning('Efficiency of assembly with 2 DoFMaps is bad.')
+            dmCombined = self.dm.combine(self.dm2)
+            B = SubMatrixAssemblyOperator(A,
+                                          np.arange(self.dm.num_dofs, dtype=INDEX),
+                                          np.arange(self.dm.num_dofs, self.dm.num_dofs+self.dm2.num_dofs, dtype=INDEX))
+            iM = IndexManager(dmCombined, B)
+
+        # Omega x Omega
+        with self.PLogger.Timer('interior'):
+            for cellNo1 in range(start, end):
+                self.local_matrix.setCell1(cellNo1)
+                for cellNo2 in range(cellNo1, self.mesh.num_cells):
+                    self.local_matrix.setCell2(cellNo2)
+                    if iM.getDoFsElemElem(cellNo1, cellNo2):
+                        continue
+                    panel = self.local_matrix.getPanelType()
+                    if cellNo1 == cellNo2:
+                        if panel != IGNORED:
+                            self.local_matrix.eval(contrib, panel)
+                            if symmetricLocalMatrix:
+                                iM.addToMatrixElemElemSym(contrib, 1.)
+                            else:
+                                iM.addToMatrixElemElem(contrib, 1.)
+                    else:
+                        if symmetricCells:
+                            if panel != IGNORED:
+                                self.local_matrix.eval(contrib, panel)
+                                # If the kernel is symmetric, the contributions from (cellNo1, cellNo2) and (cellNo2, cellNo1)
+                                # are the same. We multiply by 2 to account for the contribution from cells (cellNo2, cellNo1).
+                                if symmetricLocalMatrix:
+                                    iM.addToMatrixElemElemSym(contrib, 2.)
+                                else:
+                                    iM.addToMatrixElemElem(contrib, 2.)
+                        else:
+                            if panel != IGNORED:
+                                self.local_matrix.eval(contrib, panel)
+                                if symmetricLocalMatrix:
+                                    iM.addToMatrixElemElemSym(contrib, 1.)
+                                else:
+                                    iM.addToMatrixElemElem(contrib, 1.)
+                            self.local_matrix.swapCells()
+                            panel = self.local_matrix.getPanelType()
+                            if panel != IGNORED:
+                                if iM.getDoFsElemElem(cellNo2, cellNo1):
+                                    continue
+                                self.local_matrix.eval(contrib, panel)
+                                if symmetricLocalMatrix:
+                                    iM.addToMatrixElemElemSym(contrib, 1.)
+                                else:
+                                    iM.addToMatrixElemElem(contrib, 1.)
+                            self.local_matrix.swapCells()
+
+        # Omega x Omega^C
+        if self.zeroExterior:
+            with self.PLogger.Timer('zeroExterior'):
+                surface = self.mesh.get_surface_mesh()
+
+                self.local_matrix_zeroExterior.setMesh2(surface)
+
+                for cellNo1 in range(start, end):
+                    iM.getDoFsElem(cellNo1)
+                    self.local_matrix_zeroExterior.setCell1(cellNo1)
+                    for cellNo2 in range(surface.num_cells):
+                        self.local_matrix_zeroExterior.setCell2(cellNo2)
+                        panel = self.local_matrix_zeroExterior.getPanelType()
+                        self.local_matrix_zeroExterior.eval(contribZeroExterior, panel)
+                        # if local_matrix_zeroExterior.symmetricLocalMatrix:
+                        iM.addToMatrixElemSym(contribZeroExterior, 1.)
+                        # else:
+                        #     raise NotImplementedError()
+        if self.comm:
+            self.comm.Allreduce(MPI.IN_PLACE, A.data)
+        if trySparsification:
+            explicitZeros = 0
+            data = A.data
+            nr = A.num_rows
+            for i in range(A.num_rows):
+                explicitZerosRow = 0
+                for j in range(A.num_columns):
+                    if data[i, j] == 0.:
+                        explicitZerosRow += 1
+                explicitZeros += explicitZerosRow
+                if not (explicitZerosRow > sparsificationThreshold*A.num_columns):
+                    nr = i+1
+                    break
+            ratio = (explicitZeros/REAL(nr))/REAL(A.num_columns)
+            if ratio > sparsificationThreshold:
+                LOGGER.warning('Converting dense to sparse matrix, since {}% of entries are zero.'.format(100.*ratio))
+                return CSR_LinearOperator.from_dense(A)
+            else:
+                LOGGER.warning('Not converting dense to sparse matrix, since only {}% of entries are zero.'.format(100.*ratio))
+        return A
+
+    
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef REAL_t getEntryCluster(self, INDEX_t I, INDEX_t J):
+        cdef:
+            tree_node n1, n2, n3
+            list clusters = []
+            nearFieldClusterPair c1, c2, c3
+            arrayIndexSet aI1, aI2, aI3
+            REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL)
+            INDEX_t[::1] I_view = np.array([I], dtype=INDEX)
+            INDEX_t[::1] J_view = np.array([J], dtype=INDEX)
+            arrayIndexSetIterator it = arrayIndexSetIterator()
+            list d2c = self.d2c
+            LinearOperator A
+            REAL_t[:, ::1] mat = np.zeros((1, 1), dtype=REAL)
+        if I == J:
+            aI3 = arrayIndexSet(I_view)
+            n3 = tree_node(None, aI3, fake_boxes)
+
+            cells = set()
+            it.setIndexSet(aI3)
+            while it.step():
+                cells |= d2c[it.i]
+            n3._cells = arrayIndexSet()
+            n3._cells.fromSet(cells)
+
+            c3 = nearFieldClusterPair(n3, n3)
+            clusters.append(c3)
+        else:
+            aI1 = arrayIndexSet(I_view)
+            aI2 = arrayIndexSet(J_view)
+            n1 = tree_node(None, aI1, fake_boxes)
+            n2 = tree_node(None, aI2, fake_boxes)
+
+            cells = set()
+            it.setIndexSet(aI1)
+            while it.step():
+                cells |= d2c[it.i]
+            n1._cells = arrayIndexSet()
+            n1._cells.fromSet(cells)
+
+            cells = set()
+            it.setIndexSet(aI2)
+            while it.step():
+                cells |= d2c[it.i]
+            n2._cells = arrayIndexSet()
+            n2._cells.fromSet(cells)
+
+            c1 = nearFieldClusterPair(n1, n2)
+            c2 = nearFieldClusterPair(n2, n1)
+            clusters.append(c1)
+            clusters.append(c2)
+        A = Dense_SubBlock_LinearOperator(I_view,
+                                          J_view,
+                                          self.dm.num_dofs,
+                                          self.dm.num_dofs,
+                                          mat)
+        self.assembleClusters(clusters, Anear=A)
+        return mat[0, 0]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef REAL_t getEntry(self, INDEX_t I, INDEX_t J):
+        cdef:
+            INDEX_t cellNo1, cellNo2
+            INDEX_t[:,::1] surface_cells
+            MASK_t mask
+            indexSet cellsUnion = arrayIndexSet()
+            indexSet cellsInter = arrayIndexSet()
+            indexSetIterator it1 = arrayIndexSetIterator()
+            indexSetIterator it2 = arrayIndexSetIterator()
+            dm = self.dm
+            REAL_t entry = 0.
+        cellsUnion.fromSet(self.d2c[I] | self.d2c[J])
+        cellsInter.fromSet(self.d2c[I] & self.d2c[J])
+
+        assert isinstance(self.kernel.horizon, constant) and self.kernel.horizon.value == np.inf
+
+        # (supp phi_I \cup supp phi_J)^2
+        it1.setIndexSet(cellsUnion)
+        it2.setIndexSet(cellsUnion)
+        while it1.step():
+            cellNo1 = it1.i
+            self.local_matrix.setCell1(cellNo1)
+            it2.reset()
+            while it2.step():
+                cellNo2 = it2.i
+                if cellNo2 < cellNo1:
+                    continue
+                mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J)
+                if mask == 0:
+                    continue
+                self.local_matrix.setCell2(cellNo2)
+                panel = self.local_matrix.getPanelType()
+                if cellNo1 == cellNo2:
+                    self.local_matrix.eval(self.contrib, panel, mask)
+                    if self.local_matrix.symmetricLocalMatrix:
+                        entry += extractElemElemSymMasked(dm, self.contrib, 1., mask)
+                    else:
+                        raise NotImplementedError()
+                else:
+                    if self.local_matrix.symmetricCells:
+                        if panel != IGNORED:
+                            self.local_matrix.eval(self.contrib, panel, mask)
+                            # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1)
+                            if self.local_matrix.symmetricLocalMatrix:
+                                entry += extractElemElemSymMasked(dm, self.contrib, 2., mask)
+                            else:
+                                raise NotImplementedError()
+                    else:
+                        if panel != IGNORED:
+                            self.local_matrix.eval(self.contrib, panel, mask)
+                            # multiply by 2 to account for the contribution from cells (cellNo2, cellNo1)
+                            if self.local_matrix.symmetricLocalMatrix:
+                                entry += extractElemElemSymMasked(dm, self.contrib, 1., mask)
+                            else:
+                                raise NotImplementedError()
+                        self.local_matrix.swapCells()
+                        mask = getElemElemSymMask(dm, cellNo2, cellNo1, I, J)
+                        panel = self.local_matrix.getPanelType()
+                        if panel != IGNORED:
+                            self.local_matrix.eval(self.contrib, panel, mask)
+                            if self.local_matrix.symmetricLocalMatrix:
+                                entry += extractElemElemSymMasked(dm, self.contrib, 1., mask)
+                            else:
+                                raise NotImplementedError()
+        # (supp phi_I \cup supp phi_J) x (supp phi_I \cup supp phi_J)^C
+        if not self.kernel.variable:
+            if self.zeroExterior:
+                # zeroExterior of (supp phi_I \cup supp phi_J)
+                if self.mesh.dim == 1:
+                    surface_cells = boundaryVertices(self.mesh.cells, cellsUnion)
+                elif self.mesh.dim == 2:
+                    surface_cells = boundaryEdges(self.mesh.cells, cellsUnion)
+                else:
+                    raise NotImplementedError()
+
+                self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells)
+
+                it1.setIndexSet(cellsInter)
+                while it1.step():
+                    cellNo1 = it1.i
+                    self.local_matrix_zeroExterior.setCell1(cellNo1)
+                    mask = getElemSymMask(dm, cellNo1, I, J)
+                    for cellNo2 in range(surface_cells.shape[0]):
+                        self.local_matrix_zeroExterior.setCell2(cellNo2)
+                        panel = self.local_matrix_zeroExterior.getPanelType()
+                        self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel)
+                        entry += extractElemSymMasked(dm, self.contribZeroExterior, 1., mask)
+        else:
+            # (supp phi_I \cup supp phi_J) x (Omega \ (supp phi_I \cup supp phi_J))
+            # TODO: This can be done using surface integrals instead
+            it1.setIndexSet(cellsUnion)
+            while it1.step():
+                cellNo1 = it1.i
+                self.local_matrix.setCell1(cellNo1)
+
+                for cellNo2 in set(range(self.mesh.num_cells))-cellsUnion.toSet():
+                    self.local_matrix.setCell2(cellNo2)
+                    mask = getElemElemSymMask(dm, cellNo1, cellNo2, I, J)
+                    panel = self.local_matrix.getPanelType()
+                    if panel != IGNORED:
+                        if self.local_matrix.symmetricLocalMatrix:
+                            # multiply by 2 to account for the 2 symmetric contributions
+                            self.local_matrix.eval(self.contrib, panel)
+                            entry += extractElemElemSymMasked(dm, self.contrib, 1., mask)
+                        else:
+                            raise NotImplementedError()
+
+            if self.zeroExterior:
+                # (supp phi_I \cup supp phi_J) x Omega^C
+                surface = self.mesh.get_surface_mesh()
+                self.local_matrix_zeroExterior.setMesh2(surface)
+
+                it1.setIndexSet(cellsInter)
+                while it1.step():
+                    cellNo1 = it1.i
+                    self.local_matrix_zeroExterior.setCell1(cellNo1)
+                    mask = getElemSymMask(dm, cellNo1, I, J)
+                    for cellNo2 in range(surface.num_cells):
+                        self.local_matrix_zeroExterior.setCell2(cellNo2)
+                        panel = self.local_matrix_zeroExterior.getPanelType()
+                        self.local_matrix_zeroExterior.eval(self.contribZeroExterior, panel)
+                        entry += extractElemSymMasked(dm, self.contribZeroExterior, 1., mask)
+        return entry
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cpdef LinearOperator assembleClusters(self, list Pnear, bint forceUnsymmetric=False, LinearOperator Anear=None, dict jumps={}, BOOL_t forceSymmetric=False, indexSet myDofs=None, str prefix=''):
+        cdef:
+            INDEX_t cellNo1, cellNo2, cellNo3
+            REAL_t fac
+            REAL_t[::1] contrib = self.contrib, contribZeroExterior = self.contribZeroExterior
+            meshBase surface
+            INDEX_t[:, ::1] cells = self.mesh.cells, surface_cells, fake_cells
+            indexSet cellsInter
+            indexSet clusterDofs1, clusterDofs2
+            FilteredAssemblyOperator Anear_filtered = None
+            INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX)
+            nearFieldClusterPair cluster
+            panelType panel
+            tupleDictMASK masks = None
+            ENCODE_t hv, hv2
+            MASK_t mask = 0
+            # INDEX_t vertex1, vertex2
+            bint useSymmetricMatrix
+            bint useSymmetricCells
+            INDEX_t vertexNo, i
+            INDEX_t[::1] edge = uninitialized((2), dtype=INDEX)
+            REAL_t evalShift = 1e-9
+            local_matrix_t mass
+            indexSetIterator it = arrayIndexSetIterator()
+            INDEX_t startCluster
+
+        if Anear is None:
+            useSymmetricMatrix = self.local_matrix.symmetricLocalMatrix and self.local_matrix.symmetricCells and not forceUnsymmetric
+            with self.PLogger.Timer(prefix+'build near field sparsity pattern'):
+                Anear = getSparseNearField(self.dm, Pnear, symmetric=useSymmetricMatrix)
+            LOGGER.info('Anear: {}'.format(Anear))
+        else:
+            useSymmetricMatrix = isinstance(Anear, (SSS_LinearOperator, diagonalOperator)) or forceSymmetric
+
+        Anear_filtered = FilteredAssemblyOperator(Anear)
+
+        useSymmetricCells = self.local_matrix.symmetricCells
+
+        iM = IndexManager(self.dm, Anear)
+
+        use_masks = self.params.get('use_masks', True)
+
+        with self.PLogger.Timer(prefix+'interior'):
+            # This corresponds to
+            #  C(d,s) \int_D \int_D (u(x)-u(y)) (v(x)-v(y)) /|x-y|^{d+2s}
+            # where
+            #  D = (supp u) \cup (supp v).,
+            # We only update unknows that are in the cluster pair.
+
+            if not use_masks:
+                # This loop does the correct thing, but we are wasting a lot of
+                # element x element evaluations.
+                for cluster in Pnear:
+                    cellsUnion = cluster.cellsUnion
+
+                    clusterDofs1 = cluster.n1.get_dofs()
+                    clusterDofs2 = cluster.n2.get_dofs()
+                    Anear_filtered.setFilter(clusterDofs1, clusterDofs2)
+                    iM = IndexManager(self.dm, Anear_filtered)
+
+                    for cellNo1 in cellsUnion:
+                        self.local_matrix.setCell1(cellNo1)
+                        for cellNo2 in cellsUnion:
+                            self.local_matrix.setCell2(cellNo2)
+                            panel = self.local_matrix.getPanelType()
+                            if panel != IGNORED:
+                                if useSymmetricCells and (cellNo1 != cellNo2):
+                                    fac = 2.
+                                else:
+                                    fac = 1.
+                                if iM.getDoFsElemElem(cellNo1, cellNo2):
+                                    continue
+                                self.local_matrix.eval(contrib, panel)
+                                if useSymmetricCells:
+                                    iM.addToMatrixElemElemSym(contrib, fac)
+                                else:
+                                    raise NotImplementedError()
+            else:
+                # Pre-record all element x element contributions.
+                # This way, we only assembly over each element x element pair once.
+                # We load balance the cells and only get the list for the local rank.
+                assert contrib.shape[0] <= 64, "Mask type is not large enough for {} entries".format(contrib.shape[0])
+                startCluster = 0
+                while startCluster < len(Pnear):
+                    with self.PLogger.Timer(prefix+'interior - build masks'):
+                        masks = iM.buildMasksForClusters(Pnear, self.local_matrix.symmetricCells, &startCluster)
+
+                    if (masks.getSizeInBytes() >> 20) > 20:
+                        LOGGER.info('element x element pairs {}, {} MB'.format(masks.nnz, masks.getSizeInBytes() >> 20))
+                    # Compute all element x element contributions
+                    with self.PLogger.Timer(prefix+'interior - compute'):
+                        masks.startIter()
+                        while masks.next(cellPair, &mask):
+                            # decode_edge(hv, cellPair)
+                            cellNo1 = cellPair[0]
+                            cellNo2 = cellPair[1]
+                            self.local_matrix.setCell1(cellNo1)
+                            self.local_matrix.setCell2(cellNo2)
+                            panel = self.local_matrix.getPanelType()
+                            if panel != IGNORED:
+                                if useSymmetricCells and (cellNo1 != cellNo2):
+                                    fac = 2.
+                                else:
+                                    fac = 1.
+                                if iM.getDoFsElemElem(cellNo1, cellNo2):
+                                    continue
+                                self.local_matrix.eval(contrib, panel, mask)
+                                if useSymmetricCells:
+                                    iM.addToMatrixElemElemSymMasked(contrib, fac, mask)
+                                else:
+                                    raise NotImplementedError()
+                        masks = None
+
+        if not self.kernel.variable:
+            if not self.kernel.complement:
+                with self.PLogger.Timer(prefix+'cluster zeroExterior'):
+                    # This corresponds to
+                    #  C(d,s)/(2s) \int_D u(x) v(x) \int_E n.(x-y)/|x-y|^{d+2s}
+                    # where
+                    #  D = (supp u) \cap (supp v) \subset E,
+                    #  E = \partial((supp u) \cap (supp v)).
+                    # We only update unknows that are in the cluster pair.
+
+                    iM = IndexManager(self.dm, Anear_filtered)
+
+                    for cluster in Pnear:
+
+                        cellsInter = cluster.cellsInter
+                        if len(cellsInter) == 0:
+                            continue
+
+                        clusterDofs1 = cluster.n1.get_dofs()
+                        clusterDofs2 = cluster.n2.get_dofs()
+
+                        # surface of the union of clusters n1 and n2
+                        if self.mesh.dim == 1:
+                            surface_cells = boundaryVertices(cells, cluster.cellsUnion)
+                        elif self.mesh.dim == 2:
+                            surface_cells = boundaryEdges(cells, cluster.cellsUnion)
+                        else:
+                            raise NotImplementedError()
+
+                        Anear_filtered.setFilter(clusterDofs1, clusterDofs2)
+
+                        self.local_matrix_zeroExterior.setVerticesCells2(self.mesh.vertices, surface_cells)
+
+                        it.setIndexSet(cellsInter)
+                        while it.step():
+                            cellNo1 = it.i
+                            self.local_matrix_zeroExterior.setCell1(cellNo1)
+                            iM.getDoFsElem(cellNo1)
+                            for cellNo2 in range(surface_cells.shape[0]):
+                                self.local_matrix_zeroExterior.setCell2(cellNo2)
+                                panel = self.local_matrix_zeroExterior.getPanelType()
+                                self.local_matrix_zeroExterior.eval(contribZeroExterior, panel)
+                                if self.local_matrix_zeroExterior.symmetricLocalMatrix:
+                                    iM.addToMatrixElemSym(contribZeroExterior, 1.)
+                                else:
+                                    raise NotImplementedError()
+                if not self.zeroExterior and not self.kernel.finiteHorizon:
+                    with self.PLogger.Timer(prefix+'zeroExterior'):
+                        # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop.
+                        # This is for the regional fractional Laplacian.
+                        surface = self.mesh.get_surface_mesh()
+                        iM = IndexManager(self.dm, Anear, myDofs=myDofs)
+
+                        self.local_matrix_zeroExterior.setMesh2(surface)
+
+                        for cellNo1 in range(self.mesh.num_cells):
+                            self.local_matrix_zeroExterior.setCell1(cellNo1)
+                            iM.getDoFsElem(cellNo1)
+                            for cellNo2 in range(surface.num_cells):
+                                self.local_matrix_zeroExterior.setCell2(cellNo2)
+                                panel = self.local_matrix_zeroExterior.getPanelType()
+                                self.local_matrix_zeroExterior.eval(contribZeroExterior, panel)
+                                if self.local_matrix_zeroExterior.symmetricLocalMatrix:
+                                    iM.addToMatrixElemSym(contribZeroExterior, -1.)
+                                else:
+                                    raise NotImplementedError()
+                elif not self.zeroExterior and self.kernel.finiteHorizon:
+                    with self.PLogger.Timer(prefix+'zeroExterior'):
+                        # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop.
+                        # This is for the regional fractional Laplacian.
+
+                        if self.mesh.dim == 1:
+                            vol = 2
+                        elif self.mesh.dim == 2:
+                            vol = 2*np.pi * self.kernel.horizonValue
+                        else:
+                            raise NotImplementedError()
+                        coeff = constant(-vol*self.kernel.scalingValue*pow(self.kernel.horizonValue, 1-self.mesh.dim-2*self.kernel.sValue)/self.kernel.sValue)
+                        qr = simplexXiaoGimbutas(2, self.mesh.dim)
+                        if self.mesh.dim == 1:
+                            mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                        elif self.mesh.dim == 2:
+                            mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                        else:
+                            raise NotImplementedError()
+
+                        if myDofs is not None:
+                            Anear_filtered2 = LeftFilteredAssemblyOperator(Anear)
+                            Anear_filtered2.setFilter(myDofs)
+                            assembleMatrix(self.mesh, self.dm, mass, A=Anear_filtered2)
+                        else:
+                            assembleMatrix(self.mesh, self.dm, mass, A=Anear)
+
+            elif self.zeroExterior and not self.kernel.complement:
+                with self.PLogger.Timer(prefix+'zeroExterior'):
+                    # Add the zeroExterior contribution for Omega x Omega^C.
+                    surface = self.mesh.get_surface_mesh()
+                    iM = IndexManager(self.dm, Anear, myDofs=myDofs)
+                    self.local_matrix_zeroExterior.setMesh2(surface)
+
+                    for cellNo1 in range(self.mesh.num_cells):
+                        self.local_matrix_zeroExterior.setCell1(cellNo1)
+                        iM.getDoFsElem(cellNo1)
+                        for cellNo2 in range(surface.num_cells):
+                            self.local_matrix_zeroExterior.setCell2(cellNo2)
+                            panel = self.local_matrix_zeroExterior.getPanelType()
+                            self.local_matrix_zeroExterior.eval(contribZeroExterior, panel)
+                            iM.addToMatrixElemSym(contribZeroExterior, 1.)
+
+        else:
+            # This corresponds to
+            #  \int_D \int_E u(x) v(x) C(d, s) / |x-y|^{d+2s}
+            # where
+            #  D = (supp u) \cap (supp v) \subset E,
+            #  E = Omega \ ((supp u) \cup (supp v)).
+            # We only update unknows that are in the cluster pair.
+            with self.PLogger.Timer(prefix+'cluster exterior'):
+                iM = IndexManager(self.dm, Anear_filtered)
+
+                fake_cells = np.empty((1, self.mesh.dim), dtype=INDEX)
+                for cluster in Pnear:
+
+                    cellsInter = cluster.cellsInter
+                    if len(cellsInter) == 0:
+                        continue
+
+                    clusterDofs1 = cluster.n1.get_dofs()
+                    clusterDofs2 = cluster.n2.get_dofs()
+
+                    Anear_filtered.setFilter(clusterDofs1, clusterDofs2)
+
+                    if not self.kernel.complement:
+
+                        # surface of the union of clusters n1 and n2
+                        if self.mesh.dim == 1:
+                            surface_cells = boundaryVertices(cells, cluster.cellsUnion)
+                        elif self.mesh.dim == 2:
+                            surface_cells = boundaryEdges(cells, cluster.cellsUnion)
+                        else:
+                            raise NotImplementedError()
+                        self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, surface_cells)
+
+                        it.setIndexSet(cellsInter)
+                        while it.step():
+                            cellNo1 = it.i
+                            self.local_matrix_surface.setCell1(cellNo1)
+                            iM.getDoFsElem(cellNo1)
+                            for cellNo2 in range(surface_cells.shape[0]):
+                                self.local_matrix_surface.setCell2(cellNo2)
+                                if self.mesh.dim == 1:
+                                    if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]:
+                                        self.local_matrix_surface.center2[0] += evalShift
+                                    else:
+                                        self.local_matrix_surface.center2[0] -= evalShift
+                                elif self.mesh.dim == 2:
+                                    self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1])
+                                    self.local_matrix_surface.center2[1] -= evalShift*(self.local_matrix_surface.simplex2[1, 0]-self.local_matrix_surface.simplex2[0, 0])
+                                panel = self.local_matrix_surface.getPanelType()
+                                if panel != IGNORED:
+                                    self.local_matrix_surface.eval(contribZeroExterior, panel)
+                                    if self.local_matrix_surface.symmetricLocalMatrix:
+                                        iM.addToMatrixElemSym(contribZeroExterior, 1.)
+                                    else:
+                                        raise NotImplementedError()
+
+                    for hv in jumps:
+                        decode_edge(hv, cellPair)
+                        if not (cluster.cellsUnion.inSet(cellPair[0]) or
+                                cluster.cellsUnion.inSet(cellPair[1])):
+                            if self.mesh.dim == 1:
+                                fake_cells[0, 0] = jumps[hv]
+                            else:
+                                hv2 = jumps[hv]
+                                decode_edge(hv2, edge)
+                                for vertexNo in range(self.mesh.dim):
+                                    fake_cells[0, vertexNo] = edge[vertexNo]
+                            self.local_matrix_surface.setVerticesCells2(self.mesh.vertices, fake_cells)
+                            self.local_matrix_surface.setCell2(0)
+                            if self.mesh.dim == 1:
+                                self.local_matrix_surface.center2[0] += evalShift
+                            elif self.mesh.dim == 2:
+                                self.local_matrix_surface.center2[0] += evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1])
+                                self.local_matrix_surface.center2[1] += evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0])
+
+                            it.setIndexSet(cellsInter)
+                            while it.step():
+                                cellNo3 = it.i
+                                self.local_matrix_surface.setCell1(cellNo3)
+                                panel = self.local_matrix_surface.getPanelType()
+                                if panel != IGNORED:
+                                    if self.mesh.dim == 1:
+                                        if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]:
+                                            fac = 1.
+                                        else:
+                                            fac = -1.
+                                    else:
+                                        fac = 1.
+                                    self.local_matrix_surface.eval(contribZeroExterior, panel)
+                                    iM.getDoFsElem(cellNo3)
+                                    if self.local_matrix_surface.symmetricLocalMatrix:
+                                        iM.addToMatrixElemSym(contribZeroExterior, fac)
+                                    else:
+                                        raise NotImplementedError()
+
+                            if self.mesh.dim == 1:
+                                self.local_matrix_surface.center2[0] -= 2.*evalShift
+                            elif self.mesh.dim == 2:
+                                self.local_matrix_surface.center2[0] -= 2.*evalShift*(self.local_matrix_surface.simplex2[1, 1]-self.local_matrix_surface.simplex2[0, 1])
+                                self.local_matrix_surface.center2[1] -= 2.*evalShift*(self.local_matrix_surface.simplex2[0, 0]-self.local_matrix_surface.simplex2[1, 0])
+
+                            it.reset()
+                            while it.step():
+                                cellNo3 = it.i
+                                self.local_matrix_surface.setCell1(cellNo3)
+                                panel = self.local_matrix_surface.getPanelType()
+                                if panel != IGNORED:
+                                    if self.mesh.dim == 1:
+                                        if self.local_matrix_surface.center1[0] < self.local_matrix_surface.center2[0]:
+                                            fac = -1.
+                                        else:
+                                            fac = 1.
+                                    else:
+                                        fac = -1.
+                                    self.local_matrix_surface.eval(contribZeroExterior, panel)
+                                    iM.getDoFsElem(cellNo3)
+                                    if self.local_matrix_surface.symmetricLocalMatrix:
+                                        iM.addToMatrixElemSym(contribZeroExterior, fac)
+                                    else:
+                                        raise NotImplementedError()
+                if not self.zeroExterior and not self.kernel.finiteHorizon:
+                    with self.PLogger.Timer(prefix+'zeroExterior'):
+                        # Subtract the zeroExterior contribution for Omega x Omega^C that was added in the previous loop.
+                        # This is for the regional fractional Laplacian.
+                        surface = self.mesh.get_surface_mesh()
+                        iM = IndexManager(self.dm, Anear, myDofs=myDofs)
+
+                        self.local_matrix_zeroExterior.setMesh2(surface)
+
+                        for cellNo1 in range(self.mesh.num_cells):
+                            self.local_matrix_zeroExterior.setCell1(cellNo1)
+                            iM.getDoFsElem(cellNo1)
+                            for cellNo2 in range(surface.num_cells):
+                                self.local_matrix_zeroExterior.setCell2(cellNo2)
+                                if self.mesh.dim == 1:
+                                    if self.local_matrix_zeroExterior.center1[0] < self.local_matrix_zeroExterior.center2[0]:
+                                        self.local_matrix_zeroExterior.center2[0] += evalShift
+                                    else:
+                                        self.local_matrix_zeroExterior.center2[0] -= evalShift
+                                elif self.mesh.dim == 2:
+                                    self.local_matrix_zeroExterior.center2[0] += evalShift*(self.local_matrix_zeroExterior.simplex2[1, 1]-self.local_matrix_zeroExterior.simplex2[0, 1])
+                                    self.local_matrix_zeroExterior.center2[1] -= evalShift*(self.local_matrix_zeroExterior.simplex2[1, 0]-self.local_matrix_zeroExterior.simplex2[0, 0])
+                                panel = self.local_matrix_zeroExterior.getPanelType()
+                                self.local_matrix_zeroExterior.eval(contribZeroExterior, panel)
+                                if self.local_matrix_zeroExterior.symmetricLocalMatrix:
+                                    iM.addToMatrixElemSym(contribZeroExterior, -1.)
+                                else:
+                                    raise NotImplementedError()
+                elif not self.zeroExterior and self.kernel.finiteHorizon:
+                    with self.PLogger.Timer(prefix+'zeroExterior'):
+                        # Subtract the contribution for Omega x (\partial B_\delta(x))
+                        assert isinstance(self.kernel.horizon, constant)
+                        self.local_matrix_zeroExterior.center2 = uninitialized((self.mesh.dim), dtype=REAL)
+                        coeff = horizonSurfaceIntegral(self.local_matrix_zeroExterior, self.kernel.horizon.value)
+                        qr = simplexXiaoGimbutas(2, self.mesh.dim)
+                        if self.mesh.dim == 1:
+                            mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                        elif self.mesh.dim == 2:
+                            mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                        else:
+                            raise NotImplementedError()
+                        assembleMatrix(self.mesh, self.dm, mass, A=Anear)
+
+        return Anear
+
+    def reduceNearOp(self, LinearOperator Anear, indexSet myDofs):
+        cdef:
+            INDEX_t k = -1, kk, jj
+            INDEX_t[::1] A_indptr = Anear.indptr, A_indices = Anear.indices
+            REAL_t[::1] A_data = Anear.data, A_diagonal = None
+            INDEX_t[::1] indptr, indices
+            REAL_t[::1] data, diagonal = None
+            LinearOperator Aother
+            INDEX_t I, nnz
+            indexSetIterator it = myDofs.getIter()
+        # drop entries that are not in rows of myRoot.dofs
+        indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX)
+        while it.step():
+            k = it.i
+            indptr[k+1] = A_indptr[k+1]-A_indptr[k]
+        for k in range(self.dm.num_dofs):
+            indptr[k+1] += indptr[k]
+        indices = np.empty((indptr[self.dm.num_dofs]), dtype=INDEX)
+        data = np.empty((indptr[self.dm.num_dofs]), dtype=REAL)
+        it.reset()
+        while it.step():
+            k = it.i
+            kk = indptr[k]
+            for jj in range(A_indptr[k], A_indptr[k+1]):
+                indices[kk] = A_indices[jj]
+                data[kk] = A_data[jj]
+                kk += 1
+        if isinstance(Anear, SSS_LinearOperator):
+            A_diagonal = Anear.diagonal
+            diagonal = np.zeros((self.dm.num_dofs), dtype=REAL)
+            it.reset()
+            while it.step():
+                k = it.i
+                diagonal[k] = A_diagonal[k]
+            Anear = SSS_LinearOperator(indices, indptr, data, diagonal)
+        else:
+            Anear = CSR_LinearOperator(indices, indptr, data)
+
+        A_indptr = Anear.indptr
+
+        # sum distribute matrices by stacking rows
+        indptr = np.zeros((self.dm.num_dofs+1), dtype=INDEX)
+        for k in range(self.dm.num_dofs):
+            indptr[k+1] = A_indptr[k+1]-A_indptr[k]
+        if self.comm.rank == 0:
+            self.comm.Reduce(MPI.IN_PLACE, indptr, root=0)
+        else:
+            self.comm.Reduce(indptr, indptr, root=0)
+
+        if self.comm.rank == 0:
+            for k in range(self.dm.num_dofs):
+                indptr[k+1] += indptr[k]
+            nnz = indptr[self.dm.num_dofs]
+
+            indices = np.empty((nnz), dtype=INDEX)
+            data = np.empty((nnz), dtype=REAL)
+            if isinstance(Anear, SSS_LinearOperator):
+                diagonal = np.zeros((self.dm.num_dofs), dtype=REAL)
+
+            for p in range(self.comm.size):
+                if p == 0:
+                    Aother = Anear
+                else:
+                    Aother = self.comm.recv(source=p)
+
+                A_indptr = Aother.indptr
+                A_indices = Aother.indices
+                A_data = Aother.data
+
+                for I in range(self.dm.num_dofs):
+                    kk = indptr[I]
+                    for jj in range(A_indptr[I], A_indptr[I+1]):
+                        indices[kk] = A_indices[jj]
+                        data[kk] = A_data[jj]
+                        kk += 1
+
+                if isinstance(Aother, SSS_LinearOperator):
+                    A_diagonal = Aother.diagonal
+                    for I in range(self.dm.num_dofs):
+                        diagonal[I] += A_diagonal[I]
+
+            if isinstance(Anear, SSS_LinearOperator):
+                Anear = SSS_LinearOperator(indices, indptr, data, diagonal)
+            else:
+                Anear = CSR_LinearOperator(indices, indptr, data)
+        else:
+            self.comm.send(Anear, dest=0)
+        self.comm.Barrier()
+
+        if self.comm.rank != 0:
+            Anear = None
+        # Anear = self.comm.bcast(Anear, root=0)
+        return Anear
+
+    def getDiagonal(self):
+        cdef:
+            diagonalOperator D
+            INDEX_t I
+            INDEX_t start, end
+        D = diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL))
+        if self.comm:
+            start = <INDEX_t>np.ceil(self.dm.num_dofs*self.comm.rank/self.comm.size)
+            end = <INDEX_t>np.ceil(self.dm.num_dofs*(self.comm.rank+1)/self.comm.size)
+        else:
+            start = 0
+            end = self.dm.num_dofs
+        if self.kernel.variable:
+            for I in range(start, end):
+                D.setEntry(I, I, self.getEntryCluster(I, I))
+        else:
+            for I in range(start, end):
+                D.setEntry(I, I, self.getEntry(I, I))
+        if self.comm:
+            self.comm.Allreduce(MPI.IN_PLACE, D.data)
+        return D
+
+    def getDiagonalCluster(self):
+        cdef:
+            diagonalOperator D
+            tree_node n
+            nearFieldClusterPair c
+            INDEX_t I
+            list clusters = []
+            REAL_t[:, :, ::1] fake_boxes = uninitialized((0, 0, 0), dtype=REAL)
+            list d2c = self.d2c
+        D = diagonalOperator(np.zeros((self.dm.num_dofs), dtype=REAL))
+        for I in range(self.dm.num_dofs):
+            n = tree_node(None, set([I]), fake_boxes)
+            n._cells = d2c[I]
+            c = nearFieldClusterPair(n, n)
+            clusters.append(c)
+        D = self.assembleClusters(clusters, Anear=D)
+        if self.comm:
+            self.comm.Allreduce(MPI.IN_PLACE, D.data)
+        return D
+
+    def getKernelBlocksAndJumps(self):
+        cdef:
+            meshBase mesh = self.mesh
+            DoFMap DoFMap = self.dm
+            fractionalOrderBase s = self.kernel.s
+            REAL_t[::1] orders = None
+            REAL_t[::1] dofOrders
+            REAL_t cellOrder
+            dict blocks
+            INDEX_t[::1] cellPair = uninitialized((2), dtype=INDEX)
+            INDEX_t[::1] edge = uninitialized((2), dtype=INDEX)
+            INDEX_t cellNo, dofNo, dof, cellNo1, cellNo2, vertexNo1, vertexNo2, vertex1, vertex2, i
+            ENCODE_t hv
+        orders = getFractionalOrdersDiagonal(s, mesh)
+        dofOrders = -np.inf*np.ones((DoFMap.num_dofs), dtype=REAL)
+        for cellNo in range(mesh.num_cells):
+            cellOrder = orders[cellNo]
+            for dofNo in range(DoFMap.dofs_per_element):
+                dof = DoFMap.cell2dof(cellNo, dofNo)
+                if dof >= 0:
+                    if dofOrders[dof] == -np.inf:
+                        dofOrders[dof] = cellOrder
+                    elif dofOrders[dof] < np.inf:
+                        if dofOrders[dof] != cellOrder:
+                            dofOrders[dof] = np.inf
+        blocks = {}
+        for dof in range(DoFMap.num_dofs):
+            try:
+                blocks[dofOrders[dof]].add(dof)
+            except KeyError:
+                blocks[dofOrders[dof]] = set([dof])
+        LOGGER.debug('Block sizes: '+str({key: len(blocks[key]) for key in blocks}))
+
+        jumps = {}
+        cellConnectivity = mesh.getCellConnectivity(mesh.dim)
+        for cellNo1 in range(mesh.num_cells):
+            for cellNo2 in cellConnectivity[cellNo1]:
+                if orders[cellNo1] != orders[cellNo2]:
+                    sortEdge(cellNo1, cellNo2, cellPair)
+                    hv = encode_edge(cellPair)
+                    if mesh.dim == 1:
+                        for vertexNo1 in range(mesh.dim+1):
+                            vertex1 = mesh.cells[cellNo1, vertexNo1]
+                            for vertexNo2 in range(mesh.dim+1):
+                                vertex2 = mesh.cells[cellNo2, vertexNo2]
+                                if vertex1 == vertex2:
+                                    jumps[hv] = vertex1
+                                    break
+                    else:
+                        i = 0
+                        for vertexNo1 in range(mesh.dim+1):
+                            vertex1 = mesh.cells[cellNo1, vertexNo1]
+                            for vertexNo2 in range(mesh.dim+1):
+                                vertex2 = mesh.cells[cellNo2, vertexNo2]
+                                if vertex1 == vertex2:
+                                    edge[i] = vertex1
+                                    i += 1
+                                    break
+                        hv2 = encode_edge(edge)
+                        jumps[hv] = hv2
+        return blocks, jumps
+
+    def getH2(self, BOOL_t returnNearField=False, returnTree=False):
+        cdef:
+            meshBase mesh = self.mesh
+            DoFMap DoFMap = self.dm
+            tree_node root = None, n, myRoot=None
+            fractionalOrderBase s = self.kernel.s
+            REAL_t[:, :, ::1] boxes = None
+            list cells = []
+            REAL_t[:, ::1] centers = None
+            dict Pfar = {}
+            list Pnear = []
+            INDEX_t i, j, dof, num_cluster_dofs
+            REAL_t[:, ::1] box
+            LinearOperator h2 = None, Anear = None
+            dict blocks = {}, jumps = {}
+            indexSet dofs, clusterDofs, subDofs, blockDofs, myDofs = None
+            indexSetIterator it
+            REAL_t key
+            INDEX_t interpolation_order, maxLevels, minClusterSize, minMixedClusterSize, minFarFieldBlockSize
+            BOOL_t forceUnsymmetric, doDistributedAssembly = False
+            LinearOperator auxGraph
+            INDEX_t[::1] part = None
+            set myCells
+        assert isinstance(self.kernel, FractionalKernel), 'H2 is only implemented for fractional kernels'
+
+        target_order = self.local_matrix.target_order
+        eta = self.params.get('eta', 3.)
+
+        iO = self.params.get('interpolation_order', None)
+        if iO is None:
+            loggamma = abs(np.log(0.25))
+            interpolation_order = max(np.ceil((2*target_order+max(mesh.dim+2*s.max, 2))*abs(np.log(mesh.hmin/mesh.diam))/loggamma/3.), 2)
+        else:
+            interpolation_order = iO
+        mL = self.params.get('maxLevels', 10)
+        if mL is None:
+            maxLevels = max(int(np.around(np.log2(DoFMap.num_dofs)/mesh.dim-np.log2(interpolation_order))), 0)
+        else:
+            maxLevels = mL
+        minClusterSize = interpolation_order**mesh.dim
+        minMixedClusterSize = minClusterSize
+        if self.kernel.finiteHorizon:
+            minMixedClusterSize = max(min(self.kernel.horizon.value//(2*mesh.h)-1, minClusterSize), 1)
+        minFarFieldBlockSize = interpolation_order**(2*mesh.dim)
+        forceUnsymmetric = self.params.get('forceUnsymmetric', False)
+        doDistributedAssembly = self.comm is not None and self.comm.size > 1 and DoFMap.num_dofs > self.comm.size
+        LOGGER.info('interpolation_order: {}, maxLevels: {}, minClusterSize: {}, minMixedClusterSize: {}, minFarFieldBlockSize: {}, distributedAssembly: {}'.format(interpolation_order, maxLevels, minClusterSize, minMixedClusterSize, minFarFieldBlockSize, doDistributedAssembly))
+
+        with self.PLogger.Timer('prepare tree'):
+            boxes, cells = getDoFBoxesAndCells(mesh, DoFMap, self.comm)
+            centers = uninitialized((DoFMap.num_dofs, mesh.dim), dtype=REAL)
+            for i in range(DoFMap.num_dofs):
+                for j in range(mesh.dim):
+                    centers[i, j] = 0.5*(boxes[i, j, 0]+boxes[i, j, 1])
+
+            dofs = arrayIndexSet(np.arange(DoFMap.num_dofs, dtype=INDEX), sorted=True)
+            root = tree_node(None, dofs, boxes)
+
+            if doDistributedAssembly:
+                from PyNucleus_fem.meshPartitioning import metisDofPartitioner, PartitionerException
+                # from PyNucleus_fem.meshPartitioning import regularVertexPartitioner, PartitionerException
+
+                # coords = DoFMap.getDoFCoordinates()
+                # rVP = regularVertexPartitioner(coords)
+                dP = metisDofPartitioner(dm=DoFMap)
+                try:
+                    # part, _ = rVP.partitionVertices(self.comm.size)
+                    part, _ = dP.partitionDofs(self.comm.size)
+                except PartitionerException:
+                    doDistributedAssembly = False
+                    LOGGER.warning('Falling back to serial assembly')
+                del dP
+
+                # auxGraph = DoFMap.buildSparsityPattern(mesh.cells)
+                # dP = metisDofPartitioner(auxGraph)
+                # part, numPart = dP.partitionDofs(self.comm.size)
+                # doDistributedAssembly = numPart == self.comm.size
+            if doDistributedAssembly:
+                num_dofs = 0
+                for p in range(self.comm.size):
+                    subDofs = arrayIndexSet(np.where(np.array(part, copy=False) == p)[0].astype(INDEX), sorted=True)
+                    num_dofs += subDofs.getNumEntries()
+                    root.children.append(tree_node(root, subDofs, boxes, canBeAssembled=not self.kernel.variable))
+                assert DoFMap.num_dofs == num_dofs
+                root._dofs = None
+
+                myRoot = root.children[self.comm.rank]
+                myDofs = myRoot.get_dofs()
+            else:
+                myRoot = root
+
+            if self.kernel.variable:
+                blocks, jumps = self.getKernelBlocksAndJumps()
+                for n in root.leaves():
+                    clusterDofs = n.get_dofs()
+                    num_cluster_dofs = clusterDofs.getNumEntries()
+                    num_dofs = 0
+                    for key in sorted(blocks):
+                        blockDofs = arrayIndexSet()
+                        blockDofs.fromSet(blocks[key])
+                        subDofs = blockDofs.inter(clusterDofs)
+                        if subDofs.getNumEntries() > 0:
+                            num_dofs += subDofs.getNumEntries()
+                            n.children.append(tree_node(n, subDofs, boxes, mixed_node=key == np.inf))
+                    assert num_dofs == num_cluster_dofs, (num_dofs, num_cluster_dofs)
+                    n._dofs = None
+                LOGGER.info('Jumps: {}, Block sizes: {}, Leaf nodes: {}'.format(len(jumps), str({key: len(blocks[key]) for key in blocks}), len(list(root.leaves()))))
+
+            for n in root.leaves():
+                # if not n.mixed_node:
+                n.refine(boxes, centers, maxLevels=maxLevels, minSize=minClusterSize, minMixedSize=minMixedClusterSize)
+
+            # recursively set tree node ids
+            root.set_id()
+
+            # enter cells in leaf nodes
+            it = arrayIndexSetIterator()
+            for n in root.leaves():
+                myCells = set()
+                it.setIndexSet(n.dofs)
+                while it.step():
+                    dof = it.i
+                    myCells |= cells[dof]
+                n._cells = arrayIndexSet()
+                n._cells.fromSet(myCells)
+            del cells
+
+            # update boxes (if we stopped at maxLevels, before each DoF has
+            # only it's support as box)
+            for n in root.leaves():
+                box = n.box
+                it.setIndexSet(n.dofs)
+                while it.step():
+                    dof = it.i
+                    for i in range(mesh.dim):
+                        for j in range(2):
+                            boxes[dof, i, j] = box[i, j]
+
+            if maxLevels <= 0:
+                maxLevels = root.numLevels+maxLevels
+
+        with self.PLogger.Timer('admissible clusters'):
+            if doDistributedAssembly:
+                for n in root.children:
+                    getAdmissibleClusters(self.local_matrix.kernel, myRoot, n,
+                                          eta=eta, farFieldInteractionSize=minFarFieldBlockSize,
+                                          Pfar=Pfar, Pnear=Pnear)
+                symmetrizeNearFieldClusters(Pnear)
+
+                # collect far field on rank 0
+                farField = []
+                for lvl in Pfar:
+                    for cP in Pfar[lvl]:
+                        # "lvl+1", since the ranks are children of the global root
+                        farField.append((lvl+1, cP.n1.id, cP.n2.id))
+                farField = self.comm.reduce(farField, root=0)
+                if self.comm.rank == 0:
+                    farField = set(farField)
+                    Pfar = {}
+                    for lvl, id1, id2 in farField:
+                        cP = farFieldClusterPair(root.get_node(id1),
+                                                 root.get_node(id2))
+                        try:
+                            Pfar[lvl].append(cP)
+                        except KeyError:
+                            Pfar[lvl] = [cP]
+            else:
+                getAdmissibleClusters(self.local_matrix.kernel, root, root,
+                                      eta=eta, farFieldInteractionSize=minFarFieldBlockSize,
+                                      Pfar=Pfar, Pnear=Pnear)
+            # trimTree(root, Pnear, Pfar)
+
+        if len(Pfar) > 0:
+            # get near field matrix
+            with self.PLogger.Timer('near field'):
+                Anear = self.assembleClusters(Pnear, jumps=jumps, myDofs=myDofs, forceUnsymmetric=forceUnsymmetric)
+            if doDistributedAssembly:
+                with self.PLogger.Timer('reduceNearOp'):
+                    Anear = self.reduceNearOp(Anear, myDofs)
+
+            with self.PLogger.Timer('leave values'):
+                # get leave values
+                if s.max < 1.:
+                    root.enterLeaveValues(mesh, DoFMap, interpolation_order, boxes, self.comm)
+                elif s.min > 1:
+                    root.enterLeaveValuesGrad(mesh, DoFMap, interpolation_order, boxes, self.comm)
+                else:
+                    raise NotImplementedError()
+
+            if self.comm is None or self.comm.rank == 0:
+                with self.PLogger.Timer('far field'):
+                    # get kernel interpolations
+                    assembleFarFieldInteractions(self.local_matrix.kernel, Pfar, interpolation_order, DoFMap)
+
+                with self.PLogger.Timer('transfer matrices'):
+                    # get transfer matrices
+                    root.prepareTransferOperators(interpolation_order)
+
+                h2 = H2Matrix(root, Pfar, Anear)
+            else:
+                h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs)
+
+            # A = h2.toarray()
+            # from scipy.io import mmwrite
+            # if self.comm is None or self.comm.rank == 0:
+            #     mmwrite('test.{}.{}'.format(A.shape[0], self.comm.size if self.comm is not None else 1), A)
+
+        elif len(Pnear) == 0:
+            h2 = nullOperator(self.dm.num_dofs, self.dm.num_dofs)
+        else:
+            with self.PLogger.Timer('dense operator'):
+                h2 = self.getDense()
+        LOGGER.info('{}'.format(h2))
+        if returnNearField:
+            if returnTree:
+                return h2, Pnear, root
+            else:
+                return h2, Pnear
+        else:
+            if returnTree:
+                return h2, root
+            else:
+                return h2
+
+    def getH2FiniteHorizon(self, LinearOperator Ainf=None):
+        A = horizonCorrected(self.mesh, self.dm, self.kernel, self.comm, Ainf, logging=isinstance(self.PLogger, (PLogger, LoggingPLogger)))
+        return A
+
+
+cdef class horizonSurfaceIntegral(function):
+    cdef:
+        nonlocalLaplacian local_matrix
+        REAL_t horizon
+        REAL_t[:, ::1] quadNodes
+        REAL_t[::1] quadWeights
+        REAL_t inc
+
+    def __init__(self, nonlocalLaplacian local_matrix, REAL_t horizon):
+        cdef:
+            INDEX_t k, numQuadNodes
+        self.local_matrix = local_matrix
+        self.horizon = horizon
+        if self.local_matrix.dim == 1:
+            self.quadNodes = uninitialized((2, 1), dtype=REAL)
+            self.quadWeights = uninitialized((2), dtype=REAL)
+            self.quadNodes[0, 0] = self.horizon
+            self.quadNodes[1, 0] = -self.horizon
+            self.quadWeights[0] = 1.
+            self.quadWeights[1] = 1.
+        elif self.local_matrix.dim == 2:
+            numQuadNodes = 10
+            self.quadNodes = uninitialized((numQuadNodes, 2), dtype=REAL)
+            self.quadWeights = uninitialized((numQuadNodes), dtype=REAL)
+            inc = 2*pi/numQuadNodes
+            for k in range(numQuadNodes):
+                self.quadNodes[k, 0] = self.horizon*cos(inc*k)
+                self.quadNodes[k, 1] = self.horizon*sin(inc*k)
+                self.quadWeights[k] = inc*self.horizon
+        else:
+            raise NotImplementedError()
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef inline REAL_t eval(self, REAL_t[::1] x):
+        cdef:
+            REAL_t fac = 0.
+            INDEX_t k, j
+            REAL_t s
+            INDEX_t dim = self.local_matrix.dim
+        for j in range(self.local_matrix.dim):
+            self.local_matrix.center1[j] = x[j]
+
+        for k in range(self.quadNodes.shape[0]):
+            for j in range(self.local_matrix.dim):
+                self.local_matrix.center2[j] = x[j]+self.quadNodes[k, j]
+            self.local_matrix.kernel.evalParams(self.local_matrix.center1,
+                                                self.local_matrix.center2)
+            s = self.local_matrix.kernel.sValue
+            fac -= self.local_matrix.kernel.scalingValue*pow(self.horizon, 1-dim-2*s)/s * self.quadWeights[k]
+        return fac
+
+
+
+cdef class horizonCorrected(TimeStepperLinearOperator):
+    cdef:
+        meshBase mesh
+        DoFMap dm
+        MPI.Comm comm
+        public LinearOperator Ainf
+        public LinearOperator mass
+        public Kernel kernel
+        BOOL_t logging
+        BOOL_t initialized
+
+    def __init__(self, meshBase mesh, DoFMap dm, FractionalKernel kernel, MPI.Comm comm=None, LinearOperator Ainf=None, BOOL_t logging=False):
+        self.mesh = mesh
+        self.dm = dm
+        self.kernel = kernel
+        self.comm = comm
+        self.logging = logging
+        assert isinstance(kernel.horizon, constant)
+
+        if Ainf is None:
+            scaling = constantTwoPoint(0.5)
+            infiniteKernel = kernel.getModifiedKernel(horizon=constant(np.inf), scaling=scaling)
+            infBuilder = nonlocalBuilder(self.mesh, self.dm, infiniteKernel, zeroExterior=True, comm=self.comm, logging=self.logging)
+            self.Ainf = infBuilder.getH2()
+        else:
+            self.Ainf = Ainf
+        self.mass = self.dm.assembleMass(sss_format=True)
+        TimeStepperLinearOperator.__init__(self, self.Ainf, self.mass, 1.0)
+        self.initialized = False
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    def setKernel(self, Kernel kernel):
+        cdef:
+            REAL_t s, horizon, C, vol
+            dict jumps
+            ENCODE_t hv, hv2
+            INDEX_t[::1] cellPair, edge
+            REAL_t evalShift, fac
+            INDEX_t vertexNo, cellNo3
+            panelType panel
+            INDEX_t[:, ::1] fake_cells
+
+        assert isinstance(kernel.horizon, constant)
+        assert not isinstance(kernel.scaling, variableFractionalLaplacianScaling)
+        horizon = kernel.horizon.value
+        assert horizon < np.inf
+        C = kernel.scaling.value
+
+        if isinstance(kernel.s, constFractionalOrder):
+            assert kernel.s.value == self.kernel.s.value
+
+            if (self.initialized and
+                (self.kernel.s.value == kernel.s.value) and
+                (self.kernel.horizonValue == horizon) and
+                (self.kernel.scaling.value == C)):
+                return
+
+        self.kernel = kernel
+
+        complementKernel = kernel.getComplementKernel()
+        builder = nonlocalBuilder(self.mesh, self.dm, complementKernel, zeroExterior=True, comm=self.comm, logging=self.logging)
+        correction = builder.getH2()
+
+        self.S = self.Ainf
+        self.facS = 2.*C
+
+        if isinstance(self.kernel.s, constFractionalOrder):
+            if self.mesh.dim == 1:
+                vol = 2
+            elif self.mesh.dim == 2:
+                vol = 2*np.pi * horizon
+            else:
+                raise NotImplementedError()
+            s = self.kernel.sValue
+            self.M = -correction + (-vol*C*pow(horizon, 1-self.mesh.dim-2*s)/s) * self.mass
+        else:
+            self.mass.setZero()
+
+            builder.local_matrix_zeroExterior.center2 = uninitialized((self.mesh.dim), dtype=REAL)
+            coeff = horizonSurfaceIntegral(builder.local_matrix_zeroExterior, horizon)
+            qr = simplexXiaoGimbutas(2, self.mesh.dim)
+            if self.mesh.dim == 1:
+                if isinstance(self.dm, P1_DoFMap):
+                    mass = mass_1d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                else:
+                    raise NotImplementedError()
+            elif self.mesh.dim == 2:
+                if isinstance(self.dm, P1_DoFMap):
+                    mass = mass_2d_sym_scalar_anisotropic(coeff, self.dm, qr)
+                else:
+                    raise NotImplementedError()
+            else:
+                raise NotImplementedError()
+
+            assembleMatrix(self.mesh, self.dm, mass, A=self.mass)
+
+            _, jumps = builder.getKernelBlocksAndJumps()
+            iM = IndexManager(self.dm, self.mass)
+            fake_cells = np.empty((1, self.mesh.dim), dtype=INDEX)
+            cellPair = np.empty((2), dtype=INDEX)
+            edge = np.empty((2), dtype=INDEX)
+            evalShift = 1e-9
+            contribZeroExterior = uninitialized((self.dm.dofs_per_element*(self.dm.dofs_per_element+1)//2), dtype=REAL)
+            builder.local_matrix_surface.setMesh1(self.mesh)
+            for hv in jumps:
+                decode_edge(hv, cellPair)
+
+                if self.mesh.dim == 1:
+                    fake_cells[0, 0] = jumps[hv]
+                else:
+                    hv2 = jumps[hv]
+                    decode_edge(hv2, edge)
+                    for vertexNo in range(self.mesh.dim):
+                        fake_cells[0, vertexNo] = edge[vertexNo]
+                builder.local_matrix_surface.setVerticesCells2(self.mesh.vertices, fake_cells)
+                builder.local_matrix_surface.setCell2(0)
+                if self.mesh.dim == 1:
+                    builder.local_matrix_surface.center2[0] += evalShift
+                elif self.mesh.dim == 2:
+                    builder.local_matrix_surface.center2[0] += evalShift*(builder.local_matrix_surface.simplex2[1, 1]-builder.local_matrix_surface.simplex2[0, 1])
+                    builder.local_matrix_surface.center2[1] += evalShift*(builder.local_matrix_surface.simplex2[0, 0]-builder.local_matrix_surface.simplex2[1, 0])
+
+                for cellNo3 in range(self.mesh.num_cells):
+                    builder.local_matrix_surface.setCell1(cellNo3)
+                    panel = builder.local_matrix_surface.getPanelType()
+                    if panel != IGNORED:
+                        if self.mesh.dim == 1:
+                            if builder.local_matrix_surface.center1[0] < builder.local_matrix_surface.center2[0]:
+                                fac = 1.
+                            else:
+                                fac = -1.
+                        else:
+                            fac = 1.
+                        builder.local_matrix_surface.eval(contribZeroExterior, panel)
+                        iM.getDoFsElem(cellNo3)
+                        if builder.local_matrix_surface.symmetricLocalMatrix:
+                            iM.addToMatrixElemSym(contribZeroExterior, -2*C*fac)
+                        else:
+                            raise NotImplementedError()
+
+                if self.mesh.dim == 1:
+                    builder.local_matrix_surface.center2[0] -= 2.*evalShift
+                elif self.mesh.dim == 2:
+                    builder.local_matrix_surface.center2[0] -= 2.*evalShift*(builder.local_matrix_surface.simplex2[1, 1]-builder.local_matrix_surface.simplex2[0, 1])
+                    builder.local_matrix_surface.center2[1] -= 2.*evalShift*(builder.local_matrix_surface.simplex2[0, 0]-builder.local_matrix_surface.simplex2[1, 0])
+
+                for cellNo3 in range(self.mesh.num_cells):
+                    builder.local_matrix_surface.setCell1(cellNo3)
+                    panel = builder.local_matrix_surface.getPanelType()
+                    if panel != IGNORED:
+                        if self.mesh.dim == 1:
+                            if builder.local_matrix_surface.center1[0] < builder.local_matrix_surface.center2[0]:
+                                fac = -1.
+                            else:
+                                fac = 1.
+                        else:
+                            fac = -1.
+                        builder.local_matrix_surface.eval(contribZeroExterior, panel)
+                        iM.getDoFsElem(cellNo3)
+                        if builder.local_matrix_surface.symmetricLocalMatrix:
+                            iM.addToMatrixElemSym(contribZeroExterior, -2*C*fac)
+                        else:
+                            raise NotImplementedError()
+
+            self.M = -correction + self.mass
+        self.facM = 1.
+        self.initialized = True
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec(self,
+                        REAL_t[::1] x,
+                        REAL_t[::1] y) except -1:
+        assert self.initialized
+        return TimeStepperLinearOperator.matvec(self, x, y)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef INDEX_t matvec_no_overwrite(self,
+                                     REAL_t[::1] x,
+                                     REAL_t[::1] y) except -1:
+        assert self.initialized
+        return TimeStepperLinearOperator.matvec_no_overwrite(self, x, y)
+
+
+def assembleFractionalLaplacian(meshBase mesh,
+                                DoFMap DoFMap,
+                                fractionalOrderBase s,
+                                function horizon=constant(np.inf),
+                                target_order=None,
+                                bint zeroExterior=True,
+                                bint genKernel=False,
+                                MPI.Comm comm=None,
+                                bint forceNonSym=False,
+                                **kwargs):
+    warnings.warn('"assembleFractionalLaplacian" deprecated, use "assembleNonlocalOperator"', DeprecationWarning)
+    params = {'target_order': target_order,
+              'genKernel': genKernel,
+              'forceNonSym': forceNonSym}
+    return assembleNonlocalOperator(mesh, DoFMap, s, horizon, params, zeroExterior, comm, **kwargs)
+
+
+def assembleNonlocalOperator(meshBase mesh,
+                             DoFMap DoFMap,
+                             fractionalOrderBase s,
+                             function horizon=constant(np.inf),
+                             dict params={},
+                             bint zeroExterior=True,
+                             MPI.Comm comm=None,
+                             **kwargs):
+    kernel = getFractionalKernel(mesh.dim, s, horizon)
+    builder = nonlocalBuilder(mesh, DoFMap, kernel, params, zeroExterior, comm, **kwargs)
+    return builder.getDense()
+
+
+
+def assembleFractionalLaplacianDiagonal(meshBase mesh,
+                                        DoFMap DoFMap,
+                                        fractionalOrderBase s,
+                                        function horizon=constant(np.inf),
+                                        dict params={},
+                                        bint zeroExterior=True,
+                                        comm=None,
+                                        **kwargs):
+    kernel = getFractionalKernel(mesh.dim, s, horizon)
+    builder = nonlocalBuilder(mesh, DoFMap, kernel, params, zeroExterior, comm, **kwargs)
+    return builder.getDiagonal()
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef LinearOperator getSparseNearField(DoFMap DoFMap, list Pnear, bint symmetric=False):
+    cdef:
+        sparsityPattern sP
+        INDEX_t I = -1, J = -1
+        nearFieldClusterPair clusterPair
+        indexSet dofs1, dofs2
+        indexSetIterator it1 = arrayIndexSetIterator(), it2 = arrayIndexSetIterator()
+    sP = sparsityPattern(DoFMap.num_dofs)
+    if symmetric:
+        for clusterPair in Pnear:
+            dofs1 = clusterPair.n1.get_dofs()
+            dofs2 = clusterPair.n2.get_dofs()
+            it1.setIndexSet(dofs1)
+            it2.setIndexSet(dofs2)
+            while it1.step():
+                I = it1.i
+                it2.reset()
+                while it2.step():
+                    J = it2.i
+                    if I > J:
+                        sP.add(I, J)
+    else:
+        for clusterPair in Pnear:
+            dofs1 = clusterPair.n1.get_dofs()
+            dofs2 = clusterPair.n2.get_dofs()
+            it1.setIndexSet(dofs1)
+            it2.setIndexSet(dofs2)
+            while it1.step():
+                I = it1.i
+                it2.reset()
+                while it2.step():
+                    J = it2.i
+                    sP.add(I, J)
+    indptr, indices = sP.freeze()
+    data = np.zeros((indices.shape[0]), dtype=REAL)
+    if symmetric:
+        diagonal = np.zeros((DoFMap.num_dofs), dtype=REAL)
+        A = SSS_LinearOperator(indices, indptr, data, diagonal)
+    else:
+        A = CSR_LinearOperator(indices, indptr, data)
+    return A
+
+
+cdef class nearFieldClusterPair:
+    def __init__(self, tree_node n1, tree_node n2):
+        cdef:
+            indexSet cells1, cells2
+        self.n1 = n1
+        self.n2 = n2
+        cells1 = self.n1.get_cells()
+        cells2 = self.n2.get_cells()
+        self.cellsUnion = cells1.union(cells2)
+        self.cellsInter = cells1.inter(cells2)
+        assert len(cells1)+len(cells2) == len(self.cellsUnion)+len(self.cellsInter), (cells1.toSet(),
+                                                                                      cells2.toSet(),
+                                                                                      self.cellsInter.toSet(),
+                                                                                      self.cellsUnion.toSet())
+
+    def plot(self, color='red'):
+        import matplotlib.pyplot as plt
+        import matplotlib.patches as patches
+        dim = self.n1.box.shape[0]
+        if dim == 1:
+            box1 = self.n1.box
+            box2 = self.n2.box
+            plt.gca().add_patch(patches.Rectangle((box1[0, 0], box2[0, 0]), box1[0, 1]-box1[0, 0], box2[0, 1]-box2[0, 0], fill=True, alpha=0.5, facecolor=color))
+        else:
+            for dof1 in self.n1.dofs:
+                for dof2 in self.n2.dofs:
+                    plt.gca().add_patch(patches.Rectangle((dof1-0.5, dof2-0.5), 1., 1., fill=True, alpha=0.5, facecolor=color))
+
+    def HDF5write(self, node):
+        node.attrs['n1'] = self.n1.id
+        node.attrs['n2'] = self.n2.id
+
+    @staticmethod
+    def HDF5read(node, nodes):
+        cP = nearFieldClusterPair(nodes[int(node.attrs['n1'])],
+                                  nodes[int(node.attrs['n2'])])
+        return cP
+
+    def __repr__(self):
+        return 'nearFieldClusterPair<{}, {}>'.format(self.n1, self.n2)
+
+
+cdef inline int balanceCluster(INDEX_t[::1] csums, INDEX_t cost):
+    cdef:
+        INDEX_t k, i
+        INDEX_t csize = csums.shape[0]
+    # k = argmin(csums)
+    k = 0
+    for i in range(1, csize):
+        if csums[i] < csums[k]:
+            k = i
+    # add cost estimate
+    csums[k] += cost
+    # prevent overflow
+    if csums[k] > 1e7:
+        j = csums[k]
+        for i in range(csize):
+            csums[i] -= j
+    return k
+
+
+cdef class SubMatrixAssemblyOperator(LinearOperator):
+    cdef:
+        LinearOperator A
+        dict lookupI, lookupJ
+
+    def __init__(self, LinearOperator A, INDEX_t[::1] I, INDEX_t[::1] J):
+        LinearOperator.__init__(self,
+                                A.num_rows,
+                                A.num_columns)
+        self.A = A
+        self.lookupI = {}
+        self.lookupJ = {}
+        for i in range(I.shape[0]):
+            self.lookupI[I[i]] = i
+        for i in range(J.shape[0]):
+            self.lookupJ[J[i]] = i
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void addToEntry(self, INDEX_t I, INDEX_t J, REAL_t val):
+        cdef:
+            INDEX_t i, j
+        i = self.lookupI.get(I, -1)
+        j = self.lookupJ.get(J, -1)
+        if i >= 0 and j >= 0:
+            self.A.addToEntry(i, j, val)
+
+
+cdef class FilteredAssemblyOperator(LinearOperator):
+    cdef:
+        LinearOperator A
+        indexSet dofs1, dofs2
+
+    def __init__(self, LinearOperator A):
+        self.A = A
+
+    cdef void setFilter(self, indexSet dofs1, indexSet dofs2):
+        self.dofs1 = dofs1
+        self.dofs2 = dofs2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToEntry(self, INDEX_t I, INDEX_t J, REAL_t val):
+        if self.dofs1.inSet(I) and self.dofs2.inSet(J):
+            self.A.addToEntry(I, J, val)
+
+
+cdef class LeftFilteredAssemblyOperator(LinearOperator):
+    cdef:
+        LinearOperator A
+        indexSet dofs1
+
+    def __init__(self, LinearOperator A):
+        self.A = A
+
+    cdef void setFilter(self, indexSet dofs1):
+        self.dofs1 = dofs1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef inline void addToEntry(self, INDEX_t I, INDEX_t J, REAL_t val):
+        if self.dofs1.inSet(I):
+            self.A.addToEntry(I, J, val)
+
+
+def assembleNearField(list Pnear,
+                      meshBase mesh,
+                      DoFMap DoFMap,
+                      fractionalOrderBase s,
+                      function horizon=constant(np.inf),
+                      dict params={},
+                      bint zeroExterior=True,
+                      comm=None,
+                      **kwargs):
+    kernel = getFractionalKernel(mesh.dim, s, horizon)
+    builder = nonlocalBuilder(mesh, DoFMap, kernel, params, zeroExterior, comm, logging=True, **kwargs)
+    A = builder.assembleClusters(Pnear)
+    return A
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef INDEX_t[:, ::1] boundaryVertices(INDEX_t[:, ::1] cells, indexSet cellIds):
+    cdef:
+        INDEX_t c0, c1, i, k
+        np.ndarray[INDEX_t, ndim=2] bvertices_mem
+        INDEX_t[:, ::1] bvertices_mv
+        set bvertices = set()
+        indexSetIterator it = cellIds.getIter()
+
+    while it.step():
+        i = it.i
+        c0, c1 = cells[i, 0], cells[i, 1]
+        try:
+            bvertices.remove(c0)
+        except KeyError:
+            bvertices.add(c0)
+        try:
+            bvertices.remove(c1)
+        except KeyError:
+            bvertices.add(c1)
+    bvertices_mem = uninitialized((len(bvertices), 1), dtype=INDEX)
+    bvertices_mv = bvertices_mem
+    i = 0
+    for k in bvertices:
+        bvertices_mv[i, 0] = k
+        i += 1
+    return bvertices_mem
+
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+cdef INDEX_t[:, ::1] boundaryEdges(INDEX_t[:, ::1] cells, indexSet cellIds):
+    cdef:
+        INDEX_t c0, c1, c2, i, k
+        ENCODE_t hv
+        INDEX_t[:, ::1] temp = uninitialized((3, 2), dtype=INDEX)
+        INDEX_t[::1] e0 = temp[0, :]
+        INDEX_t[::1] e1 = temp[1, :]
+        INDEX_t[::1] e2 = temp[2, :]
+        np.ndarray[INDEX_t, ndim=2] bedges_mem
+        INDEX_t[:, ::1] bedges_mv
+        dict bedges = dict()
+        bint orientation
+        indexSetIterator it = cellIds.getIter()
+
+    while it.step():
+        i = it.i
+        c0, c1, c2 = cells[i, 0], cells[i, 1], cells[i, 2]
+        sortEdge(c0, c1, e0)
+        sortEdge(c1, c2, e1)
+        sortEdge(c2, c0, e2)
+        for k in range(3):
+            hv = encode_edge(temp[k, :])
+            try:
+                del bedges[hv]
+            except KeyError:
+                bedges[hv] = (cells[i, k] == temp[k, 0])
+    bedges_mem = uninitialized((len(bedges), 2), dtype=INDEX)
+    bedges_mv = bedges_mem
+
+    i = 0
+    for hv in bedges:
+        orientation = bedges[hv]
+        decode_edge(hv, e0)
+        if orientation:
+            bedges_mv[i, 0], bedges_mv[i, 1] = e0[0], e0[1]
+        else:
+            bedges_mv[i, 0], bedges_mv[i, 1] = e0[1], e0[0]
+        i += 1
+    return bedges_mem
diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd b/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd
new file mode 100644
index 0000000..bb05517
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacianBase.pxd
@@ -0,0 +1,127 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule, quadratureRule,
+                                       doubleSimplexQuadratureRule, GaussJacobi,
+                                       simplexDuffyTransformation, simplexXiaoGimbutas,
+                                       transformQuadratureRule)
+from PyNucleus_fem.DoFMaps cimport DoFMap, shapeFunction
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.functions cimport function, constant
+from PyNucleus_fem.femCy cimport volume_t
+from PyNucleus_fem.meshCy cimport (volume0Dsimplex,
+                         volume1Dsimplex,
+                         volume2Dsimplex,
+                         volume1Din2Dsimplex)
+from . twoPointFunctions cimport (twoPointFunction,
+                                  constantTwoPoint)
+from . interactionDomains cimport REMOTE
+from . fractionalOrders cimport (fractionalOrderBase,
+                                 constFractionalOrder,
+                                 variableFractionalOrder)
+from . kernels2 cimport (Kernel,
+                         FractionalKernel)
+from . clusterMethodCy cimport tree_node
+ctypedef INDEX_t panelType
+
+ctypedef np.uint64_t MASK_t
+
+
+cdef class double_local_matrix_t:
+    cdef:
+        REAL_t[:, ::1] vertices1, vertices2
+        INDEX_t[:, ::1] cells1, cells2
+        public dict distantQuadRules
+        public DoFMap DoFMap
+        INDEX_t dim
+        public bint symmetricLocalMatrix
+        public bint symmetricCells
+        public INDEX_t cellNo1, cellNo2
+        REAL_t[:, :, ::1] precomputedSimplices
+        REAL_t[:, ::1] precomputedCenters
+        REAL_t[::1] precomputedVolumes
+        REAL_t[::1] precomputedH
+        REAL_t[:, ::1] simplex1, simplex2
+        REAL_t[::1] center1, center2
+        volume_t volume1, volume2
+        public REAL_t vol1, vol2
+        panelType IDENTICAL
+        REAL_t dmin2, dmax2, dcenter2
+        REAL_t h1MaxInv, h2MaxInv, dMaxInv
+    cdef void precomputeSimplices(self)
+    cdef INDEX_t getCellPairIdentifierSize(self)
+    cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm)
+    cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm)
+    cdef void setMesh1(self, meshBase mesh1)
+    cdef void setMesh2(self, meshBase mesh2)
+    cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1)
+    cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2)
+    cdef void setCell1(self, INDEX_t cellNo1)
+    cdef void setCell2(self, INDEX_t cellNo2)
+    cdef void setSimplex1(self, REAL_t[:, ::1] simplex1)
+    cdef void setSimplex2(self, REAL_t[:, ::1] simplex2)
+    cdef void swapCells(self)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=*)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d)
+    cdef panelType getProtoPanelType(self)
+    cdef void computeCenterDistance(self)
+    cdef void computeExtremeDistances(self)
+    cpdef panelType getPanelType(self)
+    cdef void addQuadRule(self, panelType panel)
+    cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex)
+    cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex)
+    cdef void getSimplexCenter(self,
+                               const REAL_t[:, ::1] simplex,
+                               REAL_t[::1] center)
+
+
+cdef class nonlocalLaplacian(double_local_matrix_t):
+    cdef:
+        public REAL_t H0, hmin, num_dofs
+        void* localShapeFunctions
+        dict specialQuadRules
+        public Kernel kernel
+        REAL_t[:, ::1] x, y
+        void** distantQuadRulesPtr
+    cdef void getNearQuadRule(self, panelType panel)
+    cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof)
+
+
+cdef class specialQuadRule:
+    cdef:
+        public quadratureRule qr
+        public REAL_t[:, ::1] PSI
+        public REAL_t[:, :, ::1] PSI3
+        public REAL_t[:, ::1] PHI
+        public REAL_t[:, :, ::1] PHI3
+        public transformQuadratureRule qrTransformed0
+        public transformQuadratureRule qrTransformed1
+
+
+cdef class nonlocalLaplacian1D(nonlocalLaplacian):
+    cdef:
+        public REAL_t target_order, quad_order_diagonal
+        REAL_t[::1] temp, temp2
+        dict distantPSI
+        INDEX_t[::1] idx
+
+
+cdef class nonlocalLaplacian2D(nonlocalLaplacian):
+    cdef:
+        public REAL_t target_order, quad_order_diagonal, quad_order_diagonalV
+        dict distantPSI
+        INDEX_t[::1] idx1, idx2, idx3, idx4
+        public REAL_t[::1] temp
diff --git a/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx b/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx
new file mode 100644
index 0000000..4a7770a
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacianBase.pyx
@@ -0,0 +1,763 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport (sin, cos, sinh, cosh, tanh, sqrt, atan2,
+                        log, ceil,
+                        fabs as abs, M_PI as pi, pow,
+                        tgamma as gamma)
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL
+from PyNucleus_base import uninitialized
+from libc.stdlib cimport malloc
+
+# With 64 bits, we can handle at most 5 DoFs per element.
+MASK = np.uint64
+ALL = MASK(-1)
+include "panelTypes.pxi"
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+cdef REAL_t inf = np.inf
+
+@cython.initializedcheck(False)
+@cython.boundscheck(False)
+@cython.wraparound(False)
+@cython.cdivision(True)
+cdef inline void getSimplexAndCenter(const INDEX_t[:, ::1] cells,
+                                     const REAL_t[:, ::1] vertices,
+                                     const INDEX_t cellIdx,
+                                     REAL_t[:, ::1] simplex,
+                                     REAL_t[::1] center):
+    cdef:
+        INDEX_t dim = vertices.shape[1]
+        INDEX_t manifold_dim = cells.shape[1]-1
+        INDEX_t m, k, l
+        REAL_t v, fac = 1./(manifold_dim+1)
+    center[:] = 0.
+    for m in range(manifold_dim+1):
+        k = cells[cellIdx, m]
+        for l in range(dim):
+            v = vertices[k, l]
+            simplex[m, l] = v
+            center[l] += v
+    for l in range(dim):
+        center[l] *= fac
+
+
+cdef class double_local_matrix_t:
+    def __init__(self, INDEX_t dim, INDEX_t manifold_dim1, INDEX_t manifold_dim2):
+        self.distantQuadRules = {}
+        self.dim = dim
+        self.symmetricLocalMatrix = True
+        self.symmetricCells = True
+        self.cellNo1 = -1
+        self.cellNo2 = -1
+        self.vol1 = np.nan
+        self.vol2 = np.nan
+
+        if dim == 1:
+            self.volume1 = volume1Dsimplex
+        elif dim == 2:
+            self.volume1 = volume2Dsimplex
+        else:
+            raise NotImplementedError()
+
+        if dim == 1 and manifold_dim2 == 1:
+            self.volume2 = volume1Dsimplex
+        elif dim == 1 and manifold_dim2 == 0:
+            self.volume2 = volume0Dsimplex
+        elif dim == 2 and manifold_dim2 == 2:
+            self.volume2 = volume2Dsimplex
+        elif dim == 2 and manifold_dim2 == 1:
+            self.volume2 = volume1Din2Dsimplex
+        else:
+            raise NotImplementedError()
+
+        if self.dim == 1:
+            self.IDENTICAL = COMMON_EDGE
+        elif self.dim == 2:
+            self.IDENTICAL = COMMON_FACE
+        else:
+            raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void precomputeSimplices(self):
+        # mesh1 and mesh 2 will be the same
+        cdef:
+            INDEX_t cellNo1
+            INDEX_t m, k, l
+            REAL_t fac = 1./self.cells1.shape[1]
+        self.precomputedSimplices = uninitialized((self.cells1.shape[0], self.cells1.shape[1], self.dim), dtype=REAL)
+        self.precomputedCenters = np.zeros((self.cells1.shape[0], self.dim), dtype=REAL)
+        for cellNo1 in range(self.cells1.shape[0]):
+            for m in range(self.cells1.shape[1]):
+                k = self.cells1[cellNo1, m]
+                for l in range(self.vertices1.shape[1]):
+                    self.precomputedSimplices[cellNo1, m, l] = self.vertices1[k, l]
+                    self.precomputedCenters[cellNo1, l] += self.vertices1[k, l]
+            for l in range(self.vertices1.shape[1]):
+                self.precomputedCenters[cellNo1, l] *= fac
+
+    cdef INDEX_t getCellPairIdentifierSize(self):
+        return -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t *perm):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm):
+        self.computeCellPairIdentifierBase(ID, perm)
+
+    def computeCellPairIdentifier_py(self):
+        cdef:
+            INDEX_t perm = 0
+        ID = uninitialized((self.getCellPairIdentifierSize()), dtype=INDEX)
+        self.computeCellPairIdentifier(ID, &perm)
+        return ID, perm
+
+    cdef void setMesh1(self, meshBase mesh1):
+        self.setVerticesCells1(mesh1.vertices, mesh1.cells)
+        self.precomputedVolumes = mesh1.volVector
+        self.precomputedH = mesh1.hVector
+        h1 = 2.*mesh1.h
+        d = 2.*mesh1.diam
+        self.h1MaxInv = 1./h1
+        self.dMaxInv = 1./d
+
+    cdef void setVerticesCells1(self, REAL_t[:, ::1] vertices1, INDEX_t[:, ::1] cells1):
+        self.vertices1 = vertices1
+        self.cells1 = cells1
+        self.simplex1 = uninitialized((self.cells1.shape[1], self.dim), dtype=REAL)
+        self.center1 = uninitialized((self.dim), dtype=REAL)
+        self.cellNo1 = -1
+        self.cellNo2 = -1
+        if self.symmetricCells:
+            # mesh1 and mesh 2 will be the same
+            self.precomputeSimplices()
+
+    cdef void setMesh2(self, meshBase mesh2):
+        self.setVerticesCells2(mesh2.vertices, mesh2.cells)
+        if mesh2.manifold_dim > 0:
+            h2 = 2.*mesh2.h
+            self.h2MaxInv = 1./h2
+        else:
+            self.h2MaxInv = 1.
+
+    cdef void setVerticesCells2(self, REAL_t[:, ::1] vertices2, INDEX_t[:, ::1] cells2):
+        self.vertices2 = vertices2
+        self.cells2 = cells2
+        self.simplex2 = uninitialized((self.cells2.shape[1], self.dim), dtype=REAL)
+        self.center2 = uninitialized((self.dim), dtype=REAL)
+        self.cellNo1 = -1
+        self.cellNo2 = -1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setCell1(self, INDEX_t cellNo1):
+        if self.cellNo1 == cellNo1:
+            return
+        self.cellNo1 = cellNo1
+        if not self.symmetricCells:
+            getSimplexAndCenter(self.cells1, self.vertices1, self.cellNo1, self.simplex1, self.center1)
+            self.vol1 = self.volume1(self.simplex1)
+        else:
+            self.simplex1 = self.precomputedSimplices[cellNo1, :, :]
+            self.center1 = self.precomputedCenters[cellNo1, :]
+            self.vol1 = self.precomputedVolumes[cellNo1]
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setCell2(self, INDEX_t cellNo2):
+        if self.cellNo2 == cellNo2:
+            return
+        self.cellNo2 = cellNo2
+        if not self.symmetricCells:
+            getSimplexAndCenter(self.cells2, self.vertices2, self.cellNo2, self.simplex2, self.center2)
+            self.vol2 = self.volume2(self.simplex2)
+        else:
+            self.simplex2 = self.precomputedSimplices[cellNo2, :, :]
+            self.center2 = self.precomputedCenters[cellNo2, :]
+            self.vol2 = self.precomputedVolumes[cellNo2]
+
+    def setMesh1_py(self, meshBase mesh1):
+        self.setMesh1(mesh1)
+
+    def setMesh2_py(self, meshBase mesh2):
+        self.setMesh2(mesh2)
+
+    def setCell1_py(self, INDEX_t cellNo1):
+        self.setCell1(cellNo1)
+
+    def setCell2_py(self, INDEX_t cellNo2):
+        self.setCell2(cellNo2)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void swapCells(self):
+        self.cellNo1, self.cellNo2 = self.cellNo2, self.cellNo1
+        self.simplex1, self.simplex2 = self.simplex2, self.simplex1
+        self.center1, self.center2 = self.center2, self.center1
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setSimplex1(self, REAL_t[:, ::1] simplex1):
+        self.simplex1 = simplex1
+        self.getSimplexCenter(self.simplex1, self.center1)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void setSimplex2(self, REAL_t[:, ::1] simplex2):
+        self.simplex2 = simplex2
+        self.getSimplexCenter(self.simplex2, self.center2)
+
+    def __call__(self,
+                 REAL_t[::1] contrib,
+                 panelType panel):
+        return self.eval(contrib, panel)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        raise NotImplementedError()
+
+    def eval_py(self,
+                REAL_t[::1] contrib,
+                panel):
+        self.eval(contrib, panel)
+
+    @cython.initializedcheck(False)
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void getSimplexCenter(self,
+                               const REAL_t[:, ::1] simplex,
+                               REAL_t[::1] center):
+        cdef:
+            INDEX_t i, j
+            REAL_t fac
+        center[:] = 0.
+        for i in range(simplex.shape[0]):
+            for j in range(simplex.shape[1]):
+                center[j] += simplex[i, j]
+        fac = 1./simplex.shape[0]
+        for j in range(simplex.shape[1]):
+            center[j] *= fac
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef panelType getProtoPanelType(self):
+        # Given two cells, determines their relationship:
+        # - COMMON_FACE
+        # - COMMON_EDGE
+        # - COMMON_VERTEX
+        # - DISTANT
+        cdef:
+            INDEX_t k, i, j
+            panelType panel = 0
+        if self.symmetricCells:
+            if self.cellNo1 > self.cellNo2:
+                return IGNORED
+        if (self.cells1.shape[1] == self.cells2.shape[1]) and (self.cellNo1 == self.cellNo2):
+            return self.IDENTICAL
+        for k in range(self.cells2.shape[1]):
+            i = self.cells2[self.cellNo2, k]
+            for j in range(self.cells1.shape[1]):
+                if i == self.cells1[self.cellNo1, j]:
+                    panel -= 1
+                    break
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeCenterDistance(self):
+        cdef:
+            INDEX_t j
+            REAL_t d2 = 0.
+        for j in range(self.dim):
+            d2 += (self.center1[j]-self.center2[j])**2
+        self.dcenter2 = d2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeExtremeDistances(self):
+        cdef:
+            INDEX_t i, k, j
+            INDEX_t noSimplex1 = self.simplex1.shape[0]
+            INDEX_t noSimplex2 = self.simplex2.shape[0]
+            REAL_t d2
+            REAL_t dmin2 = inf
+            REAL_t dmax2 = 0.
+        for i in range(noSimplex1):
+            for k in range(noSimplex2):
+                d2 = 0.
+                for j in range(self.dim):
+                    d2 += (self.simplex1[i, j] - self.simplex2[k, j])**2
+                dmin2 = min(dmin2, d2)
+                dmax2 = max(dmax2, d2)
+        self.dmin2 = dmin2
+        self.dmax2 = dmax2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cpdef panelType getPanelType(self):
+        raise NotImplementedError()
+
+    cdef void addQuadRule(self, panelType panel):
+        raise NotImplementedError()
+
+    def addQuadRule_py(self, panelType panel):
+        self.addQuadRule(panel)
+
+    def __repr__(self):
+        return '{}\n'.format(self.__class__.__name__)
+
+
+cdef class specialQuadRule:
+    def __init__(self,
+                 quadratureRule qr,
+                 REAL_t[:, ::1] PSI=None,
+                 REAL_t[:, :, ::1] PSI3=None,
+                 REAL_t[:, ::1] PHI=None,
+                 REAL_t[:, :, ::1] PHI3=None):
+        self.qr = qr
+        if PSI is not None:
+            self.PSI = PSI
+        if PSI3 is not None:
+            self.PSI3 = PSI3
+        if PHI is not None:
+            self.PHI = PHI
+        if PHI3 is not None:
+            self.PHI3 = PHI3
+
+
+cdef class nonlocalLaplacian(double_local_matrix_t):
+    def __init__(self,
+                 Kernel kernel,
+                 meshBase mesh, DoFMap dm,
+                 num_dofs=None, INDEX_t manifold_dim2=-1):
+        cdef:
+            shapeFunction sf
+            INDEX_t i
+        if manifold_dim2 < 0:
+            manifold_dim2 = mesh.manifold_dim
+        double_local_matrix_t.__init__(self, mesh.dim, mesh.manifold_dim, manifold_dim2)
+        if num_dofs is None:
+            self.num_dofs = dm.num_dofs
+        else:
+            self.num_dofs = num_dofs
+        self.hmin = mesh.hmin
+        self.H0 = mesh.diam/sqrt(8)
+        self.DoFMap = dm
+        self.localShapeFunctions = malloc(self.DoFMap.dofs_per_element*sizeof(void*))
+        for i in range(self.DoFMap.dofs_per_element):
+            sf = dm.localShapeFunctions[i]
+            (<void**>(self.localShapeFunctions+i*sizeof(void*)))[0] = <void*>sf
+        self.specialQuadRules = {}
+        self.distantQuadRulesPtr = <void**>malloc(100*sizeof(void*))
+        for i in range(100):
+            self.distantQuadRulesPtr[i] = NULL
+
+        self.kernel = kernel
+
+        if self.kernel.variable:
+            self.symmetricCells = self.kernel.symmetric
+            self.symmetricLocalMatrix = self.kernel.symmetric
+        else:
+            self.symmetricCells = True
+            self.symmetricLocalMatrix = True
+
+        if self.kernel.variableHorizon:
+            self.symmetricCells = False
+
+    cdef void getNearQuadRule(self, panelType panel):
+        raise NotImplementedError()
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeCellPairIdentifier(self, INDEX_t[::1] ID, INDEX_t *perm):
+        assert not self.kernel.variable
+        if self.kernel.finiteHorizon:
+            self.computeExtremeDistances()
+            if self.dmax2 <= self.kernel.getHorizonValue2():
+                # entirely within horizon
+                self.computeCellPairIdentifierBase(ID, perm)
+            elif self.dmin2 >= self.kernel.getHorizonValue2():
+                # entirely outside of horizon
+                ID[0] = IGNORED
+            else:
+                # on horizon
+                ID[0] = ON_HORIZON
+        else:
+            self.computeCellPairIdentifierBase(ID, perm)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cpdef panelType getPanelType(self):
+        # Given two cells, determines their relationship:
+        # - COMMON_FACE
+        # - COMMON_EDGE
+        # - COMMON_VERTEX
+        # - DISTANT
+        # - IGNORED
+        cdef:
+            panelType panel
+            REAL_t d, h1, h2
+        panel = self.getProtoPanelType()
+
+        if panel == IGNORED:
+            return IGNORED
+
+        if self.kernel.variable:
+            self.kernel.evalParams(self.center1, self.center2)
+
+        if panel == DISTANT:
+            if self.kernel.interaction.getRelativePosition(self.simplex1, self.simplex2) == REMOTE:
+                return IGNORED
+
+            self.computeCenterDistance()
+            d = sqrt(self.dcenter2)
+
+            if self.symmetricCells:
+                h1 = self.precomputedH[self.cellNo1]
+            else:
+                h1 = self.get_h_simplex(self.simplex1)
+            if self.cells1.shape[1] == self.cells2.shape[1]:
+                if self.symmetricCells:
+                    h2 = self.precomputedH[self.cellNo2]
+                else:
+                    h2 = self.get_h_simplex(self.simplex2)
+            else:
+                h2 = self.get_h_surface_simplex(self.simplex2)
+            panel = self.getQuadOrder(h1, h2, d)
+        elif self.kernel.variable:
+            self.getNearQuadRule(panel)
+        return panel
+
+    def __repr__(self):
+        return (super(nonlocalLaplacian, self).__repr__() +
+                'kernel:                        {}\n'.format(self.kernel))
+
+    cdef inline shapeFunction getLocalShapeFunction(self, INDEX_t local_dof):
+        return (<shapeFunction>((<void**>(self.localShapeFunctions+local_dof*sizeof(void*)))[0]))
+
+
+cdef class nonlocalLaplacian1D(nonlocalLaplacian):
+    def __init__(self, Kernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, manifold_dim2=-1, **kwargs):
+        super(nonlocalLaplacian1D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex):
+        cdef:
+            REAL_t h2
+        h2 = abs(simplex[1, 0]-simplex[0, 0])
+        return h2
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex):
+        return 1.
+
+    cdef INDEX_t getCellPairIdentifierSize(self):
+        return 3
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t * perm):
+        # use h1, h2 and midpoint distance as identifier
+        cdef:
+            REAL_t h1, h2, d
+
+        h1 = self.simplex1[1, 0]-self.simplex1[0, 0]
+        h2 = self.simplex2[1, 0]-self.simplex2[0, 0]
+        d = self.center2[0]-self.center1[0]
+
+        perm[0] = (d < 0)
+        perm[0] += (h1 < 0) << 1
+        perm[0] += (h2 < 0) << 3
+
+        h1 = abs(h1)
+        h2 = abs(h2)
+        d = abs(d)
+
+        ID[0] = <INDEX_t>(MAX_INT*d*self.dMaxInv)
+        ID[1] = <INDEX_t>(MAX_INT*h1*self.h1MaxInv)
+        ID[2] = <INDEX_t>(MAX_INT*h2*self.h2MaxInv)
+
+
+cdef class nonlocalLaplacian2D(nonlocalLaplacian):
+    def __init__(self, Kernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, manifold_dim2=-1, **kwargs):
+        super(nonlocalLaplacian2D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_simplex(self, const REAL_t[:, ::1] simplex):
+        cdef:
+            INDEX_t i, j
+            REAL_t hmax = 0., h2
+        for i in range(2):
+            for j in range(i+1, 3):
+                h2 = (simplex[j, 0]-simplex[i, 0])*(simplex[j, 0]-simplex[i, 0]) + (simplex[j, 1]-simplex[i, 1])*(simplex[j, 1]-simplex[i, 1])
+                hmax = max(hmax, h2)
+        return sqrt(hmax)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef REAL_t get_h_surface_simplex(self, const REAL_t[:, ::1] simplex):
+        cdef:
+            INDEX_t k
+            REAL_t h2
+        h2 = 0.
+        for k in range(2):
+            h2 += (simplex[1, k]-simplex[0, k])**2
+        return sqrt(h2)
+
+    cdef INDEX_t getCellPairIdentifierSize(self):
+        return 9
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void computeCellPairIdentifierBase(self, INDEX_t[::1] ID, INDEX_t * perm):
+        cdef:
+            REAL_t d, d1, d2
+            REAL_t v00, v01, v10, v11, v20, v21
+            REAL_t c00, c01, c10, c11, c20, c21
+            INDEX_t rot
+            BOOL_t permCells
+
+        d1 = self.center2[0]-self.center1[0]
+        d2 = self.center2[1]-self.center1[1]
+
+        d = sqrt(d1*d1 + d2*d2)
+        ID[0] = <INDEX_t>(MAX_INT*d*self.dMaxInv)
+
+        if d < 1e-9:
+            d1 = 1.
+            d2 = 0.
+            permCells = False
+            perm[0] = False
+        else:
+            permCells = (d1 < 0)
+            perm[0] = permCells
+            d = 1.0/d
+            d1 *= d
+            d2 *= d
+            if permCells:
+                d1 = -d1
+                d2 = -d2
+
+        # 1st simplex
+
+        if not permCells:
+            v00 = self.simplex1[0, 0]-self.center1[0]
+            v01 = self.simplex1[0, 1]-self.center1[1]
+            v10 = self.simplex1[1, 0]-self.center1[0]
+            v11 = self.simplex1[1, 1]-self.center1[1]
+            v20 = self.simplex1[2, 0]-self.center1[0]
+            v21 = self.simplex1[2, 1]-self.center1[1]
+        else:
+            v00 = self.simplex2[0, 0]-self.center2[0]
+            v01 = self.simplex2[0, 1]-self.center2[1]
+            v10 = self.simplex2[1, 0]-self.center2[0]
+            v11 = self.simplex2[1, 1]-self.center2[1]
+            v20 = self.simplex2[2, 0]-self.center2[0]
+            v21 = self.simplex2[2, 1]-self.center2[1]
+
+        c00 = v00*d1 + v01*d2
+        c10 = v10*d1 + v11*d2
+        c20 = v20*d1 + v21*d2
+
+        d1, d2 = -d2, d1
+
+        c01 = v00*d1 + v01*d2
+        c11 = v10*d1 + v11*d2
+        c21 = v20*d1 + v21*d2
+
+        d1, d2 = d2, -d1
+
+        if c00 != c10:
+            if c00 > c10:
+                if c00 != c20:
+                    if c00 > c20:
+                        rot = 0
+                    else:
+                        rot = 2
+                elif c01 > c21:
+                    rot = 0
+                else:
+                    rot = 2
+            elif c10 != c20:
+                if c10 > c20:
+                    rot = 1
+                else:
+                    rot = 2
+            elif c11 > c21:
+                rot = 1
+            else:
+                rot = 2
+        elif c01 > c11:
+            if c00 > c20:
+                rot = 0
+            else:
+                rot = 2
+        elif c10 > c20:
+            rot = 1
+        else:
+            rot = 2
+
+        if not permCells:
+            perm[0] += (rot << 1)
+        else:
+            perm[0] += (rot << 3)
+
+        if rot == 0:
+            pass
+        elif rot == 1:
+            c00, c10 = c10, c20
+            c01, c11 = c11, c21
+        else:
+            c00, c10 = c20, c00
+            c01, c11 = c21, c01
+
+        ID[1] = <INDEX_t>(MAX_INT*c00*self.h1MaxInv)
+        ID[2] = <INDEX_t>(MAX_INT*c10*self.h1MaxInv)
+
+        ID[3] = <INDEX_t>(MAX_INT*c01*self.h1MaxInv)
+        ID[4] = <INDEX_t>(MAX_INT*c11*self.h1MaxInv)
+
+        # 2nd simplex
+
+        if not permCells:
+            v00 = self.simplex2[0, 0]-self.center2[0]
+            v01 = self.simplex2[0, 1]-self.center2[1]
+            v10 = self.simplex2[1, 0]-self.center2[0]
+            v11 = self.simplex2[1, 1]-self.center2[1]
+            v20 = self.simplex2[2, 0]-self.center2[0]
+            v21 = self.simplex2[2, 1]-self.center2[1]
+        else:
+            v00 = self.simplex1[0, 0]-self.center1[0]
+            v01 = self.simplex1[0, 1]-self.center1[1]
+            v10 = self.simplex1[1, 0]-self.center1[0]
+            v11 = self.simplex1[1, 1]-self.center1[1]
+            v20 = self.simplex1[2, 0]-self.center1[0]
+            v21 = self.simplex1[2, 1]-self.center1[1]
+
+
+        c00 = v00*d1 + v01*d2
+        c10 = v10*d1 + v11*d2
+        c20 = v20*d1 + v21*d2
+
+        d1, d2 = -d2, d1
+
+        c01 = v00*d1 + v01*d2
+        c11 = v10*d1 + v11*d2
+        c21 = v20*d1 + v21*d2
+
+        if c00 != c10:
+            if c00 > c10:
+                if c00 != c20:
+                    if c00 > c20:
+                        rot = 0
+                    else:
+                        rot = 2
+                elif c01 > c21:
+                    rot = 0
+                else:
+                    rot = 2
+            elif c10 != c20:
+                if c10 > c20:
+                    rot = 1
+                else:
+                    rot = 2
+            elif c11 > c21:
+                rot = 1
+            else:
+                rot = 2
+        elif c01 > c11:
+            if c00 > c20:
+                rot = 0
+            else:
+                rot = 2
+        elif c10 > c20:
+            rot = 1
+        else:
+            rot = 2
+
+        if not permCells:
+            perm[0] += (rot << 3)
+        else:
+            perm[0] += (rot << 1)
+
+        if rot == 0:
+            pass
+        elif rot == 1:
+            c00, c10 = c10, c20
+            c01, c11 = c11, c21
+        else:
+            c00, c10 = c20, c00
+            c01, c11 = c21, c01
+
+        ID[5] = <INDEX_t>(MAX_INT*c00*self.h2MaxInv)
+        ID[6] = <INDEX_t>(MAX_INT*c10*self.h2MaxInv)
+
+        ID[7] = <INDEX_t>(MAX_INT*c01*self.h2MaxInv)
+        ID[8] = <INDEX_t>(MAX_INT*c11*self.h2MaxInv)
diff --git a/nl/PyNucleus_nl/nonlocalLaplacianND.pxd b/nl/PyNucleus_nl/nonlocalLaplacianND.pxd
new file mode 100644
index 0000000..35d6dfb
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacianND.pxd
@@ -0,0 +1,54 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, ENCODE_t, BOOL_t
+from PyNucleus_fem.quadrature cimport (simplexQuadratureRule,
+                             transformQuadratureRule,
+                             quadratureRule,
+                             quadQuadratureRule,
+                             doubleSimplexQuadratureRule, GaussJacobi,
+                             simplexDuffyTransformation, simplexXiaoGimbutas)
+
+from PyNucleus_fem.DoFMaps cimport DoFMap
+from PyNucleus_fem.femCy cimport volume_t
+from PyNucleus_base.ip_norm cimport mydot
+from PyNucleus_fem.meshCy cimport (vectorProduct,
+                         volume0D,
+                         volume1D, volume1Dnew,
+                         volume1D_in_2D,
+                         volume2Dnew,
+                         volume3D, volume3Dnew)
+from . nonlocalLaplacianBase cimport (double_local_matrix_t,
+                                        nonlocalLaplacian1D,
+                                        nonlocalLaplacian2D,
+                                        panelType, MASK_t,
+                                        specialQuadRule)
+from . interactionDomains cimport CUT
+from . fractionalOrders cimport fractionalOrderBase
+from . kernels2 cimport Kernel
+
+include "config.pxi"
+
+
+
+cdef class integrable1D(nonlocalLaplacian1D):
+    cdef:
+        public quadQuadratureRule qrId, qrVertex0, qrVertex1
+        REAL_t[:, ::1] PSI_id, PSI_vertex0, PSI_vertex1
+
+    cdef void getNearQuadRule(self, panelType panel)
+
+
+cdef class integrable2D(nonlocalLaplacian2D):
+    cdef:
+        REAL_t[::1] temp2
+        INDEX_t[::1] idx
+        public quadQuadratureRule qrEdge0, qrEdge1, qrVertex, qrId
+        REAL_t[:, :, ::1] PSI_edge, PSI_id, PSI_vertex
+
+    cdef void getNearQuadRule(self, panelType panel)
diff --git a/nl/PyNucleus_nl/nonlocalLaplacianND.pyx b/nl/PyNucleus_nl/nonlocalLaplacianND.pyx
new file mode 100644
index 0000000..f20177f
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalLaplacianND.pyx
@@ -0,0 +1,1196 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus_base.myTypes import INDEX, REAL
+from PyNucleus_base import uninitialized, uninitialized_like
+from PyNucleus_fem.meshCy cimport meshBase
+from PyNucleus_fem.DoFMaps cimport DoFMap, P1_DoFMap, P0_DoFMap, shapeFunction
+
+import numpy as np
+cimport numpy as np
+from libc.math cimport pow, sqrt, fabs as abs, log, ceil
+cimport cython
+from . nonlocalLaplacianBase import ALL
+
+include "config.pxi"
+include "panelTypes.pxi"
+
+cdef INDEX_t MAX_INT = np.iinfo(INDEX).max
+
+
+
+cdef class integrable1D(nonlocalLaplacian1D):
+    def __init__(self, Kernel kernel, meshBase mesh, DoFMap DoFMap, num_dofs=None, manifold_dim2=-1, target_order=None, **kwargs):
+        super(integrable1D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs)
+
+        assert isinstance(DoFMap, P1_DoFMap)
+
+        if target_order is None:
+            self.target_order = 3.0
+        else:
+            self.target_order = target_order
+        quad_order_diagonal = None
+        if quad_order_diagonal is None:
+            # measured log(2 rho_2) = 0.43
+            quad_order_diagonal = max(np.ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (-2.-self.kernel.max_singularity)*abs(log(self.hmin/self.H0)))/0.8), 2)
+        self.quad_order_diagonal = quad_order_diagonal
+
+        self.x = uninitialized((0, self.dim))
+        self.y = uninitialized((0, self.dim))
+        self.temp = uninitialized((0), dtype=REAL)
+        self.temp2 = uninitialized((0), dtype=REAL)
+
+        self.idx = uninitialized((3), dtype=INDEX)
+
+        if not self.kernel.variable:
+            self.getNearQuadRule(COMMON_EDGE)
+            self.getNearQuadRule(COMMON_VERTEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2)
+            REAL_t alpha = self.kernel.getSingularityValue()
+        panel = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (-alpha-2.)*abs(log(h2/self.H0)) + (alpha+1.)*logdh2) /
+                                    (max(logdh1, 0) + 0.8)),
+                               2)
+        panel2 = <panelType>max(ceil(((self.target_order+2.)*log(self.num_dofs*self.H0) + (-alpha-2.)*abs(log(h1/self.H0)) + (alpha+1)*logdh1) /
+                                     (max(logdh2, 0) + 0.8)),
+                                2)
+        panel = max(panel, panel2)
+        try:
+            self.distantQuadRules[panel]
+        except KeyError:
+            self.addQuadRule(panel)
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            INDEX_t I, k, i, j
+            shapeFunction sf
+        qr = simplexXiaoGimbutas(panel, self.dim)
+        qr2 = doubleSimplexQuadratureRule(qr, qr)
+        PSI = uninitialized((2*self.DoFMap.dofs_per_element,
+                             qr2.num_nodes), dtype=REAL)
+        # phi_i(x) - phi_i(y) = phi_i(x) for i = 0,1
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    PSI[I, k] = sf(qr2.rule1.nodes[:, i])
+                    k += 1
+        # phi_i(x) - phi_i(y) = -phi_i(y) for i = 2,3
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    PSI[I+self.DoFMap.dofs_per_element, k] = -sf(qr2.rule2.nodes[:, j])
+                    k += 1
+        sQR = specialQuadRule(qr2, PSI)
+        self.distantQuadRules[panel] = sQR
+        self.distantQuadRulesPtr[panel] = <void*>(self.distantQuadRules[panel])
+
+        if qr2.rule1.num_nodes > self.x.shape[0]:
+            self.x = uninitialized((qr2.rule1.num_nodes, self.dim), dtype=REAL)
+        if qr2.rule2.num_nodes > self.y.shape[0]:
+            self.y = uninitialized((qr2.rule2.num_nodes, self.dim), dtype=REAL)
+        if qr2.num_nodes > self.temp.shape[0]:
+            self.temp = uninitialized((qr2.num_nodes), dtype=REAL)
+
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t alpha = self.kernel.getSingularityValue()
+            REAL_t eta0, eta1
+            specialQuadRule sQR0, sQR1
+
+        if panel == COMMON_EDGE:
+            try:
+                sQR0 = self.specialQuadRules[(alpha, panel, 0)]
+            except KeyError:
+                qrId = GaussJacobi(((1, 1, 2+alpha),
+                                    (1, 0, 0)))
+
+                PSI_id = uninitialized((self.DoFMap.dofs_per_element, qrId.num_nodes), dtype=REAL)
+                # COMMON_FACE panels
+                for i in range(qrId.num_nodes):
+                    eta0 = qrId.nodes[0, i]
+                    eta1 = qrId.nodes[1, i]
+
+                    # P0
+
+                    # phi_1(x) = 1
+                    # phi_1(y) = 1
+                    # psi_1(x, y) = (phi_1(x)-phi_1(y))/(x-y) = 0
+
+                    # P1
+
+                    # phi_1(x) = 1-x
+                    # phi_2(x) = x
+                    # phi_1(y) = 1-y
+                    # phi_2(y) = y
+                    # psi_1(x, y) = (phi_1(x)-phi_1(y))/(x-y) = -1
+                    # psi_2(x, y) = (phi_2(x)-phi_2(y))/(x-y) = 1
+
+                    # x = 1-eta0+eta0*eta1
+                    # PHI_id[0, 0, i] = 1.-x
+                    # PHI_id[0, 1, i] = x
+
+                    # y = eta0*eta1
+                    # PHI_id[2, i] = 1.-y
+                    # PHI_id[3, i] = y
+
+                    PSI_id[0, i] = -1                      # ((1-x)-(1-y))/(1-eta0)
+                    PSI_id[1, i] = 1                       # (x-y)/(1-eta0)
+                sQR0 = specialQuadRule(qrId, PSI_id)
+                self.specialQuadRules[(alpha, panel, 0)] = sQR0
+                if qrId.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrId.num_nodes), dtype=REAL)
+                    self.temp2 = uninitialized_like(self.temp)
+            self.qrId = sQR0.qr
+            self.PSI_id = sQR0.PSI
+        elif panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(alpha, panel, 0)]
+                sQR1 = self.specialQuadRules[(alpha, panel, 1)]
+            except KeyError:
+
+                qrVertex0 = GaussJacobi(((1, 3+alpha, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+                qrVertex1 = GaussJacobi(((self.quad_order_diagonal, 1, 0),
+                                         (self.quad_order_diagonal, 0, 0)))
+
+                PSI_vertex0 = uninitialized((2*self.DoFMap.dofs_per_element - self.DoFMap.dofs_per_vertex, qrVertex0.num_nodes), dtype=REAL)
+                PSI_vertex1 = uninitialized((2*self.DoFMap.dofs_per_element - self.DoFMap.dofs_per_vertex, qrVertex1.num_nodes), dtype=REAL)
+
+                # panels with common vertex
+                # first integral
+                for i in range(qrVertex0.num_nodes):
+                    eta0 = qrVertex0.nodes[0, i]
+                    eta1 = qrVertex0.nodes[1, i]
+
+                    # x = eta0*eta1
+                    # y = eta0*(1.-eta1)
+
+                    # P0
+
+                    # phi_1(x) = 1
+                    # phi_2(x) = 0
+                    # phi_1(y) = 0
+                    # phi_2(y) = 1
+                    # psi_1(x, y) = (phi_1(x)-phi_1(y))/(x-y) = 1/(x+y)  = 1/eta0
+                    # psi_2(x, y) = (phi_2(x)-phi_2(y))/(x-y) = -1/(x+y) = -1/eta0
+
+                    # => P0 should not be used for singular kernels, since phi_i(x)-phi_i(y) does not cancel any singular behavior
+
+                    # P1
+
+                    # x     y
+                    # <- | ->
+                    # [2 1 3]
+
+                    # phi_1(x) = x
+                    # phi_2(x) = 1-x
+                    # phi_3(x) = 0
+                    # phi_1(y) = 0
+                    # phi_2(y) = 1-y
+                    # phi_3(y) = y
+                    # psi_1(x, y) = (phi_1(x)-phi_1(y))/(x-y) = (x)/(x+y)           = eta1
+                    # psi_2(x, y) = (phi_2(x)-phi_2(y))/(x-y) = ((1-x)-(1-y))/(x+y) = 1-2*eta1
+                    # psi_3(x, y) = (phi_2(x)-phi_2(y))/(x-y) = (-y)/(x+y)          = eta1-1
+
+                    # x = eta0*eta1
+                    # PHI_vertex0[0, i] = x
+                    # PHI_vertex0[1, i] = 1.-x
+
+                    # y = eta0*(1.-eta1)
+                    # PHI_vertex0[2, i] = 1.-y
+                    # PHI_vertex0[3, i] = y
+
+                    PSI_vertex0[0, i] = eta1               # (x)/eta0
+                    PSI_vertex0[1, i] = 1.-2.*eta1         # ((1-x)-(1-y))/eta0
+                    PSI_vertex0[2, i] = eta1-1.            # (-y)/eta0
+                # second integral
+                for i in range(qrVertex1.num_nodes):
+                    eta0 = qrVertex1.nodes[0, i]
+                    eta1 = qrVertex1.nodes[1, i]
+                    # x = 1-eta0+eta0*eta1
+                    # PHI_vertex1[0, i] = x
+                    # PHI_vertex1[1, i] = 1.-x
+
+                    # y = 1.-eta0*eta1
+                    # PHI_vertex1[2, i] = 1.-y
+                    # PHI_vertex1[3, i] = y
+
+                    PSI_vertex1[0, i] = 1.-eta0+eta0*eta1  # x
+                    PSI_vertex1[1, i] = eta0*(1.-2.*eta1)  # (1-x)-(1-y)
+                    PSI_vertex1[2, i] = eta0*eta1-1.       # -y
+
+                sQR0 = specialQuadRule(qrVertex0, PSI_vertex0)
+                sQR1 = specialQuadRule(qrVertex1, PSI_vertex1)
+                self.specialQuadRules[(alpha, panel, 0)] = sQR0
+                self.specialQuadRules[(alpha, panel, 1)] = sQR1
+                if qrVertex0.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex0.num_nodes), dtype=REAL)
+                    self.temp2 = uninitialized_like(self.temp)
+                if qrVertex1.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex1.num_nodes), dtype=REAL)
+                    self.temp2 = uninitialized_like(self.temp)
+            self.qrVertex0 = sQR0.qr
+            self.PSI_vertex0 = sQR0.PSI
+            self.qrVertex1 = sQR1.qr
+            self.PSI_vertex1 = sQR1.PSI
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(integrable1D, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, t
+            REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2
+            REAL_t alpha = self.kernel.getSingularityValue()
+            INDEX_t[::1] idx = self.idx
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            transformQuadratureRule qr0trans, qr1trans
+            INDEX_t dofs_per_element, numQuadNodes0, numQuadNodes1
+            REAL_t c1, c2, PSI_I, PSI_J
+            REAL_t a_b1[2]
+            REAL_t a_b2[2]
+            REAL_t a_A1[2][2]
+            REAL_t a_A2[2][2]
+            REAL_t[::1] b1, b2
+            REAL_t[:, ::1] A1, A2
+            BOOL_t cutElements = False
+
+        if self.kernel.finiteHorizon and panel >= 1 :
+            # check if the horizon might cut the elements
+            if self.kernel.interaction.relPos == CUT:
+                cutElements = True
+            if self.kernel.complement:
+                cutElements = False
+                # TODO: cutElements should be set to True, but
+                #       need to figure out the element
+                #       transformation.
+
+        if panel == COMMON_EDGE:
+            # # exact value:
+            # val = scaling * vol1**(1.-2.*s)/(1.-s)/(3.-2.*s)
+            # contrib[0] = 0.
+            # contrib[1] = 0.
+            # contrib[2] = 0.
+            # contrib[3] = 0.
+            # contrib[4] = 0.
+            # contrib[5] = 0.
+            # contrib[6] = 0.
+            # contrib[7] = val
+            # contrib[8] = -val
+            # contrib[9] = val
+
+            # factor 2 comes from symmetric contributions
+            vol = self.kernel.getScalingValue() * 2.0*vol1**2
+
+            contrib[:] = 0.
+            # distance between x and y quadrature nodes
+            for i in range(self.qrId.num_nodes):
+                self.temp[i] = (simplex1[0, 0]*self.PSI_id[0, i] +
+                                simplex1[1, 0]*self.PSI_id[1, i])**2
+                self.temp[i] = self.qrId.weights[i]*pow(self.temp[i], 0.5*alpha)
+            for I in range(2):
+                for J in range(I, 2):
+                    k = 4*I-(I*(I+1) >> 1) + J
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(self.qrId.num_nodes):
+                            val += (self.temp[i] *
+                                    self.PSI_id[I, i] *
+                                    self.PSI_id[J, i])
+                        contrib[k] += val*vol
+        elif panel == COMMON_VERTEX:
+            vol = self.kernel.getScalingValue() * vol1*vol2
+
+            contrib[:] = 0.
+
+            i = 0
+            j = 0
+            for k in range(4):
+                if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                    break
+                elif j == 1:
+                    j = 0
+                    i += 1
+                else:
+                    j += 1
+            if i == 1 and j == 0:
+                idx[0], idx[1], idx[2] = 0, 1, 2
+                t = 2
+            elif i == 0 and j == 1:
+                idx[0], idx[1], idx[2] = 1, 0, 2
+                t = 3
+            else:
+                raise IndexError('COMMON_VERTEX')
+
+            # loop over all local DoFs
+            for I in range(3):
+                for J in range(I, 3):
+                    i = 3*(I//t)+(I%t)
+                    j = 3*(J//t)+(J%t)
+                    if j < i:
+                        i, j = j, i
+                    k = 4*i-(i*(i+1) >> 1) + j
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(self.qrVertex0.num_nodes):
+                            val += (self.qrVertex0.weights[i] *
+                                    self.PSI_vertex0[idx[I], i] *
+                                    self.PSI_vertex0[idx[J], i] *
+                                    pow(vol1*self.PSI_vertex0[0, i]-vol2*self.PSI_vertex0[2, i], alpha))
+                        for i in range(self.qrVertex1.num_nodes):
+                            val += (self.qrVertex1.weights[i] *
+                                    self.PSI_vertex1[idx[I], i] *
+                                    self.PSI_vertex1[idx[J], i] *
+                                    pow(vol1*self.PSI_vertex1[0, i]-vol2*self.PSI_vertex1[2, i], alpha))
+                        contrib[k] += val*vol
+        elif panel >= 1 and not cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            PSI = sQR.PSI
+            qr2.rule1.nodesInGlobalCoords(simplex1, self.x)
+            qr2.rule2.nodesInGlobalCoords(simplex2, self.y)
+            k = 0
+            for i in range(qr2.rule1.num_nodes):
+                for j in range(qr2.rule2.num_nodes):
+                    self.temp[k] = qr2.weights[k]*self.kernel.evalPtr(1,
+                                                                      &self.x[i, 0],
+                                                                      &self.y[j, 0])
+                    k += 1
+
+            vol = vol1*vol2
+            k = 0
+            for I in range(2*self.DoFMap.dofs_per_element):
+                for J in range(I, 2*self.DoFMap.dofs_per_element):
+                    if mask & (1 << k):
+                        val = 0.
+                        for i in range(qr2.num_nodes):
+                            val += self.temp[i]*PSI[I, i]*PSI[J, i]
+                        contrib[k] = val*vol
+                    k += 1
+        elif panel >= 1 and cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            if sQR.qrTransformed0 is not None:
+                qr0trans = sQR.qrTransformed0
+            else:
+                qr0 = qr2.rule1
+                qr0trans = transformQuadratureRule(qr0)
+                sQR.qrTransformed0 = qr0trans
+            if sQR.qrTransformed1 is not None:
+                qr1trans = sQR.qrTransformed1
+            else:
+                qr1 = qr2.rule2
+                qr1trans = transformQuadratureRule(qr1)
+                sQR.qrTransformed1 = qr1trans
+            numQuadNodes0 = qr0trans.num_nodes
+            numQuadNodes1 = qr1trans.num_nodes
+
+            contrib[:] = 0.
+
+            vol = vol1*vol2
+            dofs_per_element = self.DoFMap.dofs_per_element
+
+            A1 = a_A1
+            b1 = a_b1
+            A2 = a_A2
+            b2 = a_b2
+
+            self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2)
+            while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1):
+                qr0trans.setBaryTransform(A1, b1)
+                qr0trans.nodesInGlobalCoords(simplex1, self.x)
+                for i in range(qr0trans.num_nodes):
+                    self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2)
+                    while self.kernel.interaction.nextSubSimplex_Node(A2, b2, &c2):
+                        qr1trans.setBaryTransform(A2, b2)
+                        qr1trans.nodesInGlobalCoords(simplex2, self.y)
+                        for j in range(qr1trans.num_nodes):
+                            val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(1, &self.x[i, 0], &self.y[j, 0])
+                            val *= c1 * c2 * vol
+                            k = 0
+                            for I in range(2*dofs_per_element):
+                                if I < dofs_per_element:
+                                    PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0)
+                                else:
+                                    PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1)
+                                for J in range(I, 2*dofs_per_element):
+                                    if mask & (1 << k):
+                                        if J < dofs_per_element:
+                                            PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0)
+                                        else:
+                                            PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1)
+                                        contrib[k] += val * PSI_I*PSI_J
+                                    k += 1
+
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+
+cdef class integrable2D(nonlocalLaplacian2D):
+    def __init__(self, Kernel kernel, meshBase mesh, DoFMap DoFMap, quad_order_diagonal=None, num_dofs=None, manifold_dim2=-1, target_order=None, **kwargs):
+        super(integrable2D, self).__init__(kernel, mesh, DoFMap, num_dofs, manifold_dim2, **kwargs)
+
+        assert isinstance(DoFMap, P1_DoFMap)
+
+        if target_order is None:
+            self.target_order = 3.0
+        else:
+            self.target_order = target_order
+
+        if quad_order_diagonal is None:
+            alpha = self.kernel.singularityValue
+            # measured log(2 rho_2) = 0.43
+            quad_order_diagonal = max(np.ceil((self.target_order-0.5*alpha)/(0.43)*abs(np.log(self.hmin/self.H0))), 4)
+            # measured log(2 rho_2) = 0.7
+            quad_order_diagonalV = max(np.ceil((self.target_order-0.5*alpha)/(0.7)*abs(np.log(self.hmin/self.H0))), 4)
+        else:
+            quad_order_diagonalV = quad_order_diagonal
+        self.quad_order_diagonal = quad_order_diagonal
+        self.quad_order_diagonalV = quad_order_diagonalV
+
+        self.x = uninitialized((0, self.dim))
+        self.y = uninitialized((0, self.dim))
+        self.temp = uninitialized((0), dtype=REAL)
+
+        self.idx = uninitialized((3), dtype=INDEX)
+
+        self.idx1 = uninitialized((self.dim+1), dtype=INDEX)
+        self.idx2 = uninitialized((self.dim+1), dtype=INDEX)
+        self.idx3 = uninitialized((2*(self.dim+1)), dtype=INDEX)
+        self.idx4 = uninitialized(((2*self.DoFMap.dofs_per_element)*(2*self.DoFMap.dofs_per_element+1)//2), dtype=INDEX)
+
+        if not self.kernel.variable:
+            self.getNearQuadRule(COMMON_FACE)
+            self.getNearQuadRule(COMMON_EDGE)
+            self.getNearQuadRule(COMMON_VERTEX)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef panelType getQuadOrder(self,
+                                const REAL_t h1,
+                                const REAL_t h2,
+                                REAL_t d):
+        cdef:
+            panelType panel, panel2
+            REAL_t logdh1 = log(d/h1), logdh2 = log(d/h2)
+            REAL_t c = (0.5*self.target_order+0.5)*log(self.num_dofs*self.H0**2) #-4.
+            REAL_t logh1H0 = abs(log(h1/self.H0)), logh2H0 = abs(log(h2/self.H0))
+            REAL_t loghminH0 = max(logh1H0, logh2H0)
+            REAL_t alpha = self.kernel.getSingularityValue()
+        panel = <panelType>max(ceil((c + 0.5*alpha*logh2H0 + loghminH0 - (1.-0.5*alpha)*logdh2) /
+                                    (max(logdh1, 0) + 0.4)),
+                               2)
+        panel2 = <panelType>max(ceil((c + 0.5*alpha*logh1H0 + loghminH0 - (1.-0.5*alpha)*logdh1) /
+                                     (max(logdh2, 0) + 0.4)),
+                                2)
+        panel = max(panel, panel2)
+        if self.distantQuadRulesPtr[panel] == NULL:
+            self.addQuadRule(panel)
+        return panel
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    @cython.cdivision(True)
+    cdef void addQuadRule(self, panelType panel):
+        cdef:
+            simplexQuadratureRule qr0, qr1
+            doubleSimplexQuadratureRule qr2
+            specialQuadRule sQR
+            REAL_t[:, ::1] PSI
+            INDEX_t I, k, i, j
+            INDEX_t numQuadNodes0, numQuadNodes1, dofs_per_element
+            shapeFunction sf
+        qr0 = simplexXiaoGimbutas(panel, self.dim)
+        qr1 = qr0
+        qr2 = doubleSimplexQuadratureRule(qr0, qr1)
+        numQuadNodes0 = qr0.num_nodes
+        numQuadNodes1 = qr1.num_nodes
+        dofs_per_element = self.DoFMap.dofs_per_element
+        PSI = uninitialized((2*dofs_per_element,
+                             qr2.num_nodes), dtype=REAL)
+        # phi_i(x) - phi_i(y) = phi_i(x) for i = 0,1,2
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I, k] = sf(qr0.nodes[:, i])
+                    k += 1
+        # phi_i(x) - phi_i(y) = -phi_i(y) for i = 3,4,5
+        for I in range(self.DoFMap.dofs_per_element):
+            sf = self.getLocalShapeFunction(I)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    PSI[I+dofs_per_element, k] = -sf(qr1.nodes[:, j])
+                    k += 1
+        sQR = specialQuadRule(qr2, PSI)
+        self.distantQuadRules[panel] = sQR
+        self.distantQuadRulesPtr[panel] = <void*>(self.distantQuadRules[panel])
+
+        if numQuadNodes0 > self.x.shape[0]:
+            self.x = uninitialized((numQuadNodes0, self.dim), dtype=REAL)
+        if numQuadNodes1 > self.y.shape[0]:
+            self.y = uninitialized((numQuadNodes1, self.dim), dtype=REAL)
+        if numQuadNodes0*numQuadNodes1 > self.temp.shape[0]:
+            self.temp = uninitialized((numQuadNodes0*numQuadNodes1), dtype=REAL)
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.wraparound(False)
+    cdef void getNearQuadRule(self, panelType panel):
+        cdef:
+            INDEX_t i
+            REAL_t alpha = self.kernel.getSingularityValue()
+            REAL_t eta0, eta1, eta2, eta3
+            specialQuadRule sQR0, sQR1
+            quadQuadratureRule qrId, qrEdge0, qrEdge1, qrVertex
+            REAL_t[:, :, ::1] PSI_id, PSI_edge, PSI_vertex
+        if panel == COMMON_FACE:
+            try:
+                sQR0 = self.specialQuadRules[(alpha, panel, 0)]
+            except KeyError:
+                # COMMON_FACE panels have 3 integral contributions.
+                # Each integral is over a 1D domain.
+                qrId = GaussJacobi(((1, 5+alpha, 0),
+                                    (1, 4+alpha, 0),
+                                    (1, 3+alpha, 0),
+                                    (self.quad_order_diagonal, 0, 0)))
+                PSI_id = uninitialized((3,
+                                        self.DoFMap.dofs_per_element,
+                                        qrId.num_nodes),
+                                       dtype=REAL)
+                for i in range(qrId.num_nodes):
+                    eta0 = qrId.nodes[0, i]
+                    eta1 = qrId.nodes[1, i]
+                    eta2 = qrId.nodes[2, i]
+                    eta3 = qrId.nodes[3, i]
+
+                    PSI_id[0, 0, i] = -eta3
+                    PSI_id[0, 1, i] = eta3-1.
+                    PSI_id[0, 2, i] = 1.
+
+                    PSI_id[1, 0, i] = -1.
+                    PSI_id[1, 1, i] = 1.-eta3
+                    PSI_id[1, 2, i] = eta3
+
+                    PSI_id[2, 0, i] = eta3
+                    PSI_id[2, 1, i] = -1.
+                    PSI_id[2, 2, i] = 1.-eta3
+                sQR0 = specialQuadRule(qrId, PSI3=PSI_id)
+                self.specialQuadRules[(alpha, panel, 0)] = sQR0
+                if qrId.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrId.num_nodes), dtype=REAL)
+            self.qrId = sQR0.qr
+            self.PSI_id = sQR0.PSI3
+        elif panel == COMMON_EDGE:
+            try:
+                sQR0 = self.specialQuadRules[(alpha, panel, 0)]
+                sQR1 = self.specialQuadRules[(alpha, panel, 1)]
+            except KeyError:
+                qrEdge0 = GaussJacobi(((1, 5+alpha, 0),
+                                       (1, 4+alpha, 0),
+                                       (self.quad_order_diagonal, 0, 0),
+                                       (self.quad_order_diagonal, 0, 0)))
+                qrEdge1 = GaussJacobi(((1, 5+alpha, 0),
+                                       (1, 4+alpha, 0),
+                                       (self.quad_order_diagonal, 1, 0),
+                                       (self.quad_order_diagonal, 0, 0)))
+                PSI_edge = uninitialized((5,
+                                          2*self.DoFMap.dofs_per_element-2*self.DoFMap.dofs_per_vertex-self.DoFMap.dofs_per_edge,
+                                          qrEdge0.num_nodes),
+                                    dtype=REAL)
+                for i in range(qrEdge0.num_nodes):
+                    eta0 = qrEdge0.nodes[0, i]
+                    eta1 = qrEdge0.nodes[1, i]
+                    eta2 = qrEdge0.nodes[2, i]
+                    eta3 = qrEdge0.nodes[3, i]
+
+                    PSI_edge[0, 0, i] = -eta2
+                    PSI_edge[0, 1, i] = 1.-eta3
+                    PSI_edge[0, 2, i] = eta3
+                    PSI_edge[0, 3, i] = eta2-1.
+
+                    eta0 = qrEdge1.nodes[0, i]
+                    eta1 = qrEdge1.nodes[1, i]
+                    eta2 = qrEdge1.nodes[2, i]
+                    eta3 = qrEdge1.nodes[3, i]
+
+                    PSI_edge[1, 0, i] = -eta2*eta3
+                    PSI_edge[1, 1, i] = eta2-1.
+                    PSI_edge[1, 2, i] = 1.
+                    PSI_edge[1, 3, i] = eta2*(eta3-1.)
+
+                    PSI_edge[2, 0, i] = eta2
+                    PSI_edge[2, 1, i] = eta2*eta3-1.
+                    PSI_edge[2, 2, i] = 1.-eta2
+                    PSI_edge[2, 3, i] = -eta2*eta3
+
+                    PSI_edge[3, 0, i] = eta2*eta3
+                    PSI_edge[3, 1, i] = 1.-eta2
+                    PSI_edge[3, 2, i] = eta2*(1.-eta3)
+                    PSI_edge[3, 3, i] = -1.
+
+                    PSI_edge[4, 0, i] = eta2*eta3
+                    PSI_edge[4, 1, i] = eta2-1.
+                    PSI_edge[4, 2, i] = 1.-eta2*eta3
+                    PSI_edge[4, 3, i] = -eta2
+
+                sQR0 = specialQuadRule(qrEdge0, PSI3=PSI_edge)
+                sQR1 = specialQuadRule(qrEdge1, PSI3=PSI_edge)
+                self.specialQuadRules[(alpha, panel, 0)] = sQR0
+                self.specialQuadRules[(alpha, panel, 1)] = sQR1
+                if qrEdge0.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrEdge0.num_nodes), dtype=REAL)
+                if qrEdge1.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrEdge1.num_nodes), dtype=REAL)
+            self.qrEdge0 = sQR0.qr
+            self.qrEdge1 = sQR1.qr
+            self.PSI_edge = sQR0.PSI3
+        elif panel == COMMON_VERTEX:
+            try:
+                sQR0 = self.specialQuadRules[(alpha, panel, 0)]
+            except KeyError:
+                qrVertex = GaussJacobi(((1, 5+alpha, 0),
+                                        (self.quad_order_diagonalV, 0, 0),
+                                        (self.quad_order_diagonalV, 1, 0),
+                                        (self.quad_order_diagonalV, 0, 0)))
+                PSI_vertex = uninitialized((2,
+                                            2*self.DoFMap.dofs_per_element-self.DoFMap.dofs_per_vertex,
+                                            qrVertex.num_nodes),
+                                           dtype=REAL)
+                for i in range(qrVertex.num_nodes):
+                    eta0 = qrVertex.nodes[0, i]
+                    eta1 = qrVertex.nodes[1, i]
+                    eta2 = qrVertex.nodes[2, i]
+                    eta3 = qrVertex.nodes[3, i]
+
+                    PSI_vertex[0, 0, i] = eta2-1.
+                    PSI_vertex[0, 1, i] = 1.-eta1
+                    PSI_vertex[0, 2, i] = eta1
+                    PSI_vertex[0, 3, i] = eta2*(eta3-1.)
+                    PSI_vertex[0, 4, i] = -eta2*eta3
+
+                    PSI_vertex[1, 0, i] = 1.-eta2
+                    PSI_vertex[1, 1, i] = eta2*(1.-eta3)
+                    PSI_vertex[1, 2, i] = eta2*eta3
+                    PSI_vertex[1, 3, i] = eta1-1.
+                    PSI_vertex[1, 4, i] = -eta1
+
+                sQR0 = specialQuadRule(qrVertex, PSI3=PSI_vertex)
+                self.specialQuadRules[(alpha, panel, 0)] = sQR0
+                if qrVertex.num_nodes > self.temp.shape[0]:
+                    self.temp = uninitialized((qrVertex.num_nodes), dtype=REAL)
+            self.qrVertex = sQR0.qr
+            self.PSI_vertex = sQR0.PSI3
+        else:
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
+
+    def __repr__(self):
+        return (super(integrable2D, self).__repr__() +
+                'hmin:                          {:.3}\n'.format(self.hmin) +
+                'H0:                            {:.3}\n'.format(self.H0) +
+                'target order:                  {}\n'.format(self.target_order))
+
+    @cython.initializedcheck(False)
+    @cython.boundscheck(False)
+    @cython.cdivision(True)
+    @cython.wraparound(False)
+    cdef void eval(self,
+                   REAL_t[::1] contrib,
+                   panelType panel,
+                   MASK_t mask=ALL):
+        cdef:
+            INDEX_t k, i, j, I, J, l, m
+            REAL_t vol, val, vol1 = self.vol1, vol2 = self.vol2
+            specialQuadRule sQR
+            doubleSimplexQuadratureRule qr2
+            REAL_t[:, ::1] PSI
+            REAL_t[:, ::1] simplex1 = self.simplex1
+            REAL_t[:, ::1] simplex2 = self.simplex2
+            INDEX_t numQuadNodes, numQuadNodes0, numQuadNodes1, dofs_per_element
+            REAL_t alpha = self.kernel.getSingularityValue()
+            REAL_t scaling = self.kernel.getScalingValue()
+            INDEX_t[::1] idx1, idx2, idx3, idx4
+            BOOL_t cutElements = False
+            REAL_t horizon2
+            simplexQuadratureRule qr0, qr1
+            transformQuadratureRule qr0trans, qr1trans
+            INDEX_t numInside
+            INDEX_t outside, inside1, inside2
+            INDEX_t inside, outside1, outside2
+            REAL_t vol3 = np.nan, vol4 = np.nan, d1, d2, c1, c2
+            REAL_t PSI_I, PSI_J
+            REAL_t a_b1[3]
+            REAL_t a_b2[3]
+            REAL_t a_A1[3][3]
+            REAL_t a_A2[3][3]
+            REAL_t[:, ::1] A1, A2
+            REAL_t[::1] b1, b2
+            BOOL_t a_ind[3]
+            BOOL_t[::1] ind
+            REAL_t temp
+
+        if self.kernel.finiteHorizon and panel >= 1 :
+            # check if the horizon might cut the elements
+            if self.kernel.interaction.relPos == CUT:
+                cutElements = True
+            if self.kernel.complement:
+                cutElements = False
+                # TODO: cutElements should be set to True, but
+                #       need to figure out the element
+                #       transformation.
+
+        contrib[:] = 0.
+
+        if panel >= 1 and not cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            PSI = sQR.PSI
+            qr0 = <simplexQuadratureRule>qr2.rule1
+            qr1 = <simplexQuadratureRule>qr2.rule2
+            numQuadNodes0 = qr0.num_nodes
+            numQuadNodes1 = qr1.num_nodes
+            qr0.nodesInGlobalCoords(simplex1, self.x)
+            qr1.nodesInGlobalCoords(simplex2, self.y)
+            k = 0
+            for i in range(numQuadNodes0):
+                for j in range(numQuadNodes1):
+                    self.temp[k] = (qr0.weights[i] *
+                                    qr1.weights[j] *
+                                    self.kernel.evalPtr(2,
+                                                        &self.x[i, 0],
+                                                        &self.y[j, 0]))
+                    k += 1
+            vol = vol1 * vol2
+            # loop over all local DoFs
+            k = 0
+            for I in range(2*self.DoFMap.dofs_per_element):
+                for J in range(I, 2*self.DoFMap.dofs_per_element):
+                    if mask & (1 << k):
+                        val = 0.
+                        for l in range(numQuadNodes0*numQuadNodes1):
+                            val += self.temp[l]*PSI[I, l]*PSI[J, l]
+                        contrib[k] = val*vol
+                    k += 1
+        elif panel >= 1 and cutElements:
+            sQR = <specialQuadRule>(self.distantQuadRulesPtr[panel])
+            qr2 = <doubleSimplexQuadratureRule>(sQR.qr)
+            qr0 = qr2.rule1
+            qr1 = qr2.rule2
+            if sQR.qrTransformed1 is not None:
+                qr1trans = sQR.qrTransformed1
+            else:
+                qr1trans = transformQuadratureRule(qr1)
+                sQR.qrTransformed1 = qr1trans
+            numQuadNodes0 = qr0.num_nodes
+            numQuadNodes1 = qr1.num_nodes
+
+            horizon2 = self.kernel.getHorizonValue2()
+            vol = vol1*vol2
+            dofs_per_element = self.DoFMap.dofs_per_element
+
+            A1 = a_A1
+            A2 = a_A2
+            b1 = a_b1
+            b2 = a_b2
+
+            # ind = a_ind
+            # qr0.nodesInGlobalCoords(simplex1, self.x)
+            # for i in range(qr0.num_nodes):
+            #     numInside = 0
+            #     for j in range(3):
+            #         d2 = 0.
+            #         for k in range(2):
+            #             d2 += (simplex2[j, k]-self.x[i, k])**2
+            #         ind[j] = (d2 <= horizon2)
+            #         numInside += ind[j]
+            #     if numInside == 0:
+            #         continue
+            #     elif numInside == 1:
+            #         inside = 0
+            #         while not ind[inside]:
+            #             inside += 1
+            #         outside1 = (inside+1)%3
+            #         outside2 = (inside+2)%3
+            #         c1 = findIntersection(self.x[i, :], simplex2[inside, :], simplex2[outside1, :], horizon2)
+            #         c2 = findIntersection(self.x[i, :], simplex2[inside, :], simplex2[outside2, :], horizon2)
+            #         A1[:, :] = 0.
+            #         b1[:] = 0.
+            #         A1[inside,inside] = c1+c2
+            #         A1[inside,outside1] = c2
+            #         A1[inside,outside2] = c1
+            #         A1[outside1,outside1] = c1
+            #         A1[outside2,outside2] = c2
+            #         b1[inside] = 1-c1-c2
+            #         vol3 = c1*c2
+            #         qr1trans.setBaryTransform(A1, b1)
+            #         qr1 = qr1trans
+            #     elif numInside == 2:
+            #         # outside = np.where(ind == False)[0][0]
+            #         outside = 0
+            #         while ind[outside]:
+            #             outside += 1
+            #         inside1 = (outside+1)%3
+            #         inside2 = (outside+2)%3
+            #         c1 = findIntersection(self.x[i,: ], simplex2[outside, :], simplex2[inside1, :], horizon2)
+            #         c2 = findIntersection(self.x[i,: ], simplex2[outside, :], simplex2[inside2, :], horizon2)
+            #         d1 = 0.
+            #         d2 = 0.
+            #         for k in range(2):
+            #             d1 += (simplex2[outside, k]
+            #                    + c1*(simplex2[inside1, k]-simplex2[outside, k])
+            #                    - simplex2[inside2, k])**2
+            #             d2 += (simplex2[outside, k]
+            #                    + c2*(simplex2[inside2, k]-simplex2[outside, k])
+            #                    - simplex2[inside1, k])
+            #         A1[:, :] = 0.
+            #         b1[:] = 0.
+            #         A2[:, :] = 0.
+            #         b2[:] = 0.
+
+            #         if d1 < d2:
+            #             A1[outside,outside] = 1-c1
+            #             A1[inside1,inside1] = 1-c1
+            #             A1[inside1,inside2] = -c1
+            #             A1[inside2,inside2] = 1.
+            #             b1[inside1] = c1
+            #             vol3 = 1-c1
+
+            #             A2[outside,outside] = 1-c2
+            #             A2[inside2,inside2] = 1
+            #             A2[inside2,outside] = c2
+            #             A2[outside,inside1] = 1-c1
+            #             A2[inside1,inside1] = c1
+            #             vol4 = c1*(1-c2)
+            #         else:
+            #             A1[outside,outside] = 1-c2
+            #             A1[inside2,inside2] = 1-c2
+            #             A1[inside2,inside1] = -c2
+            #             A1[inside1,inside1] = 1.
+            #             b1[inside2] = c2
+            #             vol3 = 1-c2
+
+            #             A2[outside,outside] = 1-c1
+            #             A2[inside1,inside1] = 1
+            #             A2[inside1,outside] = c1
+            #             A2[outside,inside2] = 1-c2
+            #             A2[inside2,inside2] = c2
+            #             vol4 = c2*(1-c1)
+
+            #         qr1trans.setBaryTransform(A1, b1)
+            #         qr1 = qr1trans
+            #     else:
+            #         qr1 = qr2.rule2
+            #         vol3 = 1.
+
+            #     qr1.nodesInGlobalCoords(simplex2, self.y)
+            #     for j in range(qr1.num_nodes):
+            #         val = qr0.weights[i]*qr1.weights[j]*self.kernel.evalPtr(2, &self.x[i, 0], &self.y[j, 0])
+            #         val *= vol*vol3
+
+            #         k = 0
+            #         for I in range(2*dofs_per_element):
+            #             if I < dofs_per_element:
+            #                 PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+            #             else:
+            #                 PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+            #             for J in range(I, 2*dofs_per_element):
+            #                 if mask & (1 << k):
+            #                     if J < dofs_per_element:
+            #                         PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+            #                     else:
+            #                         PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+            #                     contrib[k] += val * PSI_I*PSI_J
+            #                 k += 1
+            #     if numInside == 2:
+            #         qr1trans.setBaryTransform(A2, b2)
+            #         qr1.nodesInGlobalCoords(simplex2, self.y)
+            #         for j in range(qr1.num_nodes):
+            #             val = qr0.weights[i]*qr1.weights[j]*self.kernel.evalPtr(2, &self.x[i, 0], &self.y[j, 0])
+            #             val *= vol*vol4
+
+            #             k = 0
+            #             for I in range(2*dofs_per_element):
+            #                 if I < dofs_per_element:
+            #                     PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+            #                 else:
+            #                     PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+            #                 for J in range(I, 2*dofs_per_element):
+            #                     if mask & (1 << k):
+            #                         if J < dofs_per_element:
+            #                             PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0.nodes[0, i], numQuadNodes0)
+            #                         else:
+            #                             PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1.nodes[0, j], numQuadNodes1)
+            #                         contrib[k] += val * PSI_I*PSI_J
+            #                     k += 1
+
+            # contrib2 = np.zeros((contrib.shape[0]), dtype=REAL)
+            # qr0trans = transformQuadratureRule(qr2.rule1, self.tempNodes1)
+            if sQR.qrTransformed0 is not None:
+                qr0trans = sQR.qrTransformed0
+            else:
+                qr0trans = transformQuadratureRule(qr0)
+                sQR.qrTransformed0 = qr0trans
+            # qr1trans = transformQuadratureRule(qr2.rule2)
+
+            self.kernel.interaction.startLoopSubSimplices_Simplex(simplex1, simplex2)
+            while self.kernel.interaction.nextSubSimplex_Simplex(A1, b1, &c1):
+                qr0trans.setBaryTransform(A1, b1)
+                qr0trans.nodesInGlobalCoords(simplex1, self.x)
+                for i in range(qr0trans.num_nodes):
+                    self.kernel.interaction.startLoopSubSimplices_Node(self.x[i, :], simplex2)
+                    while self.kernel.interaction.nextSubSimplex_Node(A2, b2, &c2):
+                        qr1trans.setBaryTransform(A2, b2)
+                        qr1trans.nodesInGlobalCoords(simplex2, self.y)
+                        for j in range(qr1trans.num_nodes):
+                            val = qr0trans.weights[i]*qr1trans.weights[j]*self.kernel.evalPtr(2, &self.x[i, 0], &self.y[j, 0])
+                            val *= c1 * c2 * vol
+                            k = 0
+                            for I in range(2*dofs_per_element):
+                                if I < dofs_per_element:
+                                    PSI_I = self.getLocalShapeFunction(I).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0)
+                                else:
+                                    PSI_I = -self.getLocalShapeFunction(I-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1)
+                                for J in range(I, 2*dofs_per_element):
+                                    if mask & (1 << k):
+                                        if J < dofs_per_element:
+                                            PSI_J = self.getLocalShapeFunction(J).evalStrided(&qr0trans.nodes[0, i], numQuadNodes0)
+                                        else:
+                                            PSI_J = -self.getLocalShapeFunction(J-dofs_per_element).evalStrided(&qr1trans.nodes[0, j], numQuadNodes1)
+                                        contrib[k] += val * PSI_I*PSI_J
+                                    k += 1
+
+        elif panel == COMMON_FACE:
+            # factor 2 comes from symmetric contributions
+            vol = scaling*4.0*2.0*vol1**2
+
+            # three different integrals
+            numQuadNodes = self.qrId.num_nodes
+            for l in range(3):
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[0, j]*self.PSI_id[l, 0, i] +
+                                 simplex1[1, j]*self.PSI_id[l, 1, i] +
+                                 simplex1[2, j]*self.PSI_id[l, 2, i])**2
+                    self.temp[i] = self.qrId.weights[i]*pow(temp, 0.5*alpha)
+                # loop over all local DoFs
+                for I in range(3):
+                    for J in range(I, 3):
+                        k = 6*I-(I*(I+1) >> 1) + J
+                        if mask & (1 << k):
+                            val = 0.
+                            for i in range(numQuadNodes):
+                                val += (self.temp[i] *
+                                        self.PSI_id[l, I, i] *
+                                        self.PSI_id[l, J, i])
+                            contrib[k] += val*vol
+        elif panel == COMMON_EDGE:
+            # order so that common edge matches up and first triangle
+            # is ordered in usual sense and second triangle in counter
+            # sense
+
+            idx1 = self.idx1
+            idx2 = self.idx2
+            idx3 = self.idx3
+            idx4 = self.idx4
+
+            k = 0
+            for i in range(3):
+                for j in range(3):
+                    if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                        idx3[k] = i
+                        idx4[k] = j
+                        k += 1
+                        break
+
+            if idx3[0] > idx3[1]:
+                idx3[1], idx3[0] = idx3[0], idx3[1]
+
+            if idx3[0] == 0:
+                if idx3[1] == 1:
+                    idx1[0], idx1[1], idx1[2] = 0, 1, 2
+                elif idx3[1] == 2:
+                    idx1[0], idx1[1], idx1[2] = 2, 0, 1
+                else:
+                    raise NotImplementedError("Something went wrong for COMMON_EDGE 1")
+            elif idx3[0] == 1 and idx3[1] == 2:
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            else:
+                raise NotImplementedError("Something went wrong for COMMON_EDGE 1")
+
+            if idx4[0] > idx4[1]:
+                idx4[1], idx4[0] = idx4[0], idx4[1]
+
+            if idx4[0] == 0:
+                if idx4[1] == 1:
+                    idx2[0], idx2[1], idx2[2] = 1, 0, 2
+                elif idx4[1] == 2:
+                    idx2[0], idx2[1], idx2[2] = 0, 2, 1
+                else:
+                    raise NotImplementedError("Something went wrong for COMMON_EDGE 2")
+            elif idx4[0] == 1 and idx4[1] == 2:
+                idx2[0], idx2[1], idx2[2] = 2, 1, 0
+            else:
+                raise NotImplementedError("Something went wrong for COMMON_EDGE 2")
+
+            idx3[0], idx3[1], idx3[2], idx3[3] = idx1[0], idx1[1], idx1[2], 3+idx2[2]
+
+            vol = scaling*4.0*vol1*vol2
+
+            # loop over all local DoFs
+            m = 0
+            for I in range(4):
+                for J in range(I, 4):
+                    i = idx3[I]
+                    j = idx3[J]
+                    if j < i:
+                        i, j = j, i
+                    idx4[m] = 6*i-(i*(i+1) >> 1) + j
+                    m += 1
+
+            # five different integrals
+            for l in range(5):
+                if l == 0:
+                    qrEdge = self.qrEdge0
+                else:
+                    qrEdge = self.qrEdge1
+                numQuadNodes = qrEdge.num_nodes
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[idx1[0], j]*self.PSI_edge[l, 0, i] +
+                                 simplex1[idx1[1], j]*self.PSI_edge[l, 1, i] +
+                                 simplex1[idx1[2], j]*self.PSI_edge[l, 2, i] +
+                                 simplex2[idx2[2], j]*self.PSI_edge[l, 3, i])**2
+                    self.temp[i] = qrEdge.weights[i]*pow(temp, 0.5*alpha)
+
+                # loop over all local DoFs
+                m = 0
+                for I in range(4):
+                    for J in range(I, 4):
+                        k = idx4[m]
+                        m += 1
+                        if mask & (1 << k):
+                            val = 0.
+                            for i in range(numQuadNodes):
+                                val += (self.temp[i] *
+                                        self.PSI_edge[l, I, i] *
+                                        self.PSI_edge[l, J, i])
+                            contrib[k] += val*vol
+        elif panel == COMMON_VERTEX:
+            # Find vertex that matches
+            i = 0
+            j = 0
+            while True:
+                if self.cells1[self.cellNo1, i] == self.cells2[self.cellNo2, j]:
+                    break
+                if j == 2:
+                    i += 1
+                    j = 0
+                else:
+                    j += 1
+
+            idx1 = self.idx1
+            idx2 = self.idx2
+            idx3 = self.idx3
+
+            if i == 0:
+                idx1[0], idx1[1], idx1[2] = 0, 1, 2
+            elif i == 1:
+                idx1[0], idx1[1], idx1[2] = 1, 2, 0
+            else:
+                idx1[0], idx1[1], idx1[2] = 2, 0, 1
+            if j == 0:
+                idx2[0], idx2[1], idx2[2] = 0, 1, 2
+            elif j == 1:
+                idx2[0], idx2[1], idx2[2] = 1, 2, 0
+            else:
+                idx2[0], idx2[1], idx2[2] = 2, 0, 1
+            idx3[0], idx3[1], idx3[2], idx3[3], idx3[4] = idx1[0], idx1[1], idx1[2], 3+idx2[1], 3+idx2[2]
+
+            # factor 4. comes from inverse sqare of volume of standard simplex
+            vol = scaling*4.0*vol1*vol2
+
+            # two different integrals
+            numQuadNodes = self.qrVertex.num_nodes
+            for l in range(2):
+                # distance between x and y quadrature nodes
+                for i in range(numQuadNodes):
+                    temp = 0.
+                    for j in range(2):
+                        temp += (simplex1[idx1[0], j]*self.PSI_vertex[l, 0, i] +
+                                 simplex1[idx1[1], j]*self.PSI_vertex[l, 1, i] +
+                                 simplex1[idx1[2], j]*self.PSI_vertex[l, 2, i] +
+                                 simplex2[idx2[1], j]*self.PSI_vertex[l, 3, i] +
+                                 simplex2[idx2[2], j]*self.PSI_vertex[l, 4, i])**2
+                    self.temp[i] = self.qrVertex.weights[i]*pow(temp, 0.5*alpha)
+
+                # loop over all local DoFs
+                for I in range(5):
+                    for J in range(I, 5):
+                        i = idx3[I]
+                        j = idx3[J]
+                        if j < i:
+                            i, j = j, i
+                        k = 6*i-(i*(i+1) >> 1) + j
+                        if mask & (1 << k):
+                            val = 0.
+                            for i in range(numQuadNodes):
+                                val += (self.temp[i] *
+                                        self.PSI_vertex[l, I, i] *
+                                        self.PSI_vertex[l, J, i])
+                            contrib[k] += val*vol
+        else:
+            print(np.array(simplex1), np.array(simplex2))
+            raise NotImplementedError('Unknown panel type: {}'.format(panel))
diff --git a/nl/PyNucleus_nl/nonlocalProblems.py b/nl/PyNucleus_nl/nonlocalProblems.py
new file mode 100644
index 0000000..eb762b4
--- /dev/null
+++ b/nl/PyNucleus_nl/nonlocalProblems.py
@@ -0,0 +1,833 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus_base import REAL
+from PyNucleus_base.factory import factory
+from PyNucleus_base.utilsFem import problem
+from PyNucleus_fem import (simpleInterval, intervalWithInteraction,
+                           uniformSquare, squareWithInteractions,
+                           discWithInteraction,
+                           double_graded_interval,
+                           double_graded_interval_with_interaction,
+                           discWithIslands,
+                           meshFactoryClass, PHYSICAL, NO_BOUNDARY,
+                           Lambda, constant,
+                           indicatorFunctor, squareIndicator, radialIndicator,
+                           P1_DoFMap,
+                           str2DoFMapOrder,
+                           solFractional1D, rhsFractional1D,
+                           solFractional, rhsFractional2D,
+                           rhsFractional2D_nonPeriodic,
+                           functionFactory,
+                           
+                           meshFactory, functionFactory,
+                           DIRICHLET, HOMOGENEOUS_DIRICHLET,
+                           NEUMANN, HOMOGENEOUS_NEUMANN,
+                           NORM)
+from scipy.special import gamma as Gamma, binom
+from . twoPointFunctions import (constantTwoPoint,
+                                 temperedTwoPoint,
+                                 leftRightTwoPoint,
+                                 smoothedLeftRightTwoPoint,)
+from . interactionDomains import (ball1,
+                                  ball2,
+                                  ballInf,
+                                  ellipse)
+from . fractionalOrders import (constFractionalOrder,
+                                variableConstFractionalOrder,
+                                leftRightFractionalOrder,
+                                smoothedLeftRightFractionalOrder,
+                                innerOuterFractionalOrder,
+                                islandsFractionalOrder,
+                                layersFractionalOrder,
+                                variableFractionalLaplacianScaling)
+from . kernels2 import (getKernelEnum,
+                        FRACTIONAL, INDICATOR, PERIDYNAMIC)
+from . kernels import (getFractionalKernel,
+                       getIntegrableKernel,
+                       getKernel)
+
+
+fractionalOrderFactory = factory()
+fractionalOrderFactory.register('constant', constFractionalOrder, aliases=['const'])
+fractionalOrderFactory.register('varConst', variableConstFractionalOrder, aliases=['constVar'])
+fractionalOrderFactory.register('leftRight', leftRightFractionalOrder, aliases=['twoDomain'])
+fractionalOrderFactory.register('smoothedLeftRight', smoothedLeftRightFractionalOrder, params={'r': 0.1, 'slope': 200.}, aliases=['twoDomainNonSym'])
+fractionalOrderFactory.register('innerOuter', innerOuterFractionalOrder)
+fractionalOrderFactory.register('islands', islandsFractionalOrder, params={'r': 0.1, 'r2': 0.6})
+fractionalOrderFactory.register('layers', layersFractionalOrder)
+
+interactionFactory = factory()
+interactionFactory.register('ball2', ball2, aliases=['2', 2])
+interactionFactory.register('ball1', ball1, aliases=['1', 1])
+interactionFactory.register('ballInf', ballInf, aliases=['inf', np.inf])
+
+kernelFactory = factory()
+kernelFactory.register('fractional', getFractionalKernel)
+kernelFactory.register('indicator', getIntegrableKernel, params={'kernel': INDICATOR}, aliases=['constant'])
+kernelFactory.register('peridynamic', getIntegrableKernel, params={'kernel': PERIDYNAMIC})
+
+
+class nonlocalMeshFactoryClass(factory):
+    def __init__(self):
+        super(nonlocalMeshFactoryClass, self).__init__()
+        self.nonOverlappingMeshFactory = meshFactoryClass()
+        self.overlappingMeshFactory = meshFactoryClass()
+
+    def register(self, name, classTypeNoOverlap, classTypeOverlap, dim, indicators, paramsNoOverlap={}, paramsOverlap={}, aliases=[]):
+        if classTypeNoOverlap is not None:
+            self.nonOverlappingMeshFactory.register(name, classTypeNoOverlap, dim, paramsNoOverlap, aliases)
+        if classTypeOverlap is not None:
+            self.overlappingMeshFactory.register(name, classTypeOverlap, dim, paramsOverlap, aliases)
+        super(nonlocalMeshFactoryClass, self).register(name, indicators)
+
+    def build(self, name, kernel, boundaryCondition, noRef=0, useMulti=False, **kwargs):
+        domainIndicator, boundaryIndicator, interactionIndicator = super(nonlocalMeshFactoryClass, self).build(name, **kwargs)
+
+        if boundaryCondition == HOMOGENEOUS_DIRICHLET:
+            if kernel.horizon.value == np.inf:
+                if kernel.s.max < 0.5:
+                    tag = NO_BOUNDARY
+                else:
+                    tag = PHYSICAL
+                zeroExterior = True
+            else:
+                tag = domainIndicator
+                zeroExterior = False
+            hasInteractionDomain = kernel.horizon.value < np.inf
+        elif boundaryCondition == HOMOGENEOUS_NEUMANN:
+            tag = NO_BOUNDARY
+            zeroExterior = False
+            hasInteractionDomain = False
+        elif boundaryCondition == DIRICHLET:
+            if kernel.horizon.value == np.inf:
+                if kernel.s.max < 0.5:
+                    tag = NO_BOUNDARY
+                else:
+                    tag = PHYSICAL
+                raise NotImplementedError()
+            else:
+                tag = NO_BOUNDARY
+            zeroExterior = False
+            hasInteractionDomain = True
+        elif boundaryCondition == NEUMANN:
+            if kernel.horizon.value == np.inf:
+                assert False
+            else:
+                tag = NO_BOUNDARY
+            zeroExterior = False
+            hasInteractionDomain = True
+        elif boundaryCondition == NORM:
+            tag = PHYSICAL
+            zeroExterior = kernel.s.max >= 0.5
+            hasInteractionDomain = False
+        else:
+            raise NotImplementedError('Unknown boundary condition {}'.format(boundaryCondition))
+
+        if hasInteractionDomain:
+            assert 0 < kernel.horizon.value < np.inf
+            kwargs['horizon'] = kernel.horizon.value
+            mesh = self.overlappingMeshFactory.build(name, noRef, **kwargs)
+        else:
+            mesh = self.nonOverlappingMeshFactory.build(name, noRef, **kwargs)
+
+        dmTest = P1_DoFMap(mesh, tag)
+        while dmTest.num_dofs == 0:
+            mesh = mesh.refine()
+            dmTest = P1_DoFMap(mesh, tag)
+
+        
+        nonlocalInfo = {'domain': domainIndicator,
+                        'boundary': boundaryIndicator,
+                        'interaction': interactionIndicator,
+                        'tag': tag,
+                        'zeroExterior': zeroExterior}
+        return mesh, nonlocalInfo
+
+    def getDim(self, name):
+        return self.nonOverlappingMeshFactory.getDim(name)
+
+
+def intervalIndicators(a=-1, b=1, **kwargs):
+    eps = 1e-9
+    domainIndicator = squareIndicator(np.array([a+eps], dtype=REAL),
+                                      np.array([b-eps], dtype=REAL))
+    interactionIndicator = Lambda(lambda x: 1. if ((x[0] < a-eps) or (b+eps < x[0])) else 0.)
+    boundaryIndicator = Lambda(lambda x: 1. if ((a-eps < x[0] < a+eps) or (b-eps < x[0] < b+eps)) else 0.)
+    return domainIndicator, boundaryIndicator, interactionIndicator
+
+
+def squareIndicators(ax=-1., ay=-1., bx=1., by=1., **kwargs):
+    domainIndicator = squareIndicator(np.array([ax+1e-9, ay+1e-9], dtype=REAL),
+                                      np.array([bx-1e-9, by-1e-9], dtype=REAL))
+    interactionIndicator = constant(1.)-squareIndicator(np.array([ax-1e-9, ay-1e-9], dtype=REAL),
+                                                        np.array([bx+1e-9, by+1e-9], dtype=REAL))
+    boundaryIndicator = constant(1.)-domainIndicator-interactionIndicator
+    return domainIndicator, boundaryIndicator, interactionIndicator
+
+
+def radialIndicators(*args, **kwargs):
+    domainIndicator = radialIndicator(1.-1e-9)
+    interactionIndicator = constant(1.)-radialIndicator(1.+1e-9)
+    boundaryIndicator = radialIndicator(1.+1e-9)-radialIndicator(1.-1e-9)
+    return domainIndicator, boundaryIndicator, interactionIndicator
+
+
+nonlocalMeshFactory = nonlocalMeshFactoryClass()
+nonlocalMeshFactory.register('interval', simpleInterval, intervalWithInteraction, 1, intervalIndicators, {'a': -1, 'b': 1}, {'a': -1, 'b': 1})
+nonlocalMeshFactory.register('gradedInterval', double_graded_interval, double_graded_interval_with_interaction, 1, intervalIndicators, {'a': -1, 'b': 1, 'mu_ll': 2., 'mu_rr': 2.}, {'a': -1, 'b': 1, 'mu_ll': 2., 'mu_rr': 2.})
+nonlocalMeshFactory.register('square', uniformSquare, squareWithInteractions, 2, squareIndicators, {'N': 2, 'M': 2, 'ax': -1, 'ay': -1, 'bx': 1, 'by': 1}, {'ax': -1, 'ay': -1, 'bx': 1, 'by': 1})
+nonlocalMeshFactory.register('disc', discWithInteraction, discWithInteraction, 2, radialIndicators, {'horizon': 0., 'radius': 1.}, {'radius': 1.})
+nonlocalMeshFactory.register('discWithIslands', discWithIslands, discWithIslands, 2, radialIndicators, {'horizon': 0., 'radius': 1., 'islandOffCenter': 0.35, 'islandDiam': 0.5}, {'radius': 1., 'islandOffCenter': 0.35, 'islandDiam': 0.5})
+
+
+class fractionalLaplacianProblem(problem):
+    def setDriverArgs(self, driver):
+        p = driver.addGroup('problem')
+        p.add('domain', acceptedValues=['interval', 'disc', 'Lshape', 'square', 'cutoutCircle', 'disconnectedInterval', 'disconnectedDomain'])
+        p.add('problem', acceptedValues=['constant', 'notPeriodic', 'plateau', 'sin', 'cos', 3, 'source'])
+        self.addParametrizedArg('const', [float])
+        self.addParametrizedArg('leftRight', [float, float])
+        self.addParametrizedArg('genLeftRight', [float, float, float, float])
+        self.addParametrizedArg('islands', [float, float, float])
+        self.addParametrizedArg('layers', [float, float, int])
+        p.add('s', 'const(0.75)', argInterpreter=self.argInterpreter(['const', 'leftRight', 'genLeftRight', 'islands', 'layers']))
+        p.add('element', acceptedValues=[1, 2])
+        p.add('adaptive', acceptedValues=['residualMelenk', 'residualNochetto', 'residual', 'hierarchical', 'knownSolution', None], argInterpreter=lambda v: None if v == 'None' else v)
+        p.add('noRef', -1)
+
+    def processImpl(self, params):
+        element = params['element']
+        self.dim = nonlocalMeshFactory.getDim(params['domain'])
+        for sName in ['const', 'leftRight', 'genLeftRight', 'islands']:
+            if self.parametrizedArg(sName).match(params['s']):
+                s = fractionalOrderFactory.build(sName,
+                                                 *self.parametrizedArg(sName).interpret(params['s']))
+                break
+        else:
+            if self.parametrizedArg('layers').match(params['s']):
+                t = np.linspace(*self.parametrizedArg('layers').interpret(params['s']), dtype=REAL)
+                s = np.empty((t.shape[0], t.shape[0]), dtype=REAL)
+                for i in range(t.shape[0]):
+                    for j in range(t.shape[0]):
+                        s[i, j] = 0.5*(t[i]+t[j])
+                s = layersFractionalOrder(self.dim, np.linspace(-1., 1., s.shape[0]+1, dtype=REAL), s)
+            else:
+                raise NotImplementedError(params['s'])
+        horizon = constant(np.inf)
+        normalized = params.get('normalized', True)
+        self.kernel = getFractionalKernel(self.dim, s, horizon=horizon, normalized=normalized)
+        adaptive = params['adaptive']
+        problem = params['problem']
+        self.sol_ex = None
+        self.Hs_ex = None
+        self.L2_ex = None
+        # Picking bigger, say eta = 7, potentially speeds up assembly.
+        # Not clear about impact on error.
+        self.eta = 3.
+
+        if self.dim == 1:
+            self.target_order = (1+element-s.min)/self.dim
+        else:
+            self.target_order = 1/self.dim
+            if element == 2:
+                raise NotImplementedError()
+
+        if params['domain'] == 'interval':
+            self.meshParams = {'a': -1., 'b': 1.}
+            radius = 1.
+            if self.noRef <= 0:
+                if adaptive is None:
+                    if element == 1:
+                        self.noRef = 6
+                    elif element == 2:
+                        self.noRef = 5
+                    else:
+                        raise NotImplementedError(element)
+                else:
+                    if element == 1:
+                        self.noRef = 22
+                    elif element == 2:
+                        self.noRef = 21
+                    else:
+                        raise NotImplementedError(element)
+            self.eta = 1
+
+            if problem == 'constant':
+                self.rhs = constant(1.)
+                if isinstance(s, constFractionalOrder):
+                    C = 2.**(-2.*s.value)*Gamma(self.dim/2.)/Gamma((self.dim+2.*s.value)/2.)/Gamma(1.+s.value)
+                    self.Hs_ex = C * np.sqrt(np.pi)*Gamma(s.value+1)/Gamma(s.value+3/2)
+                    self.L2_ex = np.sqrt(C**2 * np.sqrt(np.pi) * Gamma(1+2*s.value)/Gamma(3/2+2*s.value) * radius**2)
+                    self.sol_ex = solFractional(s.value, self.dim, radius)
+            elif problem == 'sin':
+                self.rhs = Lambda(lambda x: np.sin(np.pi*x[0]))
+            elif problem == 'cos':
+                self.rhs = Lambda(lambda x: np.cos(np.pi*x[0]/2.))
+            elif problem == 'plateau':
+                self.rhs = Lambda(np.sign)
+
+                # def e(n):
+                #     return (2*n+s+3/2)/2**(2*s)/np.pi / binom(n+s+1, n-1/2)**2/Gamma(s+5/2)**2
+
+                # k = 10
+                # Hs_ex = sum([e(n) for n in range(1000000)])
+                self.Hs_ex = 2**(1-2*s) / (2*s+1) / Gamma(s+1)**2
+            elif isinstance(problem, int):
+                self.rhs = rhsFractional1D(s, problem)
+                self.Hs_ex = 2**(2*s)/(2*problem+s+0.5) * Gamma(1+s)**2 * binom(s+problem, problem)**2
+                self.sol_ex = solFractional1D(s, problem)
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'disconnectedInterval':
+            if self.noRef <= 0:
+                self.noRef = 40
+            self.meshParams = {'sep': 0.1}
+
+            if problem == 'constant':
+                self.rhs = Lambda(lambda x: 1. if x[0] > 0.5 else 0.)
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'disc':
+            if self.noRef <= 0:
+                self.noRef = 15
+                if adaptive is None:
+                    self.noRef = 5
+                else:
+                    self.noRef = 7
+            radius = 1.
+            self.meshParams = {'h': 0.78, 'radius': radius}
+
+            if problem == 'constant':
+                self.rhs = constant(1.)
+                if isinstance(s, constFractionalOrder):
+                    C = 2.**(-2.*s.value)*Gamma(self.dim/2.)/Gamma((self.dim+2.*s.value)/2.)/Gamma(1.+s.value)
+                    self.Hs_ex = C * np.pi*radius**(2-2*s.value)/(s.value+1)
+                    self.L2_ex = np.sqrt(C**2 * np.pi/(1+2*s.value)*radius**2)
+                    self.sol_ex = solFractional(s.value, self.dim, radius)
+            elif problem == 'notPeriodic':
+                n = 2
+                l = 2
+                self.Hs_ex = 2**(2*s-1)/(2*n+s+l+1) * Gamma(1+s+n)**2/Gamma(1+n)**2 * (np.pi+np.sin(4*np.pi*l)/(4*l))
+
+                n = 1
+                l = 5
+                self.Hs_ex += 2**(2*s-1)/(2*n+s+l+1) * Gamma(1+s+n)**2/Gamma(1+n)**2 * (np.pi+np.sin(4*np.pi*l)/(4*l))
+                self.rhs = rhsFractional2D_nonPeriodic(s)
+            elif problem == 'plateau':
+                self.rhs = Lambda(lambda x: x[0] > 0)
+                try:
+                    from mpmath import meijerg
+                    self.Hs_ex = np.pi/4*2**(-2*s) / (s+1) / Gamma(1+s)**2
+                    self.Hs_ex -= 2**(-2*s)/np.pi * meijerg([[1., 1.+s/2], [5/2+s, 5/2+s]],
+                                                       [[2., 1/2, 1/2], [2.+s/2]],
+                                                       -1., series=2)
+                    self.Hs_ex = float(self.Hs_ex)
+                except ImportError:
+                    self.Hs_ex = np.pi/4*2**(-2*s) / (s+1) / Gamma(1+s)**2
+                    for k in range(100000):
+                        self.Hs_ex += 2**(-2*s) / Gamma(s+3)**2 / (2*np.pi) * (2*k+s+2) * (k+1) / binom(k+s+1.5, s+2)**2
+            elif isinstance(problem, tuple):
+                n, l = problem
+                self.Hs_ex = 2**(2*s-1)/(2*n+s+l+1) * Gamma(1+s+n)**2/Gamma(1+n)**2 * (np.pi+np.sin(4*np.pi*l)/(4*l))
+
+                self.rhs = rhsFractional2D(s, n=n, l=l)
+            elif problem == 'sin':
+                self.rhs = Lambda(lambda x: np.sin(np.pi*(x[0]**2+x[1]**2)))
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'square':
+            if self.noRef <= 0:
+                self.noRef = 20
+            self.meshParams = {'N': 3, 'ax': -1, 'ay': -1, 'bx': 1, 'by': 1}
+
+            if problem == 'constant':
+                self.rhs = constant(1.)
+            elif problem == 'sin':
+                self.rhs = Lambda(lambda x: np.sin(np.pi*x[0])*np.sin(np.pi*x[1]))
+            elif problem == 'source':
+                self.rhs = (functionFactory.build('radialIndicator', radius=0.3, center=np.array([0.2, 0.6], dtype=REAL)) -
+                            functionFactory.build('radialIndicator', radius=0.3, center=np.array([-0.2, -0.6], dtype=REAL)))
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'Lshape':
+            if self.noRef <= 0:
+                self.noRef = 20
+            self.meshParams = {}
+
+            if problem == 'constant':
+                self.rhs = constant(1.)
+            elif problem == 'sin':
+                self.rhs = Lambda(lambda x: np.sin(np.pi*x[0])*np.sin(np.pi*x[1]))
+            else:
+                raise NotImplementedError()
+        elif params['domain'] == 'cutoutCircle':
+            if self.noRef <= 0:
+                self.noRef = 30
+            self.meshParams = {'radius': 1., 'cutoutAngle': np.pi/2.}
+
+            if problem == 'constant':
+                self.rhs = constant(1.)
+            elif problem == 'sin':
+                self.rhs = Lambda(lambda x: np.sin(np.pi*(x[0]**2+x[1]**2)))
+            else:
+                raise NotImplementedError()
+        else:
+            raise NotImplementedError(params['domain'])
+
+        self.mesh, nI = nonlocalMeshFactory.build(params['domain'], self.kernel, HOMOGENEOUS_DIRICHLET, useMulti=True, **self.meshParams)
+        self.tag = nI['tag']
+
+    def getIdentifier(self, params):
+        keys = ['domain', 'problem', 's', 'noRef', 'element', 'adaptive']
+        d = []
+        for k in keys:
+            try:
+                d.append((k, str(self.__getattr__(k))))
+            except KeyError:
+                d.append((k, str(params[k])))
+        return '-'.join(['fracLaplAdaptive'] + [key + '=' + v for key, v in d])
+
+
+class nonlocalProblem(problem):
+    def setDriverArgs(self, driver):
+        driver.add('kernel', acceptedValues=['fractional', 'indicator', 'peridynamic'])
+        driver.add('domain', 'interval', acceptedValues=['gradedInterval', 'square', 'disc', 'discWithIslands'])
+        self.addParametrizedArg('indicator', [float, float])
+        driver.add('problem', 'poly-Dirichlet',
+                   argInterpreter=self.argInterpreter(['indicator'], acceptedValues=['poly-Dirichlet', 'poly-Dirichlet2', 'poly-Dirichlet3', 'poly-Neumann', 'zeroFlux', 'source', 'constant', 'exact-sin-Dirichlet', 'exact-sin-Neumann']))
+        driver.add('noRef', argInterpreter=int)
+        self.addParametrizedArg('const', [float])
+        self.addParametrizedArg('varconst', [float])
+        self.addParametrizedArg('leftRight', [float, float, float, float])
+        self.addParametrizedArg('twoDomain', [float, float, float, float])
+        self.addParametrizedArg('twoDomainNonSym', [float, float])
+        self.addParametrizedArg('layers', [float, float, int])
+        self.addParametrizedArg('islands', [float, float])
+        self.addParametrizedArg('islands4', [float, float, float, float])
+        self.addParametrizedArg('tempered', [float])
+        driver.add('s', 'const(0.4)', argInterpreter=self.argInterpreter(['const', 'varconst', 'twoDomain', 'twoDomainNonSym', 'layers', 'islands', 'islands4']))
+        driver.add('horizon', 0.2)
+        self.addParametrizedArg('ellipse', [float, float])
+        driver.add('interaction', 'ball2', argInterpreter=self.argInterpreter(['ellipse'], acceptedValues=['ball2', 'ellipse']))
+        driver.add('phi', 'const(1.)', argInterpreter=self.argInterpreter(['const', 'twoDomain', 'twoDomainNonSym', 'tempered']))
+        driver.add('normalized', True)
+        driver.add('element', acceptedValues=['P1', 'P0'])
+        driver.add('target_order', -1.)
+
+    def processImpl(self, params):
+
+        self.dim = nonlocalMeshFactory.getDim(params['domain'])
+
+        self.kType = getKernelEnum(params['kernel'])
+        if self.kType == FRACTIONAL:
+            for sName in ['const', 'varconst', 'leftRight', 'twoDomain', 'twoDomainNonSym', 'islands']:
+                if self.parametrizedArg(sName).match(params['s']):
+                    s = fractionalOrderFactory.build(sName,
+                                                     *self.parametrizedArg(sName).interpret(params['s']))
+                    break
+            else:
+                if self.parametrizedArg('layers').match(params['s']):
+                    t = np.linspace(*self.parametrizedArg('layers').interpret(params['s']), dtype=REAL)
+                    s = np.empty((t.shape[0], t.shape[0]), dtype=REAL)
+                    for i in range(t.shape[0]):
+                        for j in range(t.shape[0]):
+                            s[i, j] = 0.5*(t[i]+t[j])
+                    s = layersFractionalOrder(self.dim, np.linspace(-1., 1., s.shape[0]+1, dtype=REAL), s)
+                elif self.parametrizedArg('islands4').match(params['s']):
+                    sii, soo, sio, soi = self.parametrizedArg('islands4').interpret(params['s'])
+                    s = fractionalOrderFactory.build('islands', sii=sii, soo=soo, sio=sio, soi=soi)
+                else:
+                    raise NotImplementedError(params['s'])
+            self.s = s
+        else:
+            self.s = None
+
+        self.horizon = constant(params['horizon'])
+
+        element = str2DoFMapOrder(params['element'])
+        if self.dim == 1:
+            self.eta = 1.
+            if self.target_order < 0.:
+                if self.s is not None:
+                    self.target_order = (1+element-self.s.min)/self.dim
+                else:
+                    self.target_order = 2.
+        else:
+            self.eta = 3.
+            if self.target_order < 0.:
+                self.target_order = 1/self.dim
+            if element == 2:
+                raise NotImplementedError()
+
+        if self.parametrizedArg('const').match(params['phi']):
+            c, = self.parametrizedArg('const').interpret(params['phi'])
+            if c == 1.:
+                self.phi = None
+            else:
+                self.phi = constantTwoPoint(c)
+        elif self.parametrizedArg('twoDomain').match(params['phi']):
+            phill, phirr, philr, phirl = self.parametrizedArg('twoDomain').interpret(params['phi'])
+            self.phi = leftRightTwoPoint(phill, phirr, philr, phirl)
+        elif self.parametrizedArg('twoDomainNonSym').match(params['phi']):
+            phil, phir = self.parametrizedArg('twoDomainNonSym').interpret(params['phi'])
+            self.phi = smoothedLeftRightTwoPoint(phil, phir, r=0.1, slope=200.)
+        elif self.parametrizedArg('tempered').match(params['phi']):
+            lambdaCoeff, = self.parametrizedArg('tempered').interpret(params['phi'])
+            self.phi = temperedTwoPoint(lambdaCoeff, self.dim)
+        else:
+            raise NotImplementedError(params['phi'])
+
+        if params['interaction'] == 'ball2':
+            interaction = ball2()
+        elif self.parametrizedArg('ellipse').match(params['interaction']):
+            aFac, bFac = self.parametrizedArg('ellipse').interpret(params['interaction'])
+            interaction = ellipse(aFac, bFac)
+        else:
+            raise NotImplementedError(params['interaction'])
+
+        normalized = params['normalized']
+        self.kernel = getKernel(dim=self.dim, kernel=self.kType, s=self.s, horizon=self.horizon, normalized=normalized, phi=self.phi, interaction=interaction)
+        self.scaling = self.kernel.scaling
+
+        self.analyticSolution = None
+
+        if params['problem'] in ('poly-Neumann', 'exact-sin-Neumann', 'zeroFlux'):
+            self.boundaryCondition = NEUMANN
+        elif self.parametrizedArg('indicator').match(params['problem']):
+            self.boundaryCondition = HOMOGENEOUS_DIRICHLET
+        elif params['problem'] in ('source', 'constant'):
+            self.boundaryCondition = HOMOGENEOUS_DIRICHLET
+        else:
+            self.boundaryCondition = DIRICHLET
+
+        if params['domain'] in ('interval', 'gradedInterval'):
+            if params['noRef'] is None:
+                self.noRef = 8
+            self.mesh, nI = nonlocalMeshFactory.build(params['domain'], self.kernel, self.boundaryCondition)
+            self.tag = nI['tag']
+            self.zeroExterior = nI['zeroExterior']
+            self.domainInteriorIndicator = domainIndicator = nI['domain']
+            self.boundaryIndicator = boundaryIndicator = nI['boundary']
+            self.interactionInteriorIndicator = interactionIndicator = nI['interaction']
+            if params['problem'] == 'poly-Dirichlet':
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = constant(0)
+                self.interactionIndicator = interactionIndicator+boundaryIndicator
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+                if ((self.kType == FRACTIONAL and isinstance(self.s, constFractionalOrder)) or self.kType in (INDICATOR, PERIDYNAMIC)) and self.phi is None and normalized:
+                    self.analyticSolution = Lambda(lambda x: 1-x[0]**2)
+            elif params['problem'] == 'exact-sin-Dirichlet':
+                assert ((self.kType == INDICATOR) or (self.kType == FRACTIONAL)) and self.phi is None and normalized
+
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = constant(0)
+                self.interactionIndicator = interactionIndicator+boundaryIndicator
+                horizonValue = self.kernel.horizonValue
+                scalingValue = self.kernel.scalingValue
+
+                sin = functionFactory('sin1d')
+                if self.kType == INDICATOR:
+                    self.rhsData = -2.*scalingValue * 2*(np.sin(np.pi*horizonValue)/np.pi-horizonValue) * sin
+                elif self.kType == FRACTIONAL:
+                    from scipy.integrate import quad
+                    assert isinstance(self.s, constFractionalOrder)
+                    sBase = self.s.value
+                    from scipy.special import gamma
+
+                    def Phi(delta):
+                        if delta > 0:
+                            fac = delta**(-2*sBase)
+                            integral = 0.
+                            for k in range(1, 100):
+                                integral += fac * (-1)**(k+1) * (np.pi*delta)**(2*k) / (2*k-2*sBase) / gamma(2*k+1)
+                            return integral
+                        else:
+                            return 0.
+
+                    Phi_delta = Phi(horizonValue)
+                    self.rhsData = 4 * scalingValue * Phi_delta * sin
+                self.fluxData = constant(0)
+                self.dirichletData = sin
+                self.analyticSolution = sin
+            elif params['problem'] == 'exact-sin-Neumann':
+                assert (self.kType == FRACTIONAL) and self.phi is None and normalized
+
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = boundaryIndicator+interactionIndicator
+                self.interactionIndicator = constant(0.)
+                horizonValue = self.kernel.horizonValue
+                scalingValue = self.kernel.scalingValue
+
+                sin = functionFactory('sin1d')
+                cos = functionFactory('cos1d')
+                if self.kType == FRACTIONAL:
+                    from scipy.integrate import quad
+                    assert isinstance(self.s, constFractionalOrder)
+                    sBase = self.s.value
+                    from scipy.special import gamma
+
+                    def Phi(delta):
+                        if delta > 0:
+                            fac = delta**(-2*sBase)
+                            integral = 0.
+                            for k in range(1, 100):
+                                integral += fac * (-1)**(k+1) * (np.pi*delta)**(2*k) / (2*k-2*sBase) / gamma(2*k+1)
+                            return integral
+                        else:
+                            return 0.
+
+                    Psi = lambda delta_min, delta_max: quad(lambda y: np.sin(np.pi*y)/y**(1+2*sBase), delta_min, delta_max)[0]
+                    Phi_delta = Phi(horizonValue)
+                    self.rhsData = 4 * scalingValue * Phi_delta * sin
+
+                    def fluxFun(x):
+                        dist = 1+horizonValue-abs(x[0])
+                        assert dist >= 0
+                        if x[0] > 0:
+                            return 2 * scalingValue * ((Phi_delta + Phi(dist)) * sin(x) + (Psi(dist, horizonValue)) * cos(x))
+                        else:
+                            return 2 * scalingValue * ((Phi_delta + Phi(dist)) * sin(x) - (Psi(dist, horizonValue)) * cos(x))
+
+                    self.fluxData = Lambda(fluxFun)
+                self.dirichletData = sin
+                self.analyticSolution = sin
+            elif params['problem'] == 'poly-Neumann':
+                if self.kType == FRACTIONAL:
+                    assert self.s.max <= 0.5, "RHS is singular, need a special quadrature rule"
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = boundaryIndicator+interactionIndicator
+                self.interactionIndicator = constant(0.)
+                horizonBase = self.horizon.value
+
+                if self.kType == FRACTIONAL:
+                    sBase = self.s.value
+
+                    def fluxFun(x):
+                        # dist = 1+horizonBase-abs(x[0])
+                        # assert dist >= 0
+                        # return (1+(dist/horizonBase)**(2-2*sBase) - 2*abs(x[0]) * (2-2*sBase)/(1-2*sBase)/horizonBase * (1-(dist/horizonBase)**(1-2*sBase)))
+                        dist = 1+horizonBase-abs(x[0])
+                        assert dist >= 0
+                        return 2*self.kernel.scalingValue * ((2*abs(x[0])/(1-2*sBase)) * (dist**(1-2*sBase)-horizonBase**(1-2*sBase)) + 1/(2-2*sBase) * (dist**(2-2*sBase)+horizonBase**(2-2*sBase)))
+                elif self.kType == PERIDYNAMIC:
+                    def fluxFun(x):
+                        dist = 1+horizonBase-abs(x[0])
+                        assert dist >= 0
+                        return 2*self.kernel.scalingValue * (2*abs(x[0]) * (1-abs(x[0])) + 0.5 * (dist**2+horizonBase**2))
+                elif self.kType == INDICATOR:
+                    def fluxFun(x):
+                        dist = 1+horizonBase-abs(x[0])
+                        assert dist >= 0
+                        return 2*self.kernel.scalingValue * (abs(x[0]) * (dist**2-horizonBase**2) + 1./3. * (dist**3+horizonBase**3))
+
+                self.rhsData = constant(2)
+                self.fluxData = Lambda(fluxFun)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+                if ((self.kType == FRACTIONAL and isinstance(self.s, constFractionalOrder)) or self.kType != FRACTIONAL) and normalized:
+                    self.analyticSolution = Lambda(lambda x: 1-x[0]**2)
+            elif params['problem'] == 'zeroFlux':
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = Lambda(lambda x: 1. if (x[0] > 1) else 0.)
+                self.interactionIndicator = interactionIndicator+boundaryIndicator
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+            elif self.parametrizedArg('indicator').match(params['problem']):
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = constant(0)
+                self.interactionIndicator = interactionIndicator+boundaryIndicator
+                # self.fluxIndicator = squareIndicator(np.array([1.], dtype=REAL),
+                #                                      np.array([1.+params['horizon']-1e-9], dtype=REAL))
+                center, width = self.parametrizedArg('indicator').interpret(params['problem'])
+                self.rhsData = squareIndicator(np.array([center-width/2], dtype=REAL),
+                                               np.array([center+width/2], dtype=REAL))
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0.)
+            elif params['problem'] == 'constant':
+                self.domainIndicator = domainIndicator
+                self.fluxIndicator = constant(0)
+                self.interactionIndicator = interactionIndicator+boundaryIndicator
+                self.rhsData = constant(1.)
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0.)
+                if (self.kType == FRACTIONAL) and isinstance(self.kernel.s, constFractionalOrder):
+                    self.analyticSolution = functionFactory('solFractional', dim=1, s=self.kernel.s.value)
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'square':
+            if params['noRef'] is None:
+                self.noRef = 2
+            meshParams = {}
+            # meshParams['uniform'] = True
+            if isinstance(self.s, layersFractionalOrder):
+                t = np.array(self.s.layerBoundaries)[1:-1]
+                meshParams['preserveLinesHorizontal'] = t.tolist()
+            elif isinstance(self.s, leftRightFractionalOrder) or isinstance(self.phi, leftRightTwoPoint):
+                meshParams['preserveLinesVertical'] = [0.]
+            self.mesh, nI = nonlocalMeshFactory.build('square', self.kernel, self.boundaryCondition, **meshParams)
+            self.tag = nI['tag']
+            self.zeroExterior = nI['zeroExterior']
+            self.domainInteriorIndicator = domainIndicator = nI['domain']
+            self.boundaryIndicator = boundaryIndicator = nI['boundary']
+            self.interactionInteriorIndicator = interactionIndicator = nI['interaction']
+            self.domainIndicator = domainIndicator
+            self.interactionIndicator = interactionIndicator+boundaryIndicator
+            if params['problem'] == 'poly-Dirichlet' and isinstance(interaction, ball2):
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+                if (((self.kType == FRACTIONAL and isinstance(self.s, constFractionalOrder)) or
+                     self.kType in (INDICATOR, PERIDYNAMIC)) and
+                    self.phi is None and
+                    normalized):
+                    self.analyticSolution = Lambda(lambda x: 1-x[0]**2)
+            elif params['problem'] == 'poly-Dirichlet' and isinstance(interaction, ellipse):
+                aFac = np.sqrt(self.kernel.interaction.aFac2)
+                bFac = np.sqrt(self.kernel.interaction.bFac2)
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: (1-x[0]**2) * 2/(np.pi*self.kernel.horizon.value**4/4 * aFac**3 * bFac))
+                if (self.kType == INDICATOR and
+                    self.phi is None and
+                    not normalized):
+                    self.analyticSolution = self.dirichletData
+            elif params['problem'] == 'poly-Dirichlet2' and isinstance(interaction, ellipse):
+                aFac = np.sqrt(self.kernel.interaction.aFac2)
+                bFac = np.sqrt(self.kernel.interaction.bFac2)
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: (1-x[1]**2) * 2/(np.pi*self.kernel.horizon.value**4/4 * aFac * bFac**3))
+                if (self.kType == INDICATOR and
+                    self.phi is None and
+                    not normalized):
+                    self.analyticSolution = self.dirichletData
+            elif params['problem'] == 'poly-Dirichlet3' and isinstance(interaction, ellipse):
+                aFac = np.sqrt(self.kernel.interaction.aFac2)
+                bFac = np.sqrt(self.kernel.interaction.bFac2)
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(4)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: (1-x[0]**2) * 2/(np.pi*self.kernel.horizon.value**4/4 * aFac**3 * bFac) + (1-x[1]**2) * 2/(np.pi*self.kernel.horizon.value**4/4 * aFac * bFac**3))
+                if (self.kType == INDICATOR and
+                    self.phi is None and
+                    not normalized):
+                    self.analyticSolution = self.dirichletData
+            elif params['problem'] == 'poly-Neumann':
+                self.fluxIndicator = Lambda(lambda x: 1. if (x[0] > 1) else 0.)
+                raise NotImplementedError(params['problem'])
+            elif params['problem'] == 'source':
+                self.fluxIndicator = constant(0)
+                self.rhsData = (functionFactory.build('radialIndicator', radius=0.3, center=np.array([0.2, 0.6], dtype=REAL)) -
+                                functionFactory.build('radialIndicator', radius=0.3, center=np.array([-0.2, -0.6], dtype=REAL)))
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+            elif params['problem'] == 'constant':
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(1.)
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'disc':
+            if params['noRef'] is None:
+                self.noRef = 4
+            meshParams = {}
+            self.mesh, nI = nonlocalMeshFactory.build('disc', self.kernel, self.boundaryCondition, **meshParams)
+            self.tag = nI['tag']
+            self.zeroExterior = nI['zeroExterior']
+            self.domainInteriorIndicator = domainIndicator = nI['domain']
+            self.boundaryIndicator = boundaryIndicator = nI['boundary']
+            self.interactionInteriorIndicator = interactionIndicator = nI['interaction']
+            self.domainIndicator = domainIndicator+boundaryIndicator
+            self.interactionIndicator = interactionIndicator
+            if params['problem'] == 'poly-Dirichlet':
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+                if isinstance(self.s, constFractionalOrder) and isinstance(self.phi, constantTwoPoint) and normalized:
+                    self.analyticSolution = Lambda(lambda x: 1-x[0]**2)
+            elif params['problem'] == 'poly-Neumann':
+                self.fluxIndicator = Lambda(lambda x: 1. if (x[0] > 1) else 0.)
+                raise NotImplementedError(params['problem'])
+            elif params['problem'] == 'source':
+                self.fluxIndicator = constant(0)
+                self.rhsData = (functionFactory.build('radialIndicator', radius=0.3, center=np.array([0.2, 0.6], dtype=REAL)) -
+                                functionFactory.build('radialIndicator', radius=0.3, center=np.array([-0.2, -0.6], dtype=REAL)))
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+            elif params['problem'] == 'constant':
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(1.)
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+                if (self.kType == FRACTIONAL) and isinstance(self.kernel.s, constFractionalOrder):
+                    self.analyticSolution = functionFactory('solFractional', dim=2, s=self.kernel.s.value)
+            else:
+                raise NotImplementedError(params['problem'])
+        elif params['domain'] == 'discWithIslands':
+            if params['noRef'] is None:
+                self.noRef = 4
+            meshParams = {}
+            self.mesh, nI = nonlocalMeshFactory.build('discWithIslands', self.kernel, self.boundaryCondition, **meshParams)
+            self.tag = nI['tag']
+            self.zeroExterior = nI['zeroExterior']
+            self.domainInteriorIndicator = domainIndicator = nI['domain']
+            self.boundaryIndicator = boundaryIndicator = nI['boundary']
+            self.interactionInteriorIndicator = interactionIndicator = nI['interaction']
+            self.domainIndicator = domainIndicator+boundaryIndicator
+            self.interactionIndicator = interactionIndicator
+            if params['problem'] == 'poly-Dirichlet':
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(2)
+                self.fluxData = constant(0)
+                self.dirichletData = Lambda(lambda x: 1-x[0]**2)
+                if isinstance(self.s, constFractionalOrder) and isinstance(self.phi, constantTwoPoint) and normalized:
+                    self.analyticSolution = Lambda(lambda x: 1-x[0]**2)
+            elif params['problem'] == 'poly-Neumann':
+                self.fluxIndicator = Lambda(lambda x: 1. if (x[0] > 1) else 0.)
+                raise NotImplementedError(params['problem'])
+            elif params['problem'] == 'source':
+                self.fluxIndicator = constant(0)
+                self.rhsData = (functionFactory.build('radialIndicator', radius=0.3, center=np.array([0.2, 0.6], dtype=REAL)) -
+                                functionFactory.build('radialIndicator', radius=0.3, center=np.array([-0.2, -0.6], dtype=REAL)))
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+            elif params['problem'] == 'constant':
+                self.fluxIndicator = constant(0)
+                self.rhsData = constant(1.)
+                self.fluxData = constant(0)
+                self.dirichletData = constant(0)
+            else:
+                raise NotImplementedError(params['problem'])
+        else:
+            raise NotImplementedError(params['domain'])
+
+        # should be equal to the forcing term within the domain and equal to
+        # the flux term in the interaction region
+        self.rhs = (indicatorFunctor(self.rhsData, self.domainIndicator) +
+                    indicatorFunctor(self.fluxData, self.fluxIndicator))
+
+    def getIdentifier(self, params):
+        keys = ['domain', 'problem', 's', 'horizon', 'phi', 'noRef']
+        d = []
+        for k in keys:
+            try:
+                d.append((k, str(self.__getattr__(k))))
+            except KeyError:
+                d.append((k, str(params[k])))
+        return '-'.join(['nonlocal'] + [key + '=' + v for key, v in d])
+
+
diff --git a/nl/PyNucleus_nl/panelTypes.pxi b/nl/PyNucleus_nl/panelTypes.pxi
new file mode 100644
index 0000000..7a86ff5
--- /dev/null
+++ b/nl/PyNucleus_nl/panelTypes.pxi
@@ -0,0 +1,15 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+DEF DISTANT = 0
+DEF COMMON_VERTEX = -1
+DEF COMMON_EDGE = -2
+DEF COMMON_FACE = -3
+DEF SEPARATED = -4
+DEF IGNORED = -5
+DEF ON_HORIZON = -6
diff --git a/nl/PyNucleus_nl/twoPointFunctions.pxd b/nl/PyNucleus_nl/twoPointFunctions.pxd
new file mode 100644
index 0000000..79f9209
--- /dev/null
+++ b/nl/PyNucleus_nl/twoPointFunctions.pxd
@@ -0,0 +1,63 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+cimport numpy as np
+from PyNucleus_base.myTypes cimport INDEX_t, REAL_t, BOOL_t
+from PyNucleus_fem.functions cimport function
+from . kernelsCy cimport kernelCy
+
+
+cdef class twoPointFunction:
+    cdef:
+        public BOOL_t symmetric
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y)
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y)
+
+
+cdef class productTwoPoint(twoPointFunction):
+    cdef:
+        twoPointFunction f1, f2
+
+
+cdef class constantTwoPoint(twoPointFunction):
+    cdef:
+        public REAL_t value
+
+
+cdef class leftRightTwoPoint(twoPointFunction):
+    cdef:
+        public REAL_t ll, lr, rl, rr, interface
+
+
+cdef class matrixTwoPoint(twoPointFunction):
+    cdef:
+        public REAL_t[:, ::1] mat
+        REAL_t[::1] n
+
+
+cdef class temperedTwoPoint(twoPointFunction):
+    cdef:
+        public REAL_t lambdaCoeff
+        public INDEX_t dim
+
+
+cdef class smoothedLeftRightTwoPoint(twoPointFunction):
+    cdef:
+        public REAL_t vl, vr, r, slope, fac
+
+
+cdef class parametrizedTwoPointFunction(twoPointFunction):
+    cdef:
+        void *params
+    cdef void setParams(self, void *params)
+    cdef void* getParams(self)
+
+
+cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction):
+    cdef:
+        twoPointFunction f1, f2
diff --git a/nl/PyNucleus_nl/twoPointFunctions.pyx b/nl/PyNucleus_nl/twoPointFunctions.pyx
new file mode 100644
index 0000000..c6cd5da
--- /dev/null
+++ b/nl/PyNucleus_nl/twoPointFunctions.pyx
@@ -0,0 +1,407 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+cimport numpy as np
+cimport cython
+from libc.math cimport sqrt, exp, atan
+from PyNucleus_base.myTypes import INDEX, REAL, ENCODE, BOOL
+
+
+cdef class twoPointFunction:
+    def __init__(self, BOOL_t symmetric):
+        self.symmetric = symmetric
+
+    def __call__(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.eval(x, y)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        pass
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        pass
+
+    def __getstate__(self):
+        return self.symmetric
+
+    def __setstate__(self, state):
+        twoPointFunction.__init__(self, state)
+
+    def plot(self, mesh, **kwargs):
+        cdef:
+            INDEX_t i, j
+            REAL_t[:, ::1] S
+            REAL_t[::1] x, y
+        import matplotlib.pyplot as plt
+        c = np.array(mesh.getCellCenters())
+        if mesh.dim == 1:
+            X, Y = np.meshgrid(c[:, 0], c[:, 0])
+            x = np.empty((mesh.dim), dtype=REAL)
+            y = np.empty((mesh.dim), dtype=REAL)
+            S = np.zeros((mesh.num_cells, mesh.num_cells))
+            for i in range(mesh.num_cells):
+                for j in range(mesh.num_cells):
+                    x[0] = X[i, j]
+                    y[0] = Y[i, j]
+                    S[i, j] = self.eval(x, y)
+            plt.pcolormesh(X, Y, S, **kwargs)
+            plt.colorbar()
+            plt.xlabel(r'$x$')
+            plt.ylabel(r'$y$')
+        elif mesh.dim == 2:
+            S = np.zeros(mesh.num_cells)
+            for i in range(mesh.num_cells):
+                S[i] = self(c[i, :], c[i, :])
+            mesh.plotFunction(S, flat=True)
+        else:
+            raise NotImplementedError()
+
+    def __mul__(self, twoPointFunction other):
+        if isinstance(self, constantTwoPoint) and isinstance(other, constantTwoPoint):
+            return constantTwoPoint(self.value*other.value)
+        elif isinstance(self, parametrizedTwoPointFunction) or isinstance(other, parametrizedTwoPointFunction):
+            return productParametrizedTwoPoint(self, other)
+        else:
+            return productTwoPoint(self, other)
+
+
+cdef class lambdaTwoPoint(twoPointFunction):
+    cdef:
+        object fun
+
+    def __init__(self, fun, BOOL_t symmetric):
+        super(lambdaTwoPoint, self).__init__(symmetric)
+        self.fun = fun
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.fun(x, y)
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t[::1] xA =<REAL_t[:dim]> x
+            REAL_t[::1] yA =<REAL_t[:dim]> y
+        return self.fun(xA, yA)
+
+    def __repr__(self):
+        return 'Lambda({})'.format(self.fun)
+
+    def __getstate__(self):
+        return (self.fun, self.symmetric)
+
+    def __setstate__(self, state):
+        lambdaTwoPoint.__init__(self, state[0], state[1])
+
+
+cdef class productTwoPoint(twoPointFunction):
+    def __init__(self, twoPointFunction f1, twoPointFunction f2):
+        super(productTwoPoint, self).__init__(f1.symmetric and f2.symmetric)
+        self.f1 = f1
+        self.f2 = f2
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.f1.eval(x, y)*self.f2.eval(x, y)
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        return self.f1.evalPtr(dim, x, y)*self.f2.evalPtr(dim, x, y)
+
+    def __repr__(self):
+        return '{}*{}'.format(self.f1, self.f2)
+
+    def __getstate__(self):
+        return self.f1, self.f2
+
+    def __setstate__(self, state):
+        productTwoPoint.__init__(self, state[0], state[1])
+
+
+cdef class constantTwoPoint(twoPointFunction):
+    def __init__(self, REAL_t value):
+        super(constantTwoPoint, self).__init__(True)
+        self.value = value
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.value
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        return self.value
+
+    def __repr__(self):
+        return '{}'.format(self.value)
+
+    def __getstate__(self):
+        return self.value
+
+    def __setstate__(self, state):
+        constantTwoPoint.__init__(self, state)
+
+
+cdef class matrixTwoPoint(twoPointFunction):
+    def __init__(self, REAL_t[:, ::1] mat):
+        self.mat = mat
+        assert mat.shape[0] == mat.shape[1]
+        symmetric = True
+        for i in range(mat.shape[0]):
+            for j in range(i, mat.shape[0]):
+                if abs(mat[i, j]-mat[j, i]) > 1e-12:
+                    symmetric = False
+        super(matrixTwoPoint, self).__init__(symmetric)
+        self.n = np.zeros((mat.shape[0]), dtype=REAL)
+
+    def __getstate__(self):
+        return (self.mat)
+
+    def __setstate__(self, state):
+        matrixTwoPoint.__init__(self, state)
+
+    def __repr__(self):
+        return '{}({},sym={})'.format(self.__class__.__name__, np.array(self.mat), self.symmetric)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            INDEX_t dim = x.shape[0]
+            INDEX_t i, j
+            REAL_t d = 0.
+        for i in range(dim):
+            self.n[i] = x[i] - y[i]
+            d += self.n[i]**2
+        d = sqrt(d)
+        for i in range(dim):
+            self.n[i] /= d
+        d = 0.
+        for i in range(dim):
+            for j in range(dim):
+                d += self.n[i]*self.mat[i, j]*self.n[j]
+        return d
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            INDEX_t i, j
+            REAL_t d = 0.
+        for i in range(dim):
+            self.n[i] = x[i] - y[i]
+            d += self.n[i]**2
+        if d > 0:
+            d = sqrt(d)
+            for i in range(dim):
+                self.n[i] /= d
+            d = 0.
+            for i in range(dim):
+                for j in range(dim):
+                    d += self.n[i]*self.mat[i, j]*self.n[j]
+            return d
+        return 1.
+
+
+cdef class tensorTwoPoint(twoPointFunction):
+    cdef:
+        function f1, f2
+
+    def __init__(self, function f1, function f2=None):
+        self.f1 = f1
+        if f2 is not None:
+            self.f2 = f2
+            super(tensorTwoPoint, self).__init__(False)
+        else:
+            self.f2 = f1
+            super(tensorTwoPoint, self).__init__(True)
+
+    def __getstate__(self):
+        if self.symmetric:
+            return self.f1
+        else:
+            return (self.f1, self.f2)
+
+    def __setstate__(self, state):
+        if isinstance(state, tuple):
+            tensorTwoPoint.__init__(self, state[0], state[1])
+        else:
+            tensorTwoPoint.__init__(self, state)
+
+    def __repr__(self):
+        return '{}({},{},sym={})'.format(self.__class__.__name__, self.f1, self.f2, self.symmetric)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.f1(x)*self.f2(y)
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            REAL_t[::1] xv = <REAL_t[:dim]>x
+            REAL_t[::1] yv = <REAL_t[:dim]>y
+        return self.f1(xv)*self.f2(yv)
+
+
+cdef class leftRightTwoPoint(twoPointFunction):
+    def __init__(self, REAL_t ll, REAL_t rr, REAL_t lr=np.nan, REAL_t rl=np.nan, REAL_t interface=0.):
+        if not np.isfinite(lr):
+            lr = 0.5*(ll+rr)
+        if not np.isfinite(rl):
+            rl = 0.5*(ll+rr)
+        super(leftRightTwoPoint, self).__init__(rl == lr)
+        self.ll = ll
+        self.lr = lr
+        self.rl = rl
+        self.rr = rr
+        self.interface = interface
+
+    def __getstate__(self):
+        return (self.ll, self.rr, self.lr, self.rl)
+
+    def __setstate__(self, state):
+        leftRightTwoPoint.__init__(self, state[0], state[1], state[2], state[3])
+
+    def __repr__(self):
+        return '{}(ll={},rr={},lr={},rl={},interface={},sym={})'.format(self.__class__.__name__, self.ll, self.rr, self.lr, self.rl, self.interface, self.symmetric)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        if x[0] < self.interface:
+            if y[0] < self.interface:
+                return self.ll
+            else:
+                return self.lr
+        else:
+            if y[0] < self.interface:
+                return self.rl
+            else:
+                return self.rr
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        if x[0] < self.interface:
+            if y[0] < self.interface:
+                return self.ll
+            else:
+                return self.lr
+        else:
+            if y[0] < self.interface:
+                return self.rl
+            else:
+                return self.rr
+
+
+cdef class temperedTwoPoint(twoPointFunction):
+    def __init__(self, REAL_t lambdaCoeff, INDEX_t dim):
+        super(temperedTwoPoint, self).__init__(True)
+        self.lambdaCoeff = lambdaCoeff
+        self.dim = dim
+
+    def __getstate__(self):
+        return (self.lambdaCoeff, self.dim)
+
+    def __setstate__(self, state):
+        temperedTwoPoint.__init__(self, state[0], state[1])
+
+    def __repr__(self):
+        return '{}(lambda={})'.format(self.__class__.__name__, self.lambdaCoeff)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        cdef:
+            INDEX_t i
+            REAL_t r = 0.
+        for i in range(self.dim):
+            r += (x[i]-y[i])*(x[i]-y[i])
+        return exp(-self.lambdaCoeff*sqrt(r))
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        cdef:
+            INDEX_t i
+            REAL_t r = 0.
+        for i in range(dim):
+            r += (x[i]-y[i])*(x[i]-y[i])
+        return exp(-self.lambdaCoeff*sqrt(r))
+
+
+cdef class smoothedLeftRightTwoPoint(twoPointFunction):
+    def __init__(self, REAL_t vl, REAL_t vr, REAL_t r=0.1, REAL_t slope=200.):
+        super(smoothedLeftRightTwoPoint, self).__init__(False)
+        self.vl = vl
+        self.vr = vr
+        self.r = r
+        self.slope = slope
+        self.fac = 1./atan(r*slope)
+
+    def __getstate__(self):
+        return (self.vl, self.vr, self.r, self.slope)
+
+    def __setstate__(self, state):
+        smoothedLeftRightTwoPoint.__init__(self, state[0], state[1], state[2], state[3])
+
+    def __repr__(self):
+        return '{}(vl={},vr={},r={},slope={})'.format(self.__class__.__name__, self.vl, self.vr, self.r, self.slope)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        if x[0] < -self.r:
+            return self.vl
+        elif x[0] > self.r:
+            return self.vr
+        return 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac
+
+    cdef REAL_t evalPtr(self, INDEX_t dim, REAL_t* x, REAL_t* y):
+        if x[0] < -self.r:
+            return self.vl
+        elif x[0] > self.r:
+            return self.vr
+        return 0.5*(self.vl+self.vr)+0.5*(self.vr-self.vl)*atan(x[0]*self.slope) * self.fac
+
+
+cdef class parametrizedTwoPointFunction(twoPointFunction):
+    def __init__(self, BOOL_t symmetric):
+        super(parametrizedTwoPointFunction, self).__init__(symmetric)
+
+    cdef void setParams(self, void *params):
+        self.params = params
+
+    cdef void* getParams(self):
+        return self.params
+
+
+cdef class productParametrizedTwoPoint(parametrizedTwoPointFunction):
+    def __init__(self, twoPointFunction f1, twoPointFunction f2):
+        super(productParametrizedTwoPoint, self).__init__(f1.symmetric and f2.symmetric)
+        self.f1 = f1
+        self.f2 = f2
+
+    cdef void setParams(self, void *params):
+        cdef:
+            parametrizedTwoPointFunction f
+        if isinstance(self.f1, parametrizedTwoPointFunction):
+            f = self.f1
+            f.setParams(params)
+        if isinstance(self.f2, parametrizedTwoPointFunction):
+            f = self.f2
+            f.setParams(params)
+
+    @cython.wraparound(False)
+    @cython.boundscheck(False)
+    cdef REAL_t eval(self, REAL_t[::1] x, REAL_t[::1] y):
+        return self.f1.eval(x, y)*self.f2.eval(x, y)
+
+    def __repr__(self):
+        return '{}*{}'.format(self.f1, self.f2)
+
+    def __getstate__(self):
+        return self.f1, self.f2
+
+    def __setstate__(self, state):
+        productParametrizedTwoPoint.__init__(self, state[0], state[1])
diff --git a/nl/setup.cfg b/nl/setup.cfg
new file mode 100644
index 0000000..4510365
--- /dev/null
+++ b/nl/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_nl/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/nl/setup.py b/nl/setup.py
new file mode 100644
index 0000000..3d167dc
--- /dev/null
+++ b/nl/setup.py
@@ -0,0 +1,49 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+try:
+    from PyNucleus_base.setupUtils import package
+except ImportError as e:
+    raise ImportError('\'PyNucleus_base\' needs to be installed first.') from e
+
+p = package('PyNucleus_nl')
+
+p.loadConfig(extra_config={'annotate': False})
+
+
+p.addExtension("nonlocalLaplacianBase",
+               sources=[p.folder+"nonlocalLaplacianBase.pyx"])
+p.addExtension("nonlocalLaplacian",
+               sources=[p.folder+"nonlocalLaplacian.pyx"])
+p.addExtension("fractionalLaplacian1D",
+               sources=[p.folder+"fractionalLaplacian1D.pyx"])
+p.addExtension("fractionalLaplacian2D",
+               sources=[p.folder+"fractionalLaplacian2D.pyx"])
+p.addExtension("twoPointFunctions",
+               sources=[p.folder+"twoPointFunctions.pyx"])
+p.addExtension("interactionDomains",
+               sources=[p.folder+"interactionDomains.pyx"])
+p.addExtension("kernels2",
+               sources=[p.folder+"kernels2.pyx"])
+p.addExtension("fractionalOrders",
+               sources=[p.folder+"fractionalOrders.pyx"])
+p.addExtension("kernelsCy",
+               sources=[p.folder+"kernelsCy.pyx",
+                        p.folder+"kernels.cpp"],
+               depends=[p.folder+"kernels.hpp"],
+               language="c++")
+p.addExtension("clusterMethodCy",
+               sources=[p.folder+"clusterMethodCy.pyx"])
+
+p.addExtension("nonlocalLaplacianND",
+               sources=[p.folder+"nonlocalLaplacianND.pyx"])
+
+p.setup(description="Nonlocal operator assembly",
+        install_requires=['cython', 'numpy', 'scipy',
+                          'mpi4py>=2.0.0',
+                          'PyNucleus_base', 'PyNucleus_fem', 'PyNucleus_multilevelSolver'])
diff --git a/nl/versioneer.py b/nl/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/nl/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/packageTools/.gitattributes b/packageTools/.gitattributes
new file mode 100644
index 0000000..2a2de01
--- /dev/null
+++ b/packageTools/.gitattributes
@@ -0,0 +1,2 @@
+
+PyNucleus_packageTools/_version.py export-subst
diff --git a/packageTools/MANIFEST.in b/packageTools/MANIFEST.in
new file mode 100644
index 0000000..007d8b8
--- /dev/null
+++ b/packageTools/MANIFEST.in
@@ -0,0 +1,3 @@
+
+include versioneer.py
+include PyNucleus_packageTools/_version.py
diff --git a/packageTools/PyNucleus_packageTools/__init__.py b/packageTools/PyNucleus_packageTools/__init__.py
new file mode 100644
index 0000000..63c7d9c
--- /dev/null
+++ b/packageTools/PyNucleus_packageTools/__init__.py
@@ -0,0 +1,335 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import os
+import multiprocessing
+import re
+from copy import copy
+from pathlib import Path
+
+
+###############################################################################
+# from
+# https://stackoverflow.com/questions/11013851/speeding-up-build-process-with-distutils
+
+from distutils.ccompiler import CCompiler
+from distutils.command.build_ext import build_ext
+try:
+    from concurrent.futures import ThreadPoolExecutor as Pool
+except ImportError:
+    from multiprocessing.pool import ThreadPool as LegacyPool
+
+    # To ensure the with statement works. Required for some older 2.7.x releases
+    class Pool(LegacyPool):
+        def __enter__(self):
+            return self
+
+        def __exit__(self, *args):
+            self.close()
+            self.join()
+
+
+def build_extensions(self):
+    """Function to monkey-patch
+    distutils.command.build_ext.build_ext.build_extensions
+
+    """
+    self.check_extensions_list(self.extensions)
+
+    try:
+        num_jobs = os.cpu_count()
+    except AttributeError:
+        num_jobs = multiprocessing.cpu_count()
+
+    with Pool(num_jobs) as pool:
+        pool.map(self.build_extension, self.extensions)
+
+
+def compile(
+        self, sources, output_dir=None, macros=None, include_dirs=None,
+        debug=0, extra_preargs=None, extra_postargs=None, depends=None):
+    """Function to monkey-patch distutils.ccompiler.CCompiler"""
+    macros, objects, extra_postargs, pp_opts, build = self._setup_compile(
+        output_dir, macros, include_dirs, sources, depends, extra_postargs
+    )
+    cc_args = self._get_cc_args(pp_opts, debug, extra_preargs)
+
+    for obj in objects:
+        try:
+            src, ext = build[obj]
+        except KeyError:
+            continue
+        self._compile(obj, src, ext, cc_args, extra_postargs, pp_opts)
+
+    # Return *all* object filenames, not just the ones we just built.
+    return objects
+
+
+build_ext.build_extensions = build_extensions
+CCompiler.compile = compile
+
+###############################################################################
+
+
+class package:
+    def __init__(self, name, namespace=''):
+        import multiprocessing
+
+        self.name = name
+        self.namespace = namespace
+        if self.namespace != '':
+            self.full_name = self.namespace+'.'+self.name
+            self.folder = self.namespace+'/'+self.name + '/'
+        else:
+            self.full_name = self.name
+            self.folder = self.name + '/'
+        self.configLoaded = False
+        self.extensions = []
+        self.options = []
+        self.defaults = {'compileArgs': ['-O3', '-pipe', '-Wno-cpp'],  # '-fdiagnostics-color=always'
+                         'linkArgs': ['-O3', '-pipe'],
+                         'includeDirs': [],
+                         'macros': [],
+                         'use_ccache': True,
+                         'setupProfiling': False,
+                         'cythonDirectives': {'language_level': '2',
+                                              'embedsignature': True,
+                                              'binding': True},
+                         'annotate': False,
+                         'arch': 'detect',
+                         'compiler_c': 'detect',
+                         'compiler_c++': 'detect',
+                         'threads': multiprocessing.cpu_count()}
+        self.addOption('USE_OPENMP', 'useOpenMP', False)
+        self.addOption(None, 'gitSHA', self.getGitSHA())
+
+    def addOption(self, optionCy, optionPy, default, pkgDependencies=[]):
+        if isinstance(pkgDependencies, str):
+            pkgDependencies = [pkgDependencies]
+        self.options.append((optionCy, optionPy, pkgDependencies))
+        self.defaults[optionPy] = default
+
+    def loadConfig(self, filename='config.yaml', extra_config={}):
+        defaults = self.defaults
+        if Path(filename).exists():
+            import yaml
+            config = yaml.load(open(filename, 'r'), Loader=yaml.FullLoader)
+            defaults.update(config)
+            self.config = defaults
+        else:
+            self.config = defaults
+        self.config.update(extra_config)
+        self.configLoaded = True
+        self.setCompiler()
+        self.setInclude()
+        self.setProfiling()
+        self.setupOpenMP()
+        self.setOptions()
+
+    def setCompiler(self):
+        assert self.configLoaded
+        # set compiler
+        if self.config['compiler_c'] == 'detect':
+            if 'MPICC' in os.environ:
+                self.config['compiler_c'] = os.environ['MPICC']
+            else:
+                try:
+                    import mpi4py
+                    self.config['compiler_c'] = str(mpi4py.get_config()['mpicc'])
+                except:
+                    self.config['compiler_c'] = 'mpicc'
+        os.environ['CC'] = self.config['compiler_c']
+        if self.config['compiler_c++'] == 'detect':
+            if 'MPICXX' in os.environ:
+                self.config['compiler_c++'] = os.environ['MPICXX']
+            else:
+                try:
+                    import mpi4py
+                    self.config['compiler_c++'] = str(mpi4py.get_config()['mpicxx'])
+                except:
+                    self.config['compiler_c++'] = 'mpicxx'
+        os.environ['CXX'] = self.config['compiler_c++']
+        from shutil import which
+        if self.config['use_ccache'] and which('ccache') is not None:
+            os.environ['OMPI_CC'] = 'ccache gcc'
+            os.environ['OMPI_CXX'] = 'ccache g++'
+
+    def setInclude(self):
+        assert self.configLoaded
+        try:
+            import numpy
+            self.config['includeDirs'] += [numpy.get_include()]
+        except ImportError:
+            pass
+        try:
+            import mpi4py
+            self.config['includeDirs'] += [mpi4py.get_include()]
+        except ImportError:
+            pass
+
+    def setupOpenMP(self):
+        assert self.configLoaded
+        if self.config['useOpenMP']:
+            self.config['compileArgs'] += ['-fopenmp']
+            self.config['linkArgs'] += ['-fopenmp']
+            self.config['macros'] += [('USE_OPENMP', 1)]
+        else:
+            self.config['macros'] += [('USE_OPENMP', 0)]
+
+    def setProfiling(self):
+        assert self.configLoaded
+        # set up profiling
+        if self.config['setupProfiling']:
+            print('Building with profiling')
+            self.config['cythonDirectives']['linetrace'] = True
+            self.config['cythonDirectives']['binding'] = True
+            self.config['macros'] += [('CYTHON_TRACE', '1')]
+
+    def updateFile(self, filename, content):
+        try:
+            with open(filename, 'r') as f:
+                contentOld = f.read(-1)
+        except:
+            contentOld = ''
+        if content != contentOld:
+            with open(filename, 'w') as f:
+                f.write(content)
+
+    def setOptions(self):
+        assert self.configLoaded
+        cy = ''
+        py = ''
+        for optionCy, optionPy, _ in self.options:
+            if isinstance(self.config[optionPy], str):
+                value = '\"{}\"'.format(self.config[optionPy])
+            else:
+                value = self.config[optionPy]
+            if optionCy is not None:
+                cy += 'DEF {} = {}\n'.format(optionCy, value)
+            if optionPy is not None:
+                py += '{} = {}\n'.format(optionPy, value)
+        self.updateFile(self.folder+'/config.pxi', cy)
+        self.updateFile(self.folder+'/config.py', py)
+
+    def addExtension(self, ext_name, **kwargs):
+        assert self.configLoaded
+        from setuptools import Extension
+        if 'extra_compile_args' in kwargs:
+            kwargs['extra_compile_args'] += self.config['compileArgs']
+        else:
+            kwargs['extra_compile_args'] = self.config['compileArgs']
+        kwargs['extra_link_args'] = self.config['linkArgs']
+        kwargs['define_macros'] = self.config['macros']
+        kwargs['include_dirs'] = self.config['includeDirs']
+        self.extensions.append(Extension(self.full_name+'.'+ext_name, **kwargs))
+
+    def setup(self, **kwargs):
+        assert self.configLoaded
+        from setuptools import setup
+
+        if 'install_requires' not in kwargs:
+            kwargs['install_requires'] = []
+        for _, optionPy, pkgDependencies in self.options:
+            if self.config[optionPy]:
+                kwargs['install_requires'] += pkgDependencies
+        for includeDir in self.config['includeDirs']:
+            if not Path(includeDir).exists():
+                import warnings
+                warnings.warn('The include path \'{}\' does not exist.'.format(includeDir))
+
+        if len(self.extensions) > 0:
+            from Cython.Build import cythonize
+            kwargs['ext_modules'] = cythonize(self.extensions,
+                                              include_path=self.config['includeDirs'],
+                                              compiler_directives=self.config['cythonDirectives'],
+                                              annotate=self.config['annotate'],
+                                              nthreads=self.config['threads'])
+        kwargs['name'] = self.name
+        import versioneer
+        kwargs['version'] = versioneer.get_version()
+        # kwargs['cmdclass'] = versioneer.get_cmdclass()
+        # kwargs['version'] = self.getGitDate()
+        if self.namespace != '':
+            kwargs['namespace_packages'] = [self.namespace]
+        if self.namespace != '':
+            from setuptools import find_namespace_packages
+            kwargs['packages'] = find_namespace_packages(include=[self.namespace+'.*'])
+        else:
+            kwargs['packages'] = [self.full_name]
+        kwargs['package_data'] = {self.name: ['*.pxd', '*_decl_*.pxi', '*config.pxi', '*.h']}
+        kwargs['zip_safe'] = False
+        if 'author' not in kwargs:
+            kwargs['author'] = 'Christian Glusa'
+        if 'author_email' not in kwargs:
+            kwargs['author_email'] = 'caglusa@sandia.gov'
+        if 'platforms' not in kwargs:
+            kwargs['platforms'] = 'any'
+        if 'license' not in kwargs:
+            kwargs['license'] = 'MIT'
+        if 'license_files' not in kwargs:
+            kwargs['license_files'] = ['../LICENSE']
+        setup(**kwargs)
+
+    def getGitDate(self):
+        # import datetime
+        # return datetime.datetime.today().strftime('%Y.%-m.%-d')
+        try:
+            from subprocess import Popen, PIPE
+            proc = Popen('git log -1 --format=%cd --date="format:%Y.%-m.%-d"', shell=True, stdout=PIPE)
+            proc.wait()
+            sha = proc.stdout.read()
+            return sha[:-1].decode('utf-8')
+        except:
+            return ''
+
+    def getGitSHA(self):
+        try:
+            from subprocess import Popen, PIPE
+            proc = Popen('git describe --always --dirty --abbrev=40', shell=True, stdout=PIPE)
+            proc.wait()
+            sha = proc.stdout.read()
+            return sha[:-1].decode('utf-8')
+        except:
+            return ''
+
+    def hash_file(self, filename):
+        import hashlib
+        hasher = hashlib.md5()
+        try:
+            with open(filename, 'rb') as afile:
+                buf = afile.read()
+                hasher.update(buf)
+            file_hash = hasher.hexdigest()
+            return file_hash
+        except:
+            return
+
+
+def fillTemplate(basedir, templates, replacements):
+    for tmp in templates:
+        with open(str(basedir/tmp), 'r') as f:
+            lines = ''.join(f.readlines())
+        for i in range(len(replacements)):
+            newLines = copy(lines)
+            newFileName = tmp
+            for key, value in replacements[i]:
+                r = re.compile(key)
+                newLines = r.subn(value, newLines)[0]
+                newFileName = r.sub(value, newFileName)
+            if (basedir/newFileName).exists():
+                with open(str(basedir/newFileName), 'r') as f:
+                    oldLines = ''.join(f.readlines())
+                if oldLines == newLines:
+                    print('Skipping {}'.format(newFileName))
+                    continue
+            print('Generating {}'.format(newFileName))
+            with open(str(basedir/newFileName), 'w') as f:
+                f.write(newLines)
+
+from . import _version
+__version__ = _version.get_versions()['version']
diff --git a/packageTools/PyNucleus_packageTools/_version.py b/packageTools/PyNucleus_packageTools/_version.py
new file mode 100644
index 0000000..14c12ce
--- /dev/null
+++ b/packageTools/PyNucleus_packageTools/_version.py
@@ -0,0 +1,652 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "$Format:%d$"
+    git_full = "$Format:%H$"
+    git_date = "$Format:%ci$"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = ""
+    cfg.versionfile_source = "PyNucleus_packageTools/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
diff --git a/packageTools/PyNucleus_packageTools/sphinxTools.py b/packageTools/PyNucleus_packageTools/sphinxTools.py
new file mode 100644
index 0000000..e20f863
--- /dev/null
+++ b/packageTools/PyNucleus_packageTools/sphinxTools.py
@@ -0,0 +1,85 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+
+class codeRegion:
+    def __init__(self, mgr, label, isFinalTarget, codeTarget=''):
+        self.mgr = mgr
+        self.isTarget = isFinalTarget
+        self.codeTarget = codeTarget
+
+    def __enter__(self):
+        from inspect import getframeinfo, stack
+        import sys
+        from io import StringIO
+        caller = getframeinfo(stack()[1][0])
+        self.startLine = caller.lineno
+        self._stdout = sys.stdout
+        sys.stdout = self._stringio = StringIO()
+
+        return self
+
+    def __exit__(self, type, value, traceback):
+        from inspect import getframeinfo, stack
+        import sys
+        import matplotlib.pyplot as plt
+
+        sys.stdout = self._stdout
+
+        caller = getframeinfo(stack()[1][0])
+
+        self.endLine = caller.lineno
+        if self.codeTarget != '':
+            with open(caller.filename, 'r') as f:
+                lines = f.readlines()
+            from textwrap import dedent
+            code = dedent(''.join(lines[self.startLine:self.endLine]))
+            code += '\n'
+            with open(self.codeTarget, 'a') as f:
+                f.writelines(code)
+
+        if self.isTarget:
+            print(self._stringio.getvalue())
+
+
+class codeRegionManager:
+    def __init__(self):
+        from argparse import ArgumentParser
+
+        parser = ArgumentParser()
+        parser.add_argument('--export', help='filename for code export')
+        parser.add_argument('--finalTarget', default='', help='code up to this code region should be executed')
+        args = parser.parse_args()
+
+        if args.export is not None:
+            self.codeTarget = args.export
+            from pathlib import Path
+            try:
+                Path(self.codeTarget).unlink()
+            except FileNotFoundError:
+                pass
+        else:
+            self.codeTarget = ''
+        self.finalTarget = args.finalTarget
+        self.finalTargetHit = False
+
+        if self.finalTarget == '' and self.codeTarget != '':
+            with open(self.codeTarget, 'w') as f:
+                f.write('#!/usr/bin/env python3\n')
+                
+
+    def add(self, label, onlyIfFinal=False):
+        if self.finalTarget == label:
+            self.finalTargetHit = True
+        else:
+            if self.finalTargetHit:
+                exit(0)
+        return codeRegion(self,
+                          label,
+                          isFinalTarget=(self.finalTarget == label) or (self.finalTarget == ''),
+                          codeTarget=self.codeTarget if (not onlyIfFinal or self.finalTargetHit or self.finalTarget == '') else '')
diff --git a/packageTools/setup.cfg b/packageTools/setup.cfg
new file mode 100644
index 0000000..179d2bc
--- /dev/null
+++ b/packageTools/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus_packageTools/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/packageTools/setup.py b/packageTools/setup.py
new file mode 100644
index 0000000..3233fdb
--- /dev/null
+++ b/packageTools/setup.py
@@ -0,0 +1,20 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import sys
+import os
+
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), '../packageTools'))
+
+from PyNucleus_packageTools import package
+
+p = package('PyNucleus_packageTools')
+p.loadConfig()
+
+p.setup(description='tools for setting up Python packages',
+        install_requires=['numpy', 'scipy', 'matplotlib', 'Cython', 'mpi4py>=2.0.0', 'tabulate', 'PyYAML', 'H5py', 'modepy', 'meshpy'])
diff --git a/packageTools/versioneer.py b/packageTools/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/packageTools/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)
diff --git a/setup.cfg b/setup.cfg
new file mode 100644
index 0000000..294b9b4
--- /dev/null
+++ b/setup.cfg
@@ -0,0 +1,7 @@
+
+[versioneer]
+VCS = git
+style = pep440
+versionfile_source = PyNucleus/_version.py
+tag_prefix =
+parentdir_prefix =
\ No newline at end of file
diff --git a/setup.py b/setup.py
new file mode 100644
index 0000000..efbca2d
--- /dev/null
+++ b/setup.py
@@ -0,0 +1,50 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import os
+import importlib
+from setuptools import setup
+import versioneer
+
+requirements = ['numpy', 'cython']
+
+# We do this dance, so that we can install everything in editable mode
+# as well. Otherwise installing PyNucleus in editable mode replaces
+# the subpackages with their non-editable installs.
+lclDir = os.getcwd().replace('\\', '/')
+for pkg, srcLocation in [
+        # These are just in the same git repo.
+         ('packageTools', 'file://localhost/{lclDir}/packageTools'.format(lclDir=lclDir)),
+        ('base', 'file://localhost/{lclDir}/base'.format(lclDir=lclDir)),
+        ('metisCy', 'file://localhost/{lclDir}/metisCy'.format(lclDir=lclDir)),
+        ('fem', 'file://localhost/{lclDir}/fem'.format(lclDir=lclDir)),
+        ('multilevelSolver', 'file://localhost/{lclDir}/multilevelSolver'.format(lclDir=lclDir)),
+         ('nl', 'file://localhost/{lclDir}/nl'.format(lclDir=lclDir)),
+]:
+    fullPkgName = 'PyNucleus_'+pkg
+    try:
+        importlib.import_module(fullPkgName)
+        requirements += [fullPkgName]
+    except ImportError:
+        requirements += ['{} @ {}'.format(fullPkgName, srcLocation)]
+
+
+setup(name='PyNucleus',
+      packages=['PyNucleus'],
+      version=versioneer.get_version(),
+      cmdclass=versioneer.get_cmdclass(),
+      description='A finite element code that specifically targets nonlocal operators.',
+      long_description=''.join(open('README.rst').readlines()),
+      long_description_content_type='text/x-rst',
+      author="Christian Glusa",
+      author_email='caglusa@sandia.gov',
+      platforms='any',
+      license='MIT',
+      license_files=['LICENSE'],
+      install_requires=requirements,
+      zip_safe=False)
diff --git a/tests/bitArray.py b/tests/bitArray.py
new file mode 100644
index 0000000..be1faef
--- /dev/null
+++ b/tests/bitArray.py
@@ -0,0 +1,116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus.base import INDEX
+from PyNucleus.base.tupleDict import arrayIndexSet, bitArray
+
+
+def test_arrayIndexSet():
+    I = np.empty((3), dtype=INDEX)
+    I[0] = 0
+    I[1] = 3
+    I[2] = 65
+    aIS = arrayIndexSet(I, sorted=True)
+    print(aIS.toSet())
+    assert aIS.inSet_py(0)
+    assert not aIS.inSet_py(1)
+    assert aIS.inSet_py(3)
+    assert not aIS.inSet_py(64)
+    assert aIS.inSet_py(65)
+    assert len(aIS) == 3
+    for i in aIS:
+        print(i)
+
+    I[0] = 3
+    I[1] = 0
+    I[2] = 65
+    aIS = arrayIndexSet(I, sorted=False)
+    print(aIS.toSet())
+    assert aIS.inSet_py(0)
+    assert not aIS.inSet_py(1)
+    assert aIS.inSet_py(3)
+    assert not aIS.inSet_py(64)
+    assert aIS.inSet_py(65)
+    assert len(aIS) == 3
+    for i in aIS:
+        print(i)
+
+
+    aIS.fromSet({759, 760, 761, 762, 763, 764, 765, 766, 767})
+    aIS2 = arrayIndexSet()
+    aIS2.fromSet({751, 752, 753, 754, 755, 756, 757, 758, 759})
+    aIS3 = aIS.union(aIS2)
+    assert len(aIS3) == 17
+    aIS3 = aIS.inter(aIS2)
+    assert len(aIS3) == 1
+
+    aIS = arrayIndexSet()
+    aIS.fromSet({759, 760, 761})
+    aIS2 = arrayIndexSet()
+    aIS2.fromSet({760})
+    aIS3 = aIS.setminus(aIS2)
+    assert len(aIS3) == 2
+    assert aIS3.inSet_py(759)
+    assert not aIS3.inSet_py(760)
+    assert aIS3.inSet_py(761)
+
+
+def test_bitArray():
+    bA = bitArray()
+
+    print(bA.toSet())
+    bA.set_py(65)
+    print(bA.toSet())
+    bA.set_py(0)
+    print(bA.toSet())
+    bA.set_py(3)
+    print(bA.toSet())
+
+
+    assert bA.inSet_py(0)
+    assert bA.inSet_py(3)
+    assert bA.inSet_py(65)
+    assert not bA.inSet_py(1)
+    assert not bA.inSet_py(4)
+    assert not bA.inSet_py(66)
+    assert not bA.inSet_py(129)
+
+    assert len(bA) == 3
+    print(bA.length)
+
+    bA.empty()
+
+    assert len(bA) == 0
+
+    bA.fromSet(set([0, 128]))
+
+    assert bA.inSet_py(0)
+    assert bA.inSet_py(128)
+    assert len(bA) == 2
+
+    bA.empty()
+    for k in range(64):
+        bA.set_py(k)
+    print(bA.toSet())
+    assert len(bA) == 64
+
+
+    bA2 = bitArray()
+    print(bA2.toSet())
+    bA2.set_py(32)
+    print(bA2.toSet())
+    for k in range(32, 96):
+        bA2.set_py(k)
+    print(bA2.toSet())
+    assert len(bA2) == 64
+
+    print(bA.union(bA2).toSet())
+    assert len(bA.union(bA2)) == 96
+    print(bA.inter(bA2).toSet())
+    assert len(bA.inter(bA2)) == 32
diff --git a/tests/cache_interfaceProblem.py--domaindoubleInterval--noRef101 b/tests/cache_interfaceProblem.py--domaindoubleInterval--noRef101
new file mode 100644
index 0000000..451228b
--- /dev/null
+++ b/tests/cache_interfaceProblem.py--domaindoubleInterval--noRef101
@@ -0,0 +1,2 @@
+Timers: {}
+results: {}
diff --git a/tests/cache_interfaceProblem.py--domaindoubleSquare--noRef51 b/tests/cache_interfaceProblem.py--domaindoubleSquare--noRef51
new file mode 100644
index 0000000..451228b
--- /dev/null
+++ b/tests/cache_interfaceProblem.py--domaindoubleSquare--noRef51
@@ -0,0 +1,2 @@
+Timers: {}
+results: {}
diff --git a/tests/cache_runHelmholtz.py--domaincube1 b/tests/cache_runHelmholtz.py--domaincube1
new file mode 100644
index 0000000..2627a52
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domaincube1
@@ -0,0 +1,6 @@
+results:
+  L2 error: 0.0001366285632192869
+  Tolerance: 1.0e-05
+  numIter: 34
+  res: 7.193238615914433e-06
+  solution L2 norm: 1.0000009584728715
diff --git a/tests/cache_runHelmholtz.py--domaincube4 b/tests/cache_runHelmholtz.py--domaincube4
new file mode 100644
index 0000000..ec11f92
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domaincube4
@@ -0,0 +1,6 @@
+results:
+  L2 error: 0.00013930834576278384
+  Tolerance: 1.0e-05
+  numIter: 34
+  res: 7.1932386159155384e-06
+  solution L2 norm: 1.0312683908932019
diff --git a/tests/cache_runHelmholtz.py--domaininterval1 b/tests/cache_runHelmholtz.py--domaininterval1
new file mode 100644
index 0000000..de7978c
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domaininterval1
@@ -0,0 +1,6 @@
+results:
+  L2 error: 3.8723087664206776e-07
+  Tolerance: 1.0e-05
+  numIter: 24
+  res: 8.643140978579644e-06
+  solution L2 norm: 0.999999993291608
diff --git a/tests/cache_runHelmholtz.py--domaininterval4 b/tests/cache_runHelmholtz.py--domaininterval4
new file mode 100644
index 0000000..4e582ab
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domaininterval4
@@ -0,0 +1,6 @@
+results:
+  L2 error: 3.9052829703134414e-07
+  Tolerance: 1.0e-05
+  numIter: 24
+  res: 8.643140978577775e-06
+  solution L2 norm: 1.0058423027857
diff --git a/tests/cache_runHelmholtz.py--domainsquare1 b/tests/cache_runHelmholtz.py--domainsquare1
new file mode 100644
index 0000000..4968588
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domainsquare1
@@ -0,0 +1,6 @@
+results:
+  L2 error: 8.912742605830698e-06
+  Tolerance: 1.0e-05
+  numIter: 26
+  res: 9.049271861945027e-06
+  solution L2 norm: 0.9999999668131542
diff --git a/tests/cache_runHelmholtz.py--domainsquare4 b/tests/cache_runHelmholtz.py--domainsquare4
new file mode 100644
index 0000000..2cc2d10
--- /dev/null
+++ b/tests/cache_runHelmholtz.py--domainsquare4
@@ -0,0 +1,6 @@
+results:
+  L2 error: 8.981508021620977e-06
+  Tolerance: 1.0e-05
+  numIter: 26
+  res: 9.049271861949018e-06
+  solution L2 norm: 1.0039062106255343
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..b72e81e
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 1.0510553259810248e-12
+  L2 error including Dirichlet domain: 1.0510553259810248e-12
+  L2 error natural: 1.0510553259810248e-12
+  Linf error natural: 1.0527134719495734e-12
+  rel L2 error domain: 1.0176800727714922e-12
+  rel L2 error natural: 1.0176800727714922e-12
+  rel Linf error natural: 1.0527134719495734e-12
+  residual norm: 6.712385203729085e-14
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Neumann--dense b/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Neumann--dense
new file mode 100644
index 0000000..880b259
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelfractional--problempoly-Neumann--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 3.648520934495423e-05
+  L2 error including Dirichlet domain: 5.3367258640215425e-05
+  L2 error natural: 5.3367258640215425e-05
+  Linf error natural: 0.00022522431826144107
+  rel L2 error domain: 3.5326656535991405e-05
+  rel L2 error natural: 5.10854585660302e-05
+  rel Linf error natural: 0.00022522431826144107
+  residual norm: 2.0977675938900675e-13
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..983d9eb
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 1.2304576543841336e-13
+  L2 error including Dirichlet domain: 1.2304576543841336e-13
+  L2 error natural: 1.2304576543841336e-13
+  Linf error natural: 2.3945255500645857e-13
+  rel L2 error domain: 1.1913856524033175e-13
+  rel L2 error natural: 1.1913856524033175e-13
+  rel Linf error natural: 2.3945255500645857e-13
+  residual norm: 2.7843928866140115e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Neumann--dense b/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Neumann--dense
new file mode 100644
index 0000000..a393e2f
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelindicator--problempoly-Neumann--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 1.0724652863905812e-13
+  L2 error including Dirichlet domain: 8.71411257678327e-13
+  L2 error natural: 8.71411257678327e-13
+  Linf error natural: 3.8416381187289517e-11
+  rel L2 error domain: 1.0384101804347547e-13
+  rel L2 error natural: 8.341527152109071e-13
+  rel Linf error natural: 3.8416381187289517e-11
+  residual norm: 5.673833983852065e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..eccf1ea
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 4.294009198544468e-14
+  L2 error including Dirichlet domain: 4.294009198544468e-14
+  L2 error natural: 4.294009198544468e-14
+  Linf error natural: 7.494005416219807e-14
+  rel L2 error domain: 4.157657057279479e-14
+  rel L2 error natural: 4.157657057279479e-14
+  rel Linf error natural: 7.494005416219807e-14
+  residual norm: 5.615587685395032e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Neumann--dense b/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Neumann--dense
new file mode 100644
index 0000000..492afaa
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domaininterval--kernelperidynamic--problempoly-Neumann--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 1.8033629060799584e-12
+  L2 error including Dirichlet domain: 8.16581643519921e-11
+  L2 error natural: 8.16581643519921e-11
+  Linf error natural: 3.7000383867891173e-09
+  rel L2 error domain: 1.7460988476319222e-12
+  rel L2 error natural: 7.816674264093205e-11
+  rel Linf error natural: 3.7000383867891173e-09
+  residual norm: 8.679195370303276e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelfractional--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domainsquare--kernelfractional--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..581f97d
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelfractional--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 0.0033396256165033763
+  L2 error including Dirichlet domain: 0.0033396256165033763
+  L2 error natural: 0.0033396256165033763
+  Linf error natural: 0.002894714947692556
+  rel L2 error domain: 0.0023375547097963267
+  rel L2 error natural: 0.0023375547097963267
+  rel Linf error natural: 0.002894714947692556
+  residual norm: 9.8482135268577e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelindicator--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domainsquare--kernelindicator--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..f02bafb
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelindicator--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 0.012030842456908025
+  L2 error including Dirichlet domain: 0.012030842456908025
+  L2 error natural: 0.012030842456908025
+  Linf error natural: 0.010314991612424773
+  rel L2 error domain: 0.008420929672173191
+  rel L2 error natural: 0.008420929672173191
+  rel Linf error natural: 0.010314991612424773
+  residual norm: 3.945806946714776e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runNonlocal.py--domainsquare--kernelperidynamic--problempoly-Dirichlet--dense b/tests/cache_runNonlocal.py--domainsquare--kernelperidynamic--problempoly-Dirichlet--dense
new file mode 100644
index 0000000..5e847a7
--- /dev/null
+++ b/tests/cache_runNonlocal.py--domainsquare--kernelperidynamic--problempoly-Dirichlet--dense
@@ -0,0 +1,13 @@
+Timers: {}
+errors:
+  L2 error domain: 0.008859149573137827
+  L2 error including Dirichlet domain: 0.008859149573137827
+  L2 error natural: 0.008859149573137827
+  Linf error natural: 0.007585187112034042
+  rel L2 error domain: 0.006200918662002816
+  rel L2 error natural: 0.006200918662002816
+  rel Linf error natural: 0.007585187112034042
+  residual norm: 4.220320938553188e-15
+meshes: {}
+results: {}
+vectors: {}
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric1 b/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric1
new file mode 100644
index 0000000..2a49313
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric1
@@ -0,0 +1,62 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.06722386002127882
+  L^2 error: 0.0004603761803004921
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 3
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 3
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 2
+rates:
+  Rate of convergence FMG: 0.2252873723667741
+  Rate of convergence FMG-PCG: 0.08108867575038328
+  Rate of convergence FMG-PGMRES: 0.06558830380188427
+  Rate of convergence MG: 0.22514191757917507
+  Rate of convergence PBICGSTAB: 0.0008831658129958784
+  Rate of convergence PCG: 0.06769284307815154
+  Rate of convergence PGMRES: 0.06551130424132004
+resHist:
+  FMG:
+  - 0.0068635123967055395
+  - 0.0010407307951164654
+  - 0.00023495248975154417
+  FMG-PCG:
+  - 0.027575572218868968
+  - 0.0023546750460153804
+  - 0.0003741323450658072
+  - 4.686683520994082e-05
+  FMG-PGMRES:
+  - 0.0068635123967055395
+  - 0.0005345014536809393
+  - 8.839390124002943e-05
+  MG:
+  - 0.020547997420041774
+  - 0.006867305480928894
+  - 0.001040276468463747
+  - 0.00023449769855591587
+  PBICGSTAB:
+  - 1.9192917396411564
+  - 0.0005329009543748951
+  - 1.8147288846917475e-05
+  PCG:
+  - 1.9192917396411564
+  - 0.02692576880845774
+  - 0.002281553741768728
+  - 0.0003545681474422287
+  PGMRES:
+  - 0.020547997420041774
+  - 0.006525981269017738
+  - 0.0005328051170087426
+  - 8.818647717438636e-05
+residuals:
+  Residual norm FMG: 0.00023495248975154417
+  Residual norm FMG-PCG: 1.0955952147637204e-05
+  Residual norm FMG-PGMRES: 8.839390124002833e-05
+  Residual norm MG: 0.00023449769855591587
+  Residual norm PBICGSTAB: 1.8147288846908405e-05
+  Residual norm PCG: 9.4157520168062e-05
+  Residual norm PGMRES: 8.818647717439182e-05
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric4 b/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric4
new file mode 100644
index 0000000..05f8e03
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP1--symmetric4
@@ -0,0 +1,62 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.06722386002088905
+  L^2 error: 0.00046037618048135883
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 3
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 3
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 2
+rates:
+  Rate of convergence FMG: 0.22528737236677898
+  Rate of convergence FMG-PCG: 0.08108867575039691
+  Rate of convergence FMG-PGMRES: 0.06558830380188539
+  Rate of convergence MG: 0.22514191757917912
+  Rate of convergence PBICGSTAB: 0.0008831658129961386
+  Rate of convergence PCG: 0.06769284307815357
+  Rate of convergence PGMRES: 0.06551130424132194
+resHist:
+  FMG:
+  - 0.006863512396705505
+  - 0.0010407307951165446
+  - 0.00023495248975155934
+  FMG-PCG:
+  - 0.027575572218875275
+  - 0.002354675046016357
+  - 0.0003741323450660903
+  - 4.686683520996179e-05
+  FMG-PGMRES:
+  - 0.006863512396705505
+  - 0.0005345014536809492
+  - 8.839390124003211e-05
+  MG:
+  - 0.02054799742004177
+  - 0.0068673054809288555
+  - 0.0010402764684638427
+  - 0.00023449769855592853
+  PBICGSTAB:
+  - 1.9192917396411568
+  - 0.000532900954374904
+  - 1.814728884691958e-05
+  PCG:
+  - 1.9192917396411568
+  - 0.026925768808458095
+  - 0.0022815537417687497
+  - 0.0003545681474422324
+  PGMRES:
+  - 0.02054799742004177
+  - 0.006525981269017781
+  - 0.0005328051170087496
+  - 8.818647717438861e-05
+residuals:
+  Residual norm FMG: 0.00023495248975155934
+  Residual norm FMG-PCG: 1.0955952147642723e-05
+  Residual norm FMG-PGMRES: 8.839390124003134e-05
+  Residual norm MG: 0.00023449769855592853
+  Residual norm PBICGSTAB: 1.8147288846913748e-05
+  Residual norm PCG: 9.415752016806761e-05
+  Residual norm PGMRES: 8.818647717439692e-05
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP11 b/tests/cache_runParallelGMG.py--domaincube--elementP11
new file mode 100644
index 0000000..53310ee
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP11
@@ -0,0 +1,62 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0672238600212689
+  L^2 error: 0.00046037618033063657
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 3
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 3
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 2
+rates:
+  Rate of convergence FMG: 0.22528737236677684
+  Rate of convergence FMG-PCG: 0.08108867575040135
+  Rate of convergence FMG-PGMRES: 0.06558830380188278
+  Rate of convergence MG: 0.22514191757917637
+  Rate of convergence PBICGSTAB: 0.0008831658129963169
+  Rate of convergence PCG: 0.06769284307815518
+  Rate of convergence PGMRES: 0.06551130424132003
+resHist:
+  FMG:
+  - 0.006863512396705642
+  - 0.0010407307951164216
+  - 0.0002349524897515527
+  FMG-PCG:
+  - 0.02757557221886239
+  - 0.0023546750460142337
+  - 0.00037413234506547495
+  - 4.686683520991657e-05
+  FMG-PGMRES:
+  - 0.006863512396705644
+  - 0.000534501453680939
+  - 8.839390124003087e-05
+  MG:
+  - 0.020547997420041774
+  - 0.0068673054809290065
+  - 0.001040276468463699
+  - 0.00023449769855592
+  PBICGSTAB:
+  - 1.9192917396411724
+  - 0.0005329009543748969
+  - 1.8147288846919715e-05
+  PCG:
+  - 1.9192917396411724
+  - 0.02692576880845821
+  - 0.0022815537417687397
+  - 0.00035456814744223364
+  PGMRES:
+  - 0.020547997420041774
+  - 0.006525981269017738
+  - 0.0005328051170087445
+  - 8.818647717438796e-05
+residuals:
+  Residual norm FMG: 0.0002349524897515527
+  Residual norm FMG-PCG: 1.0955952147644526e-05
+  Residual norm FMG-PGMRES: 8.839390124002432e-05
+  Residual norm MG: 0.00023449769855592
+  Residual norm PBICGSTAB: 1.8147288846917414e-05
+  Residual norm PCG: 9.415752016807212e-05
+  Residual norm PGMRES: 8.818647717439181e-05
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP14 b/tests/cache_runParallelGMG.py--domaincube--elementP14
new file mode 100644
index 0000000..add3325
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP14
@@ -0,0 +1,62 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.06722386002087254
+  L^2 error: 0.0004603761804361422
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 3
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 3
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 2
+rates:
+  Rate of convergence FMG: 0.22528737236678248
+  Rate of convergence FMG-PCG: 0.08108867575040592
+  Rate of convergence FMG-PGMRES: 0.06558830380188525
+  Rate of convergence MG: 0.2251419175791806
+  Rate of convergence PBICGSTAB: 0.0008831658129966024
+  Rate of convergence PCG: 0.06769284307815539
+  Rate of convergence PGMRES: 0.06551130424131849
+resHist:
+  FMG:
+  - 0.006863512396705606
+  - 0.0010407307951165084
+  - 0.0002349524897515703
+  FMG-PCG:
+  - 0.02757557221886869
+  - 0.002354675046015212
+  - 0.00037413234506575744
+  - 4.6866835209937596e-05
+  FMG-PGMRES:
+  - 0.006863512396705605
+  - 0.0005345014536809498
+  - 8.839390124003385e-05
+  MG:
+  - 0.02054799742004177
+  - 0.006867305480928965
+  - 0.0010402764684637968
+  - 0.00023449769855593314
+  PBICGSTAB:
+  - 1.9192917396411735
+  - 0.000532900954374905
+  - 1.8147288846919467e-05
+  PCG:
+  - 1.9192917396411735
+  - 0.026925768808458626
+  - 0.002281553741768759
+  - 0.00035456814744223614
+  PGMRES:
+  - 0.02054799742004177
+  - 0.006525981269017777
+  - 0.0005328051170087526
+  - 8.818647717439037e-05
+residuals:
+  Residual norm FMG: 0.0002349524897515703
+  Residual norm FMG-PCG: 1.0955952147646376e-05
+  Residual norm FMG-PGMRES: 8.839390124003099e-05
+  Residual norm MG: 0.00023449769855593314
+  Residual norm PBICGSTAB: 1.814728884692328e-05
+  Residual norm PCG: 9.415752016807268e-05
+  Residual norm PGMRES: 8.818647717438765e-05
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric1 b/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric1
new file mode 100644
index 0000000..29de655
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric1
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0034364707887007693
+  L^2 error: 0.00026328613493083295
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 5
+rates:
+  Rate of convergence FMG: 0.2821713081313369
+  Rate of convergence FMG-PCG: 0.0842250014715604
+  Rate of convergence FMG-PGMRES: 0.07419224377301713
+  Rate of convergence MG: 0.2855248720092231
+  Rate of convergence PBICGSTAB: 0.0077183814944111395
+  Rate of convergence PCG: 0.09606559847442676
+  Rate of convergence PGMRES: 0.083443365108011
+resHist:
+  FMG:
+  - 0.001018433827038052
+  - 0.0002914444195416124
+  - 9.672322513121798e-05
+  - 3.341464970914152e-05
+  - 1.1852283755538439e-05
+  - 4.30265423743863e-06
+  - 1.5969370121726547e-06
+  - 6.055977944798353e-07
+  - 2.344652275118861e-07
+  - 9.25658947054471e-08
+  FMG-PCG:
+  - 0.004025093290827199
+  - 0.000564465721219633
+  - 6.790500643790896e-05
+  - 1.120218717729608e-05
+  - 1.6934156382384694e-06
+  - 2.645715236668483e-07
+  - 3.935543075265658e-08
+  FMG-PGMRES:
+  - 0.001018433827038052
+  - 0.00015154917035819452
+  - 1.6922600877223434e-05
+  - 2.7806533398819893e-06
+  - 4.117921453195066e-07
+  - 6.503018887948747e-08
+  MG:
+  - 0.0289282717021879
+  - 0.0029448878783492963
+  - 0.0005921575902874117
+  - 0.0001576271732829458
+  - 4.631953238320521e-05
+  - 1.476660343390697e-05
+  - 5.022727314466841e-06
+  - 1.7987532570809977e-06
+  - 6.723326238229966e-07
+  - 2.6062268271997324e-07
+  - 1.0417462435382873e-07
+  PBICGSTAB:
+  - 1.9234530819877178
+  - 0.00036703983618087307
+  - 1.1013545404277682e-05
+  - 3.368045656009088e-07
+  - 1.3301518101501765e-08
+  PCG:
+  - 1.9234530819877178
+  - 0.010881105656516192
+  - 0.0011508997558606798
+  - 0.0001629396047133334
+  - 1.8373988173482042e-05
+  - 3.6765799534428327e-06
+  - 4.7282855881077437e-07
+  - 7.848470104494705e-08
+  PGMRES:
+  - 0.0289282717021879
+  - 0.002518956033074909
+  - 0.0003594481136995974
+  - 4.0789022332035125e-05
+  - 4.6624699090179036e-06
+  - 9.115989592445337e-07
+  - 1.1702577820025768e-07
+residuals:
+  Residual norm FMG: 9.25658947054471e-08
+  Residual norm FMG-PCG: 1.0326868605190739e-08
+  Residual norm FMG-PGMRES: 6.50301888760785e-08
+  Residual norm MG: 1.0417462435382873e-07
+  Residual norm PBICGSTAB: 1.3301518089277527e-08
+  Residual norm PCG: 2.273682629969968e-08
+  Residual norm PGMRES: 1.1702577823759342e-07
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric4 b/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric4
new file mode 100644
index 0000000..6db4d41
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP2--symmetric4
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0034364707890238403
+  L^2 error: 0.00026328613493083295
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 5
+rates:
+  Rate of convergence FMG: 0.28217130812927865
+  Rate of convergence FMG-PCG: 0.0842250014814308
+  Rate of convergence FMG-PGMRES: 0.07419224377436033
+  Rate of convergence MG: 0.28552487200957516
+  Rate of convergence PBICGSTAB: 0.007718381498823819
+  Rate of convergence PCG: 0.09606559846789234
+  Rate of convergence PGMRES: 0.08344336510368186
+resHist:
+  FMG:
+  - 0.0010184338270380494
+  - 0.000291444419541618
+  - 9.672322513122257e-05
+  - 3.341464970914932e-05
+  - 1.1852283755538303e-05
+  - 4.302654237450988e-06
+  - 1.5969370121853646e-06
+  - 6.055977944924056e-07
+  - 2.344652275087378e-07
+  - 9.256589469869494e-08
+  FMG-PCG:
+  - 0.004025093290827204
+  - 0.0005644657212196425
+  - 6.790500643790984e-05
+  - 1.1202187177296314e-05
+  - 1.6934156382384806e-06
+  - 2.6457152366684893e-07
+  - 3.935543075268886e-08
+  FMG-PGMRES:
+  - 0.0010184338270380496
+  - 0.0001515491703581958
+  - 1.6922600877223403e-05
+  - 2.7806533398819597e-06
+  - 4.1179214531949496e-07
+  - 6.503018887948647e-08
+  MG:
+  - 0.0289282717021879
+  - 0.002944887878349293
+  - 0.0005921575902874215
+  - 0.0001576271732829427
+  - 4.631953238321122e-05
+  - 1.4766603433904347e-05
+  - 5.022727314479646e-06
+  - 1.798753257066016e-06
+  - 6.72332623826393e-07
+  - 2.6062268272244155e-07
+  - 1.0417462435511327e-07
+  PBICGSTAB:
+  - 1.9234530819877174
+  - 0.0003670398361808755
+  - 1.1013545404278345e-05
+  - 3.3680456560100195e-07
+  - 1.3301518101522857e-08
+  PCG:
+  - 1.9234530819877174
+  - 0.01088110565651618
+  - 0.0011508997558606908
+  - 0.0001629396047133345
+  - 1.8373988173482178e-05
+  - 3.67657995344284e-06
+  - 4.7282855881076346e-07
+  - 7.848470104495022e-08
+  PGMRES:
+  - 0.0289282717021879
+  - 0.0025189560330749266
+  - 0.0003594481136996053
+  - 4.0789022332035044e-05
+  - 4.662469909017804e-06
+  - 9.115989592445184e-07
+  - 1.1702577820026049e-07
+residuals:
+  Residual norm FMG: 9.256589469869494e-08
+  Residual norm FMG-PCG: 1.0326868612452036e-08
+  Residual norm FMG-PGMRES: 6.50301888819652e-08
+  Residual norm MG: 1.0417462435511327e-07
+  Residual norm PBICGSTAB: 1.3301518112091378e-08
+  Residual norm PCG: 2.273682629042025e-08
+  Residual norm PGMRES: 1.1702577820723625e-07
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP21 b/tests/cache_runParallelGMG.py--domaincube--elementP21
new file mode 100644
index 0000000..63fe69d
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP21
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0034364707887007693
+  L^2 error: 0.00026328613498354284
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 5
+rates:
+  Rate of convergence FMG: 0.28217130814030655
+  Rate of convergence FMG-PCG: 0.0842250014795382
+  Rate of convergence FMG-PGMRES: 0.0741922437720265
+  Rate of convergence MG: 0.2855248720154953
+  Rate of convergence PBICGSTAB: 0.00771838149683128
+  Rate of convergence PCG: 0.09606559846576827
+  Rate of convergence PGMRES: 0.08344336510281686
+resHist:
+  FMG:
+  - 0.001018433827038056
+  - 0.00029144441954163984
+  - 9.672322513125088e-05
+  - 3.341464970917935e-05
+  - 1.1852283755577889e-05
+  - 4.302654237461366e-06
+  - 1.5969370121976381e-06
+  - 6.055977945280353e-07
+  - 2.3446522753931205e-07
+  - 9.256589473487195e-08
+  FMG-PCG:
+  - 0.0040250932908271145
+  - 0.0005644657212195839
+  - 6.790500643789912e-05
+  - 1.120218717729338e-05
+  - 1.6934156382380814e-06
+  - 2.6457152366678186e-07
+  - 3.935543075264851e-08
+  FMG-PGMRES:
+  - 0.0010184338270380548
+  - 0.0001515491703581935
+  - 1.6922600877223346e-05
+  - 2.7806533398818733e-06
+  - 4.117921453194874e-07
+  - 6.503018887948045e-08
+  MG:
+  - 0.0289282717021879
+  - 0.0029448878783493175
+  - 0.0005921575902874295
+  - 0.00015762717328298892
+  - 4.631953238322139e-05
+  - 1.4766603433969086e-05
+  - 5.022727314490318e-06
+  - 1.7987532570980203e-06
+  - 6.723326238446149e-07
+  - 2.6062268275264796e-07
+  - 1.0417462437671308e-07
+  PBICGSTAB:
+  - 1.9234530819877171
+  - 0.00036703983618087524
+  - 1.1013545404269937e-05
+  - 3.3680456559944303e-07
+  - 1.330151810142679e-08
+  PCG:
+  - 1.9234530819877171
+  - 0.010881105656516394
+  - 0.001150899755860677
+  - 0.0001629396047133328
+  - 1.8373988173480914e-05
+  - 3.6765799534425096e-06
+  - 4.7282855881072153e-07
+  - 7.848470104494004e-08
+  PGMRES:
+  - 0.0289282717021879
+  - 0.0025189560330749284
+  - 0.00035944811369959564
+  - 4.078902233203373e-05
+  - 4.662469909017325e-06
+  - 9.115989592444046e-07
+  - 1.1702577820024651e-07
+residuals:
+  Residual norm FMG: 9.256589473487195e-08
+  Residual norm FMG-PCG: 1.0326868611059707e-08
+  Residual norm FMG-PGMRES: 6.50301888717371e-08
+  Residual norm MG: 1.0417462437671308e-07
+  Residual norm PBICGSTAB: 1.3301518101789818e-08
+  Residual norm PCG: 2.2736826287403905e-08
+  Residual norm PGMRES: 1.170257782011706e-07
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP24 b/tests/cache_runParallelGMG.py--domaincube--elementP24
new file mode 100644
index 0000000..9ffd181
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP24
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0034364707890238403
+  L^2 error: 0.00026328613493083295
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 5
+rates:
+  Rate of convergence FMG: 0.28217130814100067
+  Rate of convergence FMG-PCG: 0.08422500146932672
+  Rate of convergence FMG-PGMRES: 0.07419224376906615
+  Rate of convergence MG: 0.28552487201736304
+  Rate of convergence PBICGSTAB: 0.0077183814955158045
+  Rate of convergence PCG: 0.09606559847582517
+  Rate of convergence PGMRES: 0.08344336510399375
+resHist:
+  FMG:
+  - 0.0010184338270380598
+  - 0.00029144441954163686
+  - 9.672322513124272e-05
+  - 3.341464970918268e-05
+  - 1.1852283755579271e-05
+  - 4.3026542374669255e-06
+  - 1.5969370122072718e-06
+  - 6.055977945174261e-07
+  - 2.344652275389709e-07
+  - 9.256589473714896e-08
+  FMG-PCG:
+  - 0.00402509329082714
+  - 0.0005644657212196004
+  - 6.790500643790141e-05
+  - 1.1202187177293974e-05
+  - 1.6934156382381553e-06
+  - 2.6457152366679457e-07
+  - 3.935543075264863e-08
+  FMG-PGMRES:
+  - 0.0010184338270380555
+  - 0.00015154917035819466
+  - 1.6922600877223322e-05
+  - 2.7806533398818944e-06
+  - 4.1179214531948877e-07
+  - 6.50301888794817e-08
+  MG:
+  - 0.0289282717021879
+  - 0.0029448878783493353
+  - 0.0005921575902874682
+  - 0.00015762717328298044
+  - 4.6319532383209256e-05
+  - 1.4766603433941423e-05
+  - 5.022727314495054e-06
+  - 1.7987532571198708e-06
+  - 6.723326238522265e-07
+  - 2.606226827657007e-07
+  - 1.0417462438352758e-07
+  PBICGSTAB:
+  - 1.9234530819877171
+  - 0.0003670398361808818
+  - 1.1013545404279736e-05
+  - 3.3680456560151514e-07
+  - 1.3301518101528076e-08
+  PCG:
+  - 1.9234530819877171
+  - 0.010881105656516501
+  - 0.0011508997558606967
+  - 0.00016293960471333452
+  - 1.8373988173480877e-05
+  - 3.676579953442492e-06
+  - 4.728285588107216e-07
+  - 7.848470104492759e-08
+  PGMRES:
+  - 0.0289282717021879
+  - 0.002518956033074942
+  - 0.00035944811369960036
+  - 4.078902233203419e-05
+  - 4.662469909017352e-06
+  - 9.115989592443904e-07
+  - 1.1702577820024582e-07
+residuals:
+  Residual norm FMG: 9.256589473714896e-08
+  Residual norm FMG-PCG: 1.0326868603547513e-08
+  Residual norm FMG-PGMRES: 6.503018885876319e-08
+  Residual norm MG: 1.0417462438352758e-07
+  Residual norm PBICGSTAB: 1.330151809498872e-08
+  Residual norm PCG: 2.2736826301685532e-08
+  Residual norm PGMRES: 1.1702577820942332e-07
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric1 b/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric1
new file mode 100644
index 0000000..054f7c6
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric1
@@ -0,0 +1,107 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00036557097080050635
+  L^2 error: 2.0291892547314525e-06
+info: {}
+iterations:
+  Number of iterations FMG: 14
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 16
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 8
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.40202013020505156
+  Rate of convergence FMG-PCG: 0.10260101371053007
+  Rate of convergence FMG-PGMRES: 0.10108478628531259
+  Rate of convergence MG: 0.44746803631306087
+  Rate of convergence PBICGSTAB: 0.024993105365017983
+  Rate of convergence PCG: 0.15864519366505134
+  Rate of convergence PGMRES: 0.14748338159991478
+resHist:
+  FMG:
+  - 0.00037136780613590233
+  - 0.00016131561780285985
+  - 8.287738964042894e-05
+  - 4.456508400002616e-05
+  - 2.4399548823380132e-05
+  - 1.3478389562991759e-05
+  - 7.484943108364157e-06
+  - 4.171674292172183e-06
+  - 2.3314687696041866e-06
+  - 1.30596443888251e-06
+  - 7.329605183404098e-07
+  - 4.120838019172653e-07
+  - 2.3204908881952202e-07
+  - 1.3086186799793644e-07
+  FMG-PCG:
+  - 0.0011675285636010696
+  - 0.00034319267285754657
+  - 6.30983302886482e-05
+  - 1.2213506690150928e-05
+  - 2.7592327748464424e-06
+  - 6.406330172085261e-07
+  - 1.3422003448370665e-07
+  FMG-PGMRES:
+  - 0.00037136780613590233
+  - 0.00010634206406337451
+  - 2.3336771717949264e-05
+  - 4.330910314110764e-06
+  - 9.544145552869674e-07
+  - 2.268234463551736e-07
+  - 4.846840190072653e-08
+  MG:
+  - 0.04543012343821621
+  - 0.004773188614284027
+  - 0.0009009604370335024
+  - 0.00032944238592714814
+  - 0.00015671398398547974
+  - 8.070105994771094e-05
+  - 4.283123190789004e-05
+  - 2.3096966438731536e-05
+  - 1.258724442959323e-05
+  - 6.913541486060363e-06
+  - 3.820389513102133e-06
+  - 2.121348220852876e-06
+  - 1.1825296038482673e-06
+  - 6.6131082371426e-07
+  - 3.708215333323545e-07
+  - 2.0840986201619352e-07
+  - 1.1736435227990679e-07
+  PBICGSTAB:
+  - 1.9235018421471402
+  - 0.000609221216376344
+  - 4.334253095520578e-05
+  - 3.3459499569466897e-06
+  - 1.830502165908982e-07
+  - 1.7726573605964227e-08
+  PCG:
+  - 1.9235018421471402
+  - 0.009463716795265498
+  - 0.001535555387144084
+  - 0.0004343974329528032
+  - 7.609671508104502e-05
+  - 1.6150007093563756e-05
+  - 3.485755946665239e-06
+  - 7.489776301946445e-07
+  - 1.8329269735858868e-07
+  - 3.984743075172334e-08
+  PGMRES:
+  - 0.04543012343821621
+  - 0.003694464180245307
+  - 0.0006089156194596655
+  - 0.00015330859612274804
+  - 3.151476970172227e-05
+  - 7.0297861544937656e-06
+  - 1.4196249438093687e-06
+  - 2.8781560050934925e-07
+  - 6.895178677713202e-08
+residuals:
+  Residual norm FMG: 1.3086186799793644e-07
+  Residual norm FMG-PCG: 5.2997310044766764e-08
+  Residual norm FMG-PGMRES: 4.846840188454748e-08
+  Residual norm MG: 1.1736435227990679e-07
+  Residual norm PBICGSTAB: 1.7726573557543324e-08
+  Residual norm PCG: 1.8228849963557462e-08
+  Residual norm PGMRES: 6.895178678781603e-08
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric4 b/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric4
new file mode 100644
index 0000000..6997271
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP3--symmetric4
@@ -0,0 +1,107 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.000365570975052245
+  L^2 error: 2.0291174431199454e-06
+info: {}
+iterations:
+  Number of iterations FMG: 14
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 16
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 8
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.4020201302034388
+  Rate of convergence FMG-PCG: 0.10260101371699175
+  Rate of convergence FMG-PGMRES: 0.10108478628324472
+  Rate of convergence MG: 0.44746803630567006
+  Rate of convergence PBICGSTAB: 0.0249931053857106
+  Rate of convergence PCG: 0.15864519362678606
+  Rate of convergence PGMRES: 0.14748338158328195
+resHist:
+  FMG:
+  - 0.00037136780613588753
+  - 0.0001613156178029001
+  - 8.287738964038407e-05
+  - 4.4565084000044026e-05
+  - 2.4399548823393183e-05
+  - 1.3478389562984858e-05
+  - 7.484943108406862e-06
+  - 4.1716742921618465e-06
+  - 2.3314687696100176e-06
+  - 1.3059644388836464e-06
+  - 7.329605183486752e-07
+  - 4.1208380192885643e-07
+  - 2.3204908879245077e-07
+  - 1.3086186799058687e-07
+  FMG-PCG:
+  - 0.0011675285636011071
+  - 0.00034319267285755323
+  - 6.309833028864864e-05
+  - 1.221350669015113e-05
+  - 2.759232774846371e-06
+  - 6.40633017208466e-07
+  - 1.3422003448370768e-07
+  FMG-PGMRES:
+  - 0.00037136780613588753
+  - 0.00010634206406338019
+  - 2.333677171794816e-05
+  - 4.330910314110921e-06
+  - 9.54414555286867e-07
+  - 2.2682344635517184e-07
+  - 4.846840190072555e-08
+  MG:
+  - 0.04543012343821621
+  - 0.004773188614283926
+  - 0.0009009604370334367
+  - 0.000329442385926999
+  - 0.00015671398398547044
+  - 8.070105994765609e-05
+  - 4.283123190792185e-05
+  - 2.3096966438685156e-05
+  - 1.2587244429693621e-05
+  - 6.913541486125519e-06
+  - 3.820389513109153e-06
+  - 2.1213482209184102e-06
+  - 1.1825296038969063e-06
+  - 6.613108236964699e-07
+  - 3.7082153334685267e-07
+  - 2.0840986204179464e-07
+  - 1.1736435224889062e-07
+  PBICGSTAB:
+  - 1.9235018421471395
+  - 0.0006092212163763156
+  - 4.334253095523531e-05
+  - 3.345949956948491e-06
+  - 1.8305021659086374e-07
+  - 1.7726573605982554e-08
+  PCG:
+  - 1.9235018421471395
+  - 0.009463716795265528
+  - 0.001535555387144036
+  - 0.00043439743295279763
+  - 7.60967150810453e-05
+  - 1.6150007093563532e-05
+  - 3.4857559466652434e-06
+  - 7.489776301946253e-07
+  - 1.832926973585869e-07
+  - 3.98474307517225e-08
+  PGMRES:
+  - 0.04543012343821621
+  - 0.0036944641802452096
+  - 0.0006089156194596726
+  - 0.00015330859612274817
+  - 3.151476970172261e-05
+  - 7.029786154494041e-06
+  - 1.419624943809379e-06
+  - 2.8781560050935947e-07
+  - 6.895178677713503e-08
+residuals:
+  Residual norm FMG: 1.3086186799058687e-07
+  Residual norm FMG-PCG: 5.299731006479295e-08
+  Residual norm FMG-PGMRES: 4.8468401878598424e-08
+  Residual norm MG: 1.1736435224889062e-07
+  Residual norm PBICGSTAB: 1.7726573616248984e-08
+  Residual norm PCG: 1.8228849928383023e-08
+  Residual norm PGMRES: 6.895178673338249e-08
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP31 b/tests/cache_runParallelGMG.py--domaincube--elementP31
new file mode 100644
index 0000000..1c9e22e
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP31
@@ -0,0 +1,107 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00036557097019311515
+  L^2 error: 2.0291995133256395e-06
+info: {}
+iterations:
+  Number of iterations FMG: 14
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 16
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 8
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.4020201301996829
+  Rate of convergence FMG-PCG: 0.10260101371810763
+  Rate of convergence FMG-PGMRES: 0.10108478628145803
+  Rate of convergence MG: 0.44746803630371484
+  Rate of convergence PBICGSTAB: 0.024993105373684193
+  Rate of convergence PCG: 0.15864519376311612
+  Rate of convergence PGMRES: 0.1474833815971702
+resHist:
+  FMG:
+  - 0.00037136780613598186
+  - 0.00016131561780289034
+  - 8.287738964039101e-05
+  - 4.4565084000047055e-05
+  - 2.4399548823350147e-05
+  - 1.3478389562970478e-05
+  - 7.484943108393056e-06
+  - 4.171674292172287e-06
+  - 2.331468769581525e-06
+  - 1.3059644388761798e-06
+  - 7.32960518331267e-07
+  - 4.120838019254666e-07
+  - 2.3204908881156357e-07
+  - 1.308618679734708e-07
+  FMG-PCG:
+  - 0.00116752856360134
+  - 0.00034319267285763027
+  - 6.309833028865299e-05
+  - 1.2213506690150865e-05
+  - 2.7592327748459452e-06
+  - 6.406330172083072e-07
+  - 1.3422003448365905e-07
+  FMG-PGMRES:
+  - 0.0003713678061359871
+  - 0.00010634206406340502
+  - 2.3336771717954583e-05
+  - 4.330910314113025e-06
+  - 9.544145552876175e-07
+  - 2.2682344635534556e-07
+  - 4.846840190075255e-08
+  MG:
+  - 0.04543012343821621
+  - 0.004773188614283922
+  - 0.0009009604370335458
+  - 0.00032944238592730167
+  - 0.00015671398398552016
+  - 8.070105994777236e-05
+  - 4.283123190797948e-05
+  - 2.309696643878207e-05
+  - 1.2587244429715698e-05
+  - 6.913541486073558e-06
+  - 3.820389513097034e-06
+  - 2.1213482208607794e-06
+  - 1.182529603853713e-06
+  - 6.613108237508891e-07
+  - 3.7082153335593456e-07
+  - 2.084098620419563e-07
+  - 1.1736435224068543e-07
+  PBICGSTAB:
+  - 1.92350184214715
+  - 0.0006092212163763232
+  - 4.334253095520332e-05
+  - 3.3459499569469417e-06
+  - 1.830502165908748e-07
+  - 1.7726573605952743e-08
+  PCG:
+  - 1.92350184214715
+  - 0.009463716795265606
+  - 0.0015355553871440203
+  - 0.00043439743295280083
+  - 7.609671508104655e-05
+  - 1.615000709356401e-05
+  - 3.485755946665339e-06
+  - 7.489776301946678e-07
+  - 1.832926973585979e-07
+  - 3.984743075172549e-08
+  PGMRES:
+  - 0.04543012343821621
+  - 0.0036944641802451293
+  - 0.0006089156194596568
+  - 0.00015330859612275053
+  - 3.1514769701722345e-05
+  - 7.029786154493437e-06
+  - 1.419624943809264e-06
+  - 2.878156005093619e-07
+  - 6.895178677713526e-08
+residuals:
+  Residual norm FMG: 1.308618679734708e-07
+  Residual norm FMG-PCG: 5.299731006825134e-08
+  Residual norm FMG-PGMRES: 4.8468401873458343e-08
+  Residual norm MG: 1.1736435224068543e-07
+  Residual norm PBICGSTAB: 1.7726573582129656e-08
+  Residual norm PCG: 1.8228850053701167e-08
+  Residual norm PGMRES: 6.895178677883392e-08
diff --git a/tests/cache_runParallelGMG.py--domaincube--elementP34 b/tests/cache_runParallelGMG.py--domaincube--elementP34
new file mode 100644
index 0000000..5bb5488
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaincube--elementP34
@@ -0,0 +1,107 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00036557097019311515
+  L^2 error: 2.0291106037863013e-06
+info: {}
+iterations:
+  Number of iterations FMG: 14
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 16
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 8
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.40202013019753924
+  Rate of convergence FMG-PCG: 0.10260101371058639
+  Rate of convergence FMG-PGMRES: 0.10108478628679374
+  Rate of convergence MG: 0.4474680362934436
+  Rate of convergence PBICGSTAB: 0.024993105372553538
+  Rate of convergence PCG: 0.15864519370648109
+  Rate of convergence PGMRES: 0.14748338160850785
+resHist:
+  FMG:
+  - 0.00037136780613597226
+  - 0.00016131561780288067
+  - 8.287738964040085e-05
+  - 4.456508400003643e-05
+  - 2.4399548823341978e-05
+  - 1.3478389562966467e-05
+  - 7.48494310839177e-06
+  - 4.171674292169565e-06
+  - 2.331468769585226e-06
+  - 1.3059644388791883e-06
+  - 7.329605183332747e-07
+  - 4.1208380191504794e-07
+  - 2.3204908879634826e-07
+  - 1.3086186796370188e-07
+  FMG-PCG:
+  - 0.001167528563601298
+  - 0.00034319267285762485
+  - 6.30983302886521e-05
+  - 1.2213506690150217e-05
+  - 2.759232774845793e-06
+  - 6.406330172082341e-07
+  - 1.342200344836458e-07
+  FMG-PGMRES:
+  - 0.0003713678061359657
+  - 0.00010634206406340303
+  - 2.3336771717954268e-05
+  - 4.330910314112776e-06
+  - 9.54414555287628e-07
+  - 2.2682344635532246e-07
+  - 4.8468401900748686e-08
+  MG:
+  - 0.04543012343821621
+  - 0.004773188614283925
+  - 0.0009009604370334685
+  - 0.00032944238592725765
+  - 0.0001567139839855675
+  - 8.070105994777568e-05
+  - 4.2831231907918084e-05
+  - 2.3096966438792655e-05
+  - 1.2587244429694174e-05
+  - 6.913541486080754e-06
+  - 3.820389513088503e-06
+  - 2.1213482208547836e-06
+  - 1.1825296038234327e-06
+  - 6.613108236719465e-07
+  - 3.708215333022713e-07
+  - 2.0840986200666017e-07
+  - 1.1736435219758156e-07
+  PBICGSTAB:
+  - 1.923501842147149
+  - 0.0006092212163763141
+  - 4.334253095522984e-05
+  - 3.345949956947823e-06
+  - 1.8305021659081353e-07
+  - 1.7726573605979146e-08
+  PCG:
+  - 1.923501842147149
+  - 0.009463716795265737
+  - 0.0015355553871440133
+  - 0.000434397432952799
+  - 7.609671508104714e-05
+  - 1.6150007093563834e-05
+  - 3.4857559466652984e-06
+  - 7.489776301946336e-07
+  - 1.8329269735858612e-07
+  - 3.9847430751722534e-08
+  PGMRES:
+  - 0.04543012343821621
+  - 0.003694464180245164
+  - 0.0006089156194596632
+  - 0.00015330859612275164
+  - 3.1514769701722975e-05
+  - 7.029786154493402e-06
+  - 1.419624943809257e-06
+  - 2.878156005093679e-07
+  - 6.895178677713466e-08
+residuals:
+  Residual norm FMG: 1.3086186796370188e-07
+  Residual norm FMG-PCG: 5.29973100449413e-08
+  Residual norm FMG-PGMRES: 4.846840188880862e-08
+  Residual norm MG: 1.1736435219758156e-07
+  Residual norm PBICGSTAB: 1.772657357892195e-08
+  Residual norm PCG: 1.8228850001640765e-08
+  Residual norm PGMRES: 6.895178681593818e-08
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric1 b/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric1
new file mode 100644
index 0000000..35cb2d3
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric1
@@ -0,0 +1,69 @@
+Timers: {}
+errors:
+  H^1_0 error: 6.156780412641647e-05
+  L^2 error: 3.650024149988857e-08
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.026081506615969623
+  Rate of convergence FMG-PCG: 0.00012376599452264343
+  Rate of convergence FMG-PGMRES: 0.00010747254988819476
+  Rate of convergence MG: 0.049069827679446974
+  Rate of convergence PBICGSTAB: 0.00014227907727580312
+  Rate of convergence PCG: 0.0027295996398194492
+  Rate of convergence PGMRES: 0.0025379608481405024
+resHist:
+  FMG:
+  - 0.004284335225783948
+  - 5.2884980867035296e-05
+  - 6.529090516036085e-07
+  - 8.060690974218545e-09
+  - 4.652893962391342e-10
+  FMG-PCG:
+  - 1.176247861907071e-05
+  - 3.6055623876816177e-09
+  - 1.432884797575218e-12
+  FMG-PGMRES:
+  - 0.004284335225783948
+  - 9.23201113423575e-07
+  - 2.074162489219182e-10
+  MG:
+  - 0.03855314216222429
+  - 0.004358119023720919
+  - 5.9453603465070336e-05
+  - 1.8007882175662495e-06
+  - 1.0399903008512551e-07
+  - 6.4205072861672465e-09
+  - 5.382040690198124e-10
+  PBICGSTAB:
+  - 2.221441414633028
+  - 2.3679893365700804e-05
+  - 8.22177986469837e-09
+  - 1.581555777308309e-13
+  PCG:
+  - 2.221441414633028
+  - 1.2428851623584517e-05
+  - 1.5289966782957018e-07
+  - 2.0137047167654766e-09
+  - 9.687828785622898e-14
+  PGMRES:
+  - 0.03855314216222429
+  - 0.004330538241422398
+  - 2.3679888884356076e-05
+  - 4.98482589174118e-07
+  - 1.70128879912631e-11
+residuals:
+  Residual norm FMG: 4.652893962391342e-10
+  Residual norm FMG-PCG: 5.905578566850951e-10
+  Residual norm FMG-PGMRES: 4.453022462288288e-10
+  Residual norm MG: 5.382040690198124e-10
+  Residual norm PBICGSTAB: 7.8044420410914e-10
+  Residual norm PCG: 7.840732479706526e-10
+  Residual norm PGMRES: 6.302524378704918e-10
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric4 b/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric4
new file mode 100644
index 0000000..47ebeff
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP1--symmetric4
@@ -0,0 +1,69 @@
+Timers: {}
+errors:
+  H^1_0 error: 6.14509723418694e-05
+  L^2 error: 2.6863450374185958e-08
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.025909224040945256
+  Rate of convergence FMG-PCG: 0.0001245606561005667
+  Rate of convergence FMG-PGMRES: 0.00010733236407510319
+  Rate of convergence MG: 0.04906291132340355
+  Rate of convergence PBICGSTAB: 0.00014365279752217512
+  Rate of convergence PCG: 0.0027238763510346524
+  Rate of convergence PGMRES: 0.002544389158289813
+resHist:
+  FMG:
+  - 0.0042843352247981204
+  - 5.288498094984081e-05
+  - 6.529060554399709e-07
+  - 8.06813748514606e-09
+  - 4.501236293263212e-10
+  FMG-PCG:
+  - 1.1762478620242404e-05
+  - 3.605562845995621e-09
+  - 1.4328971930656372e-12
+  FMG-PGMRES:
+  - 0.0042843352247981204
+  - 9.232010646400076e-07
+  - 2.0741808732602528e-10
+  MG:
+  - 0.03855314216222431
+  - 0.004358119023720915
+  - 5.945360562148326e-05
+  - 1.8007865046446327e-06
+  - 1.0399897093767901e-07
+  - 6.419835045226427e-09
+  - 5.377490725737139e-10
+  PBICGSTAB:
+  - 2.221441414633029
+  - 2.3679893365693276e-05
+  - 8.23729358299546e-09
+  - 7.2751821514657274e-12
+  PCG:
+  - 2.221441414633029
+  - 1.2428851623584183e-05
+  - 1.528996678295575e-07
+  - 2.0137047169896525e-09
+  - 9.687852170612277e-14
+  PGMRES:
+  - 0.03855314216222431
+  - 0.004330538240280393
+  - 2.367988884673836e-05
+  - 4.984825890140161e-07
+  - 1.7012924438989613e-11
+residuals:
+  Residual norm FMG: 4.501236293263212e-10
+  Residual norm FMG-PCG: 5.98165765977064e-10
+  Residual norm FMG-PGMRES: 4.441413108215537e-10
+  Residual norm MG: 5.377490725737139e-10
+  Residual norm PBICGSTAB: 7.955875084520732e-10
+  Residual norm PCG: 7.791515645660558e-10
+  Residual norm PGMRES: 6.350536095707483e-10
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP11 b/tests/cache_runParallelGMG.py--domaininterval--elementP11
new file mode 100644
index 0000000..574e9f8
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP11
@@ -0,0 +1,69 @@
+Timers: {}
+errors:
+  H^1_0 error: 6.148227053936231e-05
+  L^2 error: 6.143906154658885e-08
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.027004771868884645
+  Rate of convergence FMG-PCG: 0.00012404139252216711
+  Rate of convergence FMG-PGMRES: 0.00011126394656575969
+  Rate of convergence MG: 0.049099444405778306
+  Rate of convergence PBICGSTAB: 0.0001432400662388326
+  Rate of convergence PCG: 0.0027529270683187223
+  Rate of convergence PGMRES: 0.0025735968654730735
+resHist:
+  FMG:
+  - 0.0042843352160314195
+  - 5.288498835440756e-05
+  - 6.529041115474649e-07
+  - 8.086366631750398e-09
+  - 5.536844717052294e-10
+  FMG-PCG:
+  - 1.1762478603302836e-05
+  - 3.6055641420330593e-09
+  - 1.4329781202120944e-12
+  FMG-PGMRES:
+  - 0.004284335218590382
+  - 9.232011055806659e-07
+  - 2.0741902233697245e-10
+  MG:
+  - 0.03855314216222429
+  - 0.004358119025646094
+  - 5.945361620248371e-05
+  - 1.8007880920454912e-06
+  - 1.0399976813235093e-07
+  - 6.4180984745831054e-09
+  - 5.401560522834693e-10
+  PBICGSTAB:
+  - 2.2214414146330204
+  - 2.3679893400223028e-05
+  - 8.22178506335117e-09
+  - 1.5815578412093597e-13
+  PCG:
+  - 2.2214414146330204
+  - 1.2428851635459414e-05
+  - 1.5289966806710395e-07
+  - 2.013704716502987e-09
+  - 9.687829001479399e-14
+  PGMRES:
+  - 0.03855314216222429
+  - 0.004330538236376545
+  - 2.36798888924357e-05
+  - 4.984825883120822e-07
+  - 1.7012907489194853e-11
+residuals:
+  Residual norm FMG: 5.536844717052294e-10
+  Residual norm FMG-PCG: 5.931889412653429e-10
+  Residual norm FMG-PGMRES: 4.772750157159591e-10
+  Residual norm MG: 5.401560522834693e-10
+  Residual norm PBICGSTAB: 7.910224440028076e-10
+  Residual norm PCG: 8.043478362159691e-10
+  Residual norm PGMRES: 6.57175458136213e-10
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP14 b/tests/cache_runParallelGMG.py--domaininterval--elementP14
new file mode 100644
index 0000000..69d4905
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP14
@@ -0,0 +1,69 @@
+Timers: {}
+errors:
+  H^1_0 error: 6.150550990626721e-05
+  L^2 error: 4.0808510594454864e-08
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.026399786727403226
+  Rate of convergence FMG-PCG: 0.00012460406111142996
+  Rate of convergence FMG-PGMRES: 0.00011117797764133533
+  Rate of convergence MG: 0.04837194865537302
+  Rate of convergence PBICGSTAB: 0.00014366063822790053
+  Rate of convergence PCG: 0.002761106200862833
+  Rate of convergence PGMRES: 0.002566002223887755
+resHist:
+  FMG:
+  - 0.004284335226213176
+  - 5.2884984853094744e-05
+  - 6.528981462137355e-07
+  - 8.095824940480544e-09
+  - 4.943811139784389e-10
+  FMG-PCG:
+  - 1.1762478630289335e-05
+  - 3.605564043718106e-09
+  - 1.4329982815980012e-12
+  FMG-PGMRES:
+  - 0.00428433522868912
+  - 9.232011158054151e-07
+  - 2.0742201149871088e-10
+  MG:
+  - 0.03855314216222431
+  - 0.0043581190257400895
+  - 5.9453611310025686e-05
+  - 1.8007864980355901e-06
+  - 1.0400112294350939e-07
+  - 6.419328268451751e-09
+  - 4.938798175624634e-10
+  PBICGSTAB:
+  - 2.221441414633029
+  - 2.3679893397170436e-05
+  - 8.242476338101039e-09
+  - 1.1265807165550953e-12
+  PCG:
+  - 2.221441414633029
+  - 1.2428851635591157e-05
+  - 1.5289966804778374e-07
+  - 2.013704716431806e-09
+  - 9.687595916027896e-14
+  PGMRES:
+  - 0.03855314216222431
+  - 0.004330538240409614
+  - 2.36798889904379e-05
+  - 4.984825897071952e-07
+  - 1.701290546769573e-11
+residuals:
+  Residual norm FMG: 4.943811139784389e-10
+  Residual norm FMG-PCG: 5.985827181038099e-10
+  Residual norm FMG-PGMRES: 4.765377603940682e-10
+  Residual norm MG: 4.938798175624634e-10
+  Residual norm PBICGSTAB: 7.95674358661429e-10
+  Residual norm PCG: 8.115384733289921e-10
+  Residual norm PGMRES: 6.513746683263674e-10
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric1 b/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric1
new file mode 100644
index 0000000..91e054d
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric1
@@ -0,0 +1,66 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0001143370788385747
+  L^2 error: 2.1073424255447017e-08
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 0.0029880507057366192
+  Rate of convergence FMG-PCG: 1.1897969015434094e-07
+  Rate of convergence FMG-PGMRES: 3.350791200233084e-08
+  Rate of convergence MG: 0.05561594678757832
+  Rate of convergence PBICGSTAB: 0.00019185797664134834
+  Rate of convergence PCG: 0.005186396852356259
+  Rate of convergence PGMRES: 0.014320441640756318
+resHist:
+  FMG:
+  - 3.9138831199186147e-07
+  - 5.067231982931464e-09
+  - 1.0841825527497335e-09
+  FMG-PCG:
+  - 6.182513205945546e-09
+  - 1.7496550679916333e-11
+  FMG-PGMRES:
+  - 3.9138831199186147e-07
+  - 6.466834002670937e-10
+  MG:
+  - 0.04063858007915261
+  - 0.0014600319690468214
+  - 4.644758613567613e-05
+  - 1.931576666153327e-06
+  - 9.965388556058843e-08
+  - 6.179685080727576e-09
+  - 1.2026369267244852e-09
+  PBICGSTAB:
+  - 2.221441467622873
+  - 1.5417885259573093e-05
+  - 1.4549511365126951e-08
+  - 7.56323135821663e-12
+  PCG:
+  - 2.221441467622873
+  - 5.245688972220659e-06
+  - 9.443250985941121e-08
+  - 2.053514889319376e-09
+  - 1.9398169940762306e-11
+  PGMRES:
+  - 0.04063858007915261
+  - 0.00137339756524588
+  - 1.538662461026725e-05
+  - 4.790290192367146e-07
+  - 4.597099822681318e-09
+  - 2.081702722376904e-11
+residuals:
+  Residual norm FMG: 1.0841825527497335e-09
+  Residual norm FMG-PCG: 4.83516566612995e-09
+  Residual norm FMG-PGMRES: 1.3617139651919206e-09
+  Residual norm MG: 1.2026369267244852e-09
+  Residual norm PBICGSTAB: 1.4958851307324917e-09
+  Residual norm PCG: 5.6693825188357385e-09
+  Residual norm PGMRES: 1.7090870059218915e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric4 b/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric4
new file mode 100644
index 0000000..308ff76
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP2--symmetric4
@@ -0,0 +1,66 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00011527447382086147
+  L^2 error: 4.59284633619262e-08
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 0.003003162502029918
+  Rate of convergence FMG-PCG: 1.192182779547094e-07
+  Rate of convergence FMG-PGMRES: 3.3627115351385855e-08
+  Rate of convergence MG: 0.055484756382788644
+  Rate of convergence PBICGSTAB: 0.00019644902141447256
+  Rate of convergence PCG: 0.005186905063678099
+  Rate of convergence PGMRES: 0.014338952633808898
+resHist:
+  FMG:
+  - 3.913884370065788e-07
+  - 5.074618081179386e-09
+  - 1.1007153503975528e-09
+  FMG-PCG:
+  - 6.368909372887228e-09
+  - 1.7521009937974708e-11
+  FMG-PGMRES:
+  - 3.913884370065788e-07
+  - 6.466128938487342e-10
+  MG:
+  - 0.04063858007915264
+  - 0.001460031969188883
+  - 4.6447564980441626e-05
+  - 1.9315784777345796e-06
+  - 9.96483251757989e-08
+  - 6.195297999406657e-09
+  - 1.1857158526703088e-09
+  PBICGSTAB:
+  - 2.221441467622876
+  - 1.5417885235510466e-05
+  - 1.4693459890298428e-08
+  - 3.780520375855798e-10
+  PCG:
+  - 2.221441467622876
+  - 5.245688969403451e-06
+  - 9.443250986157198e-08
+  - 2.053514890885079e-09
+  - 1.9398170167569194e-11
+  PGMRES:
+  - 0.04063858007915264
+  - 0.0013733975950929204
+  - 1.5386624825277226e-05
+  - 4.790290252381823e-07
+  - 4.597099810724188e-09
+  - 2.081698216563796e-11
+residuals:
+  Residual norm FMG: 1.1007153503975528e-09
+  Residual norm FMG-PCG: 4.844861535561136e-09
+  Residual norm FMG-PGMRES: 1.3665582200381971e-09
+  Residual norm MG: 1.1857158526703088e-09
+  Residual norm PBICGSTAB: 1.5683329422226616e-09
+  Residual norm PCG: 5.671049298378215e-09
+  Residual norm PGMRES: 1.7179410045458785e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP21 b/tests/cache_runParallelGMG.py--domaininterval--elementP21
new file mode 100644
index 0000000..9781c2c
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP21
@@ -0,0 +1,66 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00011556479104448955
+  L^2 error: 7.223605897152915e-08
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 0.0032725513748729307
+  Rate of convergence FMG-PCG: 1.2576613127475604e-07
+  Rate of convergence FMG-PGMRES: 3.747431246917222e-08
+  Rate of convergence MG: 0.05720031845289547
+  Rate of convergence PBICGSTAB: 0.00020150540214995458
+  Rate of convergence PCG: 0.005193112957491545
+  Rate of convergence PGMRES: 0.014840622073954795
+resHist:
+  FMG:
+  - 3.912638756838978e-07
+  - 5.001475003050143e-09
+  - 1.4242884242799316e-09
+  FMG-PCG:
+  - 5.996538197251695e-08
+  - 1.7903904319539505e-11
+  FMG-PGMRES:
+  - 3.9126557161284995e-07
+  - 6.489546240520482e-10
+  MG:
+  - 0.04063858007915261
+  - 0.0014600320730876973
+  - 4.644767617146461e-05
+  - 1.9316557428390122e-06
+  - 9.964493572785122e-08
+  - 6.218463948189244e-09
+  - 1.4234073536200093e-09
+  PBICGSTAB:
+  - 2.221441467470918
+  - 1.5417885022755102e-05
+  - 1.4550340060592982e-08
+  - 7.564949198062194e-12
+  PCG:
+  - 2.221441467470918
+  - 5.2456892128237496e-06
+  - 9.443251259198918e-08
+  - 2.053514972869962e-09
+  - 1.9398170969439376e-11
+  PGMRES:
+  - 0.04063858007915261
+  - 0.0013733979284180514
+  - 1.5386627313856498e-05
+  - 4.790290957876818e-07
+  - 4.597100566515721e-09
+  - 2.0817032778575255e-11
+residuals:
+  Residual norm FMG: 1.4242884242799316e-09
+  Residual norm FMG-PCG: 5.110956997054392e-09
+  Residual norm FMG-PGMRES: 1.5229028481896423e-09
+  Residual norm MG: 1.4234073536200093e-09
+  Residual norm PBICGSTAB: 1.6501062620932607e-09
+  Residual norm PCG: 5.691435688700658e-09
+  Residual norm PGMRES: 1.971273790699204e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP24 b/tests/cache_runParallelGMG.py--domaininterval--elementP24
new file mode 100644
index 0000000..ff1477e
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP24
@@ -0,0 +1,66 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00011356339481145647
+  L^2 error: 3.650024149988857e-08
+info: {}
+iterations:
+  Number of iterations FMG: 3
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 6
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 0.003255973572632267
+  Rate of convergence FMG-PCG: 1.258350273194184e-07
+  Rate of convergence FMG-PGMRES: 3.740277535430832e-08
+  Rate of convergence MG: 0.057380682929402314
+  Rate of convergence PBICGSTAB: 0.00020150508254402732
+  Rate of convergence PCG: 0.005191037354086219
+  Rate of convergence PGMRES: 0.014871759856076461
+resHist:
+  FMG:
+  - 3.912714236790516e-07
+  - 5.004310616628718e-09
+  - 1.4027527827968731e-09
+  FMG-PCG:
+  - 6.079385908589285e-08
+  - 1.7904372822174022e-11
+  FMG-PGMRES:
+  - 3.9127133897466146e-07
+  - 6.489126335498151e-10
+  MG:
+  - 0.04063858007915264
+  - 0.0014600320727792084
+  - 4.644765993614666e-05
+  - 1.9316643094465677e-06
+  - 9.965418613209667e-08
+  - 6.254304837278804e-09
+  - 1.450550329277394e-09
+  PBICGSTAB:
+  - 2.2214414674709086
+  - 1.5417884992119906e-05
+  - 1.449023565453694e-08
+  - 2.7487910294933343e-11
+  PCG:
+  - 2.2214414674709086
+  - 5.2456892034439e-06
+  - 9.443251240880421e-08
+  - 2.053514969353792e-09
+  - 1.9398170898776748e-11
+  PGMRES:
+  - 0.04063858007915264
+  - 0.0013733979559459703
+  - 1.538662773229625e-05
+  - 4.790291127055159e-07
+  - 4.5971006444452005e-09
+  - 2.0816990120275848e-11
+residuals:
+  Residual norm FMG: 1.4027527827968731e-09
+  Residual norm FMG-PCG: 5.1137568344825456e-09
+  Residual norm FMG-PGMRES: 1.5199956814186157e-09
+  Residual norm MG: 1.450550329277394e-09
+  Residual norm PBICGSTAB: 1.650101027659663e-09
+  Residual norm PCG: 5.684614091043313e-09
+  Residual norm PGMRES: 1.987870007091989e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric1 b/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric1
new file mode 100644
index 0000000..1754d17
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric1
@@ -0,0 +1,63 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0003502889389238787
+  L^2 error: 8.024520884654677e-08
+info: {}
+iterations:
+  Number of iterations FMG: 1
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 7
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 2.8653489408665315e-08
+  Rate of convergence FMG-PCG: 2.4153314892959718e-08
+  Rate of convergence FMG-PGMRES: 2.8653489408665315e-08
+  Rate of convergence MG: 0.08351882017564857
+  Rate of convergence PBICGSTAB: 0.00017438890916839656
+  Rate of convergence PCG: 0.0031661709481842783
+  Rate of convergence PGMRES: 0.014197061654746941
+resHist:
+  FMG:
+  - 1.295354527905917e-09
+  FMG-PCG:
+  - 3.1770133696527017e-08
+  - 4.431720150138175e-13
+  FMG-PGMRES:
+  - 1.295354527905917e-09
+  MG:
+  - 0.04520756650023153
+  - 9.259383716919433e-05
+  - 8.770618507420638e-06
+  - 1.202565100317264e-06
+  - 1.8062429961111195e-07
+  - 2.716710951559466e-08
+  - 4.237406231797889e-09
+  - 1.2814497611089664e-09
+  PBICGSTAB:
+  - 2.221441469226571
+  - 6.43901015917548e-06
+  - 2.8430629012267137e-08
+  - 1.5883226679898297e-10
+  PCG:
+  - 2.221441469226571
+  - 1.3747523123521355e-06
+  - 2.400709398430598e-08
+  - 1.2075509282823624e-09
+  PGMRES:
+  - 0.04520756650023153
+  - 9.256430249526018e-05
+  - 6.4390084678260934e-06
+  - 3.751107082041558e-07
+  - 1.4685717549828658e-08
+  - 5.923687435595278e-10
+residuals:
+  Residual norm FMG: 1.295354527905917e-09
+  Residual norm FMG-PCG: 1.091912589224509e-09
+  Residual norm FMG-PGMRES: 1.295354527905917e-09
+  Residual norm MG: 1.2814497611089664e-09
+  Residual norm PBICGSTAB: 1.374829530729178e-09
+  Residual norm PCG: 4.531895104146291e-07
+  Residual norm PGMRES: 1.8365595015921957e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric4 b/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric4
new file mode 100644
index 0000000..a2fa9ef
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP3--symmetric4
@@ -0,0 +1,65 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.00034995467638075585
+  L^2 error: 3.650024149988857e-08
+info: {}
+iterations:
+  Number of iterations FMG: 1
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 7
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 2.8785741084731856e-08
+  Rate of convergence FMG-PCG: 2.4148263417343278e-08
+  Rate of convergence FMG-PGMRES: 2.8785741084731856e-08
+  Rate of convergence MG: 0.08391631634000546
+  Rate of convergence PBICGSTAB: 0.013492522622348837
+  Rate of convergence PCG: 0.003166149065936649
+  Rate of convergence PGMRES: 0.014160691190954795
+resHist:
+  FMG:
+  - 1.3013333043464634e-09
+  FMG-PCG:
+  - 3.225379508317341e-08
+  - 4.237476807821102e-13
+  FMG-PGMRES:
+  - 1.3013333043464634e-09
+  MG:
+  - 0.04520756650023157
+  - 9.259383804344901e-05
+  - 8.770620244812483e-06
+  - 1.2025434023192948e-06
+  - 1.8061593475582307e-07
+  - 2.713090484129895e-08
+  - 4.26143788540614e-09
+  - 1.3247563502679655e-09
+  PBICGSTAB:
+  - 2.2214414692265616
+  - 6.439010291685699e-06
+  - 2.9117391982473344e-08
+  - 2.3017244339159286e-08
+  - 2.8069182777569975e-09
+  - 1.416179701726717e-10
+  PCG:
+  - 2.2214414692265616
+  - 1.3747523213526707e-06
+  - 2.4007092468233822e-08
+  - 1.2075508467543543e-09
+  PGMRES:
+  - 0.04520756650023157
+  - 9.256430614394948e-05
+  - 6.439008721193175e-06
+  - 3.7511071361638754e-07
+  - 1.4685716120459646e-08
+  - 5.923686511842439e-10
+residuals:
+  Residual norm FMG: 1.3013333043464634e-09
+  Residual norm FMG-PCG: 1.0916842243046554e-09
+  Residual norm FMG-PGMRES: 1.3013333043464634e-09
+  Residual norm MG: 1.3247563502679655e-09
+  Residual norm PBICGSTAB: 1.498248147619639e-09
+  Residual norm PCG: 4.531832462103377e-07
+  Residual norm PGMRES: 1.817811882249012e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP31 b/tests/cache_runParallelGMG.py--domaininterval--elementP31
new file mode 100644
index 0000000..9afc766
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP31
@@ -0,0 +1,63 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0002602430274770129
+  L^2 error: 2.7877519926234643e-08
+info: {}
+iterations:
+  Number of iterations FMG: 1
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 7
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 2.189131876177542e-08
+  Rate of convergence FMG-PCG: 2.441764997729535e-08
+  Rate of convergence FMG-PGMRES: 2.189131876177542e-08
+  Rate of convergence MG: 0.08092065666434285
+  Rate of convergence PBICGSTAB: 0.0001749292727636012
+  Rate of convergence PCG: 0.0031661568242965267
+  Rate of convergence PGMRES: 0.014080054563793824
+resHist:
+  FMG:
+  - 9.896532487007284e-10
+  FMG-PCG:
+  - 8.173482130989587e-09
+  - 4.0698865718508547e-13
+  FMG-PGMRES:
+  - 1.0268131853514758e-09
+  MG:
+  - 0.04520756650023153
+  - 9.259392211329623e-05
+  - 8.770646161947532e-06
+  - 1.2025935107106188e-06
+  - 1.8066648424731225e-07
+  - 2.7342453361978977e-08
+  - 3.963245386596298e-09
+  - 1.0271335578729815e-09
+  PBICGSTAB:
+  - 2.2214414692431315
+  - 6.439020221225e-06
+  - 2.8433749653107515e-08
+  - 1.5885395487155843e-10
+  PCG:
+  - 2.2214414692431315
+  - 1.3747525861371148e-06
+  - 2.4007111284709347e-08
+  - 1.2075526505390896e-09
+  PGMRES:
+  - 0.04520756650023153
+  - 9.256438595218645e-05
+  - 6.439019157844418e-06
+  - 3.751110245101866e-07
+  - 1.4685724851322305e-08
+  - 5.923687565792903e-10
+residuals:
+  Residual norm FMG: 9.896532487007284e-10
+  Residual norm FMG-PCG: 1.1038625351279563e-09
+  Residual norm FMG-PGMRES: 9.896532487007284e-10
+  Residual norm MG: 1.0271335578729815e-09
+  Residual norm PBICGSTAB: 1.383362858028291e-09
+  Residual norm PCG: 4.53185467181586e-07
+  Residual norm PGMRES: 1.776758824346899e-09
diff --git a/tests/cache_runParallelGMG.py--domaininterval--elementP34 b/tests/cache_runParallelGMG.py--domaininterval--elementP34
new file mode 100644
index 0000000..9f896f3
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domaininterval--elementP34
@@ -0,0 +1,65 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0002599102790071526
+  L^2 error: 4.0808510594454864e-08
+info: {}
+iterations:
+  Number of iterations FMG: 1
+  Number of iterations FMG-PCG: 1
+  Number of iterations FMG-PGMRES: 1
+  Number of iterations MG: 7
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 2
+  Number of iterations PGMRES: 4
+rates:
+  Rate of convergence FMG: 2.1658631773565284e-08
+  Rate of convergence FMG-PCG: 2.4075844227252812e-08
+  Rate of convergence FMG-PGMRES: 2.1658631773565284e-08
+  Rate of convergence MG: 0.08117540694259703
+  Rate of convergence PBICGSTAB: 0.014208646432746004
+  Rate of convergence PCG: 0.0031661085021361954
+  Rate of convergence PGMRES: 0.014072505641897916
+resHist:
+  FMG:
+  - 9.79134036207481e-10
+  FMG-PCG:
+  - 7.747080087244763e-09
+  - 3.9386281785654953e-13
+  FMG-PGMRES:
+  - 1.015043655340214e-09
+  MG:
+  - 0.04520756650023157
+  - 9.2593922873132e-05
+  - 8.77063388393201e-06
+  - 1.2026065895289318e-06
+  - 1.806782400480934e-07
+  - 2.7345398754481828e-08
+  - 3.9295861763219516e-09
+  - 1.049983443827921e-09
+  PBICGSTAB:
+  - 2.2214414692431377
+  - 6.439020265708483e-06
+  - 2.8841403047454004e-08
+  - 7.342482434186327e-08
+  - 9.07634211168258e-09
+  - 1.0995000958856777e-09
+  PCG:
+  - 2.2214414692431377
+  - 1.3747526067123975e-06
+  - 2.4007112333176516e-08
+  - 1.2075527071189154e-09
+  PGMRES:
+  - 0.04520756650023157
+  - 9.256438839683519e-05
+  - 6.439019068347986e-06
+  - 3.751110142727066e-07
+  - 1.4685724896003358e-08
+  - 5.923687797697047e-10
+residuals:
+  Residual norm FMG: 9.79134036207481e-10
+  Residual norm FMG-PCG: 1.0884103289527478e-09
+  Residual norm FMG-PGMRES: 9.79134036207481e-10
+  Residual norm MG: 1.049983443827921e-09
+  Residual norm PBICGSTAB: 1.8425613604920808e-09
+  Residual norm PCG: 4.5317163417525813e-07
+  Residual norm PGMRES: 1.7729515007971585e-09
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric1 b/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric1
new file mode 100644
index 0000000..fb735f2
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric1
@@ -0,0 +1,67 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0068201431938235075
+  L^2 error: 5.623584520254638e-06
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 5
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.1343185645728798
+  Rate of convergence FMG-PCG: 0.009048046164858098
+  Rate of convergence FMG-PGMRES: 0.008813942711345142
+  Rate of convergence MG: 0.16373611952936082
+  Rate of convergence PBICGSTAB: 7.520741410357475e-05
+  Rate of convergence PCG: 0.03673901049732461
+  Rate of convergence PGMRES: 0.03661789757675665
+resHist:
+  FMG:
+  - 0.003031495803260806
+  - 9.143909109302652e-05
+  - 1.798422896930025e-05
+  - 3.889088188105089e-06
+  - 8.427665097200043e-07
+  FMG-PCG:
+  - 0.0013999684692628002
+  - 9.516582814719064e-05
+  - 2.5457834726165388e-06
+  FMG-PGMRES:
+  - 0.003031495803260806
+  - 7.901421767115153e-05
+  - 1.4975023272221057e-06
+  MG:
+  - 0.01927645013716171
+  - 0.0035812078256876345
+  - 0.00026492798800953007
+  - 4.973043767406161e-05
+  - 1.0387304099038098e-05
+  - 2.26855611909886e-06
+  PBICGSTAB:
+  - 2.2207730520755145
+  - 0.00024958173176578747
+  - 1.449731968206434e-06
+  PCG:
+  - 2.2207730520755145
+  - 0.003472370787481858
+  - 0.0003032809285944668
+  - 2.00383654166649e-05
+  - 1.1602192725827527e-06
+  PGMRES:
+  - 0.01927645013716171
+  - 0.003522960244508099
+  - 0.00024956147871830776
+  - 1.760068277199418e-05
+  - 9.46470922424711e-07
+residuals:
+  Residual norm FMG: 8.427665097200043e-07
+  Residual norm FMG-PCG: 1.578107830543218e-06
+  Residual norm FMG-PGMRES: 1.4975023271966594e-06
+  Residual norm MG: 2.26855611909886e-06
+  Residual norm PBICGSTAB: 1.4497319679124312e-06
+  Residual norm PCG: 9.558933156250525e-07
+  Residual norm PGMRES: 9.464709222823141e-07
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric4 b/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric4
new file mode 100644
index 0000000..e46bed6
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP1--symmetric4
@@ -0,0 +1,67 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.0068202290747514304
+  L^2 error: 5.6227231979315615e-06
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 5
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.13870206369430452
+  Rate of convergence FMG-PCG: 0.011018354368750061
+  Rate of convergence FMG-PGMRES: 0.010256296196658044
+  Rate of convergence MG: 0.16373611953032838
+  Rate of convergence PBICGSTAB: 7.520741411405986e-05
+  Rate of convergence PCG: 0.036739010497144425
+  Rate of convergence PGMRES: 0.03661789757643771
+resHist:
+  FMG:
+  - 0.00303640326557394
+  - 0.00010203516640526426
+  - 2.070885923676543e-05
+  - 4.524826297178669e-06
+  - 9.895589551232252e-07
+  FMG-PCG:
+  - 0.0014585061644152668
+  - 0.00011588721962505267
+  - 3.70975390926001e-06
+  FMG-PGMRES:
+  - 0.00303640326557394
+  - 8.990487833443987e-05
+  - 2.027720857279504e-06
+  MG:
+  - 0.01927645013716171
+  - 0.0035812078256876184
+  - 0.0002649279880095774
+  - 4.973043767406586e-05
+  - 1.0387304099047536e-05
+  - 2.2685561191658892e-06
+  PBICGSTAB:
+  - 2.2207730520755145
+  - 0.00024958173176578834
+  - 1.4497319682066036e-06
+  PCG:
+  - 2.2207730520755145
+  - 0.0034723707874815815
+  - 0.00030328092859444684
+  - 2.0038365416664216e-05
+  - 1.1602192725827042e-06
+  PGMRES:
+  - 0.01927645013716171
+  - 0.00352296024450812
+  - 0.00024956147871831096
+  - 1.7600682771994485e-05
+  - 9.464709224247393e-07
+residuals:
+  Residual norm FMG: 9.895589551232252e-07
+  Residual norm FMG-PCG: 2.3402407161302812e-06
+  Residual norm FMG-PGMRES: 2.027720857273486e-06
+  Residual norm MG: 2.2685561191658892e-06
+  Residual norm PBICGSTAB: 1.4497319681145466e-06
+  Residual norm PCG: 9.55893315610988e-07
+  Residual norm PGMRES: 9.464709222575829e-07
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP11 b/tests/cache_runParallelGMG.py--domainsquare--elementP11
new file mode 100644
index 0000000..b801c35
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP11
@@ -0,0 +1,67 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.006820143195516481
+  L^2 error: 5.623589455819016e-06
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 5
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.13431856457220065
+  Rate of convergence FMG-PCG: 0.009048046164873959
+  Rate of convergence FMG-PGMRES: 0.008813942711644268
+  Rate of convergence MG: 0.1637361195298525
+  Rate of convergence PBICGSTAB: 7.520741411798745e-05
+  Rate of convergence PCG: 0.03673901049833652
+  Rate of convergence PGMRES: 0.036617897580698174
+resHist:
+  FMG:
+  - 0.0030314958032605535
+  - 9.143909109296775e-05
+  - 1.798422896930062e-05
+  - 3.889088188126265e-06
+  - 8.427665096986982e-07
+  FMG-PCG:
+  - 0.0013999684692645046
+  - 9.51658281472163e-05
+  - 2.5457834726197385e-06
+  FMG-PGMRES:
+  - 0.003031495803260346
+  - 7.9014217671148e-05
+  - 1.4975023272218866e-06
+  MG:
+  - 0.01927645013716171
+  - 0.003581207825687766
+  - 0.0002649279880095815
+  - 4.973043767409455e-05
+  - 1.0387304099054295e-05
+  - 2.2685561191329223e-06
+  PBICGSTAB:
+  - 2.2207730520755278
+  - 0.0002495817317657697
+  - 1.4497319682096758e-06
+  PCG:
+  - 2.2207730520755278
+  - 0.003472370787479733
+  - 0.00030328092859431305
+  - 2.0038365416658504e-05
+  - 1.1602192725824651e-06
+  PGMRES:
+  - 0.01927645013716171
+  - 0.003522960244507536
+  - 0.0002495614787183097
+  - 1.7600682771994715e-05
+  - 9.464709224247775e-07
+residuals:
+  Residual norm FMG: 8.427665096986982e-07
+  Residual norm FMG-PCG: 1.5781078305487511e-06
+  Residual norm FMG-PGMRES: 1.4975023272983031e-06
+  Residual norm MG: 2.2685561191329223e-06
+  Residual norm PBICGSTAB: 1.4497319681902565e-06
+  Residual norm PCG: 9.558933157040375e-07
+  Residual norm PGMRES: 9.464709225879463e-07
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP14 b/tests/cache_runParallelGMG.py--domainsquare--elementP14
new file mode 100644
index 0000000..93bf7d0
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP14
@@ -0,0 +1,67 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.006820229064723946
+  L^2 error: 5.622718261606791e-06
+info: {}
+iterations:
+  Number of iterations FMG: 5
+  Number of iterations FMG-PCG: 2
+  Number of iterations FMG-PGMRES: 2
+  Number of iterations MG: 5
+  Number of iterations PBICGSTAB: 1
+  Number of iterations PCG: 3
+  Number of iterations PGMRES: 3
+rates:
+  Rate of convergence FMG: 0.13870206369423507
+  Rate of convergence FMG-PCG: 0.01101835436895991
+  Rate of convergence FMG-PGMRES: 0.010256296196670423
+  Rate of convergence MG: 0.1637361195297622
+  Rate of convergence PBICGSTAB: 7.520741410613595e-05
+  Rate of convergence PCG: 0.03673901050038704
+  Rate of convergence PGMRES: 0.03661789757861959
+resHist:
+  FMG:
+  - 0.0030364032655739396
+  - 0.00010203516640541146
+  - 2.070885923675952e-05
+  - 4.52482629717484e-06
+  - 9.895589551207487e-07
+  FMG-PCG:
+  - 0.0014585061644175597
+  - 0.00011588721962510175
+  - 3.7097539092639634e-06
+  FMG-PGMRES:
+  - 0.0030364032655737965
+  - 8.990487833444894e-05
+  - 2.0277208572773705e-06
+  MG:
+  - 0.01927645013716171
+  - 0.003581207825687672
+  - 0.00026492798800967573
+  - 4.973043767413741e-05
+  - 1.0387304099098044e-05
+  - 2.2685561191266652e-06
+  PBICGSTAB:
+  - 2.2207730520755278
+  - 0.00024958173176576676
+  - 1.4497319682068605e-06
+  PCG:
+  - 2.2207730520755278
+  - 0.0034723707874798715
+  - 0.00030328092859432254
+  - 2.0038365416658324e-05
+  - 1.160219272582455e-06
+  PGMRES:
+  - 0.01927645013716171
+  - 0.0035229602445075244
+  - 0.00024956147871830955
+  - 1.76006827719947e-05
+  - 9.464709224247677e-07
+residuals:
+  Residual norm FMG: 9.895589551207487e-07
+  Residual norm FMG-PCG: 2.3402407162194234e-06
+  Residual norm FMG-PGMRES: 2.027720857278381e-06
+  Residual norm MG: 2.2685561191266652e-06
+  Residual norm PBICGSTAB: 1.4497319679618018e-06
+  Residual norm PCG: 9.558933158640916e-07
+  Residual norm PGMRES: 9.4647092242677e-07
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric1 b/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric1
new file mode 100644
index 0000000..508d47e
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric1
@@ -0,0 +1,89 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.296466928566293e-05
+  L^2 error: 3.026440145312235e-08
+info: {}
+iterations:
+  Number of iterations FMG: 8
+  Number of iterations FMG-PCG: 5
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 6
+rates:
+  Rate of convergence FMG: 0.12440048391167217
+  Rate of convergence FMG-PCG: 0.029322556593586686
+  Rate of convergence FMG-PGMRES: 0.028007664434766853
+  Rate of convergence MG: 0.18328240662394737
+  Rate of convergence PBICGSTAB: 0.0024888844634467205
+  Rate of convergence PCG: 0.05747447213919756
+  Rate of convergence PGMRES: 0.05641642357503427
+resHist:
+  FMG:
+  - 9.401346279407981e-05
+  - 1.7374219873867513e-05
+  - 3.3402759443312543e-06
+  - 6.490563906684497e-07
+  - 1.27883441674746e-07
+  - 2.5829018541867403e-08
+  - 5.486169468266695e-09
+  - 1.2766506690260712e-09
+  FMG-PCG:
+  - 8.540942620008105e-05
+  - 3.3023071488638603e-06
+  - 2.7067440951957385e-07
+  - 2.4232258932034303e-08
+  - 2.601855374236242e-09
+  - 2.556325821800341e-10
+  FMG-PGMRES:
+  - 9.401346279407981e-05
+  - 6.222997793394136e-06
+  - 3.954735109534006e-07
+  - 2.9551616715221438e-08
+  - 3.905266914667231e-09
+  - 3.836012174612182e-10
+  MG:
+  - 0.022258480797952816
+  - 0.0016722138438819395
+  - 0.00019272171821044088
+  - 3.0072032124742058e-05
+  - 4.8342599280363444e-06
+  - 8.201135261752476e-07
+  - 1.5580743104348388e-07
+  - 3.5506686872279413e-08
+  - 9.71783300615783e-09
+  - 2.969758275681764e-09
+  - 9.52145473827205e-10
+  PBICGSTAB:
+  - 2.2214041271328604
+  - 0.00016308702641605697
+  - 9.895480091687508e-07
+  - 1.3642039610072443e-08
+  - 3.431705720248863e-10
+  PCG:
+  - 2.2214041271328604
+  - 0.0021515495279407636
+  - 0.00011065382596584535
+  - 4.864728934301408e-06
+  - 5.268381526904258e-07
+  - 6.493528655362632e-08
+  - 5.163660001996808e-09
+  - 5.55900529451197e-10
+  PGMRES:
+  - 0.022258480797952816
+  - 0.00157360572301889
+  - 0.0001630024893625918
+  - 7.346860591249024e-06
+  - 6.505469989144067e-07
+  - 7.348157949806536e-08
+  - 7.651570316516861e-09
+  - 7.176765300397225e-10
+residuals:
+  Residual norm FMG: 1.2766506690260712e-09
+  Residual norm FMG-PCG: 4.82508185180641e-10
+  Residual norm FMG-PGMRES: 3.836012302570839e-10
+  Residual norm MG: 9.52145473827205e-10
+  Residual norm PBICGSTAB: 3.431703275811618e-10
+  Residual norm PCG: 8.023160255396919e-10
+  Residual norm PGMRES: 7.176766676532586e-10
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric4 b/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric4
new file mode 100644
index 0000000..c847e05
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP2--symmetric4
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.2708860058163156e-05
+  L^2 error: 1.971238338250362e-08
+info: {}
+iterations:
+  Number of iterations FMG: 9
+  Number of iterations FMG-PCG: 5
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 6
+rates:
+  Rate of convergence FMG: 0.1528160066103212
+  Rate of convergence FMG-PCG: 0.0367975568585096
+  Rate of convergence FMG-PGMRES: 0.033202102856482396
+  Rate of convergence MG: 0.18328241474547446
+  Rate of convergence PBICGSTAB: 0.016995421512200362
+  Rate of convergence PCG: 0.0597746379186267
+  Rate of convergence PGMRES: 0.05641642572948654
+resHist:
+  FMG:
+  - 0.00016041543835294098
+  - 2.7535604876294912e-05
+  - 4.94183644230768e-06
+  - 9.198072883819506e-07
+  - 1.857151284257301e-07
+  - 4.236695286968904e-08
+  - 1.1113541419424837e-08
+  - 3.252220252391814e-09
+  - 1.0116147145385498e-09
+  FMG-PCG:
+  - 0.00028169089653016366
+  - 9.456373995928972e-06
+  - 8.28232479937577e-07
+  - 8.889487034426779e-08
+  - 8.999758240310753e-09
+  - 1.182111435247212e-09
+  FMG-PGMRES:
+  - 0.00016041543835294098
+  - 1.5534134928241873e-05
+  - 1.0112366123403156e-06
+  - 9.45865848098773e-08
+  - 1.4351017243351685e-08
+  - 8.980976840031717e-10
+  MG:
+  - 0.022258480797952812
+  - 0.0016722138438835382
+  - 0.0001927217182104739
+  - 3.0072032125672168e-05
+  - 4.834259927397155e-06
+  - 8.201135263529523e-07
+  - 1.5580743055612847e-07
+  - 3.550668681268607e-08
+  - 9.717833434267017e-09
+  - 2.9697582459849676e-09
+  - 9.521458957376847e-10
+  PBICGSTAB:
+  - 2.221404127132866
+  - 0.000163087026416113
+  - 9.89548009212589e-07
+  - 1.364203961116647e-08
+  - 9.356050401210532e-09
+  - 1.8570484274105663e-09
+  PCG:
+  - 2.221404127132866
+  - 0.002151549527935724
+  - 0.00011065382596573965
+  - 4.8647289342894594e-06
+  - 5.268381526885194e-07
+  - 6.493528655351745e-08
+  - 5.035212714715517e-09
+  - 7.833039191568634e-10
+  PGMRES:
+  - 0.022258480797952812
+  - 0.0015736057230206564
+  - 0.00016300248936264818
+  - 7.3468605912462306e-06
+  - 6.505469989097072e-07
+  - 7.348157949761034e-08
+  - 7.651570316441904e-09
+  - 7.176765300263208e-10
+residuals:
+  Residual norm FMG: 1.0116147145385498e-09
+  Residual norm FMG-PCG: 1.5017251162632245e-09
+  Residual norm FMG-PGMRES: 8.980974688373558e-10
+  Residual norm MG: 9.521458957376847e-10
+  Residual norm PBICGSTAB: 1.8570486447436375e-09
+  Residual norm PCG: 1.0153066804134352e-09
+  Residual norm PGMRES: 7.176768320947644e-10
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP21 b/tests/cache_runParallelGMG.py--domainsquare--elementP21
new file mode 100644
index 0000000..17bfc08
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP21
@@ -0,0 +1,89 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.2962971811295275e-05
+  L^2 error: 3.332000937312528e-08
+info: {}
+iterations:
+  Number of iterations FMG: 8
+  Number of iterations FMG-PCG: 5
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 3
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 6
+rates:
+  Rate of convergence FMG: 0.12440034639527292
+  Rate of convergence FMG-PCG: 0.029322571399033447
+  Rate of convergence FMG-PGMRES: 0.028007661172460254
+  Rate of convergence MG: 0.1832822717847498
+  Rate of convergence PBICGSTAB: 0.0024888848467852598
+  Rate of convergence PCG: 0.057474480206702296
+  Rate of convergence PGMRES: 0.056416435647206685
+resHist:
+  FMG:
+  - 9.401346278764398e-05
+  - 1.737421986180617e-05
+  - 3.3402759305090966e-06
+  - 6.490563761355201e-07
+  - 1.2788342700121753e-07
+  - 2.582900436150482e-08
+  - 5.486155842915545e-09
+  - 1.276639379055593e-09
+  FMG-PCG:
+  - 8.540942617762358e-05
+  - 3.3023071451328246e-06
+  - 2.7067440916788705e-07
+  - 2.423225891110801e-08
+  - 2.6018553679521206e-09
+  - 2.5563258115539165e-10
+  FMG-PGMRES:
+  - 9.401346278765392e-05
+  - 6.222997792925606e-06
+  - 3.954735108004115e-07
+  - 2.9551616700782186e-08
+  - 3.905266914304765e-09
+  - 3.836012178741696e-10
+  MG:
+  - 0.022258480797952816
+  - 0.0016722138439047428
+  - 0.00019272171820841616
+  - 3.0072032115355852e-05
+  - 4.8342599156724815e-06
+  - 8.201135126813357e-07
+  - 1.5580741793562443e-07
+  - 3.5506676020782196e-08
+  - 9.717824551447451e-09
+  - 2.9697502244845106e-09
+  - 9.52138469002847e-10
+  PBICGSTAB:
+  - 2.221404127132974
+  - 0.00016308702641577627
+  - 9.895480091687258e-07
+  - 1.3642039609900571e-08
+  - 3.4317057204202704e-10
+  PCG:
+  - 2.221404127132974
+  - 0.0021515495279368886
+  - 0.00011065382596571424
+  - 4.864728934276824e-06
+  - 5.268381526854678e-07
+  - 6.49352865529309e-08
+  - 5.163660001909147e-09
+  - 5.559005294377545e-10
+  PGMRES:
+  - 0.022258480797952816
+  - 0.0015736057230459947
+  - 0.00016300248936217373
+  - 7.346860591227326e-06
+  - 6.505469989099595e-07
+  - 7.348157949792861e-08
+  - 7.65157031643337e-09
+  - 7.176765300315857e-10
+residuals:
+  Residual norm FMG: 1.276639379055593e-09
+  Residual norm FMG-PCG: 4.825094033139301e-10
+  Residual norm FMG-PGMRES: 3.8360100684957045e-10
+  Residual norm MG: 9.52138469002847e-10
+  Residual norm PBICGSTAB: 3.4317048614669707e-10
+  Residual norm PCG: 8.023167012508762e-10
+  Residual norm PGMRES: 7.176775890786759e-10
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP24 b/tests/cache_runParallelGMG.py--domainsquare--elementP24
new file mode 100644
index 0000000..50cf472
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP24
@@ -0,0 +1,91 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.270577792689281e-05
+  L^2 error: 1.747316032582309e-08
+info: {}
+iterations:
+  Number of iterations FMG: 9
+  Number of iterations FMG-PCG: 5
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 10
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 6
+  Number of iterations PGMRES: 6
+rates:
+  Rate of convergence FMG: 0.1528159018166166
+  Rate of convergence FMG-PCG: 0.036797557647117084
+  Rate of convergence FMG-PGMRES: 0.03320210725658198
+  Rate of convergence MG: 0.1832822730709879
+  Rate of convergence PBICGSTAB: 0.016995421231529625
+  Rate of convergence PCG: 0.05977462791279813
+  Rate of convergence PGMRES: 0.05641643386169551
+resHist:
+  FMG:
+  - 0.00016041543835100007
+  - 2.7535604867354283e-05
+  - 4.941836433171756e-06
+  - 9.198072790054012e-07
+  - 1.8571511853188632e-07
+  - 4.236694292205192e-08
+  - 1.1113532845344833e-08
+  - 3.2522132314857156e-09
+  - 1.0116084711149315e-09
+  FMG-PCG:
+  - 0.0002816908964076938
+  - 9.456373967306107e-06
+  - 8.282324774100069e-07
+  - 8.889486974772887e-08
+  - 8.999758246121743e-09
+  - 1.1821114156231151e-09
+  FMG-PGMRES:
+  - 0.00016041543835110155
+  - 1.5534134928953384e-05
+  - 1.0112366123914707e-06
+  - 9.458658476212503e-08
+  - 1.4351017224371172e-08
+  - 8.980976832625233e-10
+  MG:
+  - 0.022258480797952812
+  - 0.001672213843906473
+  - 0.00019272171820980313
+  - 3.0072032114441334e-05
+  - 4.834259915123921e-06
+  - 8.201135128084537e-07
+  - 1.558074183512217e-07
+  - 3.55066762317767e-08
+  - 9.717824807830143e-09
+  - 2.969750346421803e-09
+  - 9.521385358220105e-10
+  PBICGSTAB:
+  - 2.2214041271329736
+  - 0.00016308702641633987
+  - 9.895480092094543e-07
+  - 1.3642039611043245e-08
+  - 9.35605039852774e-09
+  - 1.85704842448179e-09
+  PCG:
+  - 2.2214041271329736
+  - 0.0021515495279398767
+  - 0.00011065382596592483
+  - 4.864728934280249e-06
+  - 5.26838152685567e-07
+  - 6.493528655292991e-08
+  - 5.035212714685657e-09
+  - 7.833039191551078e-10
+  PGMRES:
+  - 0.022258480797952812
+  - 0.0015736057230478129
+  - 0.00016300248936268504
+  - 7.346860591233063e-06
+  - 6.50546998910692e-07
+  - 7.348157949825758e-08
+  - 7.651570316443259e-09
+  - 7.176765300367146e-10
+residuals:
+  Residual norm FMG: 1.0116084711149315e-09
+  Residual norm FMG-PCG: 1.501725277180392e-09
+  Residual norm FMG-PGMRES: 8.980980639381187e-10
+  Residual norm MG: 9.521385358220105e-10
+  Residual norm PBICGSTAB: 1.857048522070787e-09
+  Residual norm PCG: 1.0153056606852674e-09
+  Residual norm PGMRES: 7.176774527969798e-10
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric1 b/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric1
new file mode 100644
index 0000000..3424c8d
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric1
@@ -0,0 +1,97 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.299454938359431e-06
+  L^2 error: 3.161013638317052e-08
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 4
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 14
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 7
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.1753522541501894
+  Rate of convergence FMG-PCG: 0.01801685575412339
+  Rate of convergence FMG-PGMRES: 0.026311474672465383
+  Rate of convergence MG: 0.29613969729036754
+  Rate of convergence PBICGSTAB: 0.007062680776238781
+  Rate of convergence PCG: 0.08689365283413622
+  Rate of convergence PGMRES: 0.08593111919595947
+resHist:
+  FMG:
+  - 5.428797748539679e-06
+  - 1.5795252831107584e-06
+  - 5.876053586773099e-07
+  - 2.295376729071833e-07
+  - 9.046364992735191e-08
+  - 3.5569018877790254e-08
+  - 1.3909941866730001e-08
+  - 5.409417941571505e-09
+  - 2.0934719265368974e-09
+  - 8.070247958359307e-10
+  FMG-PCG:
+  - 9.346670564514236e-06
+  - 1.1522169520195248e-06
+  - 1.3521656652647764e-07
+  - 1.6044765026240866e-08
+  - 1.6891807051507238e-09
+  FMG-PGMRES:
+  - 5.428797748539679e-06
+  - 1.198674559990406e-06
+  - 2.214177027747189e-07
+  - 2.6521113655065013e-08
+  - 2.924581654117667e-09
+  - 3.7025358611940355e-10
+  MG:
+  - 0.02936116932505517
+  - 0.0012753206367137624
+  - 0.00018718247755468415
+  - 5.2344828448004566e-05
+  - 1.810672068774139e-05
+  - 6.727386906083044e-06
+  - 2.5564930743921323e-06
+  - 9.784719589826804e-07
+  - 3.751353297385549e-07
+  - 1.4377338462578277e-07
+  - 5.5045570813740586e-08
+  - 2.105096866389252e-08
+  - 8.042423214954963e-09
+  - 3.070208737074169e-09
+  - 1.1714564321291884e-09
+  PBICGSTAB:
+  - 2.2213747063334566
+  - 0.0001686500963427375
+  - 5.404542211505165e-06
+  - 1.3247455955144843e-07
+  - 3.2936853326853865e-09
+  - 7.305565824042273e-11
+  PCG:
+  - 2.2213747063334566
+  - 0.0016851325510161138
+  - 0.00016994899222161483
+  - 2.5153954675152785e-05
+  - 3.047870748109234e-06
+  - 3.555759922279796e-07
+  - 3.894122041416526e-08
+  - 4.713913622874514e-09
+  - 6.570883629134857e-10
+  PGMRES:
+  - 0.02936116932505517
+  - 0.0012744411994470234
+  - 0.0001678701370561237
+  - 3.0044496086364993e-05
+  - 5.068536257319195e-06
+  - 6.476376064952329e-07
+  - 6.77278775108971e-08
+  - 8.181681770529225e-09
+  - 1.0158442501526152e-09
+residuals:
+  Residual norm FMG: 8.070247958359307e-10
+  Residual norm FMG-PCG: 3.0937794737206646e-09
+  Residual norm FMG-PGMRES: 3.70253604704655e-10
+  Residual norm MG: 1.1714564321291884e-09
+  Residual norm PBICGSTAB: 7.305528784580782e-11
+  Residual norm PCG: 1.0982209619505513e-09
+  Residual norm PGMRES: 1.0158430769711596e-09
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric4 b/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric4
new file mode 100644
index 0000000..b3e99d6
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP3--symmetric4
@@ -0,0 +1,104 @@
+Timers: {}
+errors:
+  H^1_0 error: 4.849818647176275e-06
+  L^2 error: 2.1073424255447017e-08
+info: {}
+iterations:
+  Number of iterations FMG: 13
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 14
+  Number of iterations PBICGSTAB: 5
+  Number of iterations PCG: 7
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.26397651263698213
+  Rate of convergence FMG-PCG: 0.06108608222884553
+  Rate of convergence FMG-PGMRES: 0.05905528509359946
+  Rate of convergence MG: 0.29613969846978877
+  Rate of convergence PBICGSTAB: 0.036482440085910554
+  Rate of convergence PCG: 0.08788359860179318
+  Rate of convergence PGMRES: 0.08593112134215163
+resHist:
+  FMG:
+  - 0.00010845881321345241
+  - 3.2512582797971134e-05
+  - 1.1512489190576765e-05
+  - 4.3742191575755635e-06
+  - 1.7225957224067011e-06
+  - 6.83187572112732e-07
+  - 2.69518861069759e-07
+  - 1.0545784767463894e-07
+  - 4.0950445927292256e-08
+  - 1.5803730664052543e-08
+  - 6.070461716292494e-09
+  - 2.3237731206293624e-09
+  - 8.874054274357975e-10
+  FMG-PCG:
+  - 0.00024033524236784256
+  - 3.081792302933389e-05
+  - 3.6340807013171967e-06
+  - 4.3192513381765425e-07
+  - 4.629529780452295e-08
+  - 4.856474757020408e-09
+  - 8.86459472454213e-10
+  FMG-PGMRES:
+  - 0.00010845881321345241
+  - 2.4125016016646905e-05
+  - 5.029939970249448e-06
+  - 7.129294172131338e-07
+  - 7.821524197107216e-08
+  - 9.172524101059032e-09
+  - 1.2454487435139896e-09
+  MG:
+  - 0.02936116932505517
+  - 0.0012753206367011533
+  - 0.00018718247755357143
+  - 5.2344828447655365e-05
+  - 1.810672068796241e-05
+  - 6.7273869063304605e-06
+  - 2.5564930737809552e-06
+  - 9.784719593532804e-07
+  - 3.751353285031664e-07
+  - 1.4377338540443305e-07
+  - 5.504557183187684e-08
+  - 2.105096705917918e-08
+  - 8.042423511795626e-09
+  - 3.0702075035021904e-09
+  - 1.171456497446229e-09
+  PBICGSTAB:
+  - 2.221374706333457
+  - 0.0001686500963411863
+  - 5.404542211319132e-06
+  - 1.3247455954507384e-07
+  - 3.293685332656219e-09
+  - 4.387566312218332e-09
+  - 1.8975474358880307e-09
+  PCG:
+  - 2.221374706333457
+  - 0.0016851325509987553
+  - 0.00016994899221845964
+  - 2.5153954674785078e-05
+  - 3.0478707480467146e-06
+  - 3.5557599222096647e-07
+  - 3.8941220413599314e-08
+  - 4.633379245907278e-09
+  - 7.14033256743798e-10
+  PGMRES:
+  - 0.02936116932505517
+  - 0.0012744411994331402
+  - 0.00016787013705433818
+  - 3.0044496085758426e-05
+  - 5.0685362571939345e-06
+  - 6.476376064827831e-07
+  - 6.772787750958701e-08
+  - 8.181681770363567e-09
+  - 1.0158442501328109e-09
+residuals:
+  Residual norm FMG: 8.874054274357975e-10
+  Residual norm FMG-PCG: 1.5255518828433695e-09
+  Residual norm FMG-PGMRES: 1.2454490629971444e-09
+  Residual norm MG: 1.171456497446229e-09
+  Residual norm PBICGSTAB: 1.897547854248795e-09
+  Residual norm PCG: 1.1888530338452328e-09
+  Residual norm PGMRES: 1.015843254571106e-09
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP31 b/tests/cache_runParallelGMG.py--domainsquare--elementP31
new file mode 100644
index 0000000..a682e7f
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP31
@@ -0,0 +1,97 @@
+Timers: {}
+errors:
+  H^1_0 error: 3.265904873412391e-06
+  L^2 error: 2.9802322387695312e-08
+info: {}
+iterations:
+  Number of iterations FMG: 10
+  Number of iterations FMG-PCG: 4
+  Number of iterations FMG-PGMRES: 5
+  Number of iterations MG: 14
+  Number of iterations PBICGSTAB: 4
+  Number of iterations PCG: 7
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.1753522249712321
+  Rate of convergence FMG-PCG: 0.01801685445498197
+  Rate of convergence FMG-PGMRES: 0.026311477394361283
+  Rate of convergence MG: 0.2961396842522246
+  Rate of convergence PBICGSTAB: 0.0070626680736735935
+  Rate of convergence PCG: 0.08689367219554152
+  Rate of convergence PGMRES: 0.08593114309930784
+resHist:
+  FMG:
+  - 5.42879774357771e-06
+  - 1.5795252809545518e-06
+  - 5.876053569101503e-07
+  - 2.2953767365303716e-07
+  - 9.046364997057166e-08
+  - 3.55690177781999e-08
+  - 1.3909941072724027e-08
+  - 5.409416976605995e-09
+  - 2.093473112929616e-09
+  - 8.070234529319254e-10
+  FMG-PCG:
+  - 9.346670565754055e-06
+  - 1.152216951712638e-06
+  - 1.3521656597916556e-07
+  - 1.6044764709687514e-08
+  - 1.6891806557239142e-09
+  FMG-PGMRES:
+  - 5.428797743446927e-06
+  - 1.1986745597161492e-06
+  - 2.2141770279100032e-07
+  - 2.652111368591167e-08
+  - 2.924581657811882e-09
+  - 3.702535886359782e-10
+  MG:
+  - 0.02936116932505517
+  - 0.0012753206366169605
+  - 0.00018718247754423426
+  - 5.2344828445643926e-05
+  - 1.810672068649881e-05
+  - 6.7273869064319825e-06
+  - 2.5564930730308737e-06
+  - 9.784719599121466e-07
+  - 3.751353285562021e-07
+  - 1.437733841926997e-07
+  - 5.504557046910123e-08
+  - 2.105096707050425e-08
+  - 8.042422407218083e-09
+  - 3.070206358147649e-09
+  - 1.1714557100693935e-09
+  PBICGSTAB:
+  - 2.2213747063334934
+  - 0.00016865009633396357
+  - 5.40454221118835e-06
+  - 1.3247455953209187e-07
+  - 3.2936853324482727e-09
+  - 7.305565824720332e-11
+  PCG:
+  - 2.2213747063334934
+  - 0.00168513255093624
+  - 0.00016994899221728244
+  - 2.5153954674313098e-05
+  - 3.0478707480294376e-06
+  - 3.5557599220860323e-07
+  - 3.894122041252581e-08
+  - 4.71391362273201e-09
+  - 6.5708836290027e-10
+  PGMRES:
+  - 0.02936116932505517
+  - 0.0012744411993508684
+  - 0.0001678701370470801
+  - 3.0044496085485824e-05
+  - 5.068536257098374e-06
+  - 6.476376064572719e-07
+  - 6.772787750888763e-08
+  - 8.181681770224278e-09
+  - 1.0158442501292414e-09
+residuals:
+  Residual norm FMG: 8.070234529319254e-10
+  Residual norm FMG-PCG: 3.0937785813881327e-09
+  Residual norm FMG-PGMRES: 3.702537962165314e-10
+  Residual norm MG: 1.1714557100693935e-09
+  Residual norm PBICGSTAB: 7.305476227367337e-11
+  Residual norm PCG: 1.0982226748697656e-09
+  Residual norm PGMRES: 1.0158450550031006e-09
diff --git a/tests/cache_runParallelGMG.py--domainsquare--elementP34 b/tests/cache_runParallelGMG.py--domainsquare--elementP34
new file mode 100644
index 0000000..78f1d9f
--- /dev/null
+++ b/tests/cache_runParallelGMG.py--domainsquare--elementP34
@@ -0,0 +1,104 @@
+Timers: {}
+errors:
+  H^1_0 error: 4.84367969176109e-06
+  L^2 error: 2.1073424255447017e-08
+info: {}
+iterations:
+  Number of iterations FMG: 13
+  Number of iterations FMG-PCG: 6
+  Number of iterations FMG-PGMRES: 6
+  Number of iterations MG: 14
+  Number of iterations PBICGSTAB: 5
+  Number of iterations PCG: 7
+  Number of iterations PGMRES: 7
+rates:
+  Rate of convergence FMG: 0.2639765105756853
+  Rate of convergence FMG-PCG: 0.06108608330107212
+  Rate of convergence FMG-PGMRES: 0.05905528348966206
+  Rate of convergence MG: 0.29613969549938
+  Rate of convergence PBICGSTAB: 0.03648243635113579
+  Rate of convergence PCG: 0.08788362340325867
+  Rate of convergence PGMRES: 0.08593113552089134
+resHist:
+  FMG:
+  - 0.00010845881323088614
+  - 3.2512582798347725e-05
+  - 1.1512489192261438e-05
+  - 4.3742191572438755e-06
+  - 1.7225957208611525e-06
+  - 6.831875723046265e-07
+  - 2.695188608572854e-07
+  - 1.054578476288921e-07
+  - 4.0950446621274397e-08
+  - 1.5803731112752255e-08
+  - 6.0704613635471296e-09
+  - 2.3237728357282135e-09
+  - 8.874053373532496e-10
+  FMG-PCG:
+  - 0.00024033524239822955
+  - 3.08179230378716e-05
+  - 3.6340807032943296e-06
+  - 4.3192513465709794e-07
+  - 4.62952978949957e-08
+  - 4.856474753020179e-09
+  - 8.864594705927064e-10
+  FMG-PGMRES:
+  - 0.00010845881323113136
+  - 2.4125016018416374e-05
+  - 5.029939970034463e-06
+  - 7.129294170981796e-07
+  - 7.821524196566887e-08
+  - 9.172524101346285e-09
+  - 1.2454487433719927e-09
+  MG:
+  - 0.02936116932505517
+  - 0.0012753206366358585
+  - 0.0001871824775462767
+  - 5.234482844560382e-05
+  - 1.8106720687326324e-05
+  - 6.727386906785701e-06
+  - 2.556493073853869e-06
+  - 9.784719597663044e-07
+  - 3.7513532936119333e-07
+  - 1.437733844471093e-07
+  - 5.5045571022743994e-08
+  - 2.1050967349145423e-08
+  - 8.042422107520287e-09
+  - 3.070208273088983e-09
+  - 1.1714563329432527e-09
+  PBICGSTAB:
+  - 2.2213747063334957
+  - 0.00016865009633548972
+  - 5.404542211235668e-06
+  - 1.3247455953801443e-07
+  - 3.2936853324685312e-09
+  - 4.3875663126822314e-09
+  - 1.8975474358071775e-09
+  PCG:
+  - 2.2213747063334957
+  - 0.0016851325509709574
+  - 0.00016994899221651672
+  - 2.5153954674484446e-05
+  - 3.0478707480202274e-06
+  - 3.5557599221083373e-07
+  - 3.894122041267313e-08
+  - 4.633379245863923e-09
+  - 7.140332567365341e-10
+  PGMRES:
+  - 0.02936116932505517
+  - 0.0012744411993702183
+  - 0.0001678701370484586
+  - 3.004449608553552e-05
+  - 5.068536257107726e-06
+  - 6.476376064638038e-07
+  - 6.772787750868786e-08
+  - 8.1816817702333e-09
+  - 1.0158442501305605e-09
+residuals:
+  Residual norm FMG: 8.874053373532496e-10
+  Residual norm FMG-PCG: 1.5255520435088401e-09
+  Residual norm FMG-PGMRES: 1.2454488600393032e-09
+  Residual norm MG: 1.1714563329432527e-09
+  Residual norm PBICGSTAB: 1.897546882971603e-09
+  Residual norm PCG: 1.1888553823751115e-09
+  Residual norm PGMRES: 1.0158444278793088e-09
diff --git a/tests/cache_runSerialGMG.py b/tests/cache_runSerialGMG.py
new file mode 100644
index 0000000..935c025
--- /dev/null
+++ b/tests/cache_runSerialGMG.py
@@ -0,0 +1,168 @@
+Timers: {}
+errors:
+  H^1_0 error: 0.003537410542403111
+  H^1_0 error constant: 2.5613588299509993
+  L^2 error: 1.6442082655606228e-06
+  L^2 error constant: 0.8620386631342476
+info: {}
+iterations:
+  Number of iterations BICGSTAB: 14
+  Number of iterations CG: 50
+  Number of iterations FMG: 5
+  Number of iterations GMRES: 20
+  Number of iterations MG: 9
+  Number of iterations PBICGSTAB: 2
+  Number of iterations PCG: 5
+  Number of iterations PGMRES: 6
+rates:
+  Rate of convergence BICGSTAB: 0.5158685063070121
+  Rate of convergence CG: 0.8348286600972041
+  Rate of convergence FMG: 0.128280059367638
+  Rate of convergence GMRES: 0.6294452755207325
+  Rate of convergence MG: 0.31996358412183235
+  Rate of convergence PBICGSTAB: 0.005609024571358598
+  Rate of convergence PCG: 0.15436080555582193
+  Rate of convergence PGMRES: 0.1792851508849768
+resHist:
+  BICGSTAB:
+  - 0.009638270428068281
+  - 1.5914404201666966e-05
+  - 8.274394319745429e-06
+  - 5.378644505878598e-06
+  - 3.951203943990726e-06
+  - 3.239615099773025e-06
+  - 2.657884061018963e-06
+  - 2.275572326009377e-06
+  - 1.966641071790893e-06
+  - 1.7252705107076594e-06
+  - 1.5267667361280442e-06
+  - 1.360146961657198e-06
+  - 1.2211107651055918e-06
+  - 1.1009350537197041e-06
+  - 9.999253059167261e-07
+  - 9.110565156826788e-07
+  CG:
+  - 0.009638270428068281
+  - 3.553098572907398e-05
+  - 1.7799702558858993e-05
+  - 1.2070702370979867e-05
+  - 1.0414455347675712e-05
+  - 9.512278382065753e-06
+  - 7.518913639981027e-06
+  - 7.292434670893872e-06
+  - 5.990991636120884e-06
+  - 5.843434442029443e-06
+  - 5.003356799446422e-06
+  - 4.86264906985428e-06
+  - 4.300875694883896e-06
+  - 4.162235117045808e-06
+  - 3.772128257731804e-06
+  - 3.6388216575009818e-06
+  - 3.358654662722082e-06
+  - 3.2332806092853035e-06
+  - 3.0261314978569842e-06
+  - 2.9098888747868487e-06
+  - 2.7528297210061116e-06
+  - 2.6459439707333594e-06
+  - 2.5242295341697578e-06
+  - 2.426376553440764e-06
+  - 2.3302232057561153e-06
+  - 2.2408081825427872e-06
+  - 2.1635432291039103e-06
+  - 2.081863837837021e-06
+  - 2.018824581192661e-06
+  - 1.944160752130178e-06
+  - 1.8920183343132155e-06
+  - 1.8236787429305398e-06
+  - 1.7800107526914325e-06
+  - 1.717354682179513e-06
+  - 1.6803679366570318e-06
+  - 1.6228134887255743e-06
+  - 1.5911600028288023e-06
+  - 1.5381849762031571e-06
+  - 1.5108372151213116e-06
+  - 1.4619762453897053e-06
+  - 1.4381409507000987e-06
+  - 1.3929809307869564e-06
+  - 1.372038563324701e-06
+  - 1.3302134590580518e-06
+  - 1.311674973691543e-06
+  - 1.272860631366006e-06
+  - 1.256336178729426e-06
+  - 1.2202454264546047e-06
+  - 1.2054213916361668e-06
+  - 1.171799569942159e-06
+  - 1.1584215220352562e-06
+  FMG:
+  - 0.0015160609167323284
+  - 3.615791676473775e-05
+  - 6.783834827034553e-06
+  - 1.5018538267380414e-06
+  - 3.348072615221243e-07
+  GMRES:
+  - 0.009638270428068283
+  - 3.553074430024288e-05
+  - 1.5914382509170402e-05
+  - 9.617285875996892e-06
+  - 7.0654815665432545e-06
+  - 5.671995171176492e-06
+  - 4.528094177244557e-06
+  - 3.846835719501442e-06
+  - 3.2369848635831347e-06
+  - 2.8315587934152194e-06
+  - 2.464295338813626e-06
+  - 2.3054650258553426e-06
+  - 2.141758558995854e-06
+  - 1.970194198173105e-06
+  - 1.8042360321120505e-06
+  - 1.6348135806660687e-06
+  - 1.4804060921936255e-06
+  - 1.3334029604632723e-06
+  - 1.2035542068506018e-06
+  - 1.105912577945519e-06
+  - 1.0202709780687723e-06
+  - 9.849001002729636e-07
+  - 9.55485790551156e-07
+  - 9.186681736137378e-07
+  MG:
+  - 0.009638270428068283
+  - 0.0030088218311112675
+  - 0.0009769434518308055
+  - 0.00032001782750482337
+  - 0.00010368199905154005
+  - 3.329353535793215e-05
+  - 1.0626921408909422e-05
+  - 3.378314501353458e-06
+  - 1.0709252956091801e-06
+  - 3.387693291422185e-07
+  PBICGSTAB:
+  - 1.964400319431065
+  - 0.0005080881606274181
+  - 1.3438003570988883e-05
+  - 3.032311356104145e-07
+  PCG:
+  - 1.964400319431065
+  - 0.14693329802878827
+  - 0.011098488491899042
+  - 0.0008440338764420434
+  - 6.472349491935834e-05
+  - 5.42857339984811e-06
+  - 6.080388480013461e-07
+  PGMRES:
+  - 400.60627670588076
+  - 13.919017614383232
+  - 0.7272312384317737
+  - 0.04159154054257073
+  - 0.002659216817159224
+  - 0.00016012902505904762
+  - 9.005611119101654e-06
+  - 5.604649885616363e-07
+residuals:
+  Residual norm BICGSTAB: 9.110565156871871e-07
+  Residual norm CG: 1.1584215220040951e-06
+  Residual norm FMG: 3.348072615221243e-07
+  Residual norm GMRES: 9.186681736182807e-07
+  Residual norm MG: 3.387693291422185e-07
+  Residual norm PBICGSTAB: 3.0323113569641994e-07
+  Residual norm PCG: 8.44664592068035e-07
+  Residual norm PGMRES: 3.200847871717492e-07
diff --git a/tests/cache_variableOrder.py b/tests/cache_variableOrder.py
new file mode 100644
index 0000000..eb8e10f
--- /dev/null
+++ b/tests/cache_variableOrder.py
@@ -0,0 +1 @@
+Timers: {}
diff --git a/tests/drivers_base.py b/tests/drivers_base.py
new file mode 100644
index 0000000..e9cba62
--- /dev/null
+++ b/tests/drivers_base.py
@@ -0,0 +1,93 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from mpi4py import MPI
+from PyNucleus.base.utilsFem import runDriver
+import os
+import inspect
+import pytest
+
+
+def getPath():
+    return os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+
+
+################################################################################
+# multigrid
+
+@pytest.fixture(scope='module', params=[1, 4])
+def ranks(request):
+    return request.param
+
+
+@pytest.fixture(scope='module', params=['interval', 'square', 'cube'])
+def domain(request):
+    return request.param
+
+
+@pytest.fixture(scope='module', params=['P1', 'P2', 'P3'])
+def element(request):
+    return request.param
+
+
+@pytest.fixture(scope='module', params=[False, True])
+def symmetric(request):
+    return request.param
+
+
+def testGMG(extra):
+    base = getPath()+'/../'
+    py = 'runSerialGMG.py'
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    runDriver(path, py, cacheDir=cacheDir, extra=extra)
+
+
+def testParallelGMG(ranks, domain, element, symmetric, extra):
+    base = getPath()+'/../'
+    py = ['runParallelGMG.py',
+          '--domain', domain,
+          '--element', element]
+    if symmetric:
+        py.append('--symmetric')
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    runDriver(path, py, ranks=ranks, cacheDir=cacheDir, relTol=3e-2, extra=extra)
+
+
+
+################################################################################
+# multigrid for Helmholtz
+
+def testHelmholtz(ranks, domain, extra):
+    base = getPath()+'/../'
+    py = ['runHelmholtz.py', '--domain', domain]
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    runDriver(path, py, ranks=ranks, cacheDir=cacheDir, extra=extra)
+
+
+
+################################################################################
+# interface problem
+
+@pytest.fixture(scope='module', params=[('doubleInterval', 10),
+                                        ('doubleSquare', 5)])
+def domainNoRef(request):
+    return request.param
+
+
+def testInterface(domainNoRef, extra):
+    domain, noRef = domainNoRef
+    base = getPath()+'/../'
+    py = ['interfaceProblem.py',
+          '--domain', domain,
+          '--noRef', str(noRef)]
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    runDriver(path, py, ranks=1, cacheDir=cacheDir, relTol=5e-2, extra=extra)
diff --git a/tests/test.py b/tests/test.py
new file mode 100644
index 0000000..22e7114
--- /dev/null
+++ b/tests/test.py
@@ -0,0 +1,12 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from drivers_base import *
+
+from tupleDict import *
+from bitArray import *
diff --git a/tests/test_drivers_intFracLapl.py b/tests/test_drivers_intFracLapl.py
new file mode 100644
index 0000000..8c71f4d
--- /dev/null
+++ b/tests/test_drivers_intFracLapl.py
@@ -0,0 +1,55 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from PyNucleus.base.utilsFem import runDriver
+import os
+import inspect
+import pytest
+
+
+def getPath():
+    return os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
+
+
+@pytest.fixture(scope='module', params=['interval', 'square'])
+def domain(request):
+    return request.param
+
+
+@pytest.fixture(scope='module', params=['fractional', 'indicator', 'peridynamic'])
+def kernel(request):
+    return request.param
+
+
+@pytest.fixture(scope='module', params=['poly-Dirichlet', 'poly-Neumann'])
+def problem(request):
+    return request.param
+
+
+def testNonlocal(domain, kernel, problem, extra):
+    base = getPath()+'/../'
+    py = ['runNonlocal.py',
+          '--domain', domain,
+          '--kernel', kernel,
+          '--problem', problem]
+    # if kernel != 'fractional':
+    py += ['--dense']
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    if problem == 'poly-Neumann' and domain == 'square':
+        return pytest.skip('not implemented')
+    runDriver(path, py, cacheDir=cacheDir, extra=extra)
+
+
+
+def testVariableOrder(extra):
+    base = getPath()+'/../'
+    py = 'variableOrder.py'
+    path = base+'drivers'
+    cacheDir = getPath()+'/'
+    runDriver(path, py, cacheDir=cacheDir, extra=extra)
diff --git a/tests/test_fracLapl.py b/tests/test_fracLapl.py
new file mode 100644
index 0000000..c77042f
--- /dev/null
+++ b/tests/test_fracLapl.py
@@ -0,0 +1,246 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+from __future__ import division
+import numpy as np
+import numpy.linalg
+from PyNucleus.fem import simpleInterval, circle, P1_DoFMap, P2_DoFMap
+from PyNucleus.fem import constant
+from PyNucleus.nl.nonlocalLaplacian import (assembleNonlocalOperator,
+                                            assembleFractionalLaplacianDiagonal,
+                                            
+                                            nonlocalBuilder)
+from PyNucleus.nl.clusterMethodCy import H2Matrix
+from PyNucleus.base.myTypes import REAL
+from scipy.special import gamma
+from PyNucleus.nl import constantTwoPoint, getFractionalKernel
+from PyNucleus.nl.fractionalOrders import (constFractionalOrder,
+                                           constantFractionalLaplacianScaling,
+                                           variableFractionalLaplacianScaling)
+import pytest
+import logging
+LOGGER = logging.getLogger(__name__)
+
+
+######################################################################
+# Test dense operators by checking Hs-error
+
+def fracLapl(dim, s, errBnd, refinements, element, genKernel=False, cached=False):
+    if dim == 1:
+        mesh = simpleInterval(-1, 1)
+    elif dim == 2:
+        mesh = circle(10)
+    for _ in range(refinements):
+        mesh = mesh.refine()
+    if element == 'P1':
+        dm = P1_DoFMap(mesh, tag=0)
+    elif element == 'P2':
+        dm = P2_DoFMap(mesh, tag=0)
+    if cached:
+         raise NotImplementedError()
+    else:
+        A = assembleNonlocalOperator(mesh, dm, s, genKernel=genKernel).data
+    fun = constant(1.)
+    rhs = dm.assembleRHS(fun)
+    u = np.linalg.solve(A, rhs)
+    s = s.value
+    if dim == 1:
+        err = np.sqrt(abs(np.vdot(rhs, u) - 2**(-2*s)*np.pi/gamma(1/2+s)/gamma(s+3/2)))
+    else:
+        err = np.sqrt(abs(np.dot(rhs, u)-2*np.pi * 2**(-2*s)*gamma(1)/gamma(1+s)**2/2/(s+1)))
+    msg = ''
+    msg += '\nBound:      {}'.format(errBnd)
+    msg += '\nAll:        {}'.format(err)
+    LOGGER.info(msg)
+    assert err < errBnd, '{} not smaller than {}'.format(err, errBnd)
+
+
+@pytest.fixture(scope='module',
+                params=[(1, constFractionalOrder(0.3), 'P1', 0.15),
+                        (1, constFractionalOrder(0.7), 'P1', 0.1),
+                        
+                        (2, constFractionalOrder(0.3), 'P1', 0.5),
+                        (2, constFractionalOrder(0.7), 'P1', 0.35)],
+                ids=['1-P1-0.3', '1-P1-0.7',
+                     
+                     '2-P1-0.3', '2-P1-0.7'])
+def setupExact(request):
+    return request.param
+
+
+def testFracLapl(setupExact):
+    dim, s, element, errBnd = setupExact
+    if dim == 1:
+        refinements = 6
+    else:
+        refinements = 2
+    fracLapl(dim, s, errBnd, refinements, element)
+
+
+
+######################################################################
+# Test scaling and diagonal of dense operators
+
+def scaling(dim, s, horizon, refinements):
+    if dim == 1:
+        mesh = simpleInterval(-1, 1)
+    else:
+        mesh = circle(10)
+    for _ in range(refinements):
+        mesh = mesh.refine()
+    dm = P1_DoFMap(mesh, tag=0)
+
+    kernel1 = getFractionalKernel(mesh.dim, s, horizon)
+    scaling = variableFractionalLaplacianScaling(True)
+    kernel2 = getFractionalKernel(mesh.dim, s, horizon, scaling=scaling)
+    print(kernel1, kernel2)
+    zeroExterior = not np.isfinite(horizon.value)
+    builder1 = nonlocalBuilder(mesh, dm, kernel1, zeroExterior=zeroExterior)
+    builder2 = nonlocalBuilder(mesh, dm, kernel2, zeroExterior=zeroExterior)
+    A = builder1.getDense().toarray()
+    B = builder2.getDense().toarray()
+    assert np.allclose(A, B)
+
+    if horizon.value == np.inf:
+        dA = builder1.getDiagonal()
+        mA = np.absolute(A.diagonal()-dA.diagonal).max()
+        rmA = np.absolute((A.diagonal()-dA.diagonal)/A.diagonal()).max()
+        assert np.allclose(A.diagonal(), dA.diagonal, rtol=2e-3), 'Diagonal A does not match; max diff = {}, rel max diff={}'.format(mA, rmA)
+        dB = builder2.getDiagonal()
+        mB = np.absolute(B.diagonal()-dB.diagonal).max()
+        rmB = np.absolute((B.diagonal()-dB.diagonal)/B.diagonal()).max()
+        assert np.allclose(B.diagonal(), dB.diagonal, rtol=2e-3), 'Diagonal B does not match; max diff = {}, rel max diff={}'.format(mB, rmB)
+
+
+@pytest.fixture(scope='module',
+                params=[(1, constFractionalOrder(0.25), constant(np.inf)),
+                        (1, constFractionalOrder(0.25), constant(1.)),
+                        (1, constFractionalOrder(0.75), constant(np.inf)),
+                        (1, constFractionalOrder(0.75), constant(1.)),
+                        (2, constFractionalOrder(0.25), constant(np.inf)),
+                        (2, constFractionalOrder(0.25), constant(1.)),
+                        (2, constFractionalOrder(0.75), constant(np.inf)),
+                        (2, constFractionalOrder(0.75), constant(1.))],
+                ids=['1-0.25-inf', '1-0.25-1', '1-0.75-inf', '1-0.75-1',
+                     '2-0.25-inf', '2-0.25-1', '2-0.75-inf', '2-0.75-1'])
+def setupScaling(request):
+    return request.param
+
+
+def testScaling(setupScaling):
+    dim, s, horizon = setupScaling
+    if dim == 1:
+        refinements = 6
+    else:
+        refinements = 2
+    scaling(dim, s, horizon, refinements)
+
+
+######################################################################
+# Test H2 operators by comparing to dense operator
+
+def h2(dim, s, refinements, element, errBnd, genKernel=False):
+    if dim == 1:
+        mesh = simpleInterval(-1, 1)
+        eta = 1
+        maxLevels = None
+    elif dim == 2:
+        mesh = circle(10)
+        eta = 3
+        maxLevels = 2
+    # mesh = mesh.refine()
+    for _ in range(refinements):
+        mesh = mesh.refine()
+    # mesh.sortVertices()
+    if element == 'P1':
+        DoFMap_fine = P1_DoFMap(mesh, tag=-1 if s.value < 0.5 else 0)
+    elif element == 'P2':
+        DoFMap_fine = P2_DoFMap(mesh, tag=-1 if s.value < 0.5 else 0)
+    params = {}
+    params['genKernel'] = genKernel
+    params['eta'] = eta
+    params['maxLevels'] = maxLevels
+    kernel = getFractionalKernel(mesh.dim, s, constant(np.inf))
+    builder = nonlocalBuilder(mesh, DoFMap_fine, kernel, params=params, zeroExterior=True)
+
+    A_d = np.array(builder.getDense().data)
+    A_h2 = builder.getH2()
+    assert isinstance(A_h2, H2Matrix)
+    LOGGER.info(str(A_h2))
+
+    n = A_d.shape[0]
+    Afar = np.zeros((n, n), dtype=REAL)
+    for level in A_h2.Pfar:
+        for c in A_h2.Pfar[level]:
+            Afar[np.ix_(list(c.n1.dofs.toSet()), list(c.n2.dofs.toSet()))] = A_d[np.ix_(list(c.n1.dofs.toSet()), list(c.n2.dofs.toSet()))]
+    Anear = A_d-Afar
+    errNear = np.absolute(Anear-A_h2.Anear.toarray()).max()
+
+    x = np.ones((A_d.shape[0]), dtype=REAL)
+
+    y_d = np.dot(Afar, x)
+    y_h2 = np.zeros_like(y_d)
+    if len(A_h2.Pfar) > 0:
+        A_h2.tree.upwardPass(x)
+        A_h2.tree.resetCoefficientsDown()
+        for level in A_h2.Pfar:
+            for clusterPair in A_h2.Pfar[level]:
+                n1, n2 = clusterPair.n1, clusterPair.n2
+                clusterPair.apply(n2.coefficientsUp, n1.coefficientsDown)
+        A_h2.tree.downwardPass(y_h2)
+    errFar = np.absolute(y_d-y_h2).max()
+
+    y_d = np.dot(A_d, x)
+    y_h2 = A_h2*x
+    errAll = np.absolute(y_d-y_h2).max()
+    msg = ''
+    msg += '\nBound:      {}'.format(errBnd)
+    msg += '\nNear field: {}'.format(errNear)
+    msg += '\nFar field:  {}'.format(errFar)
+    msg += '\nAll:        {}'.format(errAll)
+    LOGGER.info(msg)
+    if errNear > errBnd:
+        try:
+            import matplotlib
+            import matplotlib.pyplot as plt
+            print(Anear-A_h2.Anear.toarray())
+            plt.pcolormesh(np.absolute(Anear-A_h2.Anear.toarray()), norm=matplotlib.colors.LogNorm())
+            plt.colorbar()
+            plt.show()
+        except ImportError:
+            pass
+    assert errNear < errBnd
+    assert errFar < errBnd
+    assert errAll < errBnd
+
+
+def idfunc(param):
+    S = [str(p) for p in param]
+    return '-'.join(S)
+
+
+@pytest.fixture(scope='module',
+                params=[(1, constFractionalOrder(0.3), 1e-4, 'P1'),
+                        (1, constFractionalOrder(0.7), 1e-2, 'P1'),
+                        
+                        (2, constFractionalOrder(0.3), 1.2e-4, 'P1'),
+                        (2, constFractionalOrder(0.7), 1e-2, 'P1')],
+                ids=idfunc)
+def setupH2(request):
+    return request.param
+
+
+def testH2(setupH2):
+    dim, s, errBnd, element = setupH2
+    if dim == 1:
+        refinements = 6
+    else:
+        refinements = 3
+    h2(dim, s, refinements, element, errBnd)
+
+
diff --git a/tests/test_h2finiteHorizon.py b/tests/test_h2finiteHorizon.py
new file mode 100644
index 0000000..fa0396f
--- /dev/null
+++ b/tests/test_h2finiteHorizon.py
@@ -0,0 +1,284 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus.fem import intervalWithInteraction, squareWithInteractions, P1_DoFMap, Lambda, circle, constant
+from PyNucleus.nl.fractionalOrders import (constFractionalOrder,
+                                           leftRightFractionalOrder,
+                                           variableConstFractionalOrder)
+from PyNucleus.nl.clusterMethodCy import H2Matrix
+from PyNucleus.nl.nonlocalLaplacian import nonlocalBuilder
+from PyNucleus.nl import getFractionalKernel
+from scipy.linalg import solve
+import pytest
+import matplotlib.pyplot as plt
+
+
+def idfunc(param):
+    S = [str(p) for p in param]
+    return '-'.join(S)
+
+
+@pytest.fixture(scope='module',
+                params=[(1, constFractionalOrder(0.25), 1.0, 0.5, True),
+                        (1, constFractionalOrder(0.75), 1.0, 0.5, True),
+                        (1, constFractionalOrder(0.25), 1.0, 0.5, False),
+                        (1, constFractionalOrder(0.75), 1.0, 0.5, False),
+                        (1, constFractionalOrder(0.25), 1.0, 2.5, False),
+                        (1, constFractionalOrder(0.75), 1.0, 2.5, False),
+                        (1, variableConstFractionalOrder(0.75), 0.5, 1.0, False),
+                        (1, leftRightFractionalOrder(0.25, 0.75), 0.5, 1.0, False),
+                        ],
+                ids=idfunc)
+def kernels(request):
+    dim, s, horizon1, horizon2, normalized = request.param
+    kernel1 = getFractionalKernel(dim, s, constant(horizon1), normalized=normalized)
+    kernel2 = getFractionalKernel(dim, s, constant(horizon2), normalized=normalized)
+    return dim, kernel1, kernel2
+
+
+def meshNoOverlap(dim):
+    if dim == 1:
+        mesh = intervalWithInteraction(a=-1, b=1, h=2**-8, horizon=0.)
+    else:
+        raise NotImplementedError()
+    return mesh
+
+
+def meshOverlap(dim, horizon):
+    if dim == 1:
+        mesh = intervalWithInteraction(a=-1, b=1, h=2**-8, horizon=horizon)
+    else:
+        raise NotImplementedError()
+    return mesh
+
+
+def test_h2_finite(kernels):
+    dim, kernel1, kernel2 = kernels
+
+    mesh1 = meshOverlap(dim, kernel1.horizon.value)
+    dm1 = P1_DoFMap(mesh1)
+
+    mesh2 = meshNoOverlap(dim)
+    dm2 = P1_DoFMap(mesh2)
+
+    ind = dm1.interpolate(Lambda(lambda x: abs(x[0]) < 1-1e-12))
+    idx = ind.toarray() > 0
+
+    builder1 = nonlocalBuilder(mesh1, dm1, kernel1, zeroExterior=False, logging=True)
+    print('\nDENSE\n')
+    A1 = builder1.getDense()
+
+    print('\nH2\n')
+    A1_h2, Pnear = builder1.getH2(returnNearField=True)
+    # A2.setKernel(kernel1)
+
+    # mass = assembleMass(mesh1, dm1)
+    # if mesh1.dim == 1:
+    #     vol = 2
+    # elif mesh1.dim == 2:
+    #     vol = 2*np.pi * horizon
+    # else:
+    #     raise NotImplementedError()
+    # C = kernel1.scalingValue
+    # s = kernel1.s.value
+    # M = (-vol*C*pow(kernel1.horizon.value, 1-mesh1.dim-2*s)/s) * mass
+    # A3 = A2+M
+
+    # err3 = np.log10(np.absolute(A1.toarray()-A3.toarray()))
+    # print(err3.max())
+
+    # plt.figure()
+    # Adnear = A2.Anear.copy()
+    # for i in range(Adnear.num_rows):
+    #     Adnear.diagonal[i] = A1.data[i, i]
+    #     for jj in range(Adnear.indptr[i], Adnear.indptr[i+1]):
+    #         j = Adnear.indices[jj]
+    #         Adnear.data[jj] = A1.data[i, j]
+    # err2 = np.log10(np.absolute(A2.Anear.toarray()-Adnear.toarray()))
+    # plt.pcolormesh(err2)
+    # plt.colorbar()
+    # print(err2.max())
+
+    # plt.figure()
+    # err2 = np.log10(np.absolute((A2-A2.Anear).toarray()-(A1-Adnear).toarray()))
+    # plt.pcolormesh(err2, vmin=-6)
+    # plt.colorbar()
+    # print(err2.max())
+
+    print('\nCORRECTED\n')
+    builder2 = nonlocalBuilder(mesh2, dm2, kernel1, zeroExterior=False)
+    A2 = builder2.getH2FiniteHorizon()
+    A2.setKernel(kernel1)
+
+    A1d = A1.toarray()[np.ix_(idx, idx)]
+    A1_h2d = A1_h2.toarray()[np.ix_(idx, idx)]
+    A1_h2_neard = A1_h2.Anear.toarray()[np.ix_(idx, idx)]
+    A1_neard = A1d.copy()
+    A1_neard[np.where(np.absolute(A1_h2_neard) == 0.)] = 0.
+    A2d = A2.toarray()
+
+    nn = np.absolute(A1d)
+    nn[nn < 1e-16] = 1.
+
+    errDenseH2 = np.absolute(A1d-A1_h2d)
+    errDenseH2_rel = errDenseH2/nn
+    print('errDenseH2', errDenseH2.max(), errDenseH2_rel.max())
+
+    errDenseH2_near = np.absolute(A1_neard-A1_h2_neard)
+    errDenseH2_near_rel = errDenseH2_near/nn
+    print('errDenseH2_near', errDenseH2_near.max(), errDenseH2_near_rel.max())
+
+    errDenseCor = np.absolute(A1d-A2d)
+    errDenseCor_rel = errDenseCor/nn
+    print('errDenseCor', errDenseCor.max(), errDenseCor_rel.max())
+
+    errH2Cor = np.absolute(A1_h2d-A2d)
+    errH2Cor_rel = errDenseCor/nn
+    print('errH2Cor', errH2Cor.max(), errH2Cor_rel.max())
+
+    # c = dm1.getDoFCoordinates()[idx, 0]
+    # X, Y = np.meshgrid(c, c)
+
+    # if errDenseH2.max() > -3:
+    #     plt.figure()
+    #     plt.pcolormesh(X, Y, np.log10(np.maximum(errDenseH2, 1e-12)), vmin=-6)
+    #     plt.colorbar()
+    #     plt.title('errDenseH2 absolute')
+
+    #     plt.figure()
+    #     plt.pcolormesh(X, Y, np.log10(np.maximum(errDenseH2_rel, 1e-12)), vmin=-6)
+    #     plt.colorbar()
+    #     plt.title('errDenseH2 relative')
+
+    # if errDenseCor.max() > -3:
+    #     plt.figure()
+    #     plt.pcolormesh(X, Y, np.log10(np.maximum(errDenseCor, 1e-12)), vmin=-6)
+    #     plt.colorbar()
+    #     plt.title('errDenseCor absolute')
+
+    #     plt.figure()
+    #     plt.pcolormesh(X, Y, np.log10(np.maximum(errDenseCor_rel, 1e-12)), vmin=-6)
+    #     plt.colorbar()
+    #     plt.title('errDenseCor relative')
+
+    #     # plt.figure()
+    #     # A1_h2.plot(Pnear)
+
+    #     plt.show()
+
+    # assert errDenseH2.max() < 1e-4
+    # assert errDenseCor.max() < 1e-4
+    # assert errH2Cor.max() < 1e-4
+
+    rhs = Lambda(lambda x: 1. if abs(x[0]) < 1. else 0.)
+    b1 = dm1.assembleRHS(rhs).toarray()
+
+    y1 = solve(A1d, b1[idx])
+    x1 = np.zeros((A1.shape[0]))
+    x1[idx] = y1
+
+    y1_h2 = solve(A1_h2d, b1[idx])
+    x1_h2 = np.zeros((A1.shape[0]))
+    x1_h2[idx] = y1_h2
+
+    b2 = dm2.assembleRHS(rhs).toarray()
+    y2 = solve(A2d, b2)
+    x2 = np.zeros((A1.shape[0]))
+    x2[idx] = y2
+
+    # assert np.absolute(A1d[np.ix_(idx, idx)]-A2d).max() < 1e-5
+
+    M = dm1.assembleMass()
+    L2_denseH2 = np.sqrt(abs(np.vdot(M*(x1-x1_h2), x1-x1_h2)))
+    L2_denseCor = np.sqrt(abs(np.vdot(M*(x1-x2), x1-x2)))
+    L2_H2Cor = np.sqrt(abs(np.vdot(M*(x1_h2-x2), x1_h2-x2)))
+    L2_dense = np.sqrt(abs(np.vdot(M*x1, x1)))
+    L2_cor = np.sqrt(abs(np.vdot(M*x2, x2)))
+
+    # if not (L2/L2_1 < mesh2.h**(0.5+min(kernel1.s.min, 0.5))):
+    print('L2 errDenseH2', L2_denseH2)
+    print('L2 errDenseCor', L2_denseCor)
+    print('L2 errH2Cor', L2_H2Cor)
+
+    # mesh1.plotFunction(x1, DoFMap=dm1, label='dense')
+    # mesh1.plotFunction(x1_h2, DoFMap=dm1, label='h2')
+    # mesh1.plotFunction(x2, DoFMap=dm1, label='corrected')
+    # plt.legend()
+    # plt.show()
+
+    assert L2_denseCor/L2_dense < mesh2.h**(0.5+min(kernel1.s.min, 0.5)), (L2_denseCor, L2_dense, L2_denseCor/L2_dense, mesh2.h**(0.5+min(kernel1.s.min, 0.5)))
+
+    mesh3 = meshOverlap(dim, kernel2.horizon.value)
+    dm3 = P1_DoFMap(mesh3)
+
+    ind = dm3.interpolate(Lambda(lambda x: abs(x[0]) < 1-1e-12))
+    idx = ind.toarray() > 0
+
+    print('\nCORRECTED\n')
+    A2.setKernel(kernel2)
+
+    print('\nDENSE\n')
+    builder3 = nonlocalBuilder(mesh3, dm3, kernel2, zeroExterior=False)
+    A3 = builder3.getDense()
+
+    A2d = A2.toarray()
+    A3d = A3.toarray()[np.ix_(idx, idx)]
+
+    nn = np.absolute(A3d)
+    nn[nn < 1e-16] = 1.
+
+    errDenseCor = np.absolute(A3d-A2d)
+    errDenseCor_rel = errDenseCor/nn
+    print('errDenseCor', errDenseCor.max(), errDenseCor_rel.max())
+
+    y2 = solve(A2d, b2)
+    x2 = np.zeros((A3.shape[0]))
+    x2[idx] = y2
+
+    b3 = dm3.assembleRHS(rhs).toarray()
+    y3 = solve(A3d, b3[idx])
+    x3 = np.zeros((A3.shape[0]))
+    x3[idx] = y3
+
+    # assert np.absolute(A3d[np.ix_(idx, idx)]-A2d).max() < 1e-5
+
+    M = dm3.assembleMass()
+
+    L2_denseCor = np.sqrt(abs(np.vdot(M*(x3-x2), x3-x2)))
+    L2_dense = np.sqrt(abs(np.vdot(M*x3, x3)))
+    L2_cor = np.sqrt(abs(np.vdot(M*x2, x2)))
+
+    print('L2 errDenseCor', L2_denseCor)
+
+    # mesh3.plotFunction(x2, DoFMap=dm3, label='corrected')
+    # mesh3.plotFunction(x3, DoFMap=dm3, label='dense')
+    # plt.legend()
+    # plt.show()
+
+
+    # if not (L2 < mesh2.h**(0.5+min(kernel2.s.value, 0.5))):
+    #     mesh3.plotFunction(x3, DoFMap=dm3)
+    #     mesh3.plotFunction(x2, DoFMap=dm3)
+    #     plt.figure()
+    #     for lvl in A2.Ainf.Pfar:
+    #         for fCP in A2.Ainf.Pfar[lvl]:
+    #             fCP.plot()
+    #     plt.figure()
+    #     diff = np.absolute((A3d[np.ix_(idx, idx)]-A2d))
+    #     plt.pcolormesh(np.log10(diff))
+    #     plt.colorbar()
+    #     plt.figure()
+    #     diffRel = np.absolute((A3d[np.ix_(idx, idx)]-A2d)/A2d)
+    #     diffRel[diff < 1e-12] = 0.
+    #     plt.pcolormesh(np.log10(diffRel))
+    #     print(diffRel[np.isfinite(diffRel)].max(), diffRel[np.isfinite(diffRel)].mean(), np.median(diffRel[np.isfinite(diffRel)]))
+    #     plt.colorbar()
+    #     plt.show()
+
+    assert L2_denseCor/L2_dense < mesh2.h**(0.5+min(kernel2.s.min, 0.5)), (L2_denseCor, L2_dense, L2_denseCor/L2_dense, mesh2.h**(0.5+min(kernel1.s.min, 0.5)))
diff --git a/tests/test_nearField.py b/tests/test_nearField.py
new file mode 100644
index 0000000..46b91a1
--- /dev/null
+++ b/tests/test_nearField.py
@@ -0,0 +1,467 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Compare dense matrix and near field assembly with cluster pairs covering all matrix blocks
+
+from mpi4py import MPI
+import numpy as np
+from PyNucleus.base.myTypes import REAL, INDEX
+from PyNucleus.base import uninitialized
+from PyNucleus.base.tupleDict import arrayIndexSet
+from PyNucleus.fem import simpleInterval, uniformSquare, P0_DoFMap, P1_DoFMap, constant
+from PyNucleus.nl import H2Matrix, nonlocalBuilder, getFractionalKernel
+from PyNucleus.nl.nonlocalLaplacian import nearFieldClusterPair
+from PyNucleus.nl.clusterMethodCy import (getDoFBoxesAndCells,
+                                          tree_node)
+
+from PyNucleus.nl.fractionalOrders import (constFractionalOrder,
+                                           variableConstFractionalOrder,
+                                           leftRightFractionalOrder,
+                                           layersFractionalOrder,
+                                           lambdaFractionalOrder)
+from PyNucleus.base import driver
+from PyNucleus.nl.nonlocalProblems import nonlocalMeshFactory, HOMOGENEOUS_DIRICHLET
+import pytest
+
+epsRelDense = 3e-2
+epsAbsDense = {(1, np.inf): 1e-5,
+               (2, np.inf): 5e-3,
+               (1, 1.0): 7e-3,
+               (2, 1.0): 5e-3}
+epsRelH2 = 1e-1
+epsAbsH2 = {(1, np.inf): 5e-5,
+            (2, np.inf): 5e-3,
+            (1, 1.0): 7e-3,
+            (2, 1.0): 5e-3}
+
+
+class test:
+    __test__ = False
+    params = {'target_order': 3}
+
+    @classmethod
+    def setup_class(self):
+
+        kernel = getFractionalKernel(self.dim, self.s, self.horizon, normalized=self.normalized, phi=self.phi)
+
+        if self.dim == 1:
+            self.mesh, nI = nonlocalMeshFactory.build('interval', kernel, self.boundaryCondition)
+            domainIndicator = nI['domain']
+            boundaryIndicator = nI['boundary']
+            interactionIndicator = nI['interaction']
+            self.tag = nI['tag']
+            self.zeroExterior = nI['zeroExterior']
+            # noRef = 6
+        elif self.dim == 2:
+            self.mesh, nI = nonlocalMeshFactory.build('square', kernel, self.boundaryCondition)
+            # noRef = 2
+        domainIndicator = nI['domain']
+        boundaryIndicator = nI['boundary']
+        interactionIndicator = nI['interaction']
+        self.tag = nI['tag']
+        self.zeroExterior = nI['zeroExterior']
+
+        if self.element == 0:
+            DoFMap = P0_DoFMap
+        elif self.element == 1:
+            DoFMap = P1_DoFMap
+        else:
+            raise NotImplementedError()
+
+        self.dm = DoFMap(self.mesh, self.tag)
+        while self.dm.num_dofs < 230:
+            self.mesh = self.mesh.refine()
+            self.dm = DoFMap(self.mesh, self.tag)
+        self.mesh.sortVertices()
+        self.dm = DoFMap(self.mesh, self.tag)
+        print(self.dm)
+
+        self.builder = nonlocalBuilder(self.mesh, self.dm, kernel, params=self.params, zeroExterior=self.zeroExterior)
+
+        if isinstance(self.s, variableConstFractionalOrder) and self.phi is None:
+            s = constFractionalOrder(self.s.value)
+            kernel = getFractionalKernel(self.dim, s, self.horizon, normalized=True)
+            self.constBuilder = nonlocalBuilder(self.mesh, self.dm, kernel, params=self.params, zeroExterior=self.zeroExterior)
+            self.baseA = self.constBuilder.getDense()
+            self.baseLabel = 'dense_const'
+        else:
+            self.baseA = self.builder.getDense()
+            self.baseLabel = 'dense_var'
+
+    def getPnear(self, maxLevels):
+        boxes, cells = getDoFBoxesAndCells(self.mesh, self.dm)
+        centers = uninitialized((self.dm.num_dofs, self.mesh.dim), dtype=REAL)
+        for i in range(self.dm.num_dofs):
+            centers[i, :] = boxes[i, :, :].mean(axis=1)
+        blocks, jumps = self.builder.getKernelBlocksAndJumps()
+        dofs = arrayIndexSet(np.arange(self.dm.num_dofs, dtype=INDEX))
+        root = tree_node(None, dofs, boxes)
+        if len(blocks) > 1:
+            for key in blocks:
+                subDofs = arrayIndexSet()
+                subDofs.fromSet(blocks[key])
+                if len(subDofs) > 0:
+                    root.children.append(tree_node(root, subDofs, boxes, mixed_node=key == np.inf))
+            root._dofs = None
+            assert self.dm.num_dofs == sum([len(c.dofs) for c in root.children])
+            assert len(root.children) > 1
+        if maxLevels > 0:
+            for n in root.leaves():
+                n.refine(boxes, centers, maxLevels=maxLevels, maxLevelsMixed=maxLevels)
+        root.set_id()
+        # enter cells in leaf nodes
+        for n in root.leaves():
+            myCells = set()
+            for dof in n.dofs.toSet():
+                myCells |= cells[dof]
+            n._cells = arrayIndexSet()
+            n._cells.fromSet(myCells)
+
+            diam = 0
+            for i in range(self.dim):
+                diam += (n.box[i, 1]-n.box[i, 0])**2
+            diam = np.sqrt(diam)
+            if 2*diam > self.horizon.value:
+                print('Clusters of size {} to large for horizon {}'.format(diam, self.horizon.value))
+                return [], {}
+        Pnear = []
+        r = list(root.leaves())
+        for c in r:
+            for d in r:
+                assert c.isLeaf
+                assert d.isLeaf
+                Pnear.append(nearFieldClusterPair(c, d))
+        return Pnear, jumps
+
+    def constH2(self):
+        A_h2 = self.constBuilder.getH2()
+        assert isinstance(A_h2, H2Matrix)
+        self.compare("{}-h2_const".format(self.baseLabel), self.baseA, A_h2)
+
+    def constCluster(self, maxLevels):
+        if isinstance(self.s, variableConstFractionalOrder):
+            s = constFractionalOrder(self.s.value)
+            Pnear, _ = self.getPnear(maxLevels)
+            if len(Pnear) > 0:
+                A_fix_near = self.builder.assembleClusters(Pnear)
+                self.compare("{}-cluster_const({})".format(self.baseLabel, maxLevels), self.baseA, A_fix_near)
+        else:
+            pytest.skip('Only works for variableConstFractionalOrder')
+
+    def testConstCluster(self, levels=[0, 1, 2, 3, 4]):
+        print()
+        print(self.s)
+        if isinstance(levels, int):
+            levels = [levels]
+        for maxLevels in levels:
+            self.constCluster(maxLevels)
+
+    def testConstH2(self):
+        if isinstance(self.s, variableConstFractionalOrder) and self.dim == 1:
+            print()
+            print(self.s)
+            self.constH2()
+        elif self.dim == 2:
+            pytest.skip('Does not work in 2d, since mesh to small to get H2 matrix')
+        else:
+            pytest.skip('Only works for variableConstFractionalOrder in 1D')
+
+    def varDense(self):
+        A_var = self.builder.getDense()
+        self.compare("{}-dense_var".format(self.baseLabel), self.baseA, A_var)
+
+    def varCluster(self, maxLevels):
+        Pnear, jumps = self.getPnear(maxLevels)
+        if len(Pnear) > 0:
+            print('Jumps: {}'.format(len(jumps)))
+            A_var_near = self.builder.assembleClusters(Pnear, jumps=jumps)
+            self.compare("{}-cluster_var({})".format(self.baseLabel, maxLevels), self.baseA, A_var_near)
+
+    def testVarDense(self):
+        if isinstance(self.s, variableConstFractionalOrder):
+            print()
+            print(self.s)
+            self.varDense()
+        else:
+            pytest.skip('Only makes sense for variableConstFractionalOrder')
+
+    def testVarCluster(self, levels=[0, 1, 2, 3, 4, 5]):
+        print()
+        print(self.s)
+        if isinstance(levels, int):
+            levels = [levels]
+        for maxLevels in levels:
+            self.varCluster(maxLevels)
+
+    def compare(self, label, A1, A2):
+        if isinstance(A1, H2Matrix) or isinstance(A2, H2Matrix):
+            epsAbs = epsAbsH2
+            epsRel = epsRelH2
+        else:
+            epsAbs = epsAbsDense
+            epsRel = epsRelDense
+        A1 = A1.toarray()
+        A2 = A2.toarray()
+        value = np.absolute(A1-A2).max()
+        valueRel = np.absolute((A1-A2)/A1)[np.absolute(A1) > 0].max()
+        print('{}: abs: {} rel: {}'.format(label, value, valueRel))
+        if value > epsAbs[(self.dim, self.horizon.value)] or valueRel > epsRel:
+            print(A1.diagonal())
+            print(A2.diagonal())
+            print(A1.diagonal()-A2.diagonal())
+            print()
+            try:
+                import matplotlib
+                import matplotlib.pyplot as plt
+            except ImportError:
+                return
+            if self.mesh.dim == 1:
+                x = self.dm.getDoFCoordinates()
+                indMax = np.absolute(A1.diagonal()-A2.diagonal()).argmax()
+                print(indMax, x[indMax])
+                X, Y = np.meshgrid(x, x)
+                plt.figure()
+                plt.pcolormesh(X, Y, A1)
+                plt.colorbar()
+                plt.title('A1')
+
+                plt.figure()
+                plt.pcolormesh(X, Y, A2)
+                plt.colorbar()
+                plt.title('A2')
+
+                plt.figure()
+                plt.pcolormesh(X, Y, np.absolute(np.around(A1-A2, 9)), norm=matplotlib.colors.LogNorm())
+                plt.colorbar()
+                plt.title('|A1-A2|')
+
+                plt.figure()
+                # plt.pcolormesh(X, Y, np.absolute(np.around((A1-A2)/A1, 9)), norm=matplotlib.colors.LogNorm())
+                plt.pcolormesh(X, Y, np.log10(np.absolute((A1-A2)/A1)))
+                plt.colorbar()
+                plt.title('log |(A1-A2)/A1|')
+
+                plt.show()
+            else:
+                plt.figure()
+                err = self.dm.zeros()
+                err.assign(np.absolute((A1.diagonal()-A2.diagonal())/(A1.diagonal())))
+                err.plot(flat=True)
+                plt.title('diagonal error')
+
+                plt.figure()
+                plt.pcolormesh(A1)
+                plt.colorbar()
+                plt.title('A1')
+
+                plt.figure()
+                plt.pcolormesh(A2)
+                plt.colorbar()
+                plt.title('A2')
+
+                plt.figure()
+                plt.pcolormesh(np.absolute(A1-A2), norm=matplotlib.colors.LogNorm())
+                plt.colorbar()
+                plt.title('|A1-A2|')
+
+                plt.figure()
+                # plt.pcolormesh(np.absolute(np.around((A1-A2)/A1, 9)), norm=matplotlib.colors.LogNorm())
+                plt.pcolormesh(np.log10(np.absolute((A1-A2)/A1)))
+                plt.colorbar()
+                plt.title('log |(A1-A2)/A1|')
+
+                plt.show()
+        assert value < epsAbs[(self.dim, self.horizon.value)] and valueRel < epsRel
+
+
+class test1D(test):
+    dim = 1
+    element = 1
+    horizon = constant(np.inf)
+    normalized = True
+    phi = None
+    boundaryCondition = HOMOGENEOUS_DIRICHLET
+
+
+class test2D(test):
+    dim = 2
+    element = 1
+    horizon = constant(np.inf)
+    normalized = True
+    phi = None
+    boundaryCondition = HOMOGENEOUS_DIRICHLET
+
+
+class const1D_025(test1D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.25)
+
+
+class const1D_075(test1D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.75)
+
+
+class const1D_025_finiteHorizon(test1D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.25)
+    horizon = constant(1.0)
+
+
+class const1D_075_finiteHorizon(test1D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.75)
+    horizon = constant(1.0)
+
+
+class leftRight1D(test1D):
+    __test__ = True
+    s = leftRightFractionalOrder(0.25, 0.75)
+
+
+class leftRight1DfiniteHorizon(test1D):
+    __test__ = True
+    s = leftRightFractionalOrder(0.25, 0.75)
+    horizon = constant(1.0)
+
+
+class const2D_025(test2D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.25)
+
+
+class const2D_075(test2D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.75)
+
+
+class const2D_025_finiteHorizon(test2D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.25)
+    horizon = constant(1.0)
+
+
+class const2D_075_finiteHorizon(test2D):
+    __test__ = True
+    s = variableConstFractionalOrder(0.75)
+    horizon = constant(1.0)
+
+
+class leftRight2DinfiniteHorizon(test2D):
+    __test__ = True
+    s = leftRightFractionalOrder(0.25, 0.75)
+
+
+class leftRight2DfiniteHorizon(test2D):
+    __test__ = True
+    s = leftRightFractionalOrder(0.25, 0.75)
+    horizon = constant(1.0)
+
+
+class layers2D(test2D):
+    __test__ = True
+    t = np.linspace(0.2, 0.8, 4, dtype=REAL)
+    s = np.empty((t.shape[0], t.shape[0]), dtype=REAL)
+    for i in range(t.shape[0]):
+        for j in range(t.shape[0]):
+            s[i, j] = 0.5*(t[i]+t[j])
+    s = layersFractionalOrder(2, np.linspace(-1., 1., s.shape[0]+1, dtype=REAL), s)
+
+
+
+if __name__ == '__main__':
+    d = driver(MPI.COMM_WORLD)
+    d.add('dim', 1, acceptedValues=[2])
+    d.add('doVar', False)
+    d.add('doUnSym', False)
+    d.add('target_order', 3)
+    d.add('element', 1)
+    d.add('levels', -1)
+    params = d.process()
+    if params['levels'] == -1:
+        params['levels'] = [0, 1, 2]
+
+    if params['dim'] == 1:
+        tests = [const1D_025(), const1D_075()]
+    elif params['dim'] == 2:
+        tests = [const2D_025(), const2D_075()]
+    for t in tests:
+        t.setup_class()
+        if params['doVar']:
+            t.testVarDense()
+        t.testConstCluster(params['levels'])
+        if params['doVar']:
+            t.testVarCluster(params['levels'])
+
+# for s in [variableConstFractionalOrder(params['dim'], 0.25),
+#           variableConstFractionalOrder(params['dim'], 0.75)]:
+#     t = test(s)
+#     t.setup()
+#     if params['doVar']:
+#         t.varDense()
+#     for maxLevels in [0, 1, 2]:
+#         t.constCluster(maxLevels)
+#         if params['doVar']:
+#             t.varCluster(maxLevels)
+
+# if params['doVar']:
+#     def sFun(x, y):
+#         if ((abs(x[0]-0.25) < 0.125 or abs(x[0]+0.25) < 0.125) and
+#             (abs(y[0]-0.25) < 0.125 or abs(y[0]+0.25) < 0.125)):
+#             return 0.4
+#         elif ((abs(x[0]-0.25) < 0.125 or abs(x[0]+0.25) < 0.125) and
+#               not (abs(y[0]-0.25) < 0.125 or abs(y[0]+0.25) < 0.125)):
+#             return 0.2
+#         elif (not (abs(x[0]-0.25) < 0.125 or abs(x[0]+0.25) < 0.125) and
+#               (abs(y[0]-0.25) < 0.125 or abs(y[0]+0.25) < 0.125)):
+#             return 0.2
+#         elif (not (abs(x[0]-0.25) < 0.125 or abs(x[0]+0.25) < 0.125) and
+#               not (abs(y[0]-0.25) < 0.125 or abs(y[0]+0.25) < 0.125)):
+#             return 0.75
+#         else:
+#             raise NotImplementedError()
+
+#     sSpecial = lambdaFractionalOrder(params['dim'], 0.2, 0.75, True, sFun)
+
+#     for s in [leftRightFractionalOrder(params['dim'], 0.25, 0.75),
+#               leftRightFractionalOrder(params['dim'], 0.75, 0.25),
+#               sSpecial]:
+#         t = test(s)
+#         t.setup()
+#         for maxLevels in [0, 1, 2]:
+#             t.varCluster(maxLevels)
+
+#         if params['doUnSym']:
+#             s.symmetric = False
+#             A_unsym = assembleFractionalLaplacian(mesh, dm, s, params=params, zeroExterior=zeroExterior, interior=interior)
+
+#             print()
+#             print(sFun)
+#             compare("var-unsym:  ", A_var.toarray(), A_unsym.toarray())
+
+#             for maxLevels in [0, 1, 2]:
+#                 Pnear = getPnear(mesh, dm, maxLevels)
+#                 A_unsym_near = assembleNearField(Pnear, mesh, dm, sFun, params=params, zeroExterior=zeroExterior, interior=interior)
+
+#                 print()
+#                 print(sFun)
+#                 compare("maxLevels:       {}\n".format(maxLevels) +
+#                      "var-unsym_near: ", A_var.toarray(), A_unsym_near.toarray())
+
+# if params['doUnSym']:
+#     for sll, srr, slr, srl in [(0.25, 0.75, 0.25, 0.75)]:
+#         sFun = leftRightFractionalOrder(sll, srr, slr, srl)
+#         A_var = assembleNonlocalOperator(mesh, dm, sFun, params=params, zeroExterior=zeroExterior, interior=interior)
+#         for maxLevels in [0, 1, 2]:
+#             Pnear = getPnear(mesh, dm, maxLevels)
+#             A_var_near = assembleNearField(Pnear, mesh, dm, sFun, params=params, zeroExterior=zeroExterior, interior=interior)
+
+#             print()
+#             print(sFun)
+#             compare("maxLevels:         {}\n".format(maxLevels) +
+#                  "unsym-unsym_near: ", A_var.toarray(), A_var_near.toarray())
diff --git a/tests/tupleDict.py b/tests/tupleDict.py
new file mode 100644
index 0000000..da42f85
--- /dev/null
+++ b/tests/tupleDict.py
@@ -0,0 +1,79 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+import numpy as np
+from PyNucleus.base import INDEX
+from PyNucleus.base.tupleDict import tupleDictMASK
+from copy import deepcopy
+
+
+def test_tupleDict():
+    N = 15
+    masks = tupleDictMASK(N, deleteHits=False, logicalAndHits=True)
+    e = np.empty((2), dtype=INDEX)
+    for k in range(N-2, -1, -1):
+        e[0] = k
+        e[1] = k+1
+        masks.enterValue_py(e, 1)
+    assert masks.nnz == N-1, (masks.nnz, N-1)
+    assert masks.isCorrect()
+    for k in range(N-1):
+        e[0] = k
+        e[1] = k+1
+        masks.enterValue_py(e, 2)
+    assert masks.nnz == N-1
+    masks2 = deepcopy(masks)
+    assert masks2.nnz == N-1
+    for k in range(N-1):
+        e[0] = k
+        e[1] = k+1
+        assert masks2[e] == 3
+    masks3 = tupleDictMASK(N, deleteHits=False, logicalAndHits=True)
+    for k in range(N-1):
+        e[0] = k
+        e[1] = k+1
+        masks3.enterValue_py(e, 4)
+    masks2.merge(masks3)
+    assert masks2.nnz == N-1
+    for k in range(N-1):
+        e[0] = k
+        e[1] = k+1
+        assert masks2[e] == 7
+
+    masks = tupleDictMASK(1, deleteHits=False, logicalAndHits=True)
+    for k in range(30, -1, -1):
+        e[0] = 0
+        e[1] = k
+        masks.enterValue_py(e, k)
+    for k in range(31):
+        e[0] = 0
+        e[1] = k
+        masks.enterValue_py(e, k)
+    assert masks.nnz == 31, (masks.nnz, 31)
+    assert masks.isCorrect()
+    for k in range(31):
+        e[0] = 0
+        e[1] = k
+        assert masks[e] == k
+
+    masks = tupleDictMASK(1, deleteHits=False, logicalAndHits=True)
+    for k in range(31):
+        e[0] = 0
+        e[1] = k
+        masks.enterValue_py(e, k)
+        assert masks.isCorrect(), k
+    # for k in range(30, -1, -1):
+    #     e[0] = 0
+    #     e[1] = k
+    #     masks.enterValue_py(e, k)
+    assert masks.nnz == 31, (masks.nnz, 31)
+    assert masks.isCorrect()
+    for k in range(31):
+        e[0] = 0
+        e[1] = k
+        assert masks[e] == k
diff --git a/versioneer.py b/versioneer.py
new file mode 100644
index 0000000..d9c300b
--- /dev/null
+++ b/versioneer.py
@@ -0,0 +1,2116 @@
+###################################################################################
+# Copyright 2021 National Technology & Engineering Solutions of Sandia,           #
+# LLC (NTESS). Under the terms of Contract DE-NA0003525 with NTESS, the           #
+# U.S. Government retains certain rights in this software.                        #
+# If you want to use this code, please refer to the README.rst and LICENSE files. #
+###################################################################################
+
+
+# Version: 0.21
+
+"""The Versioneer - like a rocketeer, but for versions.
+
+The Versioneer
+==============
+
+* like a rocketeer, but for versions!
+* https://github.com/python-versioneer/python-versioneer
+* Brian Warner
+* License: Public Domain
+* Compatible with: Python 3.6, 3.7, 3.8, 3.9 and pypy3
+* [![Latest Version][pypi-image]][pypi-url]
+* [![Build Status][travis-image]][travis-url]
+
+This is a tool for managing a recorded version number in distutils-based
+python projects. The goal is to remove the tedious and error-prone "update
+the embedded version string" step from your release process. Making a new
+release should be as easy as recording a new tag in your version-control
+system, and maybe making new tarballs.
+
+
+## Quick Install
+
+* `pip install versioneer` to somewhere in your $PATH
+* add a `[versioneer]` section to your setup.cfg (see [Install](INSTALL.md))
+* run `versioneer install` in your source tree, commit the results
+* Verify version information with `python setup.py version`
+
+## Version Identifiers
+
+Source trees come from a variety of places:
+
+* a version-control system checkout (mostly used by developers)
+* a nightly tarball, produced by build automation
+* a snapshot tarball, produced by a web-based VCS browser, like github's
+  "tarball from tag" feature
+* a release tarball, produced by "setup.py sdist", distributed through PyPI
+
+Within each source tree, the version identifier (either a string or a number,
+this tool is format-agnostic) can come from a variety of places:
+
+* ask the VCS tool itself, e.g. "git describe" (for checkouts), which knows
+  about recent "tags" and an absolute revision-id
+* the name of the directory into which the tarball was unpacked
+* an expanded VCS keyword ($Id$, etc)
+* a `_version.py` created by some earlier build step
+
+For released software, the version identifier is closely related to a VCS
+tag. Some projects use tag names that include more than just the version
+string (e.g. "myproject-1.2" instead of just "1.2"), in which case the tool
+needs to strip the tag prefix to extract the version identifier. For
+unreleased software (between tags), the version identifier should provide
+enough information to help developers recreate the same tree, while also
+giving them an idea of roughly how old the tree is (after version 1.2, before
+version 1.3). Many VCS systems can report a description that captures this,
+for example `git describe --tags --dirty --always` reports things like
+"0.7-1-g574ab98-dirty" to indicate that the checkout is one revision past the
+0.7 tag, has a unique revision id of "574ab98", and is "dirty" (it has
+uncommitted changes).
+
+The version identifier is used for multiple purposes:
+
+* to allow the module to self-identify its version: `myproject.__version__`
+* to choose a name and prefix for a 'setup.py sdist' tarball
+
+## Theory of Operation
+
+Versioneer works by adding a special `_version.py` file into your source
+tree, where your `__init__.py` can import it. This `_version.py` knows how to
+dynamically ask the VCS tool for version information at import time.
+
+`_version.py` also contains `$Revision$` markers, and the installation
+process marks `_version.py` to have this marker rewritten with a tag name
+during the `git archive` command. As a result, generated tarballs will
+contain enough information to get the proper version.
+
+To allow `setup.py` to compute a version too, a `versioneer.py` is added to
+the top level of your source tree, next to `setup.py` and the `setup.cfg`
+that configures it. This overrides several distutils/setuptools commands to
+compute the version when invoked, and changes `setup.py build` and `setup.py
+sdist` to replace `_version.py` with a small static file that contains just
+the generated version data.
+
+## Installation
+
+See [INSTALL.md](./INSTALL.md) for detailed installation instructions.
+
+## Version-String Flavors
+
+Code which uses Versioneer can learn about its version string at runtime by
+importing `_version` from your main `__init__.py` file and running the
+`get_versions()` function. From the "outside" (e.g. in `setup.py`), you can
+import the top-level `versioneer.py` and run `get_versions()`.
+
+Both functions return a dictionary with different flavors of version
+information:
+
+* `['version']`: A condensed version string, rendered using the selected
+  style. This is the most commonly used value for the project's version
+  string. The default "pep440" style yields strings like `0.11`,
+  `0.11+2.g1076c97`, or `0.11+2.g1076c97.dirty`. See the "Styles" section
+  below for alternative styles.
+
+* `['full-revisionid']`: detailed revision identifier. For Git, this is the
+  full SHA1 commit id, e.g. "1076c978a8d3cfc70f408fe5974aa6c092c949ac".
+
+* `['date']`: Date and time of the latest `HEAD` commit. For Git, it is the
+  commit date in ISO 8601 format. This will be None if the date is not
+  available.
+
+* `['dirty']`: a boolean, True if the tree has uncommitted changes. Note that
+  this is only accurate if run in a VCS checkout, otherwise it is likely to
+  be False or None
+
+* `['error']`: if the version string could not be computed, this will be set
+  to a string describing the problem, otherwise it will be None. It may be
+  useful to throw an exception in setup.py if this is set, to avoid e.g.
+  creating tarballs with a version string of "unknown".
+
+Some variants are more useful than others. Including `full-revisionid` in a
+bug report should allow developers to reconstruct the exact code being tested
+(or indicate the presence of local changes that should be shared with the
+developers). `version` is suitable for display in an "about" box or a CLI
+`--version` output: it can be easily compared against release notes and lists
+of bugs fixed in various releases.
+
+The installer adds the following text to your `__init__.py` to place a basic
+version in `YOURPROJECT.__version__`:
+
+    from ._version import get_versions
+    __version__ = get_versions()['version']
+    del get_versions
+
+## Styles
+
+The setup.cfg `style=` configuration controls how the VCS information is
+rendered into a version string.
+
+The default style, "pep440", produces a PEP440-compliant string, equal to the
+un-prefixed tag name for actual releases, and containing an additional "local
+version" section with more detail for in-between builds. For Git, this is
+TAG[+DISTANCE.gHEX[.dirty]] , using information from `git describe --tags
+--dirty --always`. For example "0.11+2.g1076c97.dirty" indicates that the
+tree is like the "1076c97" commit but has uncommitted changes (".dirty"), and
+that this commit is two revisions ("+2") beyond the "0.11" tag. For released
+software (exactly equal to a known tag), the identifier will only contain the
+stripped tag, e.g. "0.11".
+
+Other styles are available. See [details.md](details.md) in the Versioneer
+source tree for descriptions.
+
+## Debugging
+
+Versioneer tries to avoid fatal errors: if something goes wrong, it will tend
+to return a version of "0+unknown". To investigate the problem, run `setup.py
+version`, which will run the version-lookup code in a verbose mode, and will
+display the full contents of `get_versions()` (including the `error` string,
+which may help identify what went wrong).
+
+## Known Limitations
+
+Some situations are known to cause problems for Versioneer. This details the
+most significant ones. More can be found on Github
+[issues page](https://github.com/python-versioneer/python-versioneer/issues).
+
+### Subprojects
+
+Versioneer has limited support for source trees in which `setup.py` is not in
+the root directory (e.g. `setup.py` and `.git/` are *not* siblings). The are
+two common reasons why `setup.py` might not be in the root:
+
+* Source trees which contain multiple subprojects, such as
+  [Buildbot](https://github.com/buildbot/buildbot), which contains both
+  "master" and "slave" subprojects, each with their own `setup.py`,
+  `setup.cfg`, and `tox.ini`. Projects like these produce multiple PyPI
+  distributions (and upload multiple independently-installable tarballs).
+* Source trees whose main purpose is to contain a C library, but which also
+  provide bindings to Python (and perhaps other languages) in subdirectories.
+
+Versioneer will look for `.git` in parent directories, and most operations
+should get the right version string. However `pip` and `setuptools` have bugs
+and implementation details which frequently cause `pip install .` from a
+subproject directory to fail to find a correct version string (so it usually
+defaults to `0+unknown`).
+
+`pip install --editable .` should work correctly. `setup.py install` might
+work too.
+
+Pip-8.1.1 is known to have this problem, but hopefully it will get fixed in
+some later version.
+
+[Bug #38](https://github.com/python-versioneer/python-versioneer/issues/38) is tracking
+this issue. The discussion in
+[PR #61](https://github.com/python-versioneer/python-versioneer/pull/61) describes the
+issue from the Versioneer side in more detail.
+[pip PR#3176](https://github.com/pypa/pip/pull/3176) and
+[pip PR#3615](https://github.com/pypa/pip/pull/3615) contain work to improve
+pip to let Versioneer work correctly.
+
+Versioneer-0.16 and earlier only looked for a `.git` directory next to the
+`setup.cfg`, so subprojects were completely unsupported with those releases.
+
+### Editable installs with setuptools <= 18.5
+
+`setup.py develop` and `pip install --editable .` allow you to install a
+project into a virtualenv once, then continue editing the source code (and
+test) without re-installing after every change.
+
+"Entry-point scripts" (`setup(entry_points={"console_scripts": ..})`) are a
+convenient way to specify executable scripts that should be installed along
+with the python package.
+
+These both work as expected when using modern setuptools. When using
+setuptools-18.5 or earlier, however, certain operations will cause
+`pkg_resources.DistributionNotFound` errors when running the entrypoint
+script, which must be resolved by re-installing the package. This happens
+when the install happens with one version, then the egg_info data is
+regenerated while a different version is checked out. Many setup.py commands
+cause egg_info to be rebuilt (including `sdist`, `wheel`, and installing into
+a different virtualenv), so this can be surprising.
+
+[Bug #83](https://github.com/python-versioneer/python-versioneer/issues/83) describes
+this one, but upgrading to a newer version of setuptools should probably
+resolve it.
+
+
+## Updating Versioneer
+
+To upgrade your project to a new release of Versioneer, do the following:
+
+* install the new Versioneer (`pip install -U versioneer` or equivalent)
+* edit `setup.cfg`, if necessary, to include any new configuration settings
+  indicated by the release notes. See [UPGRADING](./UPGRADING.md) for details.
+* re-run `versioneer install` in your source tree, to replace
+  `SRC/_version.py`
+* commit any changed files
+
+## Future Directions
+
+This tool is designed to make it easily extended to other version-control
+systems: all VCS-specific components are in separate directories like
+src/git/ . The top-level `versioneer.py` script is assembled from these
+components by running make-versioneer.py . In the future, make-versioneer.py
+will take a VCS name as an argument, and will construct a version of
+`versioneer.py` that is specific to the given VCS. It might also take the
+configuration arguments that are currently provided manually during
+installation by editing setup.py . Alternatively, it might go the other
+direction and include code from all supported VCS systems, reducing the
+number of intermediate scripts.
+
+## Similar projects
+
+* [setuptools_scm](https://github.com/pypa/setuptools_scm/) - a non-vendored build-time
+  dependency
+* [minver](https://github.com/jbweston/miniver) - a lightweight reimplementation of
+  versioneer
+* [versioningit](https://github.com/jwodder/versioningit) - a PEP 518-based setuptools
+  plugin
+
+## License
+
+To make Versioneer easier to embed, all its code is dedicated to the public
+domain. The `_version.py` that it creates is also in the public domain.
+Specifically, both are released under the Creative Commons "Public Domain
+Dedication" license (CC0-1.0), as described in
+https://creativecommons.org/publicdomain/zero/1.0/ .
+
+[pypi-image]: https://img.shields.io/pypi/v/versioneer.svg
+[pypi-url]: https://pypi.python.org/pypi/versioneer/
+[travis-image]:
+https://img.shields.io/travis/com/python-versioneer/python-versioneer.svg
+[travis-url]: https://travis-ci.com/github/python-versioneer/python-versioneer
+
+"""
+# pylint:disable=invalid-name,import-outside-toplevel,missing-function-docstring
+# pylint:disable=missing-class-docstring,too-many-branches,too-many-statements
+# pylint:disable=raise-missing-from,too-many-lines,too-many-locals,import-error
+# pylint:disable=too-few-public-methods,redefined-outer-name,consider-using-with
+# pylint:disable=attribute-defined-outside-init,too-many-arguments
+
+import configparser
+import errno
+import json
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_root():
+    """Get the project root directory.
+
+    We require that all commands are run from the project root, i.e. the
+    directory that contains setup.py, setup.cfg, and versioneer.py .
+    """
+    root = os.path.realpath(os.path.abspath(os.getcwd()))
+    setup_py = os.path.join(root, "setup.py")
+    versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        # allow 'python path/to/setup.py COMMAND'
+        root = os.path.dirname(os.path.realpath(os.path.abspath(sys.argv[0])))
+        setup_py = os.path.join(root, "setup.py")
+        versioneer_py = os.path.join(root, "versioneer.py")
+    if not (os.path.exists(setup_py) or os.path.exists(versioneer_py)):
+        err = ("Versioneer was unable to run the project root directory. "
+               "Versioneer requires setup.py to be executed from "
+               "its immediate directory (like 'python setup.py COMMAND'), "
+               "or in a way that lets it use sys.argv[0] to find the root "
+               "(like 'python path/to/setup.py COMMAND').")
+        raise VersioneerBadRootError(err)
+    try:
+        # Certain runtime workflows (setup.py install/develop in a setuptools
+        # tree) execute all dependencies in a single python process, so
+        # "versioneer" may be imported multiple times, and python's shared
+        # module-import table will cache the first one. So we can't use
+        # os.path.dirname(__file__), as that will find whichever
+        # versioneer.py was first imported, even in later projects.
+        my_path = os.path.realpath(os.path.abspath(__file__))
+        me_dir = os.path.normcase(os.path.splitext(my_path)[0])
+        vsr_dir = os.path.normcase(os.path.splitext(versioneer_py)[0])
+        if me_dir != vsr_dir:
+            print("Warning: build in %s is using versioneer.py from %s"
+                  % (os.path.dirname(my_path), versioneer_py))
+    except NameError:
+        pass
+    return root
+
+
+def get_config_from_root(root):
+    """Read the project setup.cfg file to determine Versioneer config."""
+    # This might raise OSError (if setup.cfg is missing), or
+    # configparser.NoSectionError (if it lacks a [versioneer] section), or
+    # configparser.NoOptionError (if it lacks "VCS="). See the docstring at
+    # the top of versioneer.py for instructions on writing your setup.cfg .
+    setup_cfg = os.path.join(root, "setup.cfg")
+    parser = configparser.ConfigParser()
+    with open(setup_cfg, "r") as cfg_file:
+        parser.read_file(cfg_file)
+    VCS = parser.get("versioneer", "VCS")  # mandatory
+
+    # Dict-like interface for non-mandatory entries
+    section = parser["versioneer"]
+
+    cfg = VersioneerConfig()
+    cfg.VCS = VCS
+    cfg.style = section.get("style", "")
+    cfg.versionfile_source = section.get("versionfile_source")
+    cfg.versionfile_build = section.get("versionfile_build")
+    cfg.tag_prefix = section.get("tag_prefix")
+    if cfg.tag_prefix in ("''", '""'):
+        cfg.tag_prefix = ""
+    cfg.parentdir_prefix = section.get("parentdir_prefix")
+    cfg.verbose = section.get("verbose")
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+# these dictionaries contain VCS-specific tools
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        HANDLERS.setdefault(vcs, {})[method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+LONG_VERSION_PY['git'] = r'''
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain. Generated by
+# versioneer-0.21 (https://github.com/python-versioneer/python-versioneer)
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = "%(DOLLAR)sFormat:%%d%(DOLLAR)s"
+    git_full = "%(DOLLAR)sFormat:%%H%(DOLLAR)s"
+    git_date = "%(DOLLAR)sFormat:%%ci%(DOLLAR)s"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "%(STYLE)s"
+    cfg.tag_prefix = "%(TAG_PREFIX)s"
+    cfg.parentdir_prefix = "%(PARENTDIR_PREFIX)s"
+    cfg.versionfile_source = "%(VERSIONFILE_SOURCE)s"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None))
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %%s" %% dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %%s" %% (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %%s (error)" %% dispcmd)
+            print("stdout was %%s" %% stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %%s but none started with prefix %%s" %%
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %%d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%%s', no digits" %% ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %%s" %% ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %%s" %% r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %%s not under git control" %% root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%%s%%s" %% (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%%s'"
+                               %% describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%%s' doesn't start with prefix '%%s'"
+                print(fmt %% (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%%s' doesn't start with prefix '%%s'"
+                               %% (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%%d.g%%s" %% (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%%d.g%%s" %% (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%%d.dev%%d" %% (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%%d" %% (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%%d" %% pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%%s" %% pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%%s" %% pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%%d" %% pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%%d" %% pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%%d-g%%s" %% (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%%s'" %% style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}
+'''
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    TAG_PREFIX_REGEX = "*"
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+        TAG_PREFIX_REGEX = r"\*"
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=True)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, ["describe", "--tags", "--dirty",
+                                     "--always", "--long",
+                                     "--match",
+                                     "%s%s" % (tag_prefix, TAG_PREFIX_REGEX)],
+                              cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        count_out, rc = runner(GITS, ["rev-list", "HEAD", "--count"], cwd=root)
+        pieces["distance"] = int(count_out)  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def do_vcs_install(manifest_in, versionfile_source, ipy):
+    """Git-specific installation logic for Versioneer.
+
+    For Git, this means creating/changing .gitattributes to mark _version.py
+    for export-subst keyword substitution.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+    files = [manifest_in, versionfile_source]
+    if ipy:
+        files.append(ipy)
+    try:
+        my_path = __file__
+        if my_path.endswith(".pyc") or my_path.endswith(".pyo"):
+            my_path = os.path.splitext(my_path)[0] + ".py"
+        versioneer_file = os.path.relpath(my_path)
+    except NameError:
+        versioneer_file = "versioneer.py"
+    files.append(versioneer_file)
+    present = False
+    try:
+        with open(".gitattributes", "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith(versionfile_source):
+                    if "export-subst" in line.strip().split()[1:]:
+                        present = True
+                        break
+    except OSError:
+        pass
+    if not present:
+        with open(".gitattributes", "a+") as fobj:
+            fobj.write(f"{versionfile_source} export-subst\n")
+        files.append(".gitattributes")
+    run_command(GITS, ["add", "--"] + files)
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+SHORT_VERSION_PY = """
+# This file was generated by 'versioneer.py' (0.21) from
+# revision-control system data, or from the parent directory name of an
+# unpacked source archive. Distribution tarballs contain a pre-generated copy
+# of this file.
+
+import json
+
+version_json = '''
+%s
+'''  # END VERSION_JSON
+
+
+def get_versions():
+    return json.loads(version_json)
+"""
+
+
+def versions_from_file(filename):
+    """Try to determine the version from _version.py if present."""
+    try:
+        with open(filename) as f:
+            contents = f.read()
+    except OSError:
+        raise NotThisMethod("unable to read _version.py")
+    mo = re.search(r"version_json = '''\n(.*)'''  # END VERSION_JSON",
+                   contents, re.M | re.S)
+    if not mo:
+        mo = re.search(r"version_json = '''\r\n(.*)'''  # END VERSION_JSON",
+                       contents, re.M | re.S)
+    if not mo:
+        raise NotThisMethod("no version_json in _version.py")
+    return json.loads(mo.group(1))
+
+
+def write_to_version_file(filename, versions):
+    """Write the given version number to the given _version.py file."""
+    os.unlink(filename)
+    contents = json.dumps(versions, sort_keys=True,
+                          indent=1, separators=(",", ": "))
+    with open(filename, "w") as f:
+        f.write(SHORT_VERSION_PY % contents)
+
+    print("set %s to '%s'" % (filename, versions["version"]))
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version+1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+class VersioneerBadRootError(Exception):
+    """The project root directory is unknown or missing key files."""
+
+
+def get_versions(verbose=False):
+    """Get the project version from whatever source is available.
+
+    Returns dict with two keys: 'version' and 'full'.
+    """
+    if "versioneer" in sys.modules:
+        # see the discussion in cmdclass.py:get_cmdclass()
+        del sys.modules["versioneer"]
+
+    root = get_root()
+    cfg = get_config_from_root(root)
+
+    assert cfg.VCS is not None, "please set [versioneer]VCS= in setup.cfg"
+    handlers = HANDLERS.get(cfg.VCS)
+    assert handlers, "unrecognized VCS '%s'" % cfg.VCS
+    verbose = verbose or cfg.verbose
+    assert cfg.versionfile_source is not None, \
+        "please set versioneer.versionfile_source"
+    assert cfg.tag_prefix is not None, "please set versioneer.tag_prefix"
+
+    versionfile_abs = os.path.join(root, cfg.versionfile_source)
+
+    # extract version from first of: _version.py, VCS command (e.g. 'git
+    # describe'), parentdir. This is meant to work for developers using a
+    # source checkout, for users of a tarball created by 'setup.py sdist',
+    # and for users of a tarball/zipball created by 'git archive' or github's
+    # download-from-tag feature or the equivalent in other VCSes.
+
+    get_keywords_f = handlers.get("get_keywords")
+    from_keywords_f = handlers.get("keywords")
+    if get_keywords_f and from_keywords_f:
+        try:
+            keywords = get_keywords_f(versionfile_abs)
+            ver = from_keywords_f(keywords, cfg.tag_prefix, verbose)
+            if verbose:
+                print("got version from expanded keyword %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        ver = versions_from_file(versionfile_abs)
+        if verbose:
+            print("got version from file %s %s" % (versionfile_abs, ver))
+        return ver
+    except NotThisMethod:
+        pass
+
+    from_vcs_f = handlers.get("pieces_from_vcs")
+    if from_vcs_f:
+        try:
+            pieces = from_vcs_f(cfg.tag_prefix, root, verbose)
+            ver = render(pieces, cfg.style)
+            if verbose:
+                print("got version from VCS %s" % ver)
+            return ver
+        except NotThisMethod:
+            pass
+
+    try:
+        if cfg.parentdir_prefix:
+            ver = versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+            if verbose:
+                print("got version from parentdir %s" % ver)
+            return ver
+    except NotThisMethod:
+        pass
+
+    if verbose:
+        print("unable to compute version")
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None, "error": "unable to compute version",
+            "date": None}
+
+
+def get_version():
+    """Get the short version string for this project."""
+    return get_versions()["version"]
+
+
+def get_cmdclass(cmdclass=None):
+    """Get the custom setuptools/distutils subclasses used by Versioneer.
+
+    If the package uses a different cmdclass (e.g. one from numpy), it
+    should be provide as an argument.
+    """
+    if "versioneer" in sys.modules:
+        del sys.modules["versioneer"]
+        # this fixes the "python setup.py develop" case (also 'install' and
+        # 'easy_install .'), in which subdependencies of the main project are
+        # built (using setup.py bdist_egg) in the same python process. Assume
+        # a main project A and a dependency B, which use different versions
+        # of Versioneer. A's setup.py imports A's Versioneer, leaving it in
+        # sys.modules by the time B's setup.py is executed, causing B to run
+        # with the wrong versioneer. Setuptools wraps the sub-dep builds in a
+        # sandbox that restores sys.modules to it's pre-build state, so the
+        # parent is protected against the child's "import versioneer". By
+        # removing ourselves from sys.modules here, before the child build
+        # happens, we protect the child from the parent's versioneer too.
+        # Also see https://github.com/python-versioneer/python-versioneer/issues/52
+
+    cmds = {} if cmdclass is None else cmdclass.copy()
+
+    # we add "version" to both distutils and setuptools
+    from distutils.core import Command
+
+    class cmd_version(Command):
+        description = "report generated version string"
+        user_options = []
+        boolean_options = []
+
+        def initialize_options(self):
+            pass
+
+        def finalize_options(self):
+            pass
+
+        def run(self):
+            vers = get_versions(verbose=True)
+            print("Version: %s" % vers["version"])
+            print(" full-revisionid: %s" % vers.get("full-revisionid"))
+            print(" dirty: %s" % vers.get("dirty"))
+            print(" date: %s" % vers.get("date"))
+            if vers["error"]:
+                print(" error: %s" % vers["error"])
+    cmds["version"] = cmd_version
+
+    # we override "build_py" in both distutils and setuptools
+    #
+    # most invocation pathways end up running build_py:
+    #  distutils/build -> build_py
+    #  distutils/install -> distutils/build ->..
+    #  setuptools/bdist_wheel -> distutils/install ->..
+    #  setuptools/bdist_egg -> distutils/install_lib -> build_py
+    #  setuptools/install -> bdist_egg ->..
+    #  setuptools/develop -> ?
+    #  pip install:
+    #   copies source tree to a tempdir before running egg_info/etc
+    #   if .git isn't copied too, 'git describe' will fail
+    #   then does setup.py bdist_wheel, or sometimes setup.py install
+    #  setup.py egg_info -> ?
+
+    # we override different "build_py" commands for both environments
+    if 'build_py' in cmds:
+        _build_py = cmds['build_py']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_py import build_py as _build_py
+    else:
+        from distutils.command.build_py import build_py as _build_py
+
+    class cmd_build_py(_build_py):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_py.run(self)
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            if cfg.versionfile_build:
+                target_versionfile = os.path.join(self.build_lib,
+                                                  cfg.versionfile_build)
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+    cmds["build_py"] = cmd_build_py
+
+    if 'build_ext' in cmds:
+        _build_ext = cmds['build_ext']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.build_ext import build_ext as _build_ext
+    else:
+        from distutils.command.build_ext import build_ext as _build_ext
+
+    class cmd_build_ext(_build_ext):
+        def run(self):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            versions = get_versions()
+            _build_ext.run(self)
+            if self.inplace:
+                # build_ext --inplace will only build extensions in
+                # build/lib<..> dir with no _version.py to write to.
+                # As in place builds will already have a _version.py
+                # in the module dir, we do not need to write one.
+                return
+            # now locate _version.py in the new build/ directory and replace
+            # it with an updated value
+            target_versionfile = os.path.join(self.build_lib,
+                                              cfg.versionfile_build)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile, versions)
+    cmds["build_ext"] = cmd_build_ext
+
+    if "cx_Freeze" in sys.modules:  # cx_freeze enabled?
+        from cx_Freeze.dist import build_exe as _build_exe
+        # nczeczulin reports that py2exe won't like the pep440-style string
+        # as FILEVERSION, but it can be used for PRODUCTVERSION, e.g.
+        # setup(console=[{
+        #   "version": versioneer.get_version().split("+", 1)[0], # FILEVERSION
+        #   "product_version": versioneer.get_version(),
+        #   ...
+
+        class cmd_build_exe(_build_exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _build_exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["build_exe"] = cmd_build_exe
+        del cmds["build_py"]
+
+    if 'py2exe' in sys.modules:  # py2exe enabled?
+        from py2exe.distutils_buildexe import py2exe as _py2exe
+
+        class cmd_py2exe(_py2exe):
+            def run(self):
+                root = get_root()
+                cfg = get_config_from_root(root)
+                versions = get_versions()
+                target_versionfile = cfg.versionfile_source
+                print("UPDATING %s" % target_versionfile)
+                write_to_version_file(target_versionfile, versions)
+
+                _py2exe.run(self)
+                os.unlink(target_versionfile)
+                with open(cfg.versionfile_source, "w") as f:
+                    LONG = LONG_VERSION_PY[cfg.VCS]
+                    f.write(LONG %
+                            {"DOLLAR": "$",
+                             "STYLE": cfg.style,
+                             "TAG_PREFIX": cfg.tag_prefix,
+                             "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                             "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                             })
+        cmds["py2exe"] = cmd_py2exe
+
+    # we override different "sdist" commands for both environments
+    if 'sdist' in cmds:
+        _sdist = cmds['sdist']
+    elif "setuptools" in sys.modules:
+        from setuptools.command.sdist import sdist as _sdist
+    else:
+        from distutils.command.sdist import sdist as _sdist
+
+    class cmd_sdist(_sdist):
+        def run(self):
+            versions = get_versions()
+            self._versioneer_generated_versions = versions
+            # unless we update this, the command will keep using the old
+            # version
+            self.distribution.metadata.version = versions["version"]
+            return _sdist.run(self)
+
+        def make_release_tree(self, base_dir, files):
+            root = get_root()
+            cfg = get_config_from_root(root)
+            _sdist.make_release_tree(self, base_dir, files)
+            # now locate _version.py in the new base_dir directory
+            # (remembering that it may be a hardlink) and replace it with an
+            # updated value
+            target_versionfile = os.path.join(base_dir, cfg.versionfile_source)
+            print("UPDATING %s" % target_versionfile)
+            write_to_version_file(target_versionfile,
+                                  self._versioneer_generated_versions)
+    cmds["sdist"] = cmd_sdist
+
+    return cmds
+
+
+CONFIG_ERROR = """
+setup.cfg is missing the necessary Versioneer configuration. You need
+a section like:
+
+ [versioneer]
+ VCS = git
+ style = pep440
+ versionfile_source = src/myproject/_version.py
+ versionfile_build = myproject/_version.py
+ tag_prefix =
+ parentdir_prefix = myproject-
+
+You will also need to edit your setup.py to use the results:
+
+ import versioneer
+ setup(version=versioneer.get_version(),
+       cmdclass=versioneer.get_cmdclass(), ...)
+
+Please read the docstring in ./versioneer.py for configuration instructions,
+edit setup.cfg, and re-run the installer or 'python versioneer.py setup'.
+"""
+
+SAMPLE_CONFIG = """
+# See the docstring in versioneer.py for instructions. Note that you must
+# re-run 'versioneer.py setup' after changing this section, and commit the
+# resulting files.
+
+[versioneer]
+#VCS = git
+#style = pep440
+#versionfile_source =
+#versionfile_build =
+#tag_prefix =
+#parentdir_prefix =
+
+"""
+
+OLD_SNIPPET = """
+from ._version import get_versions
+__version__ = get_versions()['version']
+del get_versions
+"""
+
+INIT_PY_SNIPPET = """
+from . import {0}
+__version__ = {0}.get_versions()['version']
+"""
+
+
+def do_setup():
+    """Do main VCS-independent setup function for installing Versioneer."""
+    root = get_root()
+    try:
+        cfg = get_config_from_root(root)
+    except (OSError, configparser.NoSectionError,
+            configparser.NoOptionError) as e:
+        if isinstance(e, (OSError, configparser.NoSectionError)):
+            print("Adding sample versioneer config to setup.cfg",
+                  file=sys.stderr)
+            with open(os.path.join(root, "setup.cfg"), "a") as f:
+                f.write(SAMPLE_CONFIG)
+        print(CONFIG_ERROR, file=sys.stderr)
+        return 1
+
+    print(" creating %s" % cfg.versionfile_source)
+    with open(cfg.versionfile_source, "w") as f:
+        LONG = LONG_VERSION_PY[cfg.VCS]
+        f.write(LONG % {"DOLLAR": "$",
+                        "STYLE": cfg.style,
+                        "TAG_PREFIX": cfg.tag_prefix,
+                        "PARENTDIR_PREFIX": cfg.parentdir_prefix,
+                        "VERSIONFILE_SOURCE": cfg.versionfile_source,
+                        })
+
+    ipy = os.path.join(os.path.dirname(cfg.versionfile_source),
+                       "__init__.py")
+    if os.path.exists(ipy):
+        try:
+            with open(ipy, "r") as f:
+                old = f.read()
+        except OSError:
+            old = ""
+        module = os.path.splitext(os.path.basename(cfg.versionfile_source))[0]
+        snippet = INIT_PY_SNIPPET.format(module)
+        if OLD_SNIPPET in old:
+            print(" replacing boilerplate in %s" % ipy)
+            with open(ipy, "w") as f:
+                f.write(old.replace(OLD_SNIPPET, snippet))
+        elif snippet not in old:
+            print(" appending to %s" % ipy)
+            with open(ipy, "a") as f:
+                f.write(snippet)
+        else:
+            print(" %s unmodified" % ipy)
+    else:
+        print(" %s doesn't exist, ok" % ipy)
+        ipy = None
+
+    # Make sure both the top-level "versioneer.py" and versionfile_source
+    # (PKG/_version.py, used by runtime code) are in MANIFEST.in, so
+    # they'll be copied into source distributions. Pip won't be able to
+    # install the package without this.
+    manifest_in = os.path.join(root, "MANIFEST.in")
+    simple_includes = set()
+    try:
+        with open(manifest_in, "r") as f:
+            for line in f:
+                if line.startswith("include "):
+                    for include in line.split()[1:]:
+                        simple_includes.add(include)
+    except OSError:
+        pass
+    # That doesn't cover everything MANIFEST.in can do
+    # (http://docs.python.org/2/distutils/sourcedist.html#commands), so
+    # it might give some false negatives. Appending redundant 'include'
+    # lines is safe, though.
+    if "versioneer.py" not in simple_includes:
+        print(" appending 'versioneer.py' to MANIFEST.in")
+        with open(manifest_in, "a") as f:
+            f.write("include versioneer.py\n")
+    else:
+        print(" 'versioneer.py' already in MANIFEST.in")
+    if cfg.versionfile_source not in simple_includes:
+        print(" appending versionfile_source ('%s') to MANIFEST.in" %
+              cfg.versionfile_source)
+        with open(manifest_in, "a") as f:
+            f.write("include %s\n" % cfg.versionfile_source)
+    else:
+        print(" versionfile_source already in MANIFEST.in")
+
+    # Make VCS-specific changes. For git, this means creating/changing
+    # .gitattributes to mark _version.py for export-subst keyword
+    # substitution.
+    do_vcs_install(manifest_in, cfg.versionfile_source, ipy)
+    return 0
+
+
+def scan_setup_py():
+    """Validate the contents of setup.py against Versioneer's expectations."""
+    found = set()
+    setters = False
+    errors = 0
+    with open("setup.py", "r") as f:
+        for line in f.readlines():
+            if "import versioneer" in line:
+                found.add("import")
+            if "versioneer.get_cmdclass()" in line:
+                found.add("cmdclass")
+            if "versioneer.get_version()" in line:
+                found.add("get_version")
+            if "versioneer.VCS" in line:
+                setters = True
+            if "versioneer.versionfile_source" in line:
+                setters = True
+    if len(found) != 3:
+        print("")
+        print("Your setup.py appears to be missing some important items")
+        print("(but I might be wrong). Please make sure it has something")
+        print("roughly like the following:")
+        print("")
+        print(" import versioneer")
+        print(" setup( version=versioneer.get_version(),")
+        print("        cmdclass=versioneer.get_cmdclass(),  ...)")
+        print("")
+        errors += 1
+    if setters:
+        print("You should remove lines like 'versioneer.VCS = ' and")
+        print("'versioneer.versionfile_source = ' . This configuration")
+        print("now lives in setup.cfg, and should be removed from setup.py")
+        print("")
+        errors += 1
+    return errors
+
+
+if __name__ == "__main__":
+    cmd = sys.argv[1]
+    if cmd == "setup":
+        errors = do_setup()
+        errors += scan_setup_py()
+        if errors:
+            sys.exit(1)