From c2343375252ffd9b95044027aacaf6e487af3a95 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 28 Sep 2023 14:22:19 -0600
Subject: [PATCH 01/47] Get rid of a Kokkos warning about All_t

---
 external/kokkos-kernels/KokkosBatched_Util.hpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/external/kokkos-kernels/KokkosBatched_Util.hpp b/external/kokkos-kernels/KokkosBatched_Util.hpp
index 46b97ee0..ae71a606 100644
--- a/external/kokkos-kernels/KokkosBatched_Util.hpp
+++ b/external/kokkos-kernels/KokkosBatched_Util.hpp
@@ -772,8 +772,8 @@ KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1,
 }
 template <class ViewType, class IdxType1>
 KOKKOS_INLINE_FUNCTION auto subview_wrapper(ViewType v, IdxType1 i1,
-                                            Kokkos::Impl::ALL_t i2,
-                                            Kokkos::Impl::ALL_t i3,
+                                            Kokkos::ALL_t i2,
+                                            Kokkos::ALL_t i3,
                                             const BatchLayout::Left &layout_tag,
                                             const Trans::Transpose) {
   auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag);
@@ -805,7 +805,7 @@ KOKKOS_INLINE_FUNCTION auto subview_wrapper(
 }
 template <class ViewType, class IdxType1>
 KOKKOS_INLINE_FUNCTION auto subview_wrapper(
-    ViewType v, IdxType1 i1, Kokkos::Impl::ALL_t i2, Kokkos::Impl::ALL_t i3,
+    ViewType v, IdxType1 i1, Kokkos::ALL_t i2, Kokkos::ALL_t i3,
     const BatchLayout::Right &layout_tag, const Trans::Transpose &) {
   auto sv_nt = subview_wrapper(v, i1, i3, i2, layout_tag);
 

From f723196c655010184865df7a77d7b8201a75c18a Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 28 Sep 2023 14:24:07 -0600
Subject: [PATCH 02/47] Compile fixes for using custom linker & for personal
 machines

---
 external/patches/kokkos-rx7800.patch | 13 ++++++++
 machines/bp.sh                       | 48 +++++++++++++++-------------
 make.sh                              |  8 ++---
 3 files changed, 42 insertions(+), 27 deletions(-)
 create mode 100644 external/patches/kokkos-rx7800.patch

diff --git a/external/patches/kokkos-rx7800.patch b/external/patches/kokkos-rx7800.patch
new file mode 100644
index 00000000..80415188
--- /dev/null
+++ b/external/patches/kokkos-rx7800.patch
@@ -0,0 +1,13 @@
+diff --git a/cmake/kokkos_arch.cmake b/cmake/kokkos_arch.cmake
+index 244881187..869b0962b 100644
+--- a/cmake/kokkos_arch.cmake
++++ b/cmake/kokkos_arch.cmake
+@@ -105,7 +105,7 @@ LIST(APPEND SUPPORTED_AMD_ARCHS      VEGA906  AMD_GFX906)
+ LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx906   gfx906)
+ LIST(APPEND SUPPORTED_AMD_GPUS       RX7900XTX  RX7900XTX    V620/W6800  V620/W6800)
+ LIST(APPEND SUPPORTED_AMD_ARCHS      NAVI1100   AMD_GFX1100  NAVI1030    AMD_GFX1030)
+-LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx1100    gfx1100      gfx1030     gfx1030)
++LIST(APPEND CORRESPONDING_AMD_FLAGS  gfx1100    gfx1101      gfx1030     gfx1030)
+ 
+ #FIXME CAN BE REPLACED WITH LIST_ZIP IN CMAKE 3.17
+ FOREACH(ARCH IN LISTS SUPPORTED_AMD_ARCHS)
diff --git a/machines/bp.sh b/machines/bp.sh
index 366ea959..c746b042 100644
--- a/machines/bp.sh
+++ b/machines/bp.sh
@@ -9,12 +9,13 @@ if [[ $HOST == "cheshire"* ]]; then
   if [[ "$ARGS" == *"cuda"* ]]; then
     # NVHPC. Compiler is chosen automatically now
     module load nvhpc
+    NPROC=8 # so much memory
   else
     # Intel oneAPI
     module load compiler mpi/2021
+    NPROC=24
   fi
-
-  NPROC=8
+  # Even CPU kharma is unkillable without this
   MPI_EXE=mpirun
 fi
 
@@ -24,13 +25,16 @@ fi
 
 if [[ $METAL_HOSTNAME == "fermium" ]]; then
   HOST_ARCH="AMDAVX"
-  DEVICE_ARCH="TURING75"
-  # Nvidia MPI hangs unless I do this
-  MPI_EXE=mpirun
+  # We patch Kokkos to make this gfx1101==rx7800xt
+  DEVICE_ARCH="AMD_GFX1100"
+  # MPI & Kokkos separately dislike running the bin alone
+  #MPI_EXE=mpirun
+  NPROC=24
 
-  if [[ "$ARGS" == *"cuda"* ]]; then
-    # Container default is the wrong NVHPC package
-    module swap nvhpc-hpcx nvhpc
+  if [[ "$ARGS" == *"hip"* ]]; then
+    # AMD for GPUs (this will be run in container, no modules)
+    C_NATIVE=hipcc
+    CXX_NATIVE=hipcc
   else
     # AMD for CPUs
     module load aocc-compiler-4.1.0 mpi
@@ -41,7 +45,6 @@ fi
 
 if [[ $METAL_HOSTNAME == "ferrum" ]]; then
   HOST_ARCH="HSW"
-  DEVICE_ARCH="INTEL_GEN"
   NPROC=6
 
   if [[ "$ARGS" == *"gcc"* ]]; then
@@ -52,12 +55,12 @@ if [[ $METAL_HOSTNAME == "ferrum" ]]; then
     # Intel compiler
     module purge
     module load compiler mpi
-    PREFIX_PATH="$HOME/libs/hdf5-oneapi"
+    C_NATIVE="icc"
+    CXX_NATIVE="icpc"
   else
     # Intel SYCL implementation "DPC++"
     module purge
     module load compiler mpi
-    PREFIX_PATH="$HOME/libs/hdf5-oneapi"
     C_NATIVE="icx"
     CXX_NATIVE="icpx"
   fi
@@ -69,15 +72,18 @@ if [[ $HOST == "cinnabar"* ]]; then
 
   module purge # Handle modules inside this script
   HOST_ARCH="HSW" # This won't change
+  DEVICE_ARCH="TURING75"
+
+  # Runtime
+  MPI_NUM_PROCS=1
+
+  # TODO container:
+  # module swap nvhpc-hpcx nvhpc
 
   if [[ "$ARGS" == *"cuda"* ]]; then
-    # Use NVHPC libraries (GPU-aware OpenMPI!)
-    DEVICE_ARCH="KEPLER35"
-    MPI_NUM_PROCS=1
+    # Runtime
     MPI_EXTRA_ARGS="--map-by ppr:1:numa:pe=14"
 
-    # Quash warning about my old gpus
-    export NVCC_WRAPPER_CUDA_EXTRA_FLAGS="-Wno-deprecated-gpu-targets"
     # System CUDA path
     EXTRA_FLAGS="-DCUDAToolkit_INCLUDE_DIR=/usr/include/cuda $EXTRA_FLAGS"
 
@@ -86,28 +92,24 @@ if [[ $HOST == "cinnabar"* ]]; then
       module load mpi/mpich-x86_64 nvhpc-nompi
       C_NATIVE="gcc"
       CXX_NATIVE="g++"
-      # Uses system GCC, which is old
-      EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS"
     else
       module load nvhpc
       PREFIX_PATH="$HOME/libs/hdf5-nvhpc"
       C_NATIVE="nvc"
       CXX_NATIVE="nvc++"
-      export CXXFLAGS="-mp"
+      #export CXXFLAGS="-mp"
     fi
   else
-    MPI_NUM_PROCS=1
     if [[ "$ARGS" == *"gcc"* ]]; then
       # GCC
       module load mpi/mpich-x86_64
       C_NATIVE="gcc"
       CXX_NATIVE="g++"
-      # Uses system GCC, which is old
-      EXTRA_FLAGS="-DPARTHENON_DISABLE_HDF5_COMPRESSION=ON $EXTRA_FLAGS"
     else
       # Intel by default
       module load compiler mpi
-      PREFIX_PATH="$HOME/libs/hdf5-oneapi"
+      C_NATIVE="icx"
+      CXX_NATIVE="icpx"
     fi
   fi
 fi
diff --git a/make.sh b/make.sh
index 6235e789..6dc9e585 100755
--- a/make.sh
+++ b/make.sh
@@ -201,10 +201,10 @@ fi
 # Allow for a custom linker program, but use CXX by
 # default as system linker may be older/incompatible
 if [[ -v LINKER ]]; then
-  EXTRA_FLAGS="-DCMAKE_LINKER=$LINKER"
+  EXTRA_FLAGS="$EXTRA_FLAGS -DCMAKE_LINKER=$LINKER"
 fi
 if [[ "$ARGS" == *"special_link_line"* ]]; then
-  EXTRA_FLAGS="-DCMAKE_CXX_LINK_EXECUTABLE='<CMAKE_LINKER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>'"
+  EXTRA_FLAGS="$EXTRA_FLAGS -DCMAKE_CXX_LINK_EXECUTABLE='<CMAKE_LINKER> <FLAGS> <CMAKE_CXX_LINK_FLAGS> <LINK_FLAGS> <OBJECTS> -o <TARGET> <LINK_LIBRARIES>'"
 fi
 
 # Avoid warning on nvcc pragmas Intel doesn't like
@@ -284,7 +284,7 @@ fi
 if [[ "$ARGS" == *"clean"* ]]; then
 
   cd external/parthenon
-  if [[ $(( $(git --version | cut -d '.' -f 2) > 35 )) ]]; then
+  if [[ $(( $(git --version | cut -d '.' -f 2) > 35 )) == "1" ]]; then
     git apply --quiet ../patches/parthenon-*.patch
   else
     echo "make.sh note: You may see errors applying patches below. These are normal."
@@ -314,7 +314,7 @@ if [[ "$ARGS" == *"clean"* ]]; then
     -DKokkos_ENABLE_CUDA=$ENABLE_CUDA \
     -DKokkos_ENABLE_SYCL=$ENABLE_SYCL \
     -DKokkos_ENABLE_HIP=$ENABLE_HIP \
-    $EXTRA_FLAGS
+    "$EXTRA_FLAGS"
 
   if [[ "$ARGS" == *"dryrun"* ]]; then
     set +x

From b99b16077165c58ce1fd7d1a54e4a4c40e0e67ce Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 29 Sep 2023 10:45:40 -0600
Subject: [PATCH 03/47] Parthenon bump, callbacks

Rename two very Athena-sounding callbacks more inline, add 'PostExecute'
---
 external/parthenon              |  2 +-
 kharma/b_cd/b_cd.cpp            |  2 +-
 kharma/driver/kharma_driver.cpp |  6 ++++++
 kharma/driver/kharma_driver.hpp | 10 +++++++---
 kharma/implicit/implicit.cpp    |  5 ++++-
 kharma/kharma.cpp               | 12 ++++++------
 kharma/kharma.hpp               |  5 +++--
 kharma/kharma_package.cpp       | 33 +++++++++++++++++++++++----------
 kharma/kharma_package.hpp       | 16 ++++++++++++----
 kharma/main.cpp                 |  4 ++--
 10 files changed, 65 insertions(+), 30 deletions(-)

diff --git a/external/parthenon b/external/parthenon
index eede5cd0..4b5f5026 160000
--- a/external/parthenon
+++ b/external/parthenon
@@ -1 +1 @@
-Subproject commit eede5cd09f4d669d4fc97923d51eeca35f4dcd29
+Subproject commit 4b5f5026e79fa81ff61a1806c1a4dbe09e673269
diff --git a/kharma/b_cd/b_cd.cpp b/kharma/b_cd/b_cd.cpp
index e008c1af..7d37399f 100644
--- a/kharma/b_cd/b_cd.cpp
+++ b/kharma/b_cd/b_cd.cpp
@@ -88,7 +88,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     pkg->BlockUtoP = B_CD::BlockUtoP;
 
     pkg->PostStepDiagnosticsMesh = B_CD::PostStepDiagnostics;
-    pkg->MeshPostStepUserWorkInLoop = B_CD::UpdateCtopMax;
+    pkg->PostStepWork = B_CD::UpdateCtopMax;
 
     // List (vector) of HistoryOutputVar that will all be enrolled as output variables
     parthenon::HstVar_list hst_vars = {};
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 84387958..e66fed60 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -290,3 +290,9 @@ void KHARMADriver::SetGlobalTimeStep()
       (tm.tlim - tm.time) < tm.dt) // timestep would take us past desired endpoint
     tm.dt = tm.tlim - tm.time;
 }
+
+void KHARMADriver::PostExecute(DriverStatus status)
+{
+    Packages::PostExecute(pmesh, pinput, tm);
+    EvolutionDriver::PostExecute(status);
+}
diff --git a/kharma/driver/kharma_driver.hpp b/kharma/driver/kharma_driver.hpp
index 208c472a..7bcc8d56 100644
--- a/kharma/driver/kharma_driver.hpp
+++ b/kharma/driver/kharma_driver.hpp
@@ -57,11 +57,14 @@ class KHARMADriver : public MultiStageDriver {
         static std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<Packages_t>& packages);
 
         // Eliminate Parthenon's print statements when starting up the driver, we have a bunch of our own
-        void PreExecute() { timer_main.reset(); }
+        void PreExecute() override { timer_main.reset(); }
 
         // Also override the timestep calculation, so we can start moving options etc out of GRMHD package
         void SetGlobalTimeStep();
 
+        // And the PostExecute, so we can add a package callback here
+        void PostExecute(DriverStatus status) override;
+
         /**
          * A Driver object orchestrates everything that has to be done to a mesh to take a step.
          * The function MakeTaskCollection outlines everything to be done in one sub-step,
@@ -83,7 +86,7 @@ class KHARMADriver : public MultiStageDriver {
          * All task lists proceed roughly in this order, but differ in which variables they synchronize via MPI,
          * or whether they synchronize at all.
          */
-        TaskCollection MakeTaskCollection(BlockList_t &blocks, int stage);
+        TaskCollection MakeTaskCollection(BlockList_t &blocks, int stage) override;
 
         /**
          * The default step, synchronizing conserved variables and then recovering primitive variables in the ghost zones.
@@ -158,7 +161,8 @@ class KHARMADriver : public MultiStageDriver {
         }
 
         static TaskStatus WeightedSumDataFace(const std::vector<MetadataFlag> &flags, MeshData<Real> *in1, MeshData<Real> *in2, const Real w1, const Real w2,
-                                MeshData<Real> *out) {
+                                MeshData<Real> *out)
+        {
             Kokkos::Profiling::pushRegion("Task_WeightedSumData");
             const auto &x = in1->PackVariables(flags);
             const auto &y = in2->PackVariables(flags);
diff --git a/kharma/implicit/implicit.cpp b/kharma/implicit/implicit.cpp
index 826a4665..f0ddd5d4 100644
--- a/kharma/implicit/implicit.cpp
+++ b/kharma/implicit/implicit.cpp
@@ -593,7 +593,10 @@ TaskStatus Implicit::Step(MeshData<Real> *md_full_step_init, MeshData<Real> *md_
 
             // Finally, break if max_norm is less than the total tolerance we set
             // TODO per-zone tolerance with masks?
-            if (iter >= iter_min && max_norm < rootfind_tol) break;
+            if (iter >= iter_min && max_norm < rootfind_tol) {
+                EndFlag();
+                break;
+            }
         }
         EndFlag();
     }
diff --git a/kharma/kharma.cpp b/kharma/kharma.cpp
index 6d68279e..07ec8d42 100644
--- a/kharma/kharma.cpp
+++ b/kharma/kharma.cpp
@@ -96,8 +96,8 @@ std::shared_ptr<KHARMAPackage> KHARMA::InitializeGlobals(ParameterInput *pin, st
     params.Add("branch", KHARMA::Version::GIT_REFSPEC);
 
     // Update the times with callbacks
-    pkg->MeshPreStepUserWorkInLoop = KHARMA::MeshPreStepUserWorkInLoop;
-    pkg->MeshPostStepUserWorkInLoop = KHARMA::MeshPostStepUserWorkInLoop;
+    pkg->PreStepWork = KHARMA::PreStepWork;
+    pkg->PostStepWork = KHARMA::PostStepWork;
 
     return pkg;
 }
@@ -116,7 +116,7 @@ void KHARMA::ResetGlobals(ParameterInput *pin, Mesh *pmesh)
     // to be restored by Parthenon
 }
 
-void KHARMA::MeshPreStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
+void KHARMA::PreStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
 {
     auto& globals = pmesh->packages.Get("Globals")->AllParams();
     if (!globals.Get<bool>("in_loop")) {
@@ -126,11 +126,11 @@ void KHARMA::MeshPreStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const S
     globals.Update<double>("time", tm.time);
 }
 
-void KHARMA::MeshPostStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
+void KHARMA::PostStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
 {
-    // Knowing this works took a little digging into Parthenon's EvolutionDriver.
+    // Knowing that this works took a little digging into Parthenon's EvolutionDriver.
     // The order of operations after calling Step() is:
-    // 1. Call PostStepUserWorkInLoop and PostStepDiagnostics (this function and following)
+    // 1. Call PostStepWork and PostStepDiagnostics (this function and following)
     // 2. Set the timestep tm.dt to the minimum from the EstimateTimestep calls
     // 3. Generate any outputs, e.g. jcon
     // Thus we preserve tm.dt (which has not yet been reset) as dt_last for Current::FillOutput
diff --git a/kharma/kharma.hpp b/kharma/kharma.hpp
index 91c9a99c..a772e9f5 100644
--- a/kharma/kharma.hpp
+++ b/kharma/kharma.hpp
@@ -56,11 +56,11 @@ void ResetGlobals(ParameterInput *pin, Mesh *pmesh);
 /**
  * Update variables in Globals package based on Parthenon state incl. SimTime struct
  */
-void MeshPreStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
+void PreStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
 /**
  * Update variables in Globals package based on Parthenon state incl. SimTime struct
  */
-void MeshPostStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
+void PostStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
 
 /**
  * Task to add a package.  Lets us queue up all the packages we want in a task list, *then* load them
@@ -126,6 +126,7 @@ inline int PackDimension(Packages_t* packages, Metadata::FlagCollection fc)
     int nvar = 0;
     for (auto pkg : packages->AllPackages()) {
         nvar += pkg.second->GetPackDimension(fc);
+        std::cout << pkg.first << " variables: " << pkg.second->GetPackDimension(fc) << std::endl;
     }
     return nvar;
 }
diff --git a/kharma/kharma_package.cpp b/kharma/kharma_package.cpp
index 594bc1c3..68dcb66e 100644
--- a/kharma/kharma_package.cpp
+++ b/kharma/kharma_package.cpp
@@ -204,28 +204,28 @@ void Packages::UserWorkBeforeOutput(MeshBlock *pmb, ParameterInput *pin)
     EndFlag();
 }
 
-void Packages::PreStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
+void Packages::PreStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
 {
-    Flag("PreStepUserWorkInLoop");
+    Flag("PreStepWork");
     auto kpackages = pmesh->packages.AllPackagesOfType<KHARMAPackage>();
     for (auto kpackage : kpackages) {
-        if (kpackage.second->MeshPreStepUserWorkInLoop != nullptr) {
-            Flag("PreStepUserWorkInLoop_"+kpackage.first);
-            kpackage.second->MeshPreStepUserWorkInLoop(pmesh, pin, tm);
+        if (kpackage.second->PreStepWork != nullptr) {
+            Flag("PreStepWork_"+kpackage.first);
+            kpackage.second->PreStepWork(pmesh, pin, tm);
             EndFlag();
         }
     }
     EndFlag();
 }
 
-void Packages::PostStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
+void Packages::PostStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
 {
-    Flag("PostStepUserWorkInLoop");
+    Flag("PostStepWork");
     auto kpackages = pmesh->packages.AllPackagesOfType<KHARMAPackage>();
     for (auto kpackage : kpackages) {
-        if (kpackage.second->MeshPostStepUserWorkInLoop != nullptr) {
-            Flag("PostStepUserWorkInLoop_"+kpackage.first);
-            kpackage.second->MeshPostStepUserWorkInLoop(pmesh, pin, tm);
+        if (kpackage.second->PostStepWork != nullptr) {
+            Flag("PostStepWork_"+kpackage.first);
+            kpackage.second->PostStepWork(pmesh, pin, tm);
             EndFlag();
         }
     }
@@ -250,3 +250,16 @@ void Packages::PostStepDiagnostics(Mesh *pmesh, ParameterInput *pin, const SimTi
     EndFlag();
 }
 
+void Packages::PostExecute(Mesh *pmesh, ParameterInput *pin, const SimTime &tm)
+{
+    Flag("KHARMAPostExecute");
+    auto kpackages = pmesh->packages.AllPackagesOfType<KHARMAPackage>();
+    for (auto kpackage : kpackages) {
+        if (kpackage.second->PostExecute != nullptr) {
+            Flag("PostExecute_"+kpackage.first);
+            kpackage.second->PostExecute(pmesh, pin, tm);
+            EndFlag();
+        }
+    }
+    EndFlag();
+}
diff --git a/kharma/kharma_package.hpp b/kharma/kharma_package.hpp
index 7c43bbc7..a3f02620 100644
--- a/kharma/kharma_package.hpp
+++ b/kharma/kharma_package.hpp
@@ -52,6 +52,9 @@ using namespace parthenon;
 class KHARMAPackage : public StateDescriptor {
     public:
         KHARMAPackage(std::string name) : StateDescriptor(name) {}
+#if TRACE
+        ~KHARMAPackage() { std::cerr << "Destroying package " << label_ << std::endl; }
+#endif
 
         // PHYSICS
         // Recovery of primitive variables from conserved.
@@ -88,15 +91,19 @@ class KHARMAPackage : public StateDescriptor {
 
         // CONVENIENCE
         // Anything to be done before each step begins -- currently just updating global "in_loop"
-        std::function<void(Mesh*, ParameterInput*, const SimTime&)> MeshPreStepUserWorkInLoop = nullptr;
+        std::function<void(Mesh*, ParameterInput*, const SimTime&)> PreStepWork = nullptr;
         // Anything to be done after every step is fully complete -- usually reductions or preservation of variables
-        std::function<void(Mesh*, ParameterInput*, const SimTime&)> MeshPostStepUserWorkInLoop = nullptr;
+        // Note that most diagnostics should go in "PostStepDiagnosticsMesh" instead
+        std::function<void(Mesh*, ParameterInput*, const SimTime&)> PostStepWork = nullptr;
 
         // Anything to be done just before any outputs (dump files, restarts, history files) are made
         // Usually for filling output-only variables
         // TODO Add MeshUserWorkBeforeOutput to Parthenon
         std::function<void(MeshBlock*, ParameterInput*)> BlockUserWorkBeforeOutput = nullptr;
 
+        // Anything at the very end of simulation. Cleanup, summaries, outputs if you're brave
+        std::function<void(Mesh*, ParameterInput*, const SimTime&)> PostExecute = nullptr;
+
         // BOUNDARIES
         // Currently only used by the "boundaries" package
         // Note these functions take the boundary IndexDomain as an argument, so you can assign the same function to multiple boundaries.
@@ -160,7 +167,8 @@ TaskStatus MeshApplyFloors(MeshData<Real> *md, IndexDomain domain);
 // These are already Parthenon global callbacks -- see their documentation
 // I define them here so I can pass them on to packages
 void UserWorkBeforeOutput(MeshBlock *pmb, ParameterInput *pin);
-void PreStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
-void PostStepUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
+void PreStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
+void PostStepWork(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
 void PostStepDiagnostics(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
+void PostExecute(Mesh *pmesh, ParameterInput *pin, const SimTime &tm);
 }
diff --git a/kharma/main.cpp b/kharma/main.cpp
index 7052099a..724057d0 100644
--- a/kharma/main.cpp
+++ b/kharma/main.cpp
@@ -109,8 +109,8 @@ int main(int argc, char *argv[])
     pman.app_input->ProblemGenerator = KHARMA::ProblemGenerator;
     // A few are passed on to be implemented by packages as they see fit
     pman.app_input->MeshBlockUserWorkBeforeOutput = Packages::UserWorkBeforeOutput;
-    pman.app_input->PreStepMeshUserWorkInLoop = Packages::PreStepUserWorkInLoop;
-    pman.app_input->PostStepMeshUserWorkInLoop = Packages::PostStepUserWorkInLoop;
+    pman.app_input->PreStepMeshUserWorkInLoop = Packages::PreStepWork;
+    pman.app_input->PostStepMeshUserWorkInLoop = Packages::PostStepWork;
     pman.app_input->PostStepDiagnosticsInLoop = Packages::PostStepDiagnostics;
 
     // Registering KHARMA's boundary functions here doesn't mean they will *always* run:

From c1c940e9e8206a15f4c04e0e4abe54a78854ebd8 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 29 Sep 2023 10:47:29 -0600
Subject: [PATCH 04/47] Remove unused reducers, add comments

---
 kharma/b_ct/b_ct.cpp             | 3 ---
 kharma/b_flux_ct/b_flux_ct.cpp   | 2 --
 kharma/boundaries/boundaries.cpp | 1 +
 kharma/boundaries/dirichlet.cpp  | 2 +-
 kharma/emhd/emhd.cpp             | 5 ++++-
 5 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index 485f349a..02659f1c 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -78,9 +78,6 @@ std::shared_ptr<KHARMAPackage> B_CT::Initialize(ParameterInput *pin, std::shared
     if (lazy_prolongation && pin->GetString("parthenon/mesh", "refinement") == "adaptive")
         throw std::runtime_error("Cannot use non-preserving prolongation in AMR!");
 
-    // Add a reducer object (MPI communicator) for divB to params
-    params.Add("divb_reducer", AllReduce<Real>());
-
     // FIELDS
 
     // Flags for B fields on faces.
diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index 9665e2f2..fd3fbeb3 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -92,8 +92,6 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     bool implicit_b = pin->GetOrAddBoolean("b_field", "implicit", false);
     params.Add("implicit", implicit_b);
 
-    params.Add("divb_reducer", AllReduce<Real>());
-
     // FIELDS
     // Vector size: 3x[grid shape]
     std::vector<int> s_vector({NVEC});
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index 41ef993e..5aaeb2d7 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -245,6 +245,7 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
     EndFlag();
 
     // Exit immediately if we're syncing emf alone
+    // TODO can we check name?
     if (rc->GetVariableVector().size() == 1) {
         EndFlag();
         return;
diff --git a/kharma/boundaries/dirichlet.cpp b/kharma/boundaries/dirichlet.cpp
index e90d5c47..c3b63f8f 100644
--- a/kharma/boundaries/dirichlet.cpp
+++ b/kharma/boundaries/dirichlet.cpp
@@ -56,7 +56,7 @@ void KBoundaries::DirichletImpl(std::shared_ptr<MeshBlockData<Real>> &rc, Bounda
     auto bound = rc->Get("bounds." + BoundaryName(bface)).data;
 
     if (q.GetDim(4) != bound.GetDim(4)) {
-        std::cerr << "Boundary cache mismatch! " << bound.GetDim(4) << " vs " << q.GetDim(4) << std::endl;
+        std::cerr << "Boundary cache mismatch! boundaries: " << bound.GetDim(4) << " vs pack: " << q.GetDim(4) << std::endl;
         std::cerr << "Variables with ghost zones:" << std::endl;
         ghostmap.print();
     }
diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp
index 02e73749..80da5f4c 100644
--- a/kharma/emhd/emhd.cpp
+++ b/kharma/emhd/emhd.cpp
@@ -73,6 +73,8 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     bool viscosity = pin->GetOrAddBoolean("emhd", "viscosity", true);
     params.Add("viscosity", viscosity);
 
+    // TODO consider erroring when (the correct subset of) these aren't present,
+    // rather than have defaults that won't work well
     Real tau              = pin->GetOrAddReal("emhd", "tau", 1.0);
     Real conduction_alpha = pin->GetOrAddReal("emhd", "conduction_alpha", 1.0);
     params.Add("conduction_alpha", conduction_alpha);
@@ -122,7 +124,8 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // Only enable limits internally if we're actually doing EMHD
     params.Add("enable_emhd_limits", enable_emhd_limits);
 
-
+    // Parthenon adds a flag consisting of just the package name,
+    // but it's useless to us since we want just the important variables to carry a name
     Metadata::AddUserFlag("EMHDVar");
 
     // General options for primitive and conserved scalar variables in ImEx driver

From 17b5b81db4825b531fa6ce73bb3d1eaecc1d8b41 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 29 Sep 2023 10:50:20 -0600
Subject: [PATCH 05/47] Organize prob/, make problems request B fields

The idea is to have every problem capable of either B field transport,
but this requires defining a function for A or B rather than setting it,
since you don't know where it lives.
This solution is not ideal, but I think it'll work okay
---
 kharma/CMakeLists.txt                     |   2 +
 kharma/prob/emhd/emhdmodes.hpp            |   2 +
 kharma/prob/fm_torus.cpp                  |  59 -----
 kharma/prob/fm_torus.hpp                  |  10 -
 kharma/prob/kelvin_helmholtz.hpp          |  69 +----
 kharma/prob/mhdmodes.hpp                  |  95 ++++---
 kharma/prob/orszag_tang.hpp               |  53 +---
 kharma/prob/post_initialize.cpp           |   4 +-
 kharma/prob/problem.cpp                   |   8 +-
 kharma/prob/seed_B.cpp                    | 274 +++++++++++++++++++-
 kharma/prob/seed_B.hpp                    |  63 +++--
 kharma/prob/seed_B_impl.hpp               | 295 ----------------------
 kharma/prob/{ => utils}/blob.hpp          |   0
 kharma/prob/{ => utils}/hdf5_utils.cpp    |   0
 kharma/prob/{ => utils}/hdf5_utils.h      |   0
 kharma/prob/{ => utils}/interpolation.hpp |   0
 kharma/prob/utils/perturbation.hpp        | 102 ++++++++
 17 files changed, 495 insertions(+), 541 deletions(-)
 delete mode 100644 kharma/prob/seed_B_impl.hpp
 rename kharma/prob/{ => utils}/blob.hpp (100%)
 rename kharma/prob/{ => utils}/hdf5_utils.cpp (100%)
 rename kharma/prob/{ => utils}/hdf5_utils.h (100%)
 rename kharma/prob/{ => utils}/interpolation.hpp (100%)
 create mode 100644 kharma/prob/utils/perturbation.hpp

diff --git a/kharma/CMakeLists.txt b/kharma/CMakeLists.txt
index 2a9c53a9..aec167cc 100644
--- a/kharma/CMakeLists.txt
+++ b/kharma/CMakeLists.txt
@@ -15,6 +15,7 @@ AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} EXE_NAME_SRC)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/prob EXE_NAME_SRC)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/prob/elec EXE_NAME_SRC)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/prob/emhd EXE_NAME_SRC)
+AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/prob/utils EXE_NAME_SRC)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/coordinates EXE_NAME_SRC)
 AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR}/flux EXE_NAME_SRC)
 
@@ -40,6 +41,7 @@ include_directories(${CMAKE_CURRENT_SOURCE_DIR})
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/prob)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/prob/elec)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/prob/emhd)
+include_directories(${CMAKE_CURRENT_SOURCE_DIR}/prob/utils)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/coordinates)
 include_directories(${CMAKE_CURRENT_SOURCE_DIR}/flux)
 
diff --git a/kharma/prob/emhd/emhdmodes.hpp b/kharma/prob/emhd/emhdmodes.hpp
index 2848075f..7c9c7f9d 100644
--- a/kharma/prob/emhd/emhdmodes.hpp
+++ b/kharma/prob/emhd/emhdmodes.hpp
@@ -89,6 +89,8 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     const Real k2 = 4. * M_PI;
     // END POSSIBLE ARGS
 
+    // TODO SET B PARAMS HERE
+
     IndexDomain domain = IndexDomain::interior;
     IndexRange ib = pmb->cellbounds.GetBoundsI(domain);
     IndexRange jb = pmb->cellbounds.GetBoundsJ(domain);
diff --git a/kharma/prob/fm_torus.cpp b/kharma/prob/fm_torus.cpp
index e0441445..879c1b34 100644
--- a/kharma/prob/fm_torus.cpp
+++ b/kharma/prob/fm_torus.cpp
@@ -38,9 +38,6 @@
 #include "coordinate_utils.hpp"
 #include "types.hpp"
 
-#include <random>
-#include "Kokkos_Random.hpp"
-
 TaskStatus InitializeFMTorus(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInput *pin)
 {
     auto pmb        = rc->GetBlockPointer();
@@ -198,59 +195,3 @@ TaskStatus InitializeFMTorus(std::shared_ptr<MeshBlockData<Real>>& rc, Parameter
 
     return TaskStatus::complete;
 }
-
-// TODO move this to a different file
-TaskStatus PerturbU(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInput *pin)
-{
-    auto pmb = rc->GetBlockPointer();
-    auto rho = rc->Get("prims.rho").data;
-    auto u = rc->Get("prims.u").data;
-
-    const Real u_jitter = pin->GetReal("perturbation", "u_jitter");
-    // Don't jitter values set by floors
-    const Real jitter_above_rho = pin->GetReal("floors", "rho_min_geom") + 1e-10;
-    // Note we add the MeshBlock gid to this value when seeding RNG,
-    // to get a new sequence for every block
-    const int rng_seed = pin->GetOrAddInteger("perturbation", "rng_seed", 31337);
-    // Print real seed used for all blocks, to ensure they're different
-    if (pmb->packages.Get("Globals")->Param<int>("verbose") > 1) {
-        std::cout << "Seeding RNG in block " << pmb->gid << " with value " << rng_seed + pmb->gid << std::endl;
-    }
-    const bool serial = pin->GetOrAddInteger("perturbation", "serial", false);
-
-    // Should we jitter ghosts? If first boundary sync doesn't work it's marginally less disruptive
-    IndexDomain domain = IndexDomain::interior;
-    const int is = pmb->cellbounds.is(domain), ie = pmb->cellbounds.ie(domain);
-    const int js = pmb->cellbounds.js(domain), je = pmb->cellbounds.je(domain);
-    const int ks = pmb->cellbounds.ks(domain), ke = pmb->cellbounds.ke(domain);
-
-    if (serial) {
-        // Serial version
-        // Probably guarantees better determinism, but CPU single-thread only
-        std::mt19937 gen(rng_seed + pmb->gid);
-        std::uniform_real_distribution<Real> dis(-u_jitter/2, u_jitter/2);
-
-        auto u_host = u.GetHostMirrorAndCopy();
-        for(int k=ks; k <= ke; k++)
-            for(int j=js; j <= je; j++)
-                for(int i=is; i <= ie; i++)
-                    u_host(k, j, i) *= 1. + dis(gen);
-        u.DeepCopy(u_host);
-    } else {
-        // Kokkos version
-        typedef typename Kokkos::Random_XorShift64_Pool<> RandPoolType;
-        RandPoolType rand_pool(rng_seed + pmb->gid);
-        typedef typename RandPoolType::generator_type gen_type;
-        pmb->par_for("perturb_u", ks, ke, js, je, is, ie,
-            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-                if (rho(k, j, i) > jitter_above_rho) {
-                    gen_type rgen = rand_pool.get_state();
-                    u(k, j, i) *= 1. + Kokkos::rand<gen_type, Real>::draw(rgen, -u_jitter/2, u_jitter/2);
-                    rand_pool.free_state(rgen);
-                }
-            }
-        );
-    }
-
-    return TaskStatus::complete;
-}
diff --git a/kharma/prob/fm_torus.hpp b/kharma/prob/fm_torus.hpp
index 987d33c5..211fe1e7 100644
--- a/kharma/prob/fm_torus.hpp
+++ b/kharma/prob/fm_torus.hpp
@@ -12,16 +12,6 @@
  */
 TaskStatus InitializeFMTorus(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInput *pin);
 
-/**
- * Perturb the internal energy by a uniform random proportion per cell.
- * Resulting internal energies will be between u \pm u*u_jitter/2
- * i.e. u_jitter=0.1 -> \pm 5% randomization, 0.95u to 1.05u
- *
- * @param u_jitter see description
- * @param rng_seed is added to the MPI rank to seed the GSL RNG
- */
-TaskStatus PerturbU(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInput *pin);
-
 /**
  * Torus solution for ln h, See Fishbone and Moncrief eqn. 3.6. 
  */
diff --git a/kharma/prob/kelvin_helmholtz.hpp b/kharma/prob/kelvin_helmholtz.hpp
index 7a7e9958..25c79033 100644
--- a/kharma/prob/kelvin_helmholtz.hpp
+++ b/kharma/prob/kelvin_helmholtz.hpp
@@ -66,7 +66,6 @@ TaskStatus InitializeKelvinHelmholtz(std::shared_ptr<MeshBlockData<Real>>& rc, P
     const Real amp = pin->GetOrAddReal("kelvin_helmholtz", "amp", 0.01);
     const Real z1 = pin->GetOrAddReal("kelvin_helmholtz", "z1", 0.5);
     const Real z2 = pin->GetOrAddReal("kelvin_helmholtz", "z2", 1.5);
-    const Real added_b = pin->GetOrAddReal("kelvin_helmholtz", "added_b", 0.0);
 
     const auto& G = pmb->coords;
     const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
@@ -77,68 +76,18 @@ TaskStatus InitializeKelvinHelmholtz(std::shared_ptr<MeshBlockData<Real>>& rc, P
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             GReal X[GR_DIM];
             G.coord_embed(k, j, i, Loci::center, X);
-
             // Lecoanet's x <-> x1; z <-> x2
-            GReal x = X[1];
-            GReal z = X[2];
+            GReal zdist1 = X[2] - z1;
+            GReal zdist2 = X[2] - z2;
 
             rho(k, j, i) =
-                rho0 + Drho * 0.5 * (tanh((z - z1) / a) - tanh((z - z2) / a));
-            u(k, j, i) = P0 / (gam - 1.);
-            uvec(0, k, j, i) = uflow * (tanh((z - z1) / a) - tanh((z - z2) / a) - 1.);
-            uvec(1, k, j, i) = amp * sin(2. * M_PI * x) *
-                        (m::exp(-(z - z1) * (z - z1) / (sigma * sigma)) +
-                        m::exp(-(z - z2) * (z - z2) / (sigma * sigma)));
-            uvec(2, k, j, i) = 0;
-        }
-    );
-
-    // if (pmb->packages.AllPackages().count("B_CT")) {
-    //     auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
-    //     // Halo one zone right for faces
-    //     // We don't need any more than that, since curls never take d1dx1
-    //     IndexRange3 bA = KDomain::GetRange(rc, IndexDomain::entire, 0, 0);
-    //     IndexSize3 s = KDomain::GetBlockSize(rc);
-    //     GridVector A("A", NVEC, s.n3, s.n2, s.n1);
-    //     pmb->par_for("ot_A", bA.ks, bA.ke, bA.js, bA.je, bA.is, bA.ie,
-    //         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-    //             Real Xembed[GR_DIM];
-    //             G.coord(k, j, i, Loci::corner, Xembed);
-    //             A(V3, k, j, i)  = added_b * (Xembed[1]/G.Dxc<1>(i) + Xembed[2]/G.Dxc<2>(j)) * tscale;
-    //         }
-    //     );
-    //     // This fills a couple zones outside the exact interior with bad data
-    //     IndexRange3 bB = KDomain::GetRange(rc, domain, 0, -1);
-    //     pmb->par_for("ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
-    //         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-    //             B_CT::curl_2D(G, A, B_Uf, k, j, i);
-    //         }
-    //     );
-    //     B_CT::BlockUtoP(rc.get(), IndexDomain::entire, false);
-    //     double max_divb = B_CT::BlockMaxDivB(rc.get());
-    //     std::cout << "Block max DivB: " << max_divb << std::endl;
-
-    // } else if (pmb->packages.AllPackages().count("B_FluxCT") ||
-    //            pmb->packages.AllPackages().count("B_CD")) {
-    //     GridVector B_P = rc->Get("prims.B").data;
-    //     pmb->par_for("ot_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-    //         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-    //             Real X[GR_DIM];
-    //             G.coord(k, j, i, Loci::center, X);
-    //             B_P(V1, k, j, i) = added_b * tscale;
-    //             B_P(V2, k, j, i) = added_b * tscale;
-    //             B_P(V3, k, j, i) = 0.;
-    //         }
-    //     );
-    //     B_FluxCT::BlockPtoU(rc.get(), IndexDomain::entire, false);
-    // }
-
-    // Rescale primitive velocities by tscale, and internal energy by the square.
-    pmb->par_for("kh_renorm", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-        KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-            u(k, j, i) *= tscale * tscale;
-            VLOOP uvec(v, k, j, i) *= tscale;
-            //VLOOP B_P(v, k, j, i) *= tscale; //already done
+                rho0 + Drho * 0.5 * (m::tanh(zdist1 / a) - m::tanh(zdist2 / a));
+            u(k, j, i) = P0 / (gam - 1.) * tscale * tscale;
+            uvec(0, k, j, i) = uflow * (m::tanh(zdist1 / a) - m::tanh(zdist2 / a) - 1.) * tscale;
+            uvec(1, k, j, i) = amp * m::sin(2. * M_PI * X[1]) *
+                        (m::exp(-(zdist1 * zdist1) / (sigma * sigma)) +
+                        m::exp(-(zdist2 * zdist2) / (sigma * sigma))) * tscale;
+            uvec(2, k, j, i) = 0.;
         }
     );
 
diff --git a/kharma/prob/mhdmodes.hpp b/kharma/prob/mhdmodes.hpp
index c12e4255..4a8167f0 100644
--- a/kharma/prob/mhdmodes.hpp
+++ b/kharma/prob/mhdmodes.hpp
@@ -68,51 +68,38 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
     const auto& G = pmb->coords;
 
     const int nmode = pin->GetOrAddInteger("mhdmodes", "nmode", 1);
-    const int dir = pin->GetOrAddInteger("mhdmodes", "dir", 0);
     const bool one_period = pin->GetOrAddBoolean("mhdmodes", "one_period", true);
 
-    // if (pin->GetInteger("parthenon/mesh", "nx1")) {
-    //     dir = 3;
-    // }
-
-    // START POSSIBLE ARGS: take all these as parameters in pin?
     // Mean state
-    Real rho0 = 1.;
-    Real u0 = 1.;
-    Real u10 = 0.;
-    Real u20 = 0.;
-    Real u30 = 0.;
-
-    // Wavevector
-    Real k1 = 2. * M_PI;
-    Real k2 = 2. * M_PI;
-    Real k3 = 2. * M_PI;
-    // "Faux-2D" plane orientation
-    // Set to 0 for "full" 3D wave
-    if (dir == 1)
-        k1 = 0;
-    if (dir == 2)
-        k2 = 0;
-    if (dir == 3)
-        k3 = 0;
-
-    Real amp = 1.e-4;
-    // END POSSIBLE ARGS
-
-    // B is set later, see below
-    Real B10 = 0.;
-    Real B20 = 0.;
-    Real B30 = 0.;
+    const Real rho0 = pin->GetOrAddReal("mhdmodes", "rho0", 1.);
+    const Real u0 = pin->GetOrAddReal("mhdmodes", "u0", 1.);
+    const Real u10 = pin->GetOrAddReal("mhdmodes", "u10", 0.);
+    const Real u20 = pin->GetOrAddReal("mhdmodes", "u20", 0.);
+    const Real u30 = pin->GetOrAddReal("mhdmodes", "u30", 0.);
+
+    // Wave parameters
+    // dir sets "Faux-2D" plane orientation, good for asymmetry bugs
+    // Set to 0 for "full" 3D wave.
+    const int dir = pin->GetOrAddInteger("mhdmodes", "dir", 0);
+    const Real amp = pin->GetOrAddReal("mhdmodes", "amp", 1.e-4);
+
+    // Note the modes below don't work right if you manually set these
+    // TODO generate modes on the fly for any k values
+    const Real k1 = pin->GetOrAddReal("mhdmodes", "k1", (dir == 1) ? 0. : 2. * M_PI);
+    const Real k2 = pin->GetOrAddReal("mhdmodes", "k2", (dir == 2) ? 0. : 2. * M_PI);
+    const Real k3 = pin->GetOrAddReal("mhdmodes", "k3", (dir == 3) ? 0. : 2. * M_PI);
+    // Likewise
+    const Real B10 = pin->GetOrAddReal("mhdmodes", "B10", (dir == 0 || dir == 3) ? 1.0 : 0. );
+    const Real B20 = pin->GetOrAddReal("mhdmodes", "B20", (dir == 1) ? 1.0 : 0. );
+    const Real B30 = pin->GetOrAddReal("mhdmodes", "B30", (dir == 2) ? 1.0 : 0. );
 
     std::complex<Real> omega;
     Real drho = 0, du = 0;
     Real du1 = 0, du2 = 0, du3 = 0;
     Real dB1 = 0, dB2 = 0, dB3 = 0;
-
     // Eigenmode definitions
     if (dir == 0) {
         // 3D (1,1,1) wave
-        B10 = 1.;
         if (nmode == 0) { // Entropy
             drho = 1.;
         } else if (nmode == 1) { // Slow
@@ -146,15 +133,6 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
     else
     {
         // 2D (1,1,0), (1,0,1), (0,1,1) wave
-        // Constant field direction
-        if (dir == 1) {
-            B20 = 1.;
-        } else if (dir == 2) {
-            B30 = 1.;
-        } else if (dir == 3) {
-            B10 = 1.;
-        }
-
         if (nmode == 0) { // Entropy
             drho = 1.;
         } else if (nmode == 1) { // Slow
@@ -212,6 +190,31 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
         }
     }
 
+    // Record the parameters
+    // This might be useful to read when checking, too...
+    // TODO 
+    pin->SetReal("mhdmodes", "omega_real", omega.real());
+    pin->SetReal("mhdmodes", "omega_imag", omega.imag());
+
+    pin->SetReal("mhdmodes", "drho", drho);
+    pin->SetReal("mhdmodes", "du", du);
+    pin->SetReal("mhdmodes", "du1", du1);
+    pin->SetReal("mhdmodes", "du2", du2);
+    pin->SetReal("mhdmodes", "du3", du3);
+    pin->SetReal("mhdmodes", "dB1", dB1);
+    pin->SetReal("mhdmodes", "dB2", dB2);
+    pin->SetReal("mhdmodes", "dB3", dB3);
+
+    // Set B field parameters for our mode
+    pin->GetOrAddString("b_field", "type", "wave");
+    pin->GetOrAddReal("b_field", "b10", B10);
+    pin->GetOrAddReal("b_field", "b20", B20);
+    pin->GetOrAddReal("b_field", "b30", B30);
+    pin->GetOrAddReal("b_field", "amp_B1", amp*dB1);
+    pin->GetOrAddReal("b_field", "amp_B2", amp*dB2);
+    pin->GetOrAddReal("b_field", "amp_B3", amp*dB3);
+    pin->GetOrAddReal("b_field", "phase", 0.);
+
     IndexDomain domain = IndexDomain::interior;
     IndexRange ib = pmb->cellbounds.GetBoundsI(domain);
     IndexRange jb = pmb->cellbounds.GetBoundsJ(domain);
@@ -220,16 +223,12 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             Real X[GR_DIM];
             G.coord_embed(k, j, i, Loci::center, X);
-
-            Real mode = amp * cos(k1 * X[1] + k2 * X[2] + k3 * X[3]);
+            Real mode = amp * m::cos(k1 * X[1] + k2 * X[2] + k3 * X[3]);
             rho(k, j, i) = rho0 + drho * mode;
             u(k, j, i) = u0 + du * mode;
             uvec(0, k, j, i) = u10 + du1 * mode;
             uvec(1, k, j, i) = u20 + du2 * mode;
             uvec(2, k, j, i) = u30 + du3 * mode;
-            B_P(0, k, j, i) = B10 + dB1 * mode;
-            B_P(1, k, j, i) = B20 + dB2 * mode;
-            B_P(2, k, j, i) = B30 + dB3 * mode;
         }
     );
 
diff --git a/kharma/prob/orszag_tang.hpp b/kharma/prob/orszag_tang.hpp
index 41fd17f0..099edbec 100644
--- a/kharma/prob/orszag_tang.hpp
+++ b/kharma/prob/orszag_tang.hpp
@@ -36,14 +36,20 @@ TaskStatus InitializeOrszagTang(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
     // Default phase puts the current sheet in the middle of the domain
     const Real phase = pin->GetOrAddReal("orszag_tang", "phase", M_PI);
 
-    // TODO coord_embed for snake coords?
-
+    // Set parameters for B field, which will get added differently for flux vs face
+    // In a questionable decision, we allow overriding these
+    pin->GetOrAddString("b_field", "type", "orszag_tang_a");
+    pin->GetOrAddReal("b_field", "amp_B1", tscale);
+    pin->GetOrAddReal("b_field", "amp_B2", tscale);
+    pin->GetOrAddReal("b_field", "phase", phase);
+
+    // Set the non-B values
     IndexDomain domain = IndexDomain::entire;
     IndexRange3 b = KDomain::GetRange(rc, domain);
     pmb->par_for("ot_init", b.ks, b.ke, b.js, b.je, b.is, b.ie,
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             Real X[GR_DIM];
-            G.coord(k, j, i, Loci::center, X);
+            G.coord_embed(k, j, i, Loci::center, X);
             rho(k, j, i) = 25./9.;
             u(k, j, i) = 5./(3.*(gam - 1.)) * tscale * tscale;
             uvec(0, k, j, i) = -m::sin(X[2] + phase) * tscale;
@@ -52,46 +58,5 @@ TaskStatus InitializeOrszagTang(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
         }
     );
 
-    if (pmb->packages.AllPackages().count("B_CT")) {
-        auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
-        // Halo one zone right for faces
-        // We don't need any more than that, since curls never take d1dx1
-        IndexRange3 bA = KDomain::GetRange(rc, IndexDomain::entire, 0, 0);
-        IndexSize3 s = KDomain::GetBlockSize(rc);
-        GridVector A("A", NVEC, s.n3, s.n2, s.n1);
-        pmb->par_for("ot_A", bA.ks, bA.ke, bA.js, bA.je, bA.is, bA.ie,
-            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-                Real Xembed[GR_DIM];
-                G.coord(k, j, i, Loci::corner, Xembed);
-                A(V3, k, j, i)  = (-0.5*std::cos(2*Xembed[1] + phase)
-                                   + std::cos(Xembed[2] + phase)) * tscale;
-            }
-        );
-        // This fills a couple zones outside the exact interior with bad data
-        IndexRange3 bB = KDomain::GetRange(rc, domain, 0, -1);
-        pmb->par_for("ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
-            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-                B_CT::curl_2D(G, A, B_Uf, k, j, i);
-            }
-        );
-        B_CT::BlockUtoP(rc.get(), IndexDomain::entire, false);
-        double max_divb = B_CT::BlockMaxDivB(rc.get());
-        std::cout << "Block max DivB: " << max_divb << std::endl;
-
-    } else if (pmb->packages.AllPackages().count("B_FluxCT") ||
-               pmb->packages.AllPackages().count("B_CD")) {
-        GridVector B_P = rc->Get("prims.B").data;
-        pmb->par_for("ot_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-                Real X[GR_DIM];
-                G.coord(k, j, i, Loci::center, X);
-                B_P(V1, k, j, i) = -m::sin(X[2] + phase) * tscale;
-                B_P(V2, k, j, i) = m::sin(2.*(X[1] + phase)) * tscale;
-                B_P(V3, k, j, i) = 0.;
-            }
-        );
-        B_FluxCT::BlockPtoU(rc.get(), IndexDomain::entire, false);
-    }
-
     return TaskStatus::complete;
 }
diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp
index cd2c0000..1fc7a790 100644
--- a/kharma/prob/post_initialize.cpp
+++ b/kharma/prob/post_initialize.cpp
@@ -82,7 +82,7 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
             SeedBField(md.get(), pin);
 
             // If we're doing a torus problem or explicitly ask for it,
-            // normalize the magnetic field according to the density
+            // normalize the magnetic field according to the max density
             bool is_torus = pin->GetString("parthenon/job", "problem_id") == "torus";
             if (pin->GetOrAddBoolean("b_field", "norm", is_torus)) {
                 NormalizeBField(md.get(), pin);
@@ -142,4 +142,6 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
     KHARMADriver::SyncAllBounds(md);
     // And make sure the trivial primitive values are up-to-date
     //Packages::MeshUtoPExceptMHD(md.get(), IndexDomain::entire, false);
+
+    // TODO output parsed parameters now we have *everything* including any problem configs for B field
 }
diff --git a/kharma/prob/problem.cpp b/kharma/prob/problem.cpp
index 42f85a40..2c0a8b11 100644
--- a/kharma/prob/problem.cpp
+++ b/kharma/prob/problem.cpp
@@ -41,6 +41,7 @@
 #include "gr_coordinates.hpp"
 #include "grmhd.hpp"
 #include "grmhd_functions.hpp"
+#include "perturbation.hpp"
 #include "types.hpp"
 
 // Problem initialization headers
@@ -99,8 +100,6 @@ void KHARMA::ProblemGenerator(MeshBlock *pmb, ParameterInput *pin)
     // GRMHD
     } else if (prob == "bondi") {
         status = InitializeBondi(rc, pin);
-    } else if (prob == "bz_monopole") {
-        status = InitializeBZMonopole(rc, pin);
     // Electrons
     } else if (prob == "noh") {
         status = InitializeNoh(rc, pin);
@@ -122,10 +121,13 @@ void KHARMA::ProblemGenerator(MeshBlock *pmb, ParameterInput *pin)
         status = InitializeFMTorus(rc, pin);
     } else if (prob == "resize_restart") {
         status = ReadIharmRestart(rc, pin);
-    } else if (prob == "resize_restart_kharma") { // Hyerin
+    } else if (prob == "resize_restart_kharma") {
         status = ReadKharmaRestart(rc, pin);
     } else if (prob == "gizmo") {
         status = InitializeGIZMO(rc, pin);
+    } else if (prob == "vacuum") {
+        // No need for a separate initializer, just seed w/floors
+        status = Floors::ApplyInitialFloors(pin, rc.get(), IndexDomain::interior);
     }
 
     // If we didn't initialize a problem, yell
diff --git a/kharma/prob/seed_B.cpp b/kharma/prob/seed_B.cpp
index 5476331c..c85b5fd9 100644
--- a/kharma/prob/seed_B.cpp
+++ b/kharma/prob/seed_B.cpp
@@ -33,10 +33,12 @@
  */
 #include "seed_B.hpp"
 
-#include "seed_B_impl.hpp"
+#include "b_ct.hpp"
+#include "b_flux_ct.hpp"
 
 #include "boundaries.hpp"
 #include "coordinate_utils.hpp"
+#include "domain.hpp"
 #include "fm_torus.hpp"
 #include "grmhd_functions.hpp"
 
@@ -74,6 +76,268 @@ Real MinBeta(MeshData<Real> *md)
     return Reductions::DomainReduction<Reductions::Var::beta, Real>(md, UserHistoryOperation::min);
 }
 
+
+template <BSeedType Seed>
+TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDomain domain = IndexDomain::entire)
+{
+    auto pmb = rc->GetBlockPointer();
+    auto pkgs = pmb->packages.AllPackages();
+
+    // Fields
+    GridScalar rho = rc->Get("prims.rho").data;
+    const auto &G = pmb->coords;
+
+    // Parameters
+    std::string b_field_type = pin->GetString("b_field", "type");
+    auto prob = pin->GetString("parthenon/job", "problem_id");
+    bool is_torus = (prob == "torus");
+
+    // Indices
+    // TODO handle filling faces with domain < entire more gracefully
+    IndexRange3 b = KDomain::GetRange(rc, domain);
+    int ndim = pmb->pmy_mesh->ndim;
+
+    // Shortcut to field values for easy fields
+    if constexpr (Seed == BSeedType::constant ||
+                  Seed == BSeedType::monopole ||
+                  Seed == BSeedType::monopole_cube ||
+                  Seed == BSeedType::orszag_tang)
+    {
+        // All custom B fields should set what they need of these
+        const Real b10 = pin->GetOrAddReal("b_field", "B10", 0.);
+        const Real b20 = pin->GetOrAddReal("b_field", "B20", 0.);
+        const Real b30 = pin->GetOrAddReal("b_field", "B30", 0.);
+        const Real k1 = pin->GetOrAddReal("b_field", "k1", 0.);
+        const Real k2 = pin->GetOrAddReal("b_field", "k2", 0.);
+        const Real k3 = pin->GetOrAddReal("b_field", "k3", 0.);
+        const Real phase = pin->GetOrAddReal("b_field", "phase", 0.);
+        const Real amp_B1 = pin->GetOrAddReal("b_field", "amp_B1", 0.);
+        const Real amp_B2 = pin->GetOrAddReal("b_field", "amp_B2", 0.);
+        const Real amp_B3 = pin->GetOrAddReal("b_field", "amp_B3", 0.);
+        const Real amp2_B1 = pin->GetOrAddReal("b_field", "amp2_B1", 0.);
+        const Real amp2_B2 = pin->GetOrAddReal("b_field", "amp2_B2", 0.);
+        const Real amp2_B3 = pin->GetOrAddReal("b_field", "amp2_B3", 0.);
+
+        if (pkgs.count("B_CT")) {
+            auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
+            // Fill at 3 different locations
+            pmb->par_for(
+                "B_field_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
+                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                    GReal Xembed[GR_DIM];
+                    double null1, null2;
+                    double B_Pf1, B_Pf2, B_Pf3;
+                    G.coord_embed(k, j, i, Loci::face1, Xembed);
+                    GReal gdet = G.gdet(Loci::face1, j, i);
+                    B_Pf1 = b10;
+                    seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
+                                 amp_B1, amp_B2, amp_B3,
+                                 amp2_B1, amp2_B2, amp2_B3,
+                                 B_Pf1, null1, null2);
+                    B_Uf(F1, 0, k, j, i) = B_Pf1 * gdet;
+
+                    G.coord_embed(k, j, i, Loci::face2, Xembed);
+                    gdet = G.gdet(Loci::face2, j, i);
+                    B_Pf2 = b20;
+                    seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
+                                 amp_B1, amp_B2, amp_B3,
+                                 amp2_B1, amp2_B2, amp2_B3,
+                                 null1, B_Pf2, null2);
+                    B_Uf(F2, 0, k, j, i) = B_Pf2;
+
+                    G.coord_embed(k, j, i, Loci::face3, Xembed);
+                    gdet = G.gdet(Loci::face3, j, i);
+                    B_Pf3 = b30;
+                    seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
+                                 amp_B1, amp_B2, amp_B3,
+                                 amp2_B1, amp2_B2, amp2_B3,
+                                 null1, null2, B_Pf3);
+                    B_Uf(F3, 0, k, j, i) = B_Pf3 * gdet;
+                }
+            );
+            // Update primitive variables
+            B_CT::BlockUtoP(rc, domain);
+        } else if (pkgs.count("B_FluxCT")) {
+            GridVector B_P = rc->Get("prims.B").data;
+            pmb->par_for(
+                "B_field_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
+                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                    GReal Xembed[GR_DIM];
+                    G.coord_embed(k, j, i, Loci::center, Xembed);
+                    const GReal gdet = G.gdet(Loci::center, j, i);
+                    seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
+                                 amp_B1, amp_B2, amp_B3,
+                                 amp2_B1, amp2_B2, amp2_B3,
+                                 B_P(V1, k, j, i),
+                                 B_P(V2, k, j, i),
+                                 B_P(V3, k, j, i));
+                }
+            );
+            // We still need to update conserved flux values, but then we're done
+            B_FluxCT::BlockPtoU(rc, domain);
+        }
+        return TaskStatus::complete;
+    } else { // Seed with vector potential A otherwise
+        // Require and load what we need if necessary
+        Real A0 = pin->GetOrAddReal("b_field", "A0", 0.);
+        Real min_A = pin->GetOrAddReal("b_field", "min_A", 0.2);
+        // Init-specific loads
+        Real a, rin, rmax, gam, kappa, rho_norm, arg1;
+        Real tilt = 0; // Needs to be initialized
+        switch (Seed) {
+        case BSeedType::sane:
+        case BSeedType::mad:
+        case BSeedType::mad_quadrupole:
+        case BSeedType::r3s3:
+        case BSeedType::r5s5:
+        case BSeedType::gaussian:
+            // Torus parameters
+            rin = pin->GetReal("torus", "rin");
+            rmax = pin->GetReal("torus", "rmax");
+            kappa = pin->GetReal("torus", "kappa");
+            tilt = pin->GetReal("torus", "tilt") / 180. * M_PI;
+            // Other things we need only for torus evaluation
+            gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
+            rho_norm = pmb->packages.Get("GRMHD")->Param<Real>("rho_norm");
+            a = G.coords.get_a();
+            break;
+        case BSeedType::orszag_tang_a:
+            A0 = pin->GetReal("orszag_tang", "tscale");
+            arg1 = pin->GetReal("orszag_tang", "phase");
+            break;
+        default:
+            break;
+        }
+
+        // For all other fields...
+        // Find the magnetic vector potential.  In X3 symmetry only A_phi is non-zero,
+        // But for tilted conditions we must keep track of all components
+        IndexSize3 sz = KDomain::GetBlockSize(rc);
+        ParArrayND<double> A("A", NVEC, sz.n3, sz.n2, sz.n1);
+        pmb->par_for(
+            "B_field_A", b.ks, b.ke, b.js, b.je, b.is, b.ie,
+            KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                GReal Xnative[GR_DIM];
+                GReal Xembed[GR_DIM], Xmidplane[GR_DIM];
+                G.coord(k, j, i, Loci::corner, Xnative);
+                G.coord_embed(k, j, i, Loci::corner, Xembed);
+                // What are our corresponding "midplane" values for evaluating the function?
+                rotate_polar(Xembed, tilt, Xmidplane);
+                const GReal r = Xmidplane[1], th = Xmidplane[2];
+
+                // In case we need zone sizes
+                const GReal dxc[GR_DIM] = {0., G.Dxc<1>(i), G.Dxc<2>(j), G.Dxc<3>(k)};
+
+                // This is written under the assumption re-computed rho is more accurate than a bunch
+                // of averaging in a meaningful way.  Just use the average if not.
+                Real rho_av;
+                if (is_torus) {
+                    // Find rho at corner directly for torii
+                    rho_av = fm_torus_rho(a, rin, rmax, gam, kappa, r, th) / rho_norm;
+                } else {
+                    // Use averages for anything else
+                    // This loop runs over every corner. Centers do not exist before the first
+                    // or after the last, so use the last (ghost) zones available.
+                    const int ii = clip((uint)i, b.is + 1, b.ie);
+                    const int jj = clip((uint)j, b.js + 1, b.je);
+                    const int kk = clip((uint)k, b.ks + 1, b.ke);
+                    if (ndim > 2)
+                    {
+                        rho_av = (rho(kk, jj, ii) + rho(kk, jj, ii - 1) +
+                                rho(kk, jj - 1, ii) + rho(kk, jj - 1, ii - 1) +
+                                rho(kk - 1, jj, ii) + rho(kk - 1, jj, ii - 1) +
+                                rho(kk - 1, jj - 1, ii) + rho(kk - 1, jj - 1, ii - 1)) /
+                                8;
+                    }
+                    else
+                    {
+                        rho_av = (rho(kk, jj, ii) + rho(kk, jj, ii - 1) +
+                                rho(kk, jj - 1, ii) + rho(kk, jj - 1, ii - 1)) /
+                                4;
+                    }
+                }
+
+                Real Aphi = seed_a<Seed>(Xmidplane, dxc, rho_av, rin, min_A, A0, arg1);
+
+                if (tilt != 0.0) {
+                    // This is *covariant* A_mu of an untilted disk
+                    const double A_untilt_lower[GR_DIM] = {0., 0., 0., Aphi};
+                    // Raise to contravariant vector, since rotate_polar_vec will need that.
+                    // Note we have to do this in the midplane!
+                    // The coord_to_native calculation involves an iterative solve for MKS/FMKS
+                    GReal Xnative_midplane[GR_DIM] = {0}, gcon_midplane[GR_DIM][GR_DIM] = {0};
+                    G.coords.coord_to_native(Xmidplane, Xnative_midplane);
+                    G.coords.gcon_native(Xnative_midplane, gcon_midplane);
+                    double A_untilt[GR_DIM] = {0};
+                    DLOOP2 A_untilt[mu] += gcon_midplane[mu][nu] * A_untilt_lower[nu];
+
+                    // Then rotate
+                    double A_tilt[GR_DIM] = {0};
+                    double A_untilt_embed[GR_DIM] = {0}, A_tilt_embed[GR_DIM] = {0};
+                    G.coords.con_vec_to_embed(Xnative_midplane, A_untilt, A_untilt_embed);
+                    rotate_polar_vec(Xmidplane, A_untilt_embed, -tilt, Xembed, A_tilt_embed);
+                    G.coords.con_vec_to_native(Xnative, A_tilt_embed, A_tilt);
+
+                    // Lower the result as we need curl(A_mu).  Done at local zone.
+                    double A_tilt_lower[GR_DIM] = {0};
+                    G.lower(A_tilt, A_tilt_lower, k, j, i, Loci::corner);
+                    VLOOP A(v, k, j, i) = A_tilt_lower[1 + v];
+                } else {
+                    // Some problems rely on a very accurate A->B, which the rotation lacks.
+                    // So, we preserve exact values in the no-tilt case.
+                    A(V3, k, j, i) = Aphi;
+                }
+            });
+
+        if (pkgs.count("B_CT"))
+        {
+            auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
+            // This fills a couple zones outside the exact interior with bad data
+            // Careful of that w/e.g. Dirichlet bounds.
+            IndexRange3 bB = KDomain::GetRange(rc, domain, 0, -1);
+            if (ndim > 2) {
+                pmb->par_for(
+                    "ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
+                    KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                        B_CT::curl_3D(G, A, B_Uf, k, j, i);
+                    });
+            } else if (ndim > 1) {
+                pmb->par_for(
+                    "ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
+                    KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                        B_CT::curl_2D(G, A, B_Uf, k, j, i);
+                    });
+            } else {
+                throw std::runtime_error("Must initialize 1D field directly!");
+            }
+            B_CT::BlockUtoP(rc, domain);
+        } else if (pkgs.count("B_FluxCT")) {
+            // Calculate B-field
+            GridVector B_U = rc->Get("cons.B").data;
+            IndexRange3 bl = KDomain::GetRange(rc, domain, 0, -1); // TODO will need changes if domain < entire
+            if (ndim > 2) {
+                pmb->par_for(
+                    "B_field_B_3D", bl.ks, bl.ke, bl.js, bl.je, bl.is, bl.ie,
+                    KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                        B_FluxCT::averaged_curl_3D(G, A, B_U, k, j, i);
+                    });
+            } else if (ndim > 1) {
+                pmb->par_for(
+                    "B_field_B_2D", bl.ks, bl.ke, bl.js, bl.je, bl.is, bl.ie,
+                    KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
+                        B_FluxCT::averaged_curl_2D(G, A, B_U, k, j, i);
+                    });
+            } else {
+                throw std::runtime_error("Must initialize 1D field directly!");
+            }
+            // Finally, make sure we initialize the primitive field too
+            B_FluxCT::BlockUtoP(rc, domain);
+        }
+
+        return TaskStatus::complete;
+    }
+}
+
 TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin)
 {
     Flag("SeedBField");
@@ -81,7 +345,7 @@ TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin)
     auto pmesh = md->GetMeshPointer();
     const int verbose = pmesh->packages.Get("Globals")->Param<int>("verbose");
 
-    if (verbose) {
+    if (MPIRank0() && verbose) {
         std::cout << "Seeding B field with type " << b_field_type << std::endl;
     }
 
@@ -115,6 +379,12 @@ TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin)
             status = SeedBFieldType<BSeedType::bz_monopole>(rc, pin);
         } else if (b_field_type == "vertical") {
             status = SeedBFieldType<BSeedType::vertical>(rc, pin);
+        } else if (b_field_type == "orszag_tang") {
+            status = SeedBFieldType<BSeedType::orszag_tang>(rc, pin);
+        } else if (b_field_type == "orszag_tang_a") {
+            status = SeedBFieldType<BSeedType::orszag_tang_a>(rc, pin);
+        } else if (b_field_type == "wave") {
+            status = SeedBFieldType<BSeedType::wave>(rc, pin);
         } else {
             throw std::invalid_argument("Magnetic field seed type not supported: " + b_field_type);
         }
diff --git a/kharma/prob/seed_B.hpp b/kharma/prob/seed_B.hpp
index 0f08487d..61537a2f 100644
--- a/kharma/prob/seed_B.hpp
+++ b/kharma/prob/seed_B.hpp
@@ -40,11 +40,11 @@ TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin);
 
 TaskStatus NormalizeBField(MeshData<Real> *md, ParameterInput *pin);
 
-// Internal representation of the field initialization preference for quick switch
-// Avoids string comparsion in kernels
-enum BSeedType{constant, monopole, monopole_cube, sane, mad, mad_quadrupole, r3s3, r5s5, gaussian, bz_monopole, vertical};
+// Internal representation of the field initialization preference, used for templating
+enum BSeedType{constant, monopole, monopole_cube, orszag_tang, orszag_tang_a, wave,
+                sane, mad, mad_quadrupole, r3s3, r5s5, gaussian, bz_monopole, vertical};
 
-#define SEEDA_ARGS GReal *x, double rho, double rin, double min_A, double A0
+#define SEEDA_ARGS GReal *x, const GReal *dxc, double rho, double rin, double min_A, double A0, double arg1
 
 // This will also act as the default implementation for unspecified types,
 // which should all be filled as B field by seed_b below.
@@ -98,7 +98,6 @@ KOKKOS_INLINE_FUNCTION Real seed_a<BSeedType::r5s5>(SEEDA_ARGS)
 
 // Pure vertical threaded field of gaussian strength with FWHM 2*rin (i.e. HM@rin)
 // centered at BH center
-// Block is to avoid compiler whinging about initialization
 template<>
 KOKKOS_INLINE_FUNCTION Real seed_a<BSeedType::gaussian>(SEEDA_ARGS)
 {
@@ -114,31 +113,57 @@ KOKKOS_INLINE_FUNCTION Real seed_a<BSeedType::vertical>(SEEDA_ARGS)
     return A0 * x[1] * m::sin(x[2]) / 2.;
 }
 
-#define SEEDB_ARGS GReal *x, GReal gdet, double b10, double b20, double b30, double &B1, double &B2, double &B3
+template<>
+KOKKOS_INLINE_FUNCTION Real seed_a<BSeedType::orszag_tang_a>(SEEDA_ARGS)
+{
+    return A0 * (-0.5 * std::cos(2*x[1] + arg1)
+                        + std::cos(x[2] + arg1));
+}
+
+#undef SEEDA_ARGS
+#define SEEDB_ARGS GReal *x, GReal gdet, double k1, double k2, double k3, double phase, \
+                    double amp_B1, double amp_B2, double amp_B3, \
+                    double amp2_B1, double amp2_B2, double amp2_B3, \
+                    double &B1, double &B2, double &B3
 
 template<BSeedType T>
-KOKKOS_INLINE_FUNCTION void seed_b(SEEDB_ARGS) {}
+KOKKOS_INLINE_FUNCTION void seed_b(SEEDB_ARGS) { B1 = 0./0.; B2 = 0./0.; B3 = 0./0.; }
 
+// Constant field of B10, B20, B30 is always set
 template<>
-KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::constant>(SEEDB_ARGS)
-{
-    B1 = b10;
-    B2 = b20;
-    B3 = b30;
-}
+KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::constant>(SEEDB_ARGS) {}
 
+// Reduce radial component by gdet for constant flux
 template<>
 KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::monopole>(SEEDB_ARGS)
 {
-    B1 = b10 / gdet;
-    B2 = 0.;
-    B3 = 0.;
+    B1 /= gdet;
 }
 
+// Reduce radial component by the cube of radius
 template<>
 KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::monopole_cube>(SEEDB_ARGS)
 {
-    B1 = 1 / (x[1]*x[1]*x[1]);
-    B2 = 0.;
-    B3 = 0.;
+    B1 /= (x[1]*x[1]*x[1]);
 }
+
+// For mhdmodes or linear waves tests
+template<>
+KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::wave>(SEEDB_ARGS)
+{
+    const Real smode = m::cos(k1 * x[1] + k2 * x[2] + k3 * x[3] + phase);
+    const Real cmode = m::cos(k1 * x[1] + k2 * x[2] + k3 * x[3] + phase);
+    B1 += amp_B1 * cmode + amp2_B1 * smode;
+    B2 += amp_B2 * cmode + amp2_B2 * smode;
+    B3 += amp_B3 * cmode + amp2_B3 * smode;
+}
+
+// For Orszag-Tang vortex
+template<>
+KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::orszag_tang>(SEEDB_ARGS)
+{
+    B1 -= amp_B1 * m::sin(    x[2] + phase );
+    B2 += amp_B2 * m::sin(2.*(x[1] + phase));
+}
+
+#undef SEEDB_ARGS
\ No newline at end of file
diff --git a/kharma/prob/seed_B_impl.hpp b/kharma/prob/seed_B_impl.hpp
deleted file mode 100644
index bf7dbea8..00000000
--- a/kharma/prob/seed_B_impl.hpp
+++ /dev/null
@@ -1,295 +0,0 @@
-/*
- *  File: seed_B.hpp
- *
- *  BSD 3-Clause License
- *
- *  Copyright (c) 2020, AFD Group at UIUC
- *  All rights reserved.
- *
- *  Redistribution and use in source and binary forms, with or without
- *  modification, are permitted provided that the following conditions are met:
- *
- *  1. Redistributions of source code must retain the above copyright notice, this
- *     list of conditions and the following disclaimer.
- *
- *  2. Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *  3. Neither the name of the copyright holder nor the names of its
- *     contributors may be used to endorse or promote products derived from
- *     this software without specific prior written permission.
- *
- *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
- *  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
- *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
- *  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
- *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-#pragma once
-
-#include "seed_B.hpp"
-
-#include "b_flux_ct.hpp"
-#include "b_ct.hpp"
-#include "boundaries.hpp"
-#include "domain.hpp"
-#include "fm_torus.hpp"
-
-template <BSeedType Seed>
-TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDomain domain = IndexDomain::entire)
-{
-    auto pmb = rc->GetBlockPointer();
-    auto pkgs = pmb->packages.AllPackages();
-
-    // Fields
-    GridScalar rho = rc->Get("prims.rho").data;
-    const auto &G = pmb->coords;
-
-    // Parameters
-    std::string b_field_type = pin->GetString("b_field", "type");
-    auto prob = pin->GetString("parthenon/job", "problem_id");
-    bool is_torus = (prob == "torus");
-
-    // Indices
-    IndexRange3 b = KDomain::GetRange(rc, domain);
-    int ndim = pmb->pmy_mesh->ndim;
-
-    // Shortcut to field values for easy fields
-    if constexpr (Seed == BSeedType::constant ||
-                  Seed == BSeedType::monopole ||
-                  Seed == BSeedType::monopole_cube)
-    {
-        if (pkgs.count("B_CT"))
-        {
-            auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
-            Real b10 = pin->GetOrAddReal("b_field", "b10", 0.);
-            Real b20 = pin->GetOrAddReal("b_field", "b20", 0.);
-            Real b30 = pin->GetOrAddReal("b_field", "b30", 0.);
-            // Fill at 3 different locations
-            // TODO this would need to be extended for domain < entire
-            pmb->par_for(
-                "B_field_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    GReal Xembed[GR_DIM];
-                    G.coord_embed(k, j, i, Loci::face1, Xembed);
-                    GReal gdet = G.gdet(Loci::face1, j, i);
-                    double tmp1, tmp2;
-                    seed_b<Seed>(Xembed, gdet, b10, b20, b30,
-                                 B_Uf(F1, 0, k, j, i), tmp1, tmp2);
-
-                    G.coord_embed(k, j, i, Loci::face2, Xembed);
-                    gdet = G.gdet(Loci::face2, j, i);
-                    seed_b<Seed>(Xembed, gdet, b10, b20, b30,
-                                 tmp1, B_Uf(F2, 0, k, j, i), tmp2);
-
-                    G.coord_embed(k, j, i, Loci::face3, Xembed);
-                    gdet = G.gdet(Loci::face3, j, i);
-                    seed_b<Seed>(Xembed, gdet, b10, b20, b30,
-                                 tmp1, tmp2, B_Uf(F3, 0, k, j, i));
-                });
-            // Update primitive variables
-            B_CT::BlockUtoP(rc, domain);
-        }
-        else if (pkgs.count("B_FluxCT"))
-        {
-            GridVector B_P = rc->Get("prims.B").data;
-            Real b10 = pin->GetOrAddReal("b_field", "b10", 0.);
-            Real b20 = pin->GetOrAddReal("b_field", "b20", 0.);
-            Real b30 = pin->GetOrAddReal("b_field", "b30", 0.);
-            pmb->par_for(
-                "B_field_B", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    GReal Xembed[GR_DIM];
-                    G.coord_embed(k, j, i, Loci::center, Xembed);
-                    const GReal gdet = G.gdet(Loci::center, j, i);
-                    seed_b<Seed>(Xembed, gdet, b10, b20, b30,
-                                 B_P(V1, k, j, i),
-                                 B_P(V2, k, j, i),
-                                 B_P(V3, k, j, i));
-                });
-            // We still need to update conserved flux values, but then we're done
-            B_FluxCT::BlockPtoU(rc, domain);
-        }
-        return TaskStatus::complete;
-    }
-
-    // Require and load what we need if necessary
-    // TODO this seems very inelegant. Also most of these should support non-FM-torii
-    // as long as we don't call fm_torus_rho below
-    Real a, rin, rmax, gam, kappa, rho_norm;
-    Real tilt = 0; // Needs to be initialized
-    switch (Seed)
-    {
-    case BSeedType::sane:
-    case BSeedType::mad:
-    case BSeedType::mad_quadrupole:
-    case BSeedType::r3s3:
-    case BSeedType::r5s5:
-    case BSeedType::gaussian:
-        if (!is_torus)
-            throw std::invalid_argument("Magnetic field seed " + b_field_type + " supports only torus problems!");
-        // Torus parameters
-        rin = pin->GetReal("torus", "rin");
-        rmax = pin->GetReal("torus", "rmax");
-        kappa = pin->GetReal("torus", "kappa");
-        tilt = pin->GetReal("torus", "tilt") / 180. * M_PI;
-        // Other things we need only for torus evaluation
-        gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
-        rho_norm = pmb->packages.Get("GRMHD")->Param<Real>("rho_norm");
-        a = G.coords.get_a();
-        break;
-    default:
-        break;
-    }
-
-    Real A0 = pin->GetOrAddReal("b_field", "A0", 0.);
-    Real min_A = pin->GetOrAddReal("b_field", "min_A", 0.2); // TODO back compat?  Doubtful was used
-
-    // For all other fields...
-    // Find the magnetic vector potential.  In X3 symmetry only A_phi is non-zero,
-    // But for tilted conditions we must keep track of all components
-    IndexSize3 sz = KDomain::GetBlockSize(rc);
-    ParArrayND<double> A("A", NVEC, sz.n3, sz.n2, sz.n1);
-    pmb->par_for(
-        "B_field_A", b.ks, b.ke, b.js, b.je, b.is, b.ie,
-        KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-            GReal Xnative[GR_DIM];
-            GReal Xembed[GR_DIM], Xmidplane[GR_DIM];
-            G.coord(k, j, i, Loci::corner, Xnative);
-            G.coord_embed(k, j, i, Loci::corner, Xembed);
-            // What are our corresponding "midplane" values for evaluating the function?
-            rotate_polar(Xembed, tilt, Xmidplane);
-            const GReal r = Xmidplane[1], th = Xmidplane[2];
-
-            // This is written under the assumption re-computed rho is more accurate than a bunch
-            // of averaging in a meaningful way.  Just use the average if not.
-            Real rho_av;
-            if (is_torus)
-            {
-                // Find rho at corner directly for torii
-                rho_av = fm_torus_rho(a, rin, rmax, gam, kappa, r, th) / rho_norm;
-            }
-            else
-            {
-                // Use averages for anything else
-                // This loop runs over every corner. Centers do not exist before the first
-                // or after the last, so use the last (ghost) zones available.
-                const int ii = clip((uint)i, b.is + 1, b.ie);
-                const int jj = clip((uint)j, b.js + 1, b.je);
-                const int kk = clip((uint)k, b.ks + 1, b.ke);
-                if (ndim > 2)
-                {
-                    rho_av = (rho(kk, jj, ii) + rho(kk, jj, ii - 1) +
-                              rho(kk, jj - 1, ii) + rho(kk, jj - 1, ii - 1) +
-                              rho(kk - 1, jj, ii) + rho(kk - 1, jj, ii - 1) +
-                              rho(kk - 1, jj - 1, ii) + rho(kk - 1, jj - 1, ii - 1)) /
-                             8;
-                }
-                else
-                {
-                    rho_av = (rho(kk, jj, ii) + rho(kk, jj, ii - 1) +
-                              rho(kk, jj - 1, ii) + rho(kk, jj - 1, ii - 1)) /
-                             4;
-                }
-            }
-
-            Real Aphi = seed_a<Seed>(Xmidplane, rho_av, rin, min_A, A0);
-
-            if (tilt != 0.0)
-            {
-                // This is *covariant* A_mu of an untilted disk
-                const double A_untilt_lower[GR_DIM] = {0., 0., 0., Aphi};
-                // Raise to contravariant vector, since rotate_polar_vec will need that.
-                // Note we have to do this in the midplane!
-                // The coord_to_native calculation involves an iterative solve for MKS/FMKS
-                GReal Xnative_midplane[GR_DIM] = {0}, gcon_midplane[GR_DIM][GR_DIM] = {0};
-                G.coords.coord_to_native(Xmidplane, Xnative_midplane);
-                G.coords.gcon_native(Xnative_midplane, gcon_midplane);
-                double A_untilt[GR_DIM] = {0};
-                DLOOP2 A_untilt[mu] += gcon_midplane[mu][nu] * A_untilt_lower[nu];
-
-                // Then rotate
-                double A_tilt[GR_DIM] = {0};
-                double A_untilt_embed[GR_DIM] = {0}, A_tilt_embed[GR_DIM] = {0};
-                G.coords.con_vec_to_embed(Xnative_midplane, A_untilt, A_untilt_embed);
-                rotate_polar_vec(Xmidplane, A_untilt_embed, -tilt, Xembed, A_tilt_embed);
-                G.coords.con_vec_to_native(Xnative, A_tilt_embed, A_tilt);
-
-                // Lower the result as we need curl(A_mu).  Done at local zone.
-                double A_tilt_lower[GR_DIM] = {0};
-                G.lower(A_tilt, A_tilt_lower, k, j, i, Loci::corner);
-                VLOOP A(v, k, j, i) = A_tilt_lower[1 + v];
-            }
-            else
-            {
-                // Some problems rely on a very accurate A->B, which the rotation lacks.
-                // So, we preserve exact values in the no-tilt case.
-                A(V3, k, j, i) = Aphi;
-            }
-        });
-
-    if (pkgs.count("B_CT"))
-    {
-        auto B_Uf = rc->PackVariables(std::vector<std::string>{"cons.fB"});
-        // This fills a couple zones outside the exact interior with bad data
-        // Careful of that w/e.g. Dirichlet bounds.
-        IndexRange3 bB = KDomain::GetRange(rc, domain, 0, -1);
-        if (ndim > 2)
-        {
-            pmb->par_for(
-                "ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    B_CT::curl_3D(G, A, B_Uf, k, j, i);
-                });
-        }
-        else if (ndim > 1)
-        {
-            pmb->par_for(
-                "ot_B", bB.ks, bB.ke, bB.js, bB.je, bB.is, bB.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    B_CT::curl_2D(G, A, B_Uf, k, j, i);
-                });
-        }
-        else
-        {
-            throw std::runtime_error("Must initialize 1D field directly!");
-        }
-        B_CT::BlockUtoP(rc, domain);
-    }
-    else if (pkgs.count("B_FluxCT"))
-    {
-        // Calculate B-field
-        GridVector B_U = rc->Get("cons.B").data;
-        IndexRange3 bl = KDomain::GetRange(rc, domain, 0, -1); // TODO will need changes if domain < entire
-        if (ndim > 2)
-        {
-            pmb->par_for(
-                "B_field_B_3D", bl.ks, bl.ke, bl.js, bl.je, bl.is, bl.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    B_FluxCT::averaged_curl_3D(G, A, B_U, k, j, i);
-                });
-        }
-        else if (ndim > 1)
-        {
-            pmb->par_for(
-                "B_field_B_2D", bl.ks, bl.ke, bl.js, bl.je, bl.is, bl.ie,
-                KOKKOS_LAMBDA(const int &k, const int &j, const int &i) {
-                    B_FluxCT::averaged_curl_2D(G, A, B_U, k, j, i);
-                });
-        }
-        else
-        {
-            throw std::runtime_error("Must initialize 1D field directly!");
-        }
-        // Finally, make sure we initialize the primitive field too
-        B_FluxCT::BlockUtoP(rc, domain);
-    }
-
-    return TaskStatus::complete;
-}
\ No newline at end of file
diff --git a/kharma/prob/blob.hpp b/kharma/prob/utils/blob.hpp
similarity index 100%
rename from kharma/prob/blob.hpp
rename to kharma/prob/utils/blob.hpp
diff --git a/kharma/prob/hdf5_utils.cpp b/kharma/prob/utils/hdf5_utils.cpp
similarity index 100%
rename from kharma/prob/hdf5_utils.cpp
rename to kharma/prob/utils/hdf5_utils.cpp
diff --git a/kharma/prob/hdf5_utils.h b/kharma/prob/utils/hdf5_utils.h
similarity index 100%
rename from kharma/prob/hdf5_utils.h
rename to kharma/prob/utils/hdf5_utils.h
diff --git a/kharma/prob/interpolation.hpp b/kharma/prob/utils/interpolation.hpp
similarity index 100%
rename from kharma/prob/interpolation.hpp
rename to kharma/prob/utils/interpolation.hpp
diff --git a/kharma/prob/utils/perturbation.hpp b/kharma/prob/utils/perturbation.hpp
new file mode 100644
index 00000000..7a453302
--- /dev/null
+++ b/kharma/prob/utils/perturbation.hpp
@@ -0,0 +1,102 @@
+/* 
+ *  File: perturbation.hpp
+ *  
+ *  BSD 3-Clause License
+ *  
+ *  Copyright (c) 2020, AFD Group at UIUC
+ *  All rights reserved.
+ *  
+ *  Redistribution and use in source and binary forms, with or without
+ *  modification, are permitted provided that the following conditions are met:
+ *  
+ *  1. Redistributions of source code must retain the above copyright notice, this
+ *     list of conditions and the following disclaimer.
+ *  
+ *  2. Redistributions in binary form must reproduce the above copyright notice,
+ *     this list of conditions and the following disclaimer in the documentation
+ *     and/or other materials provided with the distribution.
+ *  
+ *  3. Neither the name of the copyright holder nor the names of its
+ *     contributors may be used to endorse or promote products derived from
+ *     this software without specific prior written permission.
+ *  
+ *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ *  AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ *  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ *  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+ *  FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ *  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ *  SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ *  CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ *  OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ *  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#pragma once
+
+#include "decs.hpp"
+
+#include <random>
+#include "Kokkos_Random.hpp"
+
+/**
+ * Perturb the internal energy by a uniform random proportion per cell.
+ * Resulting internal energies will be between u \pm u*u_jitter/2
+ * i.e. u_jitter=0.1 -> \pm 5% randomization, 0.95u to 1.05u
+ *
+ * @param u_jitter see description
+ * @param rng_seed is added to the MPI rank to seed the GSL RNG
+ */
+TaskStatus PerturbU(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInput *pin)
+{
+    auto pmb = rc->GetBlockPointer();
+    auto rho = rc->Get("prims.rho").data;
+    auto u = rc->Get("prims.u").data;
+
+    const Real u_jitter = pin->GetReal("perturbation", "u_jitter");
+    // Don't jitter values set by floors
+    const Real jitter_above_rho = pin->GetReal("floors", "rho_min_geom") + 1e-10;
+    // Note we add the MeshBlock gid to this value when seeding RNG,
+    // to get a new sequence for every block
+    const int rng_seed = pin->GetOrAddInteger("perturbation", "rng_seed", 31337);
+    // Print real seed used for all blocks, to ensure they're different
+    if (pmb->packages.Get("Globals")->Param<int>("verbose") > 1) {
+        std::cout << "Seeding RNG in block " << pmb->gid << " with value " << rng_seed + pmb->gid << std::endl;
+    }
+    const bool serial = pin->GetOrAddInteger("perturbation", "serial", false);
+
+    // Should we jitter ghosts? If first boundary sync doesn't work it's marginally less disruptive
+    IndexDomain domain = IndexDomain::interior;
+    const int is = pmb->cellbounds.is(domain), ie = pmb->cellbounds.ie(domain);
+    const int js = pmb->cellbounds.js(domain), je = pmb->cellbounds.je(domain);
+    const int ks = pmb->cellbounds.ks(domain), ke = pmb->cellbounds.ke(domain);
+
+    if (serial) {
+        // Serial version
+        // Probably guarantees better determinism, but CPU single-thread only
+        std::mt19937 gen(rng_seed + pmb->gid);
+        std::uniform_real_distribution<Real> dis(-u_jitter/2, u_jitter/2);
+
+        auto u_host = u.GetHostMirrorAndCopy();
+        for(int k=ks; k <= ke; k++)
+            for(int j=js; j <= je; j++)
+                for(int i=is; i <= ie; i++)
+                    u_host(k, j, i) *= 1. + dis(gen);
+        u.DeepCopy(u_host);
+    } else {
+        // Kokkos version
+        typedef typename Kokkos::Random_XorShift64_Pool<> RandPoolType;
+        RandPoolType rand_pool(rng_seed + pmb->gid);
+        typedef typename RandPoolType::generator_type gen_type;
+        pmb->par_for("perturb_u", ks, ke, js, je, is, ie,
+            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
+                if (rho(k, j, i) > jitter_above_rho) {
+                    gen_type rgen = rand_pool.get_state();
+                    u(k, j, i) *= 1. + Kokkos::rand<gen_type, Real>::draw(rgen, -u_jitter/2, u_jitter/2);
+                    rand_pool.free_state(rgen);
+                }
+            }
+        );
+    }
+
+    return TaskStatus::complete;
+}

From ca3b994154c73997a6fce7aa42e7e886bdb00732 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 3 Oct 2023 13:56:17 -0600
Subject: [PATCH 06/47] Reorganize parameter files

This finally organizes the different parameter files into folders.
Plenty of parfiles fit more than one category, feel free to move them.

Better parfile documentation is planned as we bring up the rest of
the tests.
---
 pars/{ => benchmark}/sane_perf.par            |  0
 pars/{ => benchmark}/scaling_torus.par        |  0
 pars/{ => bondi}/bondi.par                    |  0
 pars/{ => bondi}/bondi_b.par                  |  0
 pars/{ => bondi}/bondi_b_vertical.par         |  0
 pars/conducting_atmosphere.par                | 98 -------------------
 pars/{ => electrons}/driven_turbulence.par    |  0
 pars/{ => electrons}/hubble.par               |  0
 pars/{ => electrons}/noh.par                  | 12 +--
 pars/{ => electrons}/rest_conserve.par        |  2 +-
 pars/{ => emhd}/anisotropic_conduction.par    |  0
 pars/{ => emhd}/bondi_viscous.par             | 16 +--
 pars/{ => emhd}/emhdmodes.par                 |  0
 pars/{ => emhd}/orszag_tang_viscous.par       |  4 +-
 pars/emhdshock.par                            | 94 ------------------
 pars/orszag_tang_face_ct.par                  | 67 -------------
 pars/{ => restarts}/resize_restart.par        |  0
 pars/shocks/komissarov_collision.par          |  2 +-
 pars/shocks/komissarov_fast.par               |  2 +-
 pars/shocks/komissarov_shock_1.par            |  2 +-
 pars/shocks/komissarov_shock_2.par            |  2 +-
 pars/shocks/komissarov_slow.par               |  2 +-
 pars/shocks/komissarov_switch_off.par         |  2 +-
 pars/shocks/komissarov_switch_on.par          |  2 +-
 .../orszag_tang_refined.par}                  |  0
 pars/{ => smr}/sane2d_refined.par             |  0
 pars/{ => smr}/sane3d_refined.par             |  0
 pars/{ => tests}/bz_monopole.par              |  2 +-
 pars/{ => tests}/explosion.par                |  0
 pars/{ => tests}/kelvin_helmholtz.par         |  0
 pars/{ => tests}/mhdmodes.par                 | 12 +--
 pars/{ => tests}/orszag_tang.par              |  0
 pars/{ => tori_2d}/sane2d.par                 | 14 +--
 pars/{ => tori_2d}/sane2d_cooling.par         |  0
 pars/{ => tori_2d}/sane_divb_2d.par           |  0
 pars/{ => tori_3d}/eht_comp.par               |  0
 pars/{ => tori_3d}/mad.par                    |  0
 pars/{ => tori_3d}/mad_test.par               |  0
 pars/{ => tori_3d}/mad_tilt.par               |  0
 pars/{ => tori_3d}/sane.par                   |  0
 pars/{ => tori_3d}/sane_emhd.par              |  0
 pars/{ => tori_3d}/sane_imex.par              |  0
 pars/{ => tori_3d}/sane_tilt.par              |  0
 scripts/batch/polaris.qsub                    |  2 +-
 scripts/batch/scaling_delta.sb                |  4 +-
 scripts/batch/scaling_frontera.sb             |  4 +-
 scripts/batch/scaling_longhorn.sb             |  4 +-
 scripts/batch/scaling_polaris.qsub            |  2 +-
 scripts/batch/scaling_summit.bsub             |  2 +-
 tests/all_pars/run.sh                         | 10 +-
 tests/run_all.sh                              | 11 +++
 51 files changed, 64 insertions(+), 310 deletions(-)
 rename pars/{ => benchmark}/sane_perf.par (100%)
 rename pars/{ => benchmark}/scaling_torus.par (100%)
 rename pars/{ => bondi}/bondi.par (100%)
 rename pars/{ => bondi}/bondi_b.par (100%)
 rename pars/{ => bondi}/bondi_b_vertical.par (100%)
 delete mode 100644 pars/conducting_atmosphere.par
 rename pars/{ => electrons}/driven_turbulence.par (100%)
 rename pars/{ => electrons}/hubble.par (100%)
 rename pars/{ => electrons}/noh.par (90%)
 rename pars/{ => electrons}/rest_conserve.par (97%)
 rename pars/{ => emhd}/anisotropic_conduction.par (100%)
 rename pars/{ => emhd}/bondi_viscous.par (86%)
 rename pars/{ => emhd}/emhdmodes.par (100%)
 rename pars/{ => emhd}/orszag_tang_viscous.par (95%)
 delete mode 100644 pars/emhdshock.par
 delete mode 100644 pars/orszag_tang_face_ct.par
 rename pars/{ => restarts}/resize_restart.par (100%)
 rename pars/{orszag_tang_smr.par => smr/orszag_tang_refined.par} (100%)
 rename pars/{ => smr}/sane2d_refined.par (100%)
 rename pars/{ => smr}/sane3d_refined.par (100%)
 rename pars/{ => tests}/bz_monopole.par (96%)
 rename pars/{ => tests}/explosion.par (100%)
 rename pars/{ => tests}/kelvin_helmholtz.par (100%)
 rename pars/{ => tests}/mhdmodes.par (95%)
 rename pars/{ => tests}/orszag_tang.par (100%)
 rename pars/{ => tori_2d}/sane2d.par (79%)
 rename pars/{ => tori_2d}/sane2d_cooling.par (100%)
 rename pars/{ => tori_2d}/sane_divb_2d.par (100%)
 rename pars/{ => tori_3d}/eht_comp.par (100%)
 rename pars/{ => tori_3d}/mad.par (100%)
 rename pars/{ => tori_3d}/mad_test.par (100%)
 rename pars/{ => tori_3d}/mad_tilt.par (100%)
 rename pars/{ => tori_3d}/sane.par (100%)
 rename pars/{ => tori_3d}/sane_emhd.par (100%)
 rename pars/{ => tori_3d}/sane_imex.par (100%)
 rename pars/{ => tori_3d}/sane_tilt.par (100%)
 create mode 100755 tests/run_all.sh

diff --git a/pars/sane_perf.par b/pars/benchmark/sane_perf.par
similarity index 100%
rename from pars/sane_perf.par
rename to pars/benchmark/sane_perf.par
diff --git a/pars/scaling_torus.par b/pars/benchmark/scaling_torus.par
similarity index 100%
rename from pars/scaling_torus.par
rename to pars/benchmark/scaling_torus.par
diff --git a/pars/bondi.par b/pars/bondi/bondi.par
similarity index 100%
rename from pars/bondi.par
rename to pars/bondi/bondi.par
diff --git a/pars/bondi_b.par b/pars/bondi/bondi_b.par
similarity index 100%
rename from pars/bondi_b.par
rename to pars/bondi/bondi_b.par
diff --git a/pars/bondi_b_vertical.par b/pars/bondi/bondi_b_vertical.par
similarity index 100%
rename from pars/bondi_b_vertical.par
rename to pars/bondi/bondi_b_vertical.par
diff --git a/pars/conducting_atmosphere.par b/pars/conducting_atmosphere.par
deleted file mode 100644
index 694a8b1b..00000000
--- a/pars/conducting_atmosphere.par
+++ /dev/null
@@ -1,98 +0,0 @@
-# Hydrostatic conducting atmosphere
-# Try to maintain the ODE solution that represnts hydrostatic equilibrium
-# Checks the geometrical terms
-# IMPORTANT: This test is different from the other tests in its initialization
-#            It reads in ".txt" files that correspond to the ODE solution (set input to "ODE" in <conducting_atmosphere>)
-#            Run it with a single MPI task
-
-<parthenon/job>
-problem_id = conducting_atmosphere
-
-<parthenon/mesh>
-refinement = none
-numlevel   = 1
-nx1 = 256
-nx2 = 256
-nx3 = 1
-
-<parthenon/meshblock>
-nx1 = 256
-nx2 = 256
-nx3 = 1
-
-
-<coordinates>
-base      = ks
-transform = mks
-a         = 0.0
-hslope    = 1.0
-r_in      = 200.
-r_out     = 300.
-
-<boundaries>
-inner_x1 = dirichlet
-outer_x1 = dirichlet
-check_inflow_inner_x1 = false
-check_inflow_outer_x1 = false
-
-<parthenon/time>
-tlim       = 150.
-
-<driver>
-type = imex
-
-<GRMHD>
-implicit       = true
-cfl            = 0.9
-gamma          = 1.333333
-reconstruction = weno5
-
-<b_field>
-implicit        = false
-initial_cleanup = false
-
-<implicit>
-max_nonlinear_iter  = 3
-rootfind_tol        = 1.e-20
-jacobian_delta      = 4.e-8
-linesearch          = true
-max_linesearch_iter = 3
-linesearch_eps      = 1.e-4
-
-# IMPORTANT: This block must be present and values filled in all EGRMHD simulations
-<emhd>
-on                 = true
-higher_order_terms = true
-feedback           = true
-stability_limits   = false
-
-conduction = true
-viscosity  = false
-
-closure_type = kappa_eta
-tau   = 10.
-kappa = 0.1
-eta   = 0.0
-
-<conducting_atmosphere>
-input = ODE
-
-<floors>
-disable_floors = true
-
-<debug>
-verbose = 1
-flag_verbose = 2
-extra_checks = 1
-
-<parthenon/output0>
-file_type = hdf5
-dt = 10
-single_precision_output = false
-#variables = prims, cons, solve_norm, solve_fail
-variables = prims.rho, prims.u, prims.uvec, prims.B, prims.q, solve_norm, solve_fail
-ghost_zones = true
-
-<parthenon/output1>
-file_type = hst
-dt = 100
diff --git a/pars/driven_turbulence.par b/pars/electrons/driven_turbulence.par
similarity index 100%
rename from pars/driven_turbulence.par
rename to pars/electrons/driven_turbulence.par
diff --git a/pars/hubble.par b/pars/electrons/hubble.par
similarity index 100%
rename from pars/hubble.par
rename to pars/electrons/hubble.par
diff --git a/pars/noh.par b/pars/electrons/noh.par
similarity index 90%
rename from pars/noh.par
rename to pars/electrons/noh.par
index d7af06c7..015b9057 100644
--- a/pars/noh.par
+++ b/pars/electrons/noh.par
@@ -51,11 +51,11 @@ solver = none
 <electrons>
 on = true
 constant = true
-gamma_e = 1.333333
+gamma_e = 1.666667
 fel_0 = 0.
 fel_constant = 0.5
-diss_sign = false
-kel_min = false
+enforce_positive_dissipation = false
+limit_kel = false
 
 <noh>
 mach = 49.
@@ -65,8 +65,8 @@ zero_ug = false
 set_tlim = true
 centered = false
 
-<floors>
-disable_floors = true
+#<floors>
+#disable_floors = true
 
 <driver>
 type = imex
@@ -76,7 +76,7 @@ verbose = 0
 
 <parthenon/output0>
 file_type = hdf5
-dt = 0.1
+dt = 10
 single_precision_output = false
 variables = prims.rho, prims.u, prims.uvec, prims.Ktot, prims.Kel_Constant
 
diff --git a/pars/rest_conserve.par b/pars/electrons/rest_conserve.par
similarity index 97%
rename from pars/rest_conserve.par
rename to pars/electrons/rest_conserve.par
index 327b9b5c..86504eb3 100644
--- a/pars/rest_conserve.par
+++ b/pars/electrons/rest_conserve.par
@@ -2,7 +2,7 @@
 # Try to propagate several analytically-amenable linear modes of the MHD equations
 
 <parthenon/job>
-problem_id = rest_conserve
+problem_id = hubble
 
 <parthenon/mesh>
 refinement = none
diff --git a/pars/anisotropic_conduction.par b/pars/emhd/anisotropic_conduction.par
similarity index 100%
rename from pars/anisotropic_conduction.par
rename to pars/emhd/anisotropic_conduction.par
diff --git a/pars/bondi_viscous.par b/pars/emhd/bondi_viscous.par
similarity index 86%
rename from pars/bondi_viscous.par
rename to pars/emhd/bondi_viscous.par
index d7b10a2b..f7b15d23 100644
--- a/pars/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -4,9 +4,6 @@
 problem_id = bondi
 
 <parthenon/mesh>
-# Full mesh size, no refinement
-refinement = none
-numlevel   = 1
 nx1 = 128
 nx2 = 128
 nx3 = 1
@@ -21,10 +18,8 @@ base      = ks
 transform = mks
 a         = 0.0
 hslope    = 1.0
-r_out     = 20
-# Needed to place 5 zones inside 3M,
-# to match the analytic files.
-Rhor      = 3
+r_out     = 20.0
+r_in      = 5.0
 
 <parthenon/time>
 tlim = 400.0
@@ -32,7 +27,6 @@ tlim = 400.0
 <GRMHD>
 cfl            = 0.9
 gamma          = 1.666667
-reconstruction = weno5
 implicit       = true
 
 <b_field>
@@ -40,9 +34,6 @@ type            = monopole_cube
 implicit        = false
 initial_cleanup = false
 
-<driver>
-type = imex
-
 <implicit>
 min_nonlinear_iter  = 1
 max_nonlinear_iter  = 3
@@ -75,7 +66,9 @@ disable_floors = true
 
 <boundaries>
 outer_x1 = dirichlet
+inner_x1 = dirichlet
 check_inflow_outer_x1 = false
+#check_inflow_inner_x1 = false
 
 <debug>
 verbose = 1
@@ -86,6 +79,7 @@ extra_checks = 1
 file_type               = hdf5
 dt                      = 100.0
 single_precision_output = false
+ghost_zones             = true
 variables               = prims, solve_norm, solve_fail
 
 <parthenon/output1>
diff --git a/pars/emhdmodes.par b/pars/emhd/emhdmodes.par
similarity index 100%
rename from pars/emhdmodes.par
rename to pars/emhd/emhdmodes.par
diff --git a/pars/orszag_tang_viscous.par b/pars/emhd/orszag_tang_viscous.par
similarity index 95%
rename from pars/orszag_tang_viscous.par
rename to pars/emhd/orszag_tang_viscous.par
index d566e5ed..cdec24fc 100644
--- a/pars/orszag_tang_viscous.par
+++ b/pars/emhd/orszag_tang_viscous.par
@@ -44,8 +44,10 @@ cfl = 0.9
 gamma = 1.666667
 reconstruction = weno5
 
-<viscosity>
+<emhd>
 on = true
+viscosity = true
+conduction = false
 
 <debug>
 verbose = 0
diff --git a/pars/emhdshock.par b/pars/emhdshock.par
deleted file mode 100644
index 26e191b0..00000000
--- a/pars/emhdshock.par
+++ /dev/null
@@ -1,94 +0,0 @@
-# EMHD Shock problem
-# Try to maintain the BVP solution to a discontuinity
-# Checks the higher order terms implementation in flat space
-# IMPORTANT: This test is different from the other tests in its initialization
-#            It reads in ".txt" files that correspond to the BVP solution (set input to "BVP" in <emhdshock>)
-#            One, in principle, can run this problem with the usual ideal MHD jump conditions but this
-#            may not allow a quantitative check
-#            Run it with a single MPI task
-
-<parthenon/job>
-problem_id = emhdshock
-
-<parthenon/mesh>
-refinement = none
-numlevel   = 1
-
-nx1 = 1024
-x1min  = -0.5
-x1max  = 1.5
-ix1_bc = outflow
-ox1_bc = outflow
-
-nx2 = 1
-x2min  = 0.0
-x2max  = 1.0
-ix2_bc = periodic
-ox2_bc = periodic
-
-nx3 = 1
-x3min  = 0.0
-x3max  = 1.0
-ix3_bc = periodic
-ox3_bc = periodic
-
-<parthenon/meshblock>
-nx1 = 1024
-nx2 = 1
-nx3 = 1
-
-<coordinates>
-base      = cartesian_minkowski
-transform = null
-
-<parthenon/time>
-# "RK2" is the only option for implicit solver
-tlim       = 0.5
-integrator = rk2
-dt_min     = 1.e-6
-
-<GRMHD>
-cfl            = 0.25
-gamma          = 1.333333
-reconstruction = linear_mc
-
-<b_field>
-implicit        = true
-initial_cleanup = false
-
-# IMPORTANT: This block must be present and values filled in all EGRMHD simulations
-<emhd>
-on                 = true
-higher_order_terms = true
-
-closure_type       = soundspeed
-tau                = 0.1
-conduction_alpha   = 5.0
-viscosity_alpha    = 3.0
-
-<implicit>
-max_nonlinear_iter = 3
-rootfind_tol       = 1.e-20
-jacobian_delta     = 4.e-8
-
-<emhdshock>
-# The input can be the BVP solution or the ideal MHD Rankine-Hugoniot jump conditions
-input = BVP
-
-<floors>
-disable_floors = true
-
-<debug>
-verbose      = 1
-flag_verbose = 2
-extra_checks = 1
-
-<parthenon/output0>
-file_type               = hdf5
-dt                      = 0.05
-single_precision_output = false
-variables               = prims.rho, prims.u, prims.uvec, prims.B, prims.q, prims.dP
-
-<parthenon/output1>
-file_type = hst
-dt        = 0.1
\ No newline at end of file
diff --git a/pars/orszag_tang_face_ct.par b/pars/orszag_tang_face_ct.par
deleted file mode 100644
index 3a7eca6d..00000000
--- a/pars/orszag_tang_face_ct.par
+++ /dev/null
@@ -1,67 +0,0 @@
-# Orszag-Tang Vortex problem:
-# Generate current sheets on short timescales
-
-<parthenon/job>
-problem_id = orszag_tang
-
-<parthenon/mesh>
-nx1 = 256
-x1min = -3.141592653589793
-x1max = 3.141592653589793
-
-nx2 = 128
-x2min = -3.141592653589793
-x2max = 3.141592653589793
-
-nx3 = 1
-x3min = -0.01
-x3max = 0.01
-
-<parthenon/meshblock>
-nx1 = 128
-nx2 = 128
-nx3 = 1
-
-<coordinates>
-base = cartesian_minkowski
-transform = null
-
-<parthenon/time>
-tlim = 100.0
-integrator = rk2
-
-<GRMHD>
-cfl = 0.9
-gamma = 1.666667
-reconstruction = weno5
-
-<b_field>
-solver = face_ct
-kill_on_large_divb = true
-ct_scheme = bs99
-
-<debug>
-verbose = 1
-flag_verbose = 2
-extra_checks = 1
-
-<floors>
-disable_floors = true
-
-<parthenon/output0>
-file_type = hdf5
-dt = 1
-single_precision_output = true
-# TODO just prims when face fields supported
-variables = prims.rho, prims.u, prims.uvec, prims.B, divB, jcon
-ghost_zones = true
-
-<parthenon/output1>
-file_type = hst
-dt = 0.1
-
-# This problem is generally much too short to need
-# checkpointing.  However, we have a test which uses it.
-#<parthenon/output2>
-#file_type = rst
-#dt = 10.0
diff --git a/pars/resize_restart.par b/pars/restarts/resize_restart.par
similarity index 100%
rename from pars/resize_restart.par
rename to pars/restarts/resize_restart.par
diff --git a/pars/shocks/komissarov_collision.par b/pars/shocks/komissarov_collision.par
index fbefb3a8..9da07f16 100644
--- a/pars/shocks/komissarov_collision.par
+++ b/pars/shocks/komissarov_collision.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.
diff --git a/pars/shocks/komissarov_fast.par b/pars/shocks/komissarov_fast.par
index 22d25c40..4b1a3d55 100644
--- a/pars/shocks/komissarov_fast.par
+++ b/pars/shocks/komissarov_fast.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.4
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.
diff --git a/pars/shocks/komissarov_shock_1.par b/pars/shocks/komissarov_shock_1.par
index 29273e3d..8aa79aa7 100644
--- a/pars/shocks/komissarov_shock_1.par
+++ b/pars/shocks/komissarov_shock_1.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.
diff --git a/pars/shocks/komissarov_shock_2.par b/pars/shocks/komissarov_shock_2.par
index 72941ec0..afde2d88 100644
--- a/pars/shocks/komissarov_shock_2.par
+++ b/pars/shocks/komissarov_shock_2.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.
diff --git a/pars/shocks/komissarov_slow.par b/pars/shocks/komissarov_slow.par
index 6f319699..9fb91b67 100644
--- a/pars/shocks/komissarov_slow.par
+++ b/pars/shocks/komissarov_slow.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.
diff --git a/pars/shocks/komissarov_switch_off.par b/pars/shocks/komissarov_switch_off.par
index 17ea2977..1afa3ded 100644
--- a/pars/shocks/komissarov_switch_off.par
+++ b/pars/shocks/komissarov_switch_off.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 0.1
diff --git a/pars/shocks/komissarov_switch_on.par b/pars/shocks/komissarov_switch_on.par
index 4fbf9159..cd094493 100644
--- a/pars/shocks/komissarov_switch_on.par
+++ b/pars/shocks/komissarov_switch_on.par
@@ -43,7 +43,7 @@ dt_min = 0.0001
 <GRMHD>
 cfl = 0.5
 gamma = 1.333333
-reconstruction = linear_vl
+reconstruction = linear_mc
 
 <shock>
 rhoL = 1.78e-3
diff --git a/pars/orszag_tang_smr.par b/pars/smr/orszag_tang_refined.par
similarity index 100%
rename from pars/orszag_tang_smr.par
rename to pars/smr/orszag_tang_refined.par
diff --git a/pars/sane2d_refined.par b/pars/smr/sane2d_refined.par
similarity index 100%
rename from pars/sane2d_refined.par
rename to pars/smr/sane2d_refined.par
diff --git a/pars/sane3d_refined.par b/pars/smr/sane3d_refined.par
similarity index 100%
rename from pars/sane3d_refined.par
rename to pars/smr/sane3d_refined.par
diff --git a/pars/bz_monopole.par b/pars/tests/bz_monopole.par
similarity index 96%
rename from pars/bz_monopole.par
rename to pars/tests/bz_monopole.par
index cbf3b13f..5ed92c7f 100644
--- a/pars/bz_monopole.par
+++ b/pars/tests/bz_monopole.par
@@ -37,7 +37,7 @@ flag_verbose = 0
 <GRMHD>
 cfl = 0.7
 gamma = 1.444444
-reconstruction = weno5
+reconstruction = linear_mc
 
 <b_field>
 type = bz_monopole
diff --git a/pars/explosion.par b/pars/tests/explosion.par
similarity index 100%
rename from pars/explosion.par
rename to pars/tests/explosion.par
diff --git a/pars/kelvin_helmholtz.par b/pars/tests/kelvin_helmholtz.par
similarity index 100%
rename from pars/kelvin_helmholtz.par
rename to pars/tests/kelvin_helmholtz.par
diff --git a/pars/mhdmodes.par b/pars/tests/mhdmodes.par
similarity index 95%
rename from pars/mhdmodes.par
rename to pars/tests/mhdmodes.par
index 02e01285..e8b7f5a7 100644
--- a/pars/mhdmodes.par
+++ b/pars/tests/mhdmodes.par
@@ -26,13 +26,13 @@ dir = 3
 refinement = none
 numlevel = 1
 
-nx1 = 1024
+nx1 = 64
 x1min = 0.0
 x1max = 1.0
 ix1_bc = periodic
 ox1_bc = periodic
 
-nx2 = 1024
+nx2 = 64
 x2min = 0.0
 x2max = 1.0
 ix2_bc = periodic
@@ -48,8 +48,8 @@ ox3_bc = periodic
 # # of meshblocks must be >= the number of MPI ranks,
 # however there may be multiple blocks per rank
 <parthenon/meshblock>
-nx1 = 1024
-nx2 = 1024
+nx1 = 64
+nx2 = 64
 nx3 = 1
 
 # Set boring box coordinates. Explanations in bondi.par
@@ -60,7 +60,7 @@ transform = null
 <parthenon/time>
 # tlim will be overridden depending on the problem
 tlim = 5.0
-integrator = vl2
+integrator = rk2
 # Minimum is also the starting timestep
 dt_min = 0.0001
 
@@ -105,7 +105,7 @@ file_type = hdf5
 # This is so as to output only the final state
 dt = 0.5
 single_precision_output = true
-variables = prims.rho, prims.u, prims.uvec, prims.B
+variables = prims.rho, prims.u, prims.uvec, prims.B, divB
 
 # Text file with statistics (e.g. fluxes, floors hit)
 # recorded over time
diff --git a/pars/orszag_tang.par b/pars/tests/orszag_tang.par
similarity index 100%
rename from pars/orszag_tang.par
rename to pars/tests/orszag_tang.par
diff --git a/pars/sane2d.par b/pars/tori_2d/sane2d.par
similarity index 79%
rename from pars/sane2d.par
rename to pars/tori_2d/sane2d.par
index 0c24d995..a2fa5b5e 100644
--- a/pars/sane2d.par
+++ b/pars/tori_2d/sane2d.par
@@ -31,6 +31,7 @@ poly_alpha = 14.0
 <parthenon/time>
 tlim = 3000.0
 nlim = -1
+integrator = vl2
 
 <debug>
 verbose = 1
@@ -42,6 +43,9 @@ cfl = 0.9
 gamma = 1.666667
 reconstruction = weno5
 
+<driver>
+type = kharma
+
 <torus>
 rin = 6.0
 rmax = 12.0
@@ -75,12 +79,10 @@ on = false
 file_type = hdf5
 dt = 10.0
 single_precision_output = true
-# Any fields listed here which are not present (e.g. electrons if disabled)
-# will be silently skipped. '&' character is a line continuation, like '\'
-# Remember that the commas are still necessary, and unknown fields will silently fail!
-variables = prims.rho, prims.u, prims.uvec, prims.B, prims.Ktot, &
-            prims.Kel_Howes, prims.Kel_Kawazura, prims.Kel_Werner, prims.Kel_Rowan, prims.Kel_Sharma, &
-            pflag, fflag
+# Fields beginning with a specifier here will be included (e.g., all prims.XXX)
+# Fields specified but not present are silently skipped
+# If you need a line break, use '&' at line end
+variables = prims, jcon, pflag, fflag
 
 <parthenon/output1>
 file_type = rst
diff --git a/pars/sane2d_cooling.par b/pars/tori_2d/sane2d_cooling.par
similarity index 100%
rename from pars/sane2d_cooling.par
rename to pars/tori_2d/sane2d_cooling.par
diff --git a/pars/sane_divb_2d.par b/pars/tori_2d/sane_divb_2d.par
similarity index 100%
rename from pars/sane_divb_2d.par
rename to pars/tori_2d/sane_divb_2d.par
diff --git a/pars/eht_comp.par b/pars/tori_3d/eht_comp.par
similarity index 100%
rename from pars/eht_comp.par
rename to pars/tori_3d/eht_comp.par
diff --git a/pars/mad.par b/pars/tori_3d/mad.par
similarity index 100%
rename from pars/mad.par
rename to pars/tori_3d/mad.par
diff --git a/pars/mad_test.par b/pars/tori_3d/mad_test.par
similarity index 100%
rename from pars/mad_test.par
rename to pars/tori_3d/mad_test.par
diff --git a/pars/mad_tilt.par b/pars/tori_3d/mad_tilt.par
similarity index 100%
rename from pars/mad_tilt.par
rename to pars/tori_3d/mad_tilt.par
diff --git a/pars/sane.par b/pars/tori_3d/sane.par
similarity index 100%
rename from pars/sane.par
rename to pars/tori_3d/sane.par
diff --git a/pars/sane_emhd.par b/pars/tori_3d/sane_emhd.par
similarity index 100%
rename from pars/sane_emhd.par
rename to pars/tori_3d/sane_emhd.par
diff --git a/pars/sane_imex.par b/pars/tori_3d/sane_imex.par
similarity index 100%
rename from pars/sane_imex.par
rename to pars/tori_3d/sane_imex.par
diff --git a/pars/sane_tilt.par b/pars/tori_3d/sane_tilt.par
similarity index 100%
rename from pars/sane_tilt.par
rename to pars/tori_3d/sane_tilt.par
diff --git a/scripts/batch/polaris.qsub b/scripts/batch/polaris.qsub
index 5cb698d5..a2524d52 100644
--- a/scripts/batch/polaris.qsub
+++ b/scripts/batch/polaris.qsub
@@ -9,7 +9,7 @@
 
 KHARMA_DIR=~/kharma-dev
 WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris
-KHARMA_ARGS="-i $KHARMA_DIR/pars/sane_perf.par"
+KHARMA_ARGS="-i $KHARMA_DIR/pars/benchmark/sane_perf.par"
 
 # Print ranks
 NNODES=`wc -l < $PBS_NODEFILE`
diff --git a/scripts/batch/scaling_delta.sb b/scripts/batch/scaling_delta.sb
index d96e5212..3ff110bc 100755
--- a/scripts/batch/scaling_delta.sb
+++ b/scripts/batch/scaling_delta.sb
@@ -65,7 +65,7 @@ if [[ $DO_STRONG == "true" ]]; then
 
         echo "cycle=100 Running $size cubed problem with KHARMA on $nodes nodes with $tpn tasks each (blocksize $msize)"
 
-        srun -n $np $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        srun -n $np $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                              parthenon/mesh/nx1=$size parthenon/mesh/nx2=$size parthenon/mesh/nx3=$size \
                                              parthenon/meshblock/nx1=$msize parthenon/meshblock/nx2=$msize parthenon/meshblock/nx3=$msize
 
@@ -130,7 +130,7 @@ if [[ $DO_WEAK == "true" ]]; then
         nblock=$(( $mul1 * $mul2 * $mul3 ))
         echo "cycle=100 Running $size per node problem with KHARMA on $nodes nodes with $tpn tasks each (total size ${tsize1}x${tsize2}x${tsize3}, $nblock blocks)"
 
-        srun -n $np $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        srun -n $np $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                              parthenon/mesh/nx1=$tsize1 parthenon/mesh/nx2=$tsize2 parthenon/mesh/nx3=$tsize3 \
                                              parthenon/meshblock/nx1=$size parthenon/meshblock/nx2=$size parthenon/meshblock/nx3=$size
 
diff --git a/scripts/batch/scaling_frontera.sb b/scripts/batch/scaling_frontera.sb
index 17833c65..0ca0593c 100755
--- a/scripts/batch/scaling_frontera.sb
+++ b/scripts/batch/scaling_frontera.sb
@@ -54,7 +54,7 @@ if [[ $DO_STRONG == "true" ]]; then
 
         echo "cycle=100 Running $size cubed problem with KHARMA on $nodes nodes with $tpn tasks each (blocksize $msize)"
 
-        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.host -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.host -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$size parthenon/mesh/nx2=$size parthenon/mesh/nx3=$size \
                                     parthenon/meshblock/nx1=$msize parthenon/meshblock/nx2=$msize parthenon/meshblock/nx3=$msize
 
@@ -119,7 +119,7 @@ if [[ $DO_WEAK == "true" ]]; then
         nblock=$(( $mul1 * $mul2 * $mul3 ))
         echo "cycle=100 Running $size per node problem with KHARMA on $nodes nodes with $tpn tasks each (total size ${tsize1}x${tsize2}x${tsize3}, $nblock blocks)"
 
-        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.host -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.host -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$tsize1 parthenon/mesh/nx2=$tsize2 parthenon/mesh/nx3=$tsize3 \
                                     parthenon/meshblock/nx1=$size parthenon/meshblock/nx2=$size parthenon/meshblock/nx3=$size
 
diff --git a/scripts/batch/scaling_longhorn.sb b/scripts/batch/scaling_longhorn.sb
index 3e2b097b..8c4f41f8 100755
--- a/scripts/batch/scaling_longhorn.sb
+++ b/scripts/batch/scaling_longhorn.sb
@@ -52,7 +52,7 @@ if [[ $DO_STRONG == "true" ]]; then
 
         echo "cycle=100 Running $size cubed problem with KHARMA on $nodes nodes with $tpn tasks each (blocksize $msize)"
 
-        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$size parthenon/mesh/nx2=$size parthenon/mesh/nx3=$size \
                                     parthenon/meshblock/nx1=$msize parthenon/meshblock/nx2=$msize parthenon/meshblock/nx3=$msize
 
@@ -117,7 +117,7 @@ if [[ $DO_WEAK == "true" ]]; then
         nblock=$(( $mul1 * $mul2 * $mul3 ))
         echo "cycle=100 Running $size per node problem with KHARMA on $nodes nodes with $tpn tasks each (total size ${tsize1}x${tsize2}x${tsize3}, $nblock blocks)"
 
-        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/scaling_torus.par parthenon/time/nlim=102 \
+        ibrun -np $np tacc_affinity $KHARMA_DIR/kharma.cuda -i $KHARMA_DIR/pars/benchmark/scaling_torus.par parthenon/time/nlim=102 \
                                     parthenon/mesh/nx1=$tsize1 parthenon/mesh/nx2=$tsize2 parthenon/mesh/nx3=$tsize3 \
                                     parthenon/meshblock/nx1=$size parthenon/meshblock/nx2=$size parthenon/meshblock/nx3=$size
 
diff --git a/scripts/batch/scaling_polaris.qsub b/scripts/batch/scaling_polaris.qsub
index 3e975aac..5c82de34 100755
--- a/scripts/batch/scaling_polaris.qsub
+++ b/scripts/batch/scaling_polaris.qsub
@@ -20,7 +20,7 @@ KHARMA_DIR=~/kharma-dev
 WRAPPER=$KHARMA_DIR/bin/select_gpu_polaris
 
 # Gotta specify this inline since bsub doesn't do arguments
-PARFILE=~/kharma-dev/pars/scaling_torus.par
+PARFILE=~/kharma-dev/pars/benchmark/scaling_torus.par
 # Allocate in full nodes, vs individual gpus
 min_nodes=1
 min_gpus=1 #$(( $NRANKS * $min_nodes ))
diff --git a/scripts/batch/scaling_summit.bsub b/scripts/batch/scaling_summit.bsub
index d0ece6f9..9f0f45f8 100755
--- a/scripts/batch/scaling_summit.bsub
+++ b/scripts/batch/scaling_summit.bsub
@@ -19,7 +19,7 @@ DO_WEAK=true
 KHARMA_DIR=~/kharma
 
 # Gotta specify this inline since bsub doesn't do arguments
-PARFILE=~/kharma/pars/scaling_torus.par
+PARFILE=~/kharma/pars/benchmark/scaling_torus.par
 # Allocate in full nodes, vs individual gpus
 min_nodes=1
 min_gpus=1 #$(( 6 * $min_nodes ))
diff --git a/tests/all_pars/run.sh b/tests/all_pars/run.sh
index 9b561044..d2e654c9 100755
--- a/tests/all_pars/run.sh
+++ b/tests/all_pars/run.sh
@@ -1,8 +1,12 @@
 #!/bin/bash
 set -euo pipefail
 
-for fil in ../../pars/*.par
+# Skip testing the restarting & benchmark scripts
+for folder in bondi electrons emhd shocks smr tests tori_2d tori_3d
 do
-  ../../run.sh -n 1 -i $fil parthenon/time/nlim=2
-  rm *.{hst,phdf,rhdf,xdmf}
+  for fil in ../../pars/$folder/*.par
+  do
+    ../../run.sh -n 1 -i $fil parthenon/time/nlim=2
+    rm -f *.{hst,phdf,rhdf,xdmf}
+  done
 done
diff --git a/tests/run_all.sh b/tests/run_all.sh
new file mode 100755
index 00000000..ea0da229
--- /dev/null
+++ b/tests/run_all.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+
+for dir in */
+do
+  cd $dir
+  if [ -f ./run.sh ]; then
+    echo "Running $dir"
+    ./run.sh
+  fi
+  cd -
+done

From 0e416cdca04486daf2cbc194091ae57baf6882bc Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 3 Oct 2023 14:13:52 -0600
Subject: [PATCH 07/47] Better boundaries, options

* Try to make block and domain boundaries clear but
flexible. Now supports, between blocks:
sync cons
sync prims
sync prims (but under the hood sync cons for AMR)
and for domain boundaries:
prims are marked sync ->
    PtoU everything
cons,GRHD prims are marked sync ->
    UtoP except PtoU on MHD
cons,GRHD prims are marked sync ->
    UtoP everything
* Dirichlet boundary fixes
* More options to try inadvisable things on boundaries,
    & to record exactly what was applied
* Drivers now have a type
* Put in structure for limiting MPI sync vars
* Clearly deprecate B_CD
* Rename B_Cleanup -> general StartupOnly flag
* Don't allocate in current calc
---
 kharma/b_cd/b_cd.cpp             |   2 +
 kharma/b_cleanup/b_cleanup.cpp   |  18 +++---
 kharma/b_flux_ct/b_flux_ct.cpp   |  17 +++--
 kharma/b_flux_ct/b_flux_ct.hpp   |   2 +-
 kharma/boundaries/boundaries.cpp | 107 +++++++++++++++++++++++-------
 kharma/boundaries/boundaries.hpp |  13 +---
 kharma/boundaries/dirichlet.cpp  | 108 ++++++++++++++++---------------
 kharma/current/current.cpp       |  24 ++++---
 kharma/driver/kharma_driver.cpp  |  89 +++++++++++++++++--------
 kharma/driver/kharma_driver.hpp  |  10 +--
 kharma/driver/kharma_step.cpp    |  76 +++++++++++-----------
 kharma/driver/simple_step.cpp    |   2 +-
 kharma/electrons/electrons.cpp   |  25 +++----
 kharma/emhd/emhd.cpp             |  96 ++++++++++++++-------------
 kharma/flux/flux.cpp             |   8 ++-
 kharma/grmhd/grmhd.cpp           |  58 ++++++-----------
 kharma/inverter/inverter.cpp     |   6 +-
 kharma/kharma.cpp                |   7 +-
 kharma/kharma.hpp                |  10 +--
 kharma/kharma_package.cpp        |  18 +++---
 kharma/kharma_package.hpp        |  29 +++++----
 kharma/main.cpp                  |   6 +-
 22 files changed, 417 insertions(+), 314 deletions(-)

diff --git a/kharma/b_cd/b_cd.cpp b/kharma/b_cd/b_cd.cpp
index 7d37399f..4f69a6da 100644
--- a/kharma/b_cd/b_cd.cpp
+++ b/kharma/b_cd/b_cd.cpp
@@ -43,6 +43,8 @@ namespace B_CD
 
 std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<Packages_t>& packages)
 {
+    throw std::runtime_error("Constraint-damping transport is not functional with modern B field initialization!");
+
     auto pkg = std::make_shared<KHARMAPackage>("B_CD");
     Params &params = pkg->AllParams();
 
diff --git a/kharma/b_cleanup/b_cleanup.cpp b/kharma/b_cleanup/b_cleanup.cpp
index 831b920b..3d47e557 100644
--- a/kharma/b_cleanup/b_cleanup.cpp
+++ b/kharma/b_cleanup/b_cleanup.cpp
@@ -68,8 +68,7 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
     auto pkg = std::make_shared<KHARMAPackage>("B_Cleanup");
     Params &params = pkg->AllParams();
 
-    // The solver needs this flag
-    Metadata::AddUserFlag("B_Cleanup");
+    // TODO also support face divB!!
 
     // Solver options
     // Allow setting tolerance relative to starting value.  Off by default
@@ -104,8 +103,9 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
     // RHS.  Must not just be "divB" as that field does not sync boundaries
     pkg->AddParam<std::string>("rhs_name", "divB_RHS");
     // Construct a solver. We don't need the template parameter, so we use 'int'
-    // TODO TODO
-    BiCGStabSolver<int> solver(pkg.get(), rel_tolerance, SparseMatrixAccessor(), {}); //, {Metadata::GetUserFlag("B_Cleanup")});
+    // The flag "StartupOnly" marks solver variables not to be sync'd later,
+    // even though they're also marked FillGhost
+    BiCGStabSolver<int> solver(pkg.get(), rel_tolerance, SparseMatrixAccessor(), {}, {Metadata::GetUserFlag("StartupOnly")});
     // Set callback
     solver.user_MatVec = B_Cleanup::CornerLaplacian;
 
@@ -113,7 +113,7 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
 
     // FIELDS
     std::vector<int> s_vector({NVEC});
-    std::vector<MetadataFlag> cleanup_flags({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::GetUserFlag("B_Cleanup")});
+    std::vector<MetadataFlag> cleanup_flags({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::GetUserFlag("StartupOnly")});
     auto cleanup_flags_ghost = cleanup_flags;
     cleanup_flags_ghost.push_back(Metadata::FillGhost);
     // Scalar potential, solution to del^2 p = div B
@@ -135,10 +135,8 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
 
     // Declare fields if we're doing that
     if (manage_field) {
-        // Stolen verbatim from FluxCT, except we don't register the FixFlux step obvs
-        // Probably will crash due to not having the right parameters: add as needed.
-        // Best to crash, this mode is very not supported.
-        // TODO preserve an easier form of divB in this case?
+        // Stolen verbatim from FluxCT, will need updates to actually use
+        throw std::runtime_error("B field cleanup/projection is set as B field transport! If you really want this, disable this error in source!");
 
         // Mark if we're evolving implicitly
         bool implicit_b = pin->GetOrAddBoolean("b_field", "implicit", false);
@@ -237,7 +235,7 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
     KHARMADriver::SyncAllBounds(md);
 
     // Add a solver container and associated MeshData
-    std::vector<std::string> names = KHARMA::GetVariableNames(&pmesh->packages, Metadata::GetUserFlag("B_Cleanup"));
+    std::vector<std::string> names = KHARMA::GetVariableNames(&pmesh->packages, {Metadata::GetUserFlag("B_Cleanup"), Metadata::GetUserFlag("StartupOnly")});
     auto &msolve = pmesh->mesh_data.Add("solve", names);
 
     // Create a TaskCollection of just the solve,
diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index fd3fbeb3..d4b4dd63 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -100,11 +100,14 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     MetadataFlag areWeImplicit = (implicit_b) ? Metadata::GetUserFlag("Implicit")
                                               : Metadata::GetUserFlag("Explicit");
 
-    // Flags for B fields. "primitive" form is field, "conserved" is flux
-    std::vector<MetadataFlag> flags_prim = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::GetUserFlag("Primitive"),
-                                            Metadata::Restart, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
-    std::vector<MetadataFlag> flags_cons = {Metadata::Real, Metadata::Cell, Metadata::Independent, Metadata::Conserved, Metadata::Conserved,
-                                            Metadata::WithFluxes, Metadata::FillGhost, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
+    // Flags for B fields
+    std::vector<MetadataFlag> flags_b = {Metadata::Cell, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
+
+    // "primitive" B field is field, "conserved" is flux
+    auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
+    flags_prim.insert(flags_prim.end(), flags_b.begin(), flags_b.end());
+    auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
+    flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
 
     auto m = Metadata(flags_prim, s_vector);
     pkg->AddField("prims.B", m);
@@ -112,7 +115,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     pkg->AddField("cons.B", m);
 
     // Declare EMF temporary variables, to avoid malloc/free during each step
-    // These are edge-centered but we only need the interior + 1-zone halo anyway
+    // Technically these are edge-centered but we only need the interior + 1-zone halo anyway, so we store as a vector
     std::vector<MetadataFlag> flags_emf = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy};
     m = Metadata(flags_emf, s_vector);
     pkg->AddField("emf", m);
@@ -465,6 +468,7 @@ double MaxDivB(MeshData<Real> *md)
 {
     auto pmesh = md->GetMeshPointer();
     const int ndim = pmesh->ndim;
+    if (ndim < 2) return 0.;
 
     // Packing out here avoids frequent per-mesh packs.  Do we need to?
     auto B_U = md->PackVariables(std::vector<std::string>{"cons.B"});
@@ -545,6 +549,7 @@ void CalcDivB(MeshData<Real> *md, std::string divb_field_name)
 {
     auto pmesh = md->GetMeshPointer();
     const int ndim = pmesh->ndim;
+    if (ndim < 2) return;
 
     // Packing out here avoids frequent per-mesh packs.  Do we need to?
     auto B_U = md->PackVariables(std::vector<std::string>{"cons.B"});
diff --git a/kharma/b_flux_ct/b_flux_ct.hpp b/kharma/b_flux_ct/b_flux_ct.hpp
index 7080b269..2fc4c461 100644
--- a/kharma/b_flux_ct/b_flux_ct.hpp
+++ b/kharma/b_flux_ct/b_flux_ct.hpp
@@ -68,7 +68,7 @@ void MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
  * Reverse of the above.  Only used alone during initialization.
- * Generally, use Flux::BlockPtoU or Flux::BlockPtoUExceptMHD.
+ * Generally, use Flux::BlockPtoU
  */
 void BlockPtoU(MeshBlockData<Real> *md, IndexDomain domain, bool coarse=false);
 
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index 5aaeb2d7..a256dd1f 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -59,6 +59,11 @@ std::shared_ptr<KHARMAPackage> KBoundaries::Initialize(ParameterInput *pin, std:
     bool zero_polar_flux = pin->GetOrAddBoolean("boundaries", "zero_polar_flux", spherical);
     params.Add("zero_polar_flux", zero_polar_flux);
 
+    // Apply physical boundaries to conserved GRMHD variables rho u^r, T^mu_nu
+    // Probably inadvisable?
+    bool domain_bounds_on_conserved = pin->GetOrAddBoolean("boundaries", "domain_bounds_on_conserved", false);
+    params.Add("domain_bounds_on_conserved", domain_bounds_on_conserved);
+
     // Fix the X1/X2 corner by replacing the reflecting condition with the inflow
     // Never use this if not in spherical coordinates
     // Activates by default only with reflecting X2/outflow X1 and interior boundary inside EH
@@ -71,8 +76,11 @@ std::shared_ptr<KHARMAPackage> KBoundaries::Initialize(ParameterInput *pin, std:
              pin->GetString("boundaries", "inner_x1") == "outflow");
         bool inside_eh = pin->GetBoolean("coordinates", "domain_intersects_eh");
         fix_corner = pin->GetOrAddBoolean("boundaries", "fix_corner", correct_bounds && inside_eh);
+        // Allow overriding with specific name
+        fix_corner = pin->GetOrAddBoolean("boundaries", "fix_corner_inner", fix_corner);
     }
-    params.Add("fix_corner", fix_corner);
+    params.Add("fix_corner_inner", fix_corner);
+    params.Add("fix_corner_outer", pin->GetOrAddBoolean("boundaries", "fix_corner_outer", false));
 
     Metadata m_x1, m_x2, m_x3;
     {
@@ -99,8 +107,7 @@ std::shared_ptr<KHARMAPackage> KBoundaries::Initialize(ParameterInput *pin, std:
     }
 
     // Set options for each boundary
-    for (int i = 0; i < BOUNDARY_NFACES; i++)
-    {
+    for (int i = 0; i < BOUNDARY_NFACES; i++) {
         const auto bface = (BoundaryFace) i;
         const auto bdomain = BoundaryDomain(bface);
         const auto bname = BoundaryName(bface);
@@ -259,18 +266,57 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
         EndFlag();
     }
 
-    // If specified, fix corner values when applying X2 boundaries (see function)
-    if (bdir == X2DIR && params.Get<bool>("fix_corner")) {
-        Flag("FixCorner");
-        FixCorner(rc, domain, coarse);
-        EndFlag();
+    /*
+    * KHARMA is very particular about corner boundaries.
+    * In particular, we apply the outflow boundary over ALL X2 & X3.
+    * Then we apply the polar bound only where outflow is not applied,
+    * and periodic bounds only where neither other bound applies.
+    * The latter is accomplished regardless of Parthenon's definitions,
+    * since these functions are run after Parthenon's MPI boundary syncs &
+    * replace whatever they've done.
+    * However, the former must be added after the X2 boundary call,
+    * replacing the reflecting conditions in the X1/X2 corner (or in 3D, edge)
+    * with outflow conditions based on the updated ghost cells.
+    */
+    if (bdir == X2DIR) {
+        // If we're on the interior edge, re-apply that edge for our block by calling
+        // exactly the same function that Parthenon does.  This ensures we're applying
+        // the same thing, just emulating calling it after X2.
+        if (params.Get<bool>("fix_corner_inner")) {
+            if (pmb->boundary_flag[BoundaryFace::inner_x1] == BoundaryFlag::user) {
+                Flag("FixCorner");
+                ApplyBoundary(rc, IndexDomain::inner_x1, coarse);
+                EndFlag();
+            }
+        }
+        if (params.Get<bool>("fix_corner_outer")) {
+            if (pmb->boundary_flag[BoundaryFace::outer_x1] == BoundaryFlag::user) {
+                Flag("FixCorner");
+                ApplyBoundary(rc, IndexDomain::outer_x1, coarse);
+                EndFlag();
+            }
+        }
     }
 
-    // Respect the fluid primitives on boundaries (does not include B)
-    // Also currently the EMHD extra variables q, dP
-    Packages::BoundaryPtoU(rc.get(), domain, coarse);
-    // For everything else, respect conserved variables
-    Packages::BoundaryUtoP(rc.get(), domain, coarse);
+    // If we applied the domain boundary to primitives (as we usually do)...
+    if (!params.Get<bool>("domain_bounds_on_conserved")) {
+        bool sync_prims = rc->GetBlockPointer()->packages.Get("Driver")->Param<bool>("sync_prims");
+        // There are two modes of operation here:
+        if (sync_prims) {
+            // 1. ImEx w/o AMR:
+            //    PRIMITIVE variables (only) are marked FillGhost
+            //    So, run PtoU on EVERYTHING (and correct the B field)
+            CorrectBPrimitive(rc, domain, coarse);
+            Flux::BlockPtoU(rc.get(), domain, coarse);
+        } else {
+            // 2. Normal (KHARMA driver, ImEx w/AMR):
+            //    CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
+            //    So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
+            Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
+        }
+    } else {
+        Packages::BlockUtoP(rc.get(), domain, coarse);
+    }
 
     EndFlag();
 }
@@ -295,19 +341,34 @@ void KBoundaries::CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDom
     );
 }
 
-void KBoundaries::FixCorner(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDomain domain, bool coarse)
+void KBoundaries::CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse)
 {
+    Flag("CorrectBPrimitive");
     std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
-    if (pmb->pmy_mesh->ndim < 2)
-        return;
+    const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
 
-    // If we're on the interior edge, re-apply that edge for our block by calling
-    // whatever the X1 boundary is, again.  This ensures we're applying
-    // the same thing, just emulating calling it after X2.
-    if (pmb->boundary_flag[BoundaryFace::inner_x1] == BoundaryFlag::user)
-    {
-        ApplyBoundary(rc, IndexDomain::inner_x1, coarse);
-    }
+    auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
+    // Return if no field to correct
+    if (B_P.GetDim(4) == 0) return;
+
+    const auto& G = pmb->coords;
+
+    const auto &bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+    const int dir = BoundaryDirection(domain);
+    const auto &range = (dir == 1) ? bounds.GetBoundsI(IndexDomain::interior)
+                            : (dir == 2 ? bounds.GetBoundsJ(IndexDomain::interior)
+                                : bounds.GetBoundsK(IndexDomain::interior));
+    const int ref = BoundaryIsInner(domain) ? range.s : range.e;
+
+    pmb->par_for_bndry(
+        "Correct_B_P", IndexRange{0,NVEC-1}, domain, CC, coarse,
+        KOKKOS_LAMBDA (const int &v, const int &k, const int &j, const int &i) {
+            B_P(v, k, j, i) *= G.gdet(Loci::center, (dir == 2) ? ref : j, (dir == 1) ? ref : i)
+                                / G.gdet(Loci::center, j, i);
+        }
+    );
+
+    EndFlag();
 }
 
 TaskStatus KBoundaries::FixFlux(MeshData<Real> *md)
diff --git a/kharma/boundaries/boundaries.hpp b/kharma/boundaries/boundaries.hpp
index cf412551..dde70a60 100644
--- a/kharma/boundaries/boundaries.hpp
+++ b/kharma/boundaries/boundaries.hpp
@@ -85,18 +85,9 @@ TaskStatus FixFlux(MeshData<Real> *rc);
 void CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDomain domain, bool coarse);
 
 /**
- * KHARMA is very particular about corner boundaries.
- * In particular, we apply the outflow boundary over ALL X2 & X3.
- * Then we apply the polar bound only where outflow is not applied,
- * and periodic bounds only where neither other bound applies.
- * The latter is accomplished regardless of Parthenon's definitions,
- * since these functions are run after Parthenon's MPI boundary syncs &
- * replace whatever they've done.
- * However, the former must be added after the X2 boundary call,
- * replacing the reflecting conditions in the X1/X2 corner (or in 3D, edge)
- * with outflow conditions based on the updated ghost cells.
+ * Correct for geometry when applying primitive B field boundaries
  */
-void FixCorner(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDomain domain, bool coarse);
+void CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse);
 
 /**
  * Check for velocity toward the simulation domain in a zone, and eliminate it.
diff --git a/kharma/boundaries/dirichlet.cpp b/kharma/boundaries/dirichlet.cpp
index c3b63f8f..809ae84e 100644
--- a/kharma/boundaries/dirichlet.cpp
+++ b/kharma/boundaries/dirichlet.cpp
@@ -40,6 +40,7 @@
 
 using namespace parthenon;
 
+// TODO can SetDirichlet be folded into this?
 void KBoundaries::DirichletImpl(std::shared_ptr<MeshBlockData<Real>> &rc, BoundaryFace bface, bool coarse)
 {
     std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
@@ -47,16 +48,18 @@ void KBoundaries::DirichletImpl(std::shared_ptr<MeshBlockData<Real>> &rc, Bounda
 
     // Get all ghosts, minus those in the B_Cleanup package if it is present
     using FC = Metadata::FlagCollection;
-    FC main_ghosts = pmb->packages.AllPackages().count("B_Cleanup")
-                            ? FC({Metadata::FillGhost}) - FC({Metadata::GetUserFlag("B_Cleanup")})
-                            : FC({Metadata::FillGhost});
+    FC ghost_vars = FC({Metadata::FillGhost, Metadata::Conserved})
+                  + FC({Metadata::FillGhost, Metadata::GetUserFlag("Primitive")})
+                  - FC({Metadata::GetUserFlag("StartupOnly")});
     PackIndexMap ghostmap;
-    auto q = rc->PackVariables(main_ghosts, ghostmap, coarse);
-    const int q_index = ghostmap["prims.q"].first;
+    auto q = rc->PackVariables(ghost_vars, ghostmap, coarse);
     auto bound = rc->Get("bounds." + BoundaryName(bface)).data;
 
+    // We're sometimes called without any variables to sync (e.g. syncing flags, EMFs), just return
+    if (q.GetDim(4) == 0) return;
+
     if (q.GetDim(4) != bound.GetDim(4)) {
-        std::cerr << "Boundary cache mismatch! boundaries: " << bound.GetDim(4) << " vs pack: " << q.GetDim(4) << std::endl;
+        std::cerr << "Dirichlet boundary mismatch! Boundary cache: " << bound.GetDim(4) << " for pack: " << q.GetDim(4) << std::endl;
         std::cerr << "Variables with ghost zones:" << std::endl;
         ghostmap.print();
     }
@@ -73,7 +76,7 @@ void KBoundaries::DirichletImpl(std::shared_ptr<MeshBlockData<Real>> &rc, Bounda
 
     const auto &G = pmb->coords;
 
-    // printf("Freezing bounds:\n");
+    // const int q_index = ghostmap["prims.q"].first;
     const auto domain = BoundaryDomain(bface);
     pmb->par_for_bndry(
         "dirichlet_boundary", vars, domain, CC, coarse,
@@ -86,8 +89,53 @@ void KBoundaries::DirichletImpl(std::shared_ptr<MeshBlockData<Real>> &rc, Bounda
             // if (p == q_index) printf("%g ", q(p, k, j, i));
         }
     );
-    // Kokkos::fence();
-    // printf("\n\n");
+}
+
+void KBoundaries::SetDomainDirichlet(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
+{
+    std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
+    const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
+    const BoundaryFace bface = BoundaryFaceOf(domain);
+
+    using FC = Metadata::FlagCollection;
+    FC ghost_vars = FC({Metadata::FillGhost, Metadata::Conserved}) + FC({Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
+    FC main_ghosts = ghost_vars - FC({Metadata::GetUserFlag("StartupOnly")});
+    PackIndexMap ghostmap;
+    auto q = rc->PackVariables(main_ghosts, ghostmap, coarse);
+    const int q_index = ghostmap["prims.q"].first;
+    auto bound = rc->Get("bounds." + BoundaryName(bface)).data;
+
+    // We're sometimes called without any variables to sync (e.g. syncing flags, EMFs), just return
+    if (q.GetDim(4) == 0) return;
+
+    if (q.GetDim(4) != bound.GetDim(4)) {
+        std::cerr << "Dirichlet boundary mismatch! Boundary cache: " << bound.GetDim(4) << " for pack: " << q.GetDim(4) << std::endl;
+        std::cerr << "Variables with ghost zones:" << std::endl;
+        ghostmap.print();
+    }
+
+    const IndexRange vars = IndexRange{0, q.GetDim(4) - 1};
+    const bool right = !BoundaryIsInner(domain);
+
+    // Subtract off the starting index if we're on the right
+    const auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+    const int dir = BoundaryDirection(bface);
+    const int ie = (dir == 1) ? bounds.ie(IndexDomain::interior) + 1 : 0;
+    const int je = (dir == 2) ? bounds.je(IndexDomain::interior) + 1 : 0;
+    const int ke = (dir == 3) ? bounds.ke(IndexDomain::interior) + 1 : 0;
+
+    const auto &G = pmb->coords;
+
+    pmb->par_for_bndry(
+        "dirichlet_boundary", vars, domain, CC, coarse,
+        KOKKOS_LAMBDA(const int &p, const int &k, const int &j, const int &i) {
+            if (right) {
+                bound(p, k - ke, j - je, i - ie) = q(p, k, j, i);
+            } else {
+                bound(p, k, j, i) = q(p, k, j, i);
+            }
+        }
+    );
 }
 
 void KBoundaries::FreezeDirichlet(std::shared_ptr<MeshData<Real>> &md)
@@ -127,45 +175,3 @@ void KBoundaries::FreezeDirichletBlock(MeshBlockData<Real> *rc)
         }
     }
 }
-
-void KBoundaries::SetDomainDirichlet(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
-{
-    std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
-    const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
-    const BoundaryFace bface = BoundaryFaceOf(domain);
-
-    using FC = Metadata::FlagCollection;
-    FC main_ghosts = pmb->packages.AllPackages().count("B_Cleanup")
-                            ? FC({Metadata::FillGhost}) - FC({Metadata::GetUserFlag("B_Cleanup")})
-                            : FC({Metadata::FillGhost});
-    auto q = rc->PackVariables(main_ghosts, coarse);
-    auto bound = rc->Get("bounds." + BoundaryName(bface)).data;
-
-    // TODO error?
-    if (q.GetDim(4) != bound.GetDim(4)) {
-        std::cerr << "Dirichlet boundary cache mismatch! " << bound.GetDim(4) << " vs " << q.GetDim(4) << std::endl;
-    }
-
-    const IndexRange vars = IndexRange{0, q.GetDim(4) - 1};
-    const bool right = !BoundaryIsInner(domain);
-
-    // Subtract off the starting index if we're on the right
-    const auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
-    const int dir = BoundaryDirection(bface);
-    const int ie = (dir == 1) ? bounds.ie(IndexDomain::interior) + 1 : 0;
-    const int je = (dir == 2) ? bounds.je(IndexDomain::interior) + 1 : 0;
-    const int ke = (dir == 3) ? bounds.ke(IndexDomain::interior) + 1 : 0;
-
-    const auto &G = pmb->coords;
-
-    pmb->par_for_bndry(
-        "dirichlet_boundary", vars, domain, CC, coarse,
-        KOKKOS_LAMBDA(const int &p, const int &k, const int &j, const int &i) {
-            if (right) {
-                bound(p, k - ke, j - je, i - ie) = q(p, k, j, i);
-            } else {
-                bound(p, k, j, i) = q(p, k, j, i);
-            }
-        }
-    );
-}
diff --git a/kharma/current/current.cpp b/kharma/current/current.cpp
index a3e481fa..b29e9604 100644
--- a/kharma/current/current.cpp
+++ b/kharma/current/current.cpp
@@ -44,6 +44,12 @@ std::shared_ptr<KHARMAPackage> Current::Initialize(ParameterInput *pin, std::sha
     auto m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, s_fourvector);
     pkg->AddField("jcon", m);
 
+    // Temporaries
+    std::vector<int> s_vector({NVEC});
+    m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy}, s_vector);
+    pkg->AddField("Current.uvec_c", m);
+    pkg->AddField("Current.B_P_c", m);
+
     pkg->BlockUserWorkBeforeOutput = Current::FillOutput;
 
     return pkg;
@@ -57,15 +63,12 @@ TaskStatus Current::CalculateCurrent(MeshBlockData<Real> *rc0, MeshBlockData<Rea
     GridVector uvec_new = rc1->Get("prims.uvec").data;
     GridVector B_P_new = rc1->Get("prims.B").data;
     GridVector jcon = rc1->Get("jcon").data;
-    const auto& G = pmb->coords;
 
-    int n1 = pmb->cellbounds.ncellsi(IndexDomain::entire);
-    int n2 = pmb->cellbounds.ncellsj(IndexDomain::entire);
-    int n3 = pmb->cellbounds.ncellsk(IndexDomain::entire);
-    const int ndim = pmb->pmy_mesh->ndim;
+    GridVector uvec_c = rc1->Get("Current.uvec_c").data;
+    GridVector B_P_c = rc1->Get("Current.B_P_c").data;
 
-    GridVector uvec_c("uvec_c", NVEC, n3, n2, n1);
-    GridVector B_P_c("B_P_c", NVEC, n3, n2, n1);
+    const auto& G = pmb->coords;
+    const int ndim = pmb->pmy_mesh->ndim;
 
     // Calculate time-centered primitives
     // We could pack, but we just need the vectors, U1,2,3 and B1,2,3
@@ -89,6 +92,7 @@ TaskStatus Current::CalculateCurrent(MeshBlockData<Real> *rc0, MeshBlockData<Rea
     pmb->par_for("jcon_calc", n4v.s, n4v.e, kb_i.s, kb_i.e, jb_i.s, jb_i.e, ib_i.s, ib_i.e,
         KOKKOS_LAMBDA (const int &mu, const int &k, const int &j, const int &i) {
             // Get sqrt{-g}*F^{mu nu} at neighboring points
+            // TODO(BSP) this recalculates Fcon a lot...
             const Real gF0p = get_gdet_Fcon(G, uvec_new, B_P_new, 0, mu, k, j, i);
             const Real gF0m = get_gdet_Fcon(G, uvec_old, B_P_old, 0, mu, k, j, i);
             const Real gF1p = get_gdet_Fcon(G, uvec_c, B_P_c, 1, mu, k, j, i+1);
@@ -101,9 +105,9 @@ TaskStatus Current::CalculateCurrent(MeshBlockData<Real> *rc0, MeshBlockData<Rea
             // Difference: D_mu F^{mu nu} = 4 \pi j^nu
             jcon(mu, k, j, i) = 1. / (m::sqrt(4. * M_PI) * G.gdet(Loci::center, j, i)) *
                                 ((gF0p - gF0m) / dt +
-                                (gF1p - gF1m) / (2. * G.Dxc<1>(i)) +
-                                (gF2p - gF2m) / (2. * G.Dxc<2>(j)) +
-                                (gF3p - gF3m) / (2. * G.Dxc<3>(k)));
+                                (gF1p - gF1m) / (2 * G.Dxc<1>(i)) +
+                                (gF2p - gF2m) / (2 * G.Dxc<2>(j)) +
+                                (gF3p - gF3m) / (2 * G.Dxc<3>(k)));
         }
     );
 
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index e66fed60..5c7b30b8 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -57,15 +57,19 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     // driver (formerly HARM driver), and the latter supporting implicit stepping of some or all variables
     // Mostly, packages should react to e.g. the "sync_prims" option rather than the driver name
     bool do_emhd = pin->GetOrAddBoolean("emhd", "on", false);
-    std::string driver_type = pin->GetOrAddString("driver", "type", (do_emhd) ? "imex" : "kharma");
-    if (driver_type == "harm") driver_type = "kharma"; // TODO enum rather than strings?
+    std::string driver_type_s = pin->GetOrAddString("driver", "type", (do_emhd) ? "imex" : "kharma");
+    DriverType driver_type;
+    if (driver_type_s == "harm" || driver_type_s == "kharma") {
+        driver_type = DriverType::kharma;
+    } else if (driver_type_s == "imex") {
+        driver_type = DriverType::imex;
+    } else if (driver_type_s == "simple") {
+        driver_type = DriverType::simple;
+    } else {
+        throw std::invalid_argument("Driver type must be one of: simple, kharma, imex");
+    }
     params.Add("type", driver_type);
-
-    // Record whether we marked the prims or cons as "FillGhost." This also translates to whether we consider
-    // primitive or conserved state to be the ground truth when updating values in a step.
-    // Currently "imex" and "simple" drivers both sync primitive vars
-    bool sync_prims = !(driver_type == "kharma");
-    params.Add("sync_prims", sync_prims);
+    params.Add("name", driver_type_s);
 
     // Synchronize boundary variables twice. Ensures KHARMA is agnostic to the breakdown
     // of meshblocks, at the cost of twice the MPI overhead, for potentially worse strong scaling.
@@ -87,9 +91,9 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     if (recon == "donor_cell") {
         params.Add("recon", KReconstruction::Type::donor_cell);
         stencil = 1;
-    } else if (recon == "linear_vl") {
-        params.Add("recon", KReconstruction::Type::linear_vl);
-        stencil = 3;
+    // } else if (recon == "linear_vl") {
+    //     params.Add("recon", KReconstruction::Type::linear_vl);
+    //     stencil = 3;
     } else if (recon == "linear_mc") {
         params.Add("recon", KReconstruction::Type::linear_mc);
         stencil = 3;
@@ -104,7 +108,7 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
         stencil = 5;
     } else {
         std::cerr << "Reconstruction type not supported!  Supported reconstructions:" << std::endl;
-        std::cerr << "donor_cell, linear_mc, linear_vl, weno5" << std::endl;
+        std::cerr << "donor_cell, linear_mc, weno5, weno5_lower_edges, weno5_lower_poles (linear_vl coming back soon!)" << std::endl;
         throw std::invalid_argument("Unsupported reconstruction algorithm!");
     }
     // Warn if using less than 3 ghost zones w/WENO etc, 2 w/Linear, etc.
@@ -112,13 +116,42 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
         throw std::runtime_error("Not enough ghost zones for specified reconstruction!");
     }
 
-    // Field flags related to driver operation are defined outside any particular driver
-    // When using the Implicit package we need to globally distinguish implicitly and explicitly-updated variables
+    // When using the Implicit package we need to globally distinguish implicit & explicit vars
     // All independent variables should be marked one or the other,
     // so we define the flags here to avoid loading order issues
     Metadata::AddUserFlag("Implicit");
     Metadata::AddUserFlag("Explicit");
 
+    // 1. One flag to mark the primitive variables specifically
+    // (Parthenon has Metadata::Conserved already)
+    Metadata::AddUserFlag("Primitive");
+
+    // Finally, a flag for anything used (and possibly sync'd) during startup,
+    // but which should not be evolved (or more importantly, sync'd) during main stepping
+    Metadata::AddUserFlag("StartupOnly");
+
+    // This marks whether we consider primitive or conserved state to be
+    // the ground truth when updating values in a step.
+    // Currently "imex" and "simple" drivers both update primitive vars
+    bool prims_are_fundamental = driver_type != DriverType::kharma;
+    params.Add("prims_are_fundamental", prims_are_fundamental);
+
+    // Finally, we set default flags for primitive and conserved variables
+    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
+    bool sync_prims = prims_are_fundamental &&
+                        (!pin->DoesParameterExist("parthenon/mesh", "numlevel") ||
+                         pin->GetInteger("parthenon/mesh", "numlevel") == 1);
+    params.Add("sync_prims", sync_prims);
+    if (sync_prims) {
+        // If we're not in AMR, we can sync primitive variables directly
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::Restart, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::WithFluxes, Metadata::Conserved});
+    } else {
+        // If we're in AMR or using the KHARMA driver anyway, sync conserved vars
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::Restart, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
+    }
+
     return pkg;
 }
 
@@ -126,26 +159,32 @@ void KHARMADriver::AddFullSyncRegion(TaskCollection& tc, std::shared_ptr<MeshDat
 {
     const TaskID t_none(0);
 
-    bool sync_prims = pmesh->packages.Get("Driver")->Param<bool>("sync_prims");
-
     // MPI boundary exchange, done over MeshData objects/partitions at once
     // Parthenon includes physical bounds
     const int num_partitions = pmesh->DefaultNumPartitions(); // Usually 1
     TaskRegion &bound_sync = tc.AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
         auto &tl = bound_sync[i];
-        AddMPIBoundarySync(t_none, tl, md_sync, sync_prims, pmesh->multilevel);
+        AddMPIBoundarySync(t_none, tl, md_sync);
     }
 }
 
-// We take the extra bools to make this a static method, so SyncAllBounds can be static
-TaskID KHARMADriver::AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &mc1,
-                                        bool sync_prims, bool multilevel)
+TaskID KHARMADriver::AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &mc1)
 {
     Flag("AddBoundarySync");
     auto t_start_sync = t_start;
 
-    if (sync_prims) {
+    // Pull the mesh pointer from mc1 so we can be a static method
+    auto &params = mc1->GetMeshPointer()->packages.Get("Driver")->AllParams();
+    bool multilevel = mc1->GetMeshPointer()->multilevel;
+
+    // If we're "syncing primitive variables" but must exchange conserved vars to prolong/restrict them,
+    // make sure to run P->U, then sync, then U->P
+    // Note this has the side effect of filling U in some zones,
+    // which must be replaced during e.g. startup code when primitive values should be truth
+    bool prims_are_fundamental = params.Get<bool>("prims_are_fundamental");
+    bool sync_prims = params.Get<bool>("sync_prims");
+    if (prims_are_fundamental && !sync_prims) {
         TaskID t_all_ptou[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_ptou_final(0);
         int i_task = 0;
@@ -171,7 +210,7 @@ TaskID KHARMADriver::AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std:
     EndFlag();
 
     // If we're "syncing primitive variables" but just exchanged conserved variables (B, implicit, etc), we need to recover the prims
-    if (sync_prims) {
+    if (prims_are_fundamental && !sync_prims) {
         TaskID t_all_utop[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_utop_final(0);
         int i_task = 0;
@@ -194,16 +233,14 @@ TaskID KHARMADriver::AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std:
     return t_bounds;
 }
 
-TaskStatus KHARMADriver::SyncAllBounds(std::shared_ptr<MeshData<Real>> &md, bool sync_prims, bool multilevel)
+TaskStatus KHARMADriver::SyncAllBounds(std::shared_ptr<MeshData<Real>> &md)
 {
     Flag("SyncAllBounds");
     TaskID t_none(0);
 
-    // 1. Sync MPI bounds
-    // This call syncs the primitive variables when using the ImEx driver, and cons
     TaskCollection tc;
     auto tr = tc.AddRegion(1);
-    AddMPIBoundarySync(t_none, tr[0], md, sync_prims, multilevel);
+    AddMPIBoundarySync(t_none, tr[0], md);
     while (!tr.Execute());
 
     EndFlag();
diff --git a/kharma/driver/kharma_driver.hpp b/kharma/driver/kharma_driver.hpp
index 7bcc8d56..18c943c5 100644
--- a/kharma/driver/kharma_driver.hpp
+++ b/kharma/driver/kharma_driver.hpp
@@ -40,6 +40,9 @@
 
 using namespace parthenon;
 
+// See Initialize()
+enum class DriverType{kharma, imex, simple};
+
 /**
  * This is the "Driver" class for KHARMA.
  * A Driver object orchestrates everything that has to be done to a mesh to constitute a step.
@@ -124,8 +127,7 @@ class KHARMADriver : public MultiStageDriver {
          * This sequence is used identically in several places, so it makes sense
          * to define once and use elsewhere.
          */
-        static TaskID AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &md,
-                                         bool sync_prims=false, bool multilevel=false);
+        static TaskID AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &md);
 
         /**
          * Calculate the fluxes in each direction
@@ -136,9 +138,9 @@ class KHARMADriver : public MultiStageDriver {
          * Single call to sync all boundary conditions (MPI/internal and domain/physical boundaries)
          * Used anytime boundary sync is needed outside the usual loop of steps.
          * 
-         * Only use this as a task each step when debugging!
+         * Only use this during the run if you're debugging!
          */
-        static TaskStatus SyncAllBounds(std::shared_ptr<MeshData<Real>> &md, bool sync_prims=false, bool multilevel=false);
+        static TaskStatus SyncAllBounds(std::shared_ptr<MeshData<Real>> &md);
 
         // TODO swapped versions of these
         /**
diff --git a/kharma/driver/kharma_step.cpp b/kharma/driver/kharma_step.cpp
index cdd0380d..86730060 100644
--- a/kharma/driver/kharma_step.cpp
+++ b/kharma/driver/kharma_step.cpp
@@ -55,15 +55,19 @@
 
 TaskCollection KHARMADriver::MakeTaskCollection(BlockList_t &blocks, int stage)
 {
-    std::string driver_type = blocks[0]->packages.Get("Driver")->Param<std::string>("type");
-    Flag("MakeTaskCollection_"+driver_type);
+    DriverType driver_type = blocks[0]->packages.Get("Driver")->Param<DriverType>("type");
+    Flag("MakeTaskCollection");
     TaskCollection tc;
-    if (driver_type == "imex") {
+    switch (driver_type) {
+    case DriverType::kharma:
+        tc = MakeDefaultTaskCollection(blocks, stage);
+        break;
+    case DriverType::imex:
         tc = MakeImExTaskCollection(blocks, stage);
-    } else if (driver_type == "simple") {
+        break;
+    case DriverType::simple:
         tc = MakeSimpleTaskCollection(blocks, stage);
-    } else {
-        tc = MakeDefaultTaskCollection(blocks, stage);
+        break;
     }
     EndFlag();
     return tc;
@@ -100,22 +104,25 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
             if (use_jcon) {
                 // At the end of the step, updating "mbd_sub_step_final" updates the base
                 // So we have to keep a copy at the beginning to calculate jcon
-                pmb->meshblock_data.Add("preserve", base);
-                // Above only copies on allocate -- ensure we copy every step
-                Copy<MeshBlockData<Real>>({}, base.get(), pmb->meshblock_data.Get("preserve").get());
+                // We have to explicitly copy, since after the first step `Add`==`Get`
+                Copy<MeshBlockData<Real>>({}, base.get(), pmb->meshblock_data.Add("preserve").get());
             }
         }
     }
+    //Copy<MeshData<Real>>({}, pmesh->mesh_data.Get().get(), pmesh->mesh_data.Add("preserve").get());
 
     Flag("MakeTaskCollection::fluxes");
 
-    // Build the list of variables we'll be syncing during "normal" boundary exchanges.
-    // This *excludes* anything related to divergence cleaning (which have their own syncs during the clean),
-    // and the EMF (or other edge variables) which are really part of the flux correction sync
-    using FC = Metadata::FlagCollection;
-    auto sync_flags = FC(Metadata::FillGhost) - FC(Metadata::Edge);
-    if (pkgs.count("B_Cleanup")) sync_flags = sync_flags - FC(Metadata::GetUserFlag("B_Cleanup"));
-    std::vector<std::string> sync_vars = KHARMA::GetVariableNames(&(pmesh->packages), sync_flags);
+    // TODO when we can make shallow copies work, copy based on this list for MPI syncs
+    // static std::vector<std::string> sync_vars;
+    // if (sync_vars.size() == 0) {
+    //     // Build the list of variables we'll be syncing during "normal" boundary exchanges.
+    //     // This *excludes* anything related to divergence cleaning (which have their own syncs during the clean),
+    //     // and the EMF (or other edge variables) which are really part of the flux correction sync
+    //     using FC = Metadata::FlagCollection;
+    //     auto sync_flags = FC(Metadata::FillGhost) - FC(Metadata::Edge) - FC(Metadata::GetUserFlag("StartupOnly"));
+    //     sync_vars = KHARMA::GetVariableNames(&(pmesh->packages), sync_flags);
+    // }
 
     // Big packed region: get and apply new fluxes on all the zones we control
     const int num_partitions = pmesh->DefaultNumPartitions();
@@ -131,9 +138,11 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
         auto &md_sub_step_init  = pmesh->mesh_data.GetOrAdd(integrator->stage_name[stage - 1], i);
         auto &md_sub_step_final = pmesh->mesh_data.GetOrAdd(integrator->stage_name[stage], i);
         auto &md_flux_src       = pmesh->mesh_data.GetOrAdd("dUdt", i);
+        // TODO this doesn't work still for some reason, even if the shallow copy has all variables
+        auto &md_sync = md_sub_step_final; //pmesh->mesh_data.AddShallow("sync", md_sub_step_final);
 
         // Start receiving flux corrections and ghost cells
-        auto t_start_recv_bound = tl.AddTask(t_none, parthenon::StartReceiveBoundBufs<parthenon::BoundaryType::any>, md_sub_step_final);
+        auto t_start_recv_bound = tl.AddTask(t_none, parthenon::StartReceiveBoundBufs<parthenon::BoundaryType::any>, md_sync);
         auto t_start_recv_flux = t_start_recv_bound;
         if (pmesh->multilevel || use_b_ct)
             t_start_recv_flux = tl.AddTask(t_none, parthenon::StartReceiveFluxCorrections, md_sub_step_init);
@@ -148,7 +157,6 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
         auto t_flux_bounds = t_fluxes;
         if (pmesh->multilevel || use_b_ct) {
             auto t_emf = t_fluxes;
-            // TODO this MPI sync should be bundled into fluxcorr
             if (use_b_ct) {
                 // Pull out a container of only EMF to synchronize
                 auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
@@ -198,10 +206,10 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
         auto t_update = t_update_c;
         if (use_b_ct) {
             t_update = tl.AddTask(t_update_c, WeightedSumDataFace,
-                                  std::vector<MetadataFlag>({Metadata::Independent, Metadata::Face}),
-                                  md_sub_step_final.get(), md_flux_src.get(),
-                                  1.0, integrator->beta[stage-1] * integrator->dt,
-                                  md_sub_step_final.get());
+                                    std::vector<MetadataFlag>({Metadata::Independent, Metadata::Face}),
+                                    md_sub_step_final.get(), md_flux_src.get(),
+                                    1.0, integrator->beta[stage-1] * integrator->dt,
+                                    md_sub_step_final.get());
         }
 
         // UtoP needs a guess in order to converge, so we copy in sc0
@@ -213,22 +221,17 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
                                                 md_sub_step_init.get(), md_sub_step_final.get());
         }
 
-        // TODO the pointers here are weird
-        //auto &md_sync = pmesh->mesh_data.AddShallow("sync", md_sub_step_final, sync_vars);
-        //md_sync->SetMeshPointer(pmesh);
-        KHARMADriver::AddMPIBoundarySync(t_copy_prims, tl, md_sub_step_final);
+        KHARMADriver::AddMPIBoundarySync(t_copy_prims, tl, md_sync);
     }
 
     EndFlag();
     Flag("MakeTaskCollection::fixes");
 
-    // Smaller meshblock region.  This gets touchy because we want to keep ghost zones updated,
-    // so very commented
+    // Smaller meshblock region.  This gets touchy because we want to keep ghost zones updated, so it's very commented
     TaskRegion &async_region = tc.AddRegion(blocks.size());
     for (int i = 0; i < blocks.size(); i++) {
         auto &pmb = blocks[i];
         auto &tl = async_region[i];
-        //auto &base = pmb->meshblock_data.Get();
         auto &mbd_sub_step_init = pmb->meshblock_data.Get(integrator->stage_name[stage-1]);
         auto &mbd_sub_step_final = pmb->meshblock_data.Get(integrator->stage_name[stage]);
 
@@ -305,12 +308,10 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
     // B Field cleanup: this is a separate solve so it's split out
     // It's also really slow when enabled so we don't care too much about limiting regions, etc.
     if (use_b_cleanup && (stage == integrator->nstages) && B_Cleanup::CleanupThisStep(pmesh, tm.ncycle)) {
-        TaskRegion &cleanup_region = tc.AddRegion(num_partitions);
-        for (int i = 0; i < num_partitions; i++) {
-            auto &tl = cleanup_region[i];
-            auto &md_sub_step_final = pmesh->mesh_data.GetOrAdd(integrator->stage_name[stage], i);
-            tl.AddTask(t_none, B_Cleanup::CleanupDivergence, md_sub_step_final);
-        }
+        TaskRegion &cleanup_region = tc.AddRegion(1);
+        auto &tl = cleanup_region[0];
+        auto &md_sub_step_final = pmesh->mesh_data.Get(integrator->stage_name[stage]);
+        tl.AddTask(t_none, B_Cleanup::CleanupDivergence, md_sub_step_final);
     }
 
     // Second boundary sync:
@@ -320,9 +321,8 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
     const auto &two_sync = pkgs.at("Driver")->Param<bool>("two_sync");
     if (two_sync) {
         auto &md_sub_step_final = pmesh->mesh_data.GetOrAdd(integrator->stage_name[stage], 0);
-        // TODO this gets weird if we partition
-        //auto &md_sync = pmesh->mesh_data.AddShallow("sync", md_sub_step_final, sync_vars);
-        KHARMADriver::AddFullSyncRegion(tc, md_sub_step_final);
+        auto &md_sync = md_sub_step_final; //pmesh->mesh_data.AddShallow("sync", md_sub_step_final);
+        KHARMADriver::AddFullSyncRegion(tc, md_sync);
     }
 
     EndFlag();
diff --git a/kharma/driver/simple_step.cpp b/kharma/driver/simple_step.cpp
index 2d68b8f0..ea30839e 100644
--- a/kharma/driver/simple_step.cpp
+++ b/kharma/driver/simple_step.cpp
@@ -60,7 +60,7 @@ TaskCollection KHARMADriver::MakeSimpleTaskCollection(BlockList_t &blocks, int s
         }
     }
 
-    //auto t_heating_test = tl.AddTask(t_none, Electrons::ApplyHeating, base.get());
+
 
     // Big synchronous region: get & apply fluxes to advance the fluid state
     // num_partitions is nearly always 1
diff --git a/kharma/electrons/electrons.cpp b/kharma/electrons/electrons.cpp
index dd73df5c..ff4b2dff 100644
--- a/kharma/electrons/electrons.cpp
+++ b/kharma/electrons/electrons.cpp
@@ -34,6 +34,7 @@
 #include "electrons.hpp"
 
 #include "decs.hpp"
+#include "kharma_driver.hpp"
 #include "flux.hpp"
 #include "grmhd.hpp"
 #include "kharma.hpp"
@@ -115,21 +116,21 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
         }
     }
 
-    // Default implicit iff GRMHD is done implicitly. TODO can we do explicit?
+    // Evolving e- implicitly is not tested.  Shouldn't be necessary even in EMHD
     auto& driver = packages->Get("Driver")->AllParams();
-    auto driver_type = driver.Get<std::string>("type");
-    bool grmhd_implicit = packages->Get("GRMHD")->Param<bool>("implicit"); // usually false
-    bool implicit_e = (driver_type == "imex" && pin->GetOrAddBoolean("electrons", "implicit", grmhd_implicit)); // so this false too
+    auto driver_type = driver.Get<DriverType>("type");
+    bool implicit_e = (driver_type == DriverType::imex && pin->GetOrAddBoolean("electrons", "implicit", false));
     params.Add("implicit", implicit_e);
 
-    Metadata::AddUserFlag("Electrons");
+    Metadata::AddUserFlag("Elec");
     MetadataFlag areWeImplicit = (implicit_e) ? Metadata::GetUserFlag("Implicit")
                                               : Metadata::GetUserFlag("Explicit");
+    std::vector<MetadataFlag> flags_elec = {Metadata::Cell, areWeImplicit, Metadata::GetUserFlag("Elec")};
 
-    std::vector<MetadataFlag> flags_cons = {Metadata::Real, Metadata::Cell, Metadata::Independent, Metadata::Conserved, Metadata::Conserved,
-                                            Metadata::WithFluxes, Metadata::FillGhost, areWeImplicit, Metadata::GetUserFlag("Electrons")};
-    std::vector<MetadataFlag> flags_prim = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::GetUserFlag("Primitive"),
-                                            Metadata::Restart, areWeImplicit, Metadata::GetUserFlag("Electrons")};
+    auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
+    flags_prim.insert(flags_prim.end(), flags_elec.begin(), flags_elec.end());
+    auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
+    flags_cons.insert(flags_cons.end(), flags_elec.begin(), flags_elec.end());
 
     // Total entropy, used to track changes
     int nKs = 1;
@@ -201,7 +202,7 @@ TaskStatus InitElectrons(std::shared_ptr<MeshBlockData<Real>>& rc, ParameterInpu
 
     // Need to distinguish KTOT from the other variables, so we record which it is
     PackIndexMap prims_map;
-    auto& e_P = rc->PackVariables({Metadata::GetUserFlag("Electrons"), Metadata::GetUserFlag("Primitive")}, prims_map);
+    auto& e_P = rc->PackVariables({Metadata::GetUserFlag("Elec"), Metadata::GetUserFlag("Primitive")}, prims_map);
     const int ktot_index = prims_map["prims.Ktot"].first;
     // Just need these two from the rest of Prims
     GridScalar rho = rc->Get("prims.rho").data;
@@ -238,8 +239,8 @@ void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     auto pmb = rc->GetBlockPointer();
 
     // No need for a "map" here, we just want everything that fits these
-    auto& e_P = rc->PackVariables({Metadata::GetUserFlag("Electrons"), Metadata::GetUserFlag("Primitive")});
-    auto& e_U = rc->PackVariables({Metadata::GetUserFlag("Electrons"), Metadata::Conserved});
+    auto& e_P = rc->PackVariables({Metadata::GetUserFlag("Elec"), Metadata::GetUserFlag("Primitive")});
+    auto& e_U = rc->PackVariables({Metadata::GetUserFlag("Elec"), Metadata::Conserved});
     // And then the local density
     GridScalar rho_U = rc->Get("cons.rho").data;
 
diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp
index 80da5f4c..65ea482b 100644
--- a/kharma/emhd/emhd.cpp
+++ b/kharma/emhd/emhd.cpp
@@ -124,26 +124,28 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // Only enable limits internally if we're actually doing EMHD
     params.Add("enable_emhd_limits", enable_emhd_limits);
 
-    // Parthenon adds a flag consisting of just the package name,
-    // but it's useless to us since we want just the important variables to carry a name
-    Metadata::AddUserFlag("EMHDVar");
-
     // General options for primitive and conserved scalar variables in ImEx driver
     // EMHD is supported only with imex driver and implicit evolution,
     // synchronizing primitive variables
-    Metadata m_con  = Metadata({Metadata::Real, Metadata::Cell, Metadata::Independent, Metadata::GetUserFlag("Implicit"),
-                                Metadata::WithFluxes, Metadata::Conserved, Metadata::Conserved, Metadata::GetUserFlag("EMHDVar")});
-    Metadata m_prim = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::GetUserFlag("Implicit"),
-                                Metadata::Restart, Metadata::FillGhost, Metadata::GetUserFlag("Primitive"), Metadata::GetUserFlag("EMHDVar")});
+    Metadata::AddUserFlag("EMHDVar"); // "EMHD" name now taken by Parthenon for general flag, we want this one specific
+    std::vector<MetadataFlag> emhd_flags = {Metadata::Cell, Metadata::GetUserFlag("Implicit"), Metadata::GetUserFlag("EMHD")};
+
+    auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
+    flags_prim.insert(flags_prim.end(), emhd_flags.begin(), emhd_flags.end());
+    auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
+    flags_cons.insert(flags_cons.end(), emhd_flags.begin(), emhd_flags.end());
+
+    Metadata m_cons = Metadata(flags_cons);
+    Metadata m_prim = Metadata(flags_prim);
 
     // Heat conduction
     if (conduction) {
-        pkg->AddField("cons.q", m_con);
+        pkg->AddField("cons.q", m_cons);
         pkg->AddField("prims.q", m_prim);
     }
     // Pressure anisotropy
     if (viscosity) {
-        pkg->AddField("cons.dP", m_con);
+        pkg->AddField("cons.dP", m_cons);
         pkg->AddField("prims.dP", m_prim);
     }
 
@@ -156,19 +158,15 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     Metadata m_temp_vec = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::GetUserFlag("EMHDTemporary")}, fourv);
     pkg->AddField("ucov", m_temp_vec);
 
-    // This works similarly to the fflag --
+    // This works similarly to the fflag:
     // we register zones where limits on q and dP are hit
     Metadata m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy});
     pkg->AddField("eflag", m);
 
     // Callbacks
 
-    // UtoP is *only* for boundary syncs and output, only register that function
-    // TODO support syncing cons someday
-    //pkg->BoundaryUtoP = EMHD::BlockUtoP;
-
-    // For now, sync primitive variables & call PtoU on physical boundaries
-    pkg->BoundaryPtoU = EMHD::BlockPtoU;
+    // UtoP function specifically for boundary sync (KHARMA must sync cons for AMR) and output
+    pkg->BoundaryUtoP = EMHD::BlockUtoP;
 
     // Add all explicit source terms -- implicit terms are called from Implicit::Step
     pkg->AddSource = EMHD::AddSource;
@@ -183,37 +181,37 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
 
 // TODO is relying on GRMHD P variables a mistake here?  They're available on physical boundaries at least,
 // maybe not internal?
-// void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
-// {
-//     auto pmb = rc->GetBlockPointer();
-
-//     PackIndexMap prims_map, cons_map;
-//     auto U_E = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map);
-//     auto P = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("Primitive")}, prims_map);
-//     const VarMap m_p(prims_map, false), m_u(cons_map, true);
-
-//     const auto& G = pmb->coords;
-
-//     auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
-//     const IndexRange ib = bounds.GetBoundsI(domain);
-//     const IndexRange jb = bounds.GetBoundsJ(domain);
-//     const IndexRange kb = bounds.GetBoundsK(domain);
-
-//     pmb->par_for("UtoP_EMHD", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
-//         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
-//             const Real gamma = GRMHD::lorentz_calc(G, P, m_p, k, j, i, Loci::center);
-//             const Real inv_alpha = m::sqrt(-G.gcon(Loci::center, j, i, 0, 0));
-//             const Real ucon0 = gamma * inv_alpha;
-
-//             // Update the primitive EMHD fields
-//             if (m_p.Q >= 0)
-//                 P(m_p.Q, k, j, i) = U_E(m_u.Q, k, j, i) / (ucon0 * G.gdet(Loci::center, j, i));
-//             if (m_p.DP >= 0)
-//                 P(m_p.DP, k, j, i) = U_E(m_u.DP, k, j, i) / (ucon0 * G.gdet(Loci::center, j, i));
-//         }
-//     );
-//     Kokkos::fence();
-// }
+void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
+{
+    auto pmb = rc->GetBlockPointer();
+
+    PackIndexMap prims_map, cons_map;
+    auto U_E = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("EMHD"), Metadata::Conserved}, cons_map);
+    auto P = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("Primitive")}, prims_map);
+    const VarMap m_p(prims_map, false), m_u(cons_map, true);
+
+    const auto& G = pmb->coords;
+
+    auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+    const IndexRange ib = bounds.GetBoundsI(domain);
+    const IndexRange jb = bounds.GetBoundsJ(domain);
+    const IndexRange kb = bounds.GetBoundsK(domain);
+
+    pmb->par_for("UtoP_EMHD", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
+            const Real gamma = GRMHD::lorentz_calc(G, P, m_p, k, j, i, Loci::center);
+            const Real inv_alpha = m::sqrt(-G.gcon(Loci::center, j, i, 0, 0));
+            const Real ucon0 = gamma * inv_alpha;
+
+            // Update the primitive EMHD fields
+            if (m_p.Q >= 0)
+                P(m_p.Q, k, j, i) = U_E(m_u.Q, k, j, i) / (ucon0 * G.gdet(Loci::center, j, i));
+            if (m_p.DP >= 0)
+                P(m_p.DP, k, j, i) = U_E(m_u.DP, k, j, i) / (ucon0 * G.gdet(Loci::center, j, i));
+        }
+    );
+    Kokkos::fence();
+}
 
 void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
@@ -231,7 +229,7 @@ void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     const IndexRange jb = bounds.GetBoundsJ(domain);
     const IndexRange kb = bounds.GetBoundsK(domain);
 
-    pmb->par_for("UtoP_EMHD", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+    pmb->par_for("PtoU_EMHD", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             const Real gamma = GRMHD::lorentz_calc(G, P, m_p, k, j, i, Loci::center);
             const Real inv_alpha = m::sqrt(-G.gcon(Loci::center, j, i, 0, 0));
diff --git a/kharma/flux/flux.cpp b/kharma/flux/flux.cpp
index 6f474d13..c00e54b0 100644
--- a/kharma/flux/flux.cpp
+++ b/kharma/flux/flux.cpp
@@ -52,7 +52,7 @@ std::shared_ptr<KHARMAPackage> Flux::Initialize(ParameterInput *pin, std::shared
     // That's what this function is for.
     int nvar = KHARMA::PackDimension(packages.get(), Metadata::WithFluxes);
     std::vector<int> s_flux({nvar});
-    // TODO optionally move all these to faces? Not important yet, no output, more memory
+    // TODO optionally move all these to faces? Not important yet, & faces have no output, more memory
     std::vector<MetadataFlag> flags_flux = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy};
     Metadata m = Metadata(flags_flux, s_flux);
     pkg->AddField("Flux.Pr", m);
@@ -62,7 +62,6 @@ std::shared_ptr<KHARMAPackage> Flux::Initialize(ParameterInput *pin, std::shared
     pkg->AddField("Flux.Fr", m);
     pkg->AddField("Flux.Fl", m);
 
-    // TODO could formally move this to face
     std::vector<int> s_vector({NVEC});
     std::vector<MetadataFlag> flags_speed = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy};
     m = Metadata(flags_speed, s_vector);
@@ -70,13 +69,16 @@ std::shared_ptr<KHARMAPackage> Flux::Initialize(ParameterInput *pin, std::shared
     pkg->AddField("Flux.cmin", m);
 
     // Preserve all velocities at faces, for upwinded constrained transport
-    if (packages->AllPackages().count("B_CT")) {
+    if (packages->AllPackages().count("B_CT")) { // TODO & GS05_c
         std::vector<MetadataFlag> flags_vel = {Metadata::Real, Metadata::Face, Metadata::Derived, Metadata::OneCopy};
         m = Metadata(flags_vel, s_vector);
         pkg->AddField("Flux.vr", m);
         pkg->AddField("Flux.vl", m);
     }
 
+    // We register the geometric (\Gamma*T) source here
+    pkg->AddSource = Flux::AddGeoSource;
+
     EndFlag();
     return pkg;
 }
diff --git a/kharma/grmhd/grmhd.cpp b/kharma/grmhd/grmhd.cpp
index 8e82e224..153cc5a4 100644
--- a/kharma/grmhd/grmhd.cpp
+++ b/kharma/grmhd/grmhd.cpp
@@ -46,6 +46,7 @@
 #include "gr_coordinates.hpp"
 #include "grmhd_functions.hpp"
 #include "kharma.hpp"
+#include "kharma_driver.hpp"
 
 #include <memory>
 
@@ -106,7 +107,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // updates for GRMHD vars is useful for testing, or if adding just a couple of implicit variables
     // Doing EGRMHD requires implicit evolution of GRMHD variables, of course
     auto& driver = packages->Get("Driver")->AllParams();
-    auto implicit_grmhd = (driver.Get<std::string>("type") == "imex") &&
+    auto implicit_grmhd = (driver.Get<DriverType>("type") == DriverType::imex) &&
                           (pin->GetBoolean("emhd", "on") || pin->GetOrAddBoolean("GRMHD", "implicit", false));
     params.Add("implicit", implicit_grmhd);
 
@@ -126,38 +127,25 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // closely-related size (for "Face" and "Edge" fields)
 
     // Add flags to distinguish groups of fields.
-    // 1. One flag to mark the primitive variables specifically
-    // (Parthenon has Metadata::Conserved already, but that has special meanings for it)
-    Metadata::AddUserFlag("Primitive");
-    // 2. And one for hydrodynamics (everything we directly handle in this package)
+    // Hydrodynamics (everything we directly handle in this package)
     Metadata::AddUserFlag("HD");
-    // 3. And one for magnetohydrodynamics
-    // (all HD fields plus B field, which we'll need to make use of)
+    // Magnetohydrodynamics (all HD fields plus B field, which we'll need to make use of)
     Metadata::AddUserFlag("MHD");
     // Mark whether to evolve our variables via the explicit or implicit step inside the driver
     MetadataFlag areWeImplicit = (implicit_grmhd) ? Metadata::GetUserFlag("Implicit")
                                                   : Metadata::GetUserFlag("Explicit");
-
-    std::vector<MetadataFlag> flags_prim = {Metadata::Real, Metadata::Cell, Metadata::Derived, areWeImplicit,
-                                            Metadata::Restart, Metadata::GetUserFlag("Primitive"),
-                                            Metadata::GetUserFlag("HD"), Metadata::GetUserFlag("MHD")};
-    std::vector<MetadataFlag> flags_cons = {Metadata::Real, Metadata::Cell, Metadata::Independent, areWeImplicit,
-                                            Metadata::WithFluxes, Metadata::Conserved, Metadata::Conserved,
-                                            Metadata::GetUserFlag("HD"), Metadata::GetUserFlag("MHD")};
-
-    bool sync_prims = packages->Get("Driver")->Param<bool>("sync_prims");
-    if (!sync_prims) { // Normal operation
-        // As mentioned elsewhere, KHARMA treats the conserved variables as the independent ones,
-        // and the primitives as "Derived"
-        // Primitives are still used for reconstruction, physical boundaries, and output, and are
-        // generally the easier to understand quantities
-        // TODO can we not sync prims if we're using two_sync?
-        flags_cons.push_back(Metadata::FillGhost);
-        flags_prim.push_back(Metadata::FillGhost);
-    } else { // Treat primitive vars as fundamental
-        // When evolving (E)GRMHD implicitly, we just mark the primitive variables to be synchronized.
-        // This won't work for AMR, but it fits much better with the implicit solver, which expects
-        // primitive variable inputs and produces primitive variable results.
+    std::vector<MetadataFlag> flags_grmhd = {Metadata::Cell, areWeImplicit, Metadata::GetUserFlag("HD"), Metadata::GetUserFlag("MHD")};
+
+    auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
+    flags_prim.insert(flags_prim.end(), flags_grmhd.begin(), flags_grmhd.end());
+    auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
+    flags_cons.insert(flags_cons.end(), flags_grmhd.begin(), flags_grmhd.end());
+
+    // We must additionally fill ghost zones of primitive variables in GRMHD, to seed the solver
+    // Only necessary to add here if syncing conserved vars
+    // Note some startup behavior relies on having the GRHD prims marked for syncing,
+    // so disable sync_utop_seed at your peril
+    if (!driver.Get<bool>("sync_prims") && pin->GetOrAddBoolean("GRMHD", "sync_utop_seed", true)) {
         flags_prim.push_back(Metadata::FillGhost);
     }
 
@@ -189,16 +177,11 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // Generally, see the headers for function descriptions.
 
     //pkg->BlockUtoP // Taken care of by the inverter package since it's hard to do
-    // There's no "Flux" package, so we register the geometric (\Gamma*T) source here. I think it makes sense.
-    pkg->AddSource = Flux::AddGeoSource;
 
     // On physical boundaries, even if we've sync'd both, respect the application to primitive variables
-    pkg->BoundaryPtoU = Flux::BlockPtoUMHD;
+    pkg->DomainBoundaryPtoU = Flux::BlockPtoUMHD;
 
-    // Finally, the StateDescriptor/Package object determines the Callbacks Parthenon makes to
-    // a particular package -- that is, some portion of the things that the package needs done
-    // at each step, which must be done at specific times.
-    // See the header files defining each of these functions for their purpose and call context.
+    // AMR-related
     pkg->CheckRefinementBlock    = GRMHD::CheckRefinement;
     pkg->EstimateTimestepBlock   = GRMHD::EstimateTimestep;
     pkg->PostStepDiagnosticsMesh = GRMHD::PostStepDiagnostics;
@@ -222,8 +205,9 @@ Real EstimateTimestep(MeshBlockData<Real> *rc)
     auto& cmax = rc->Get("Flux.cmax").data;
     auto& cmin = rc->Get("Flux.cmin").data;
 
-    // TODO: move timestep limiter into an override of SetGlobalTimestep
-    // TODO: keep location of the max, or be able to look it up in diagnostics
+    // TODO: move timestep limiters into KHARMADriver::SetGlobalTimestep
+    // TODO: option to keep location (in embedding coords) of zone which sets step.
+    //       (this will likely be very slow, but we should do it anyway)
 
     auto& globals = pmb->packages.Get("Globals")->AllParams();
     const auto& grmhd_pars = pmb->packages.Get("GRMHD")->AllParams();
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index 62f70e84..e538350c 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -66,10 +66,10 @@ std::shared_ptr<KHARMAPackage> Inverter::Initialize(ParameterInput *pin, std::sh
     // Flag denoting UtoP inversion failures
     // Only needed if we're actually calling UtoP, but always allocated as it's retrieved often
     // Needs boundary sync if treating primitive variables as fundamental
-    bool sync_prims = packages->Get("Driver")->Param<bool>("sync_prims");
+    bool prims_are_fundamental = packages->Get("Driver")->Param<bool>("prims_are_fundamental");
     bool implicit_grmhd = packages->Get("GRMHD")->Param<bool>("implicit");
     Metadata m;
-    if (sync_prims && !implicit_grmhd) {
+    if (prims_are_fundamental && !implicit_grmhd) {
         m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::FillGhost});
     } else {
         m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy});
@@ -151,7 +151,7 @@ TaskStatus Inverter::PostStepDiagnostics(const SimTime& tm, MeshData<Real> *md)
     // Debugging/diagnostic info about floor and inversion flags
     // TODO grab the total and die on too many
     if (flag_verbose >= 1) {
-        // TODO this should move into BlockUtoP when everything goes MeshData
+        // TODO this should move into UtoP when everything goes MeshData
         Reductions::StartFlagReduce(md, "pflag", Inverter::status_names, IndexDomain::interior, false, 1);
         Reductions::CheckFlagReduceAndPrintHits(md, "pflag", Inverter::status_names, IndexDomain::interior, false, 1);
     }
diff --git a/kharma/kharma.cpp b/kharma/kharma.cpp
index 07ec8d42..65748f00 100644
--- a/kharma/kharma.cpp
+++ b/kharma/kharma.cpp
@@ -209,6 +209,10 @@ void KHARMA::FixParameters(ParameterInput *pin)
                     pin->GetOrAddReal("coordinates", "r_in", tmp_coords.X1_to_embed(x1min));
                 }
             }
+        } else {
+            // Add the coordinate versions if they don't exist (usually restarts)
+            pin->GetOrAddReal("coordinates", "r_in", tmp_coords.X1_to_embed(pin->GetReal("parthenon/mesh", "x1min")));
+            pin->GetOrAddReal("coordinates", "r_out", tmp_coords.X1_to_embed(pin->GetReal("parthenon/mesh", "x1max")));
         }
 
         // If the simulation domain extends inside the EH, we change some boundary options
@@ -241,7 +245,8 @@ void KHARMA::FixParameters(ParameterInput *pin)
     //             << tmp_coords.stopx(1) << " "
     //             << tmp_coords.stopx(2) << " "
     //             << tmp_coords.stopx(3) << std::endl;
-    // TODO(BSP) is this worth looping?  I say probably no.
+    // In any coordinate system which sets boundaries (i.e. not Cartesian),
+    // stopx > startx > 0. In Cartesian xNmin/xNmax are required
     if (tmp_coords.startx(1) >= 0)
         pin->GetOrAddReal("parthenon/mesh", "x1min", tmp_coords.startx(1));
     if (tmp_coords.stopx(1) >= 0)
diff --git a/kharma/kharma.hpp b/kharma/kharma.hpp
index a772e9f5..17e1deb3 100644
--- a/kharma/kharma.hpp
+++ b/kharma/kharma.hpp
@@ -117,16 +117,16 @@ inline bool FieldIsOutput(ParameterInput *pin, std::string name)
  */
 inline int PackDimension(Packages_t* packages, Metadata::FlagCollection fc)
 {
-    // We want to exclude anything specific to B field cleanup & not used elsewhere
-    // (confusingly, this isn't *necessarily* everything in the B_Cleanup package)
-    if (packages->AllPackages().count("B_Cleanup"))
-        fc = fc - Metadata::GetUserFlag("B_Cleanup");
+    // We want to exclude anything specific to startup processes e.g. B field cleanup,
+    // & not used elsewhere
+    if (packages->AllPackages().count("StartupOnly"))
+        fc = fc - Metadata::GetUserFlag("StartupOnly");
 
     // Count dimensions (1 for scalars + vector lengths) of each package's variables
     int nvar = 0;
     for (auto pkg : packages->AllPackages()) {
         nvar += pkg.second->GetPackDimension(fc);
-        std::cout << pkg.first << " variables: " << pkg.second->GetPackDimension(fc) << std::endl;
+        // std::cout << pkg.first << " variables: " << pkg.second->GetPackDimension(fc) << std::endl;
     }
     return nvar;
 }
diff --git a/kharma/kharma_package.cpp b/kharma/kharma_package.cpp
index 68dcb66e..bb0b7aea 100644
--- a/kharma/kharma_package.cpp
+++ b/kharma/kharma_package.cpp
@@ -35,7 +35,6 @@
 
 #include "types.hpp"
 
-// PHYSICS-RELATED
 // TODO take & accumulate TaskStatus?  Useful for ::incomplete if we ever want to do that
 // TODO continue meshification until all is mesh
 
@@ -57,8 +56,7 @@ TaskStatus Packages::FixFlux(MeshData<Real> *md)
 TaskStatus Packages::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     Flag("BlockUtoP");
-    // Apply UtoP from B_CT, as this fills B primitive var for the GRMHD UtoP
-    // TODO could maybe call this in Inverter, or handle all ordering there, or something
+    // Apply UtoP from B_CT first, as this fills cons.B at cell centers
     auto pmb = rc->GetBlockPointer();
     auto pkgs = pmb->packages.AllPackages();
     if (pkgs.count("B_CT")) {
@@ -105,14 +103,18 @@ TaskStatus Packages::BoundaryUtoP(MeshBlockData<Real> *rc, IndexDomain domain, b
     return TaskStatus::complete;
 }
 
-TaskStatus Packages::BoundaryPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
+TaskStatus Packages::BoundaryPtoUElseUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
-    Flag("BoundaryPtoU");
+    Flag("DomainBoundaryLockstep");
     auto kpackages = rc->GetBlockPointer()->packages.AllPackagesOfType<KHARMAPackage>();
     for (auto kpackage : kpackages) {
-        if (kpackage.second->BoundaryPtoU != nullptr) {
-            Flag("BoundaryPtoU_"+kpackage.first);
-            kpackage.second->BoundaryPtoU(rc, domain, coarse);
+        if (kpackage.second->DomainBoundaryPtoU != nullptr) {
+            Flag("DomainBoundaryPtoU_"+kpackage.first);
+            kpackage.second->DomainBoundaryPtoU(rc, domain, coarse);
+            EndFlag();
+        } else if (kpackage.second->BoundaryUtoP != nullptr) {
+            Flag("DomainBoundaryUtoP_"+kpackage.first);
+            kpackage.second->BoundaryUtoP(rc, domain, coarse);
             EndFlag();
         }
     }
diff --git a/kharma/kharma_package.hpp b/kharma/kharma_package.hpp
index a3f02620..6075ea4a 100644
--- a/kharma/kharma_package.hpp
+++ b/kharma/kharma_package.hpp
@@ -62,15 +62,16 @@ class KHARMAPackage : public StateDescriptor {
         // rather, they are called on zone center values once per step only.
         std::function<void(MeshBlockData<Real>*, IndexDomain, bool)> BlockUtoP = nullptr;
         std::function<void(MeshData<Real>*, IndexDomain, bool)> MeshUtoP = nullptr;
-        // Allow applying UtoP only/separately for physical boundary domains after sync/prolong/restrict
-        // e.g., GRMHD does *not* register this as boundaries are applied to prims,
-        // whereas implicitly-evolved vars *only* register this.
+        // Allow applying UtoP only/separately for boundary domains after sync/prolong/restrict ops
+        // All packages with independent variables should register this for AMR
         std::function<void(MeshBlockData<Real>*, IndexDomain, bool)> BoundaryUtoP = nullptr;
-        // Same thing, the other way. For packages syncing primitives, e.g. GRMHD
-        std::function<void(MeshBlockData<Real>*, IndexDomain, bool)> BoundaryPtoU = nullptr;
+        // On domain boundaries, however, we sometimes need to respect the primitive variables.
+        // Currently only the GRMHD primitives (rho, u, uvec) do this
+        std::function<void(MeshBlockData<Real>*, IndexDomain, bool)> DomainBoundaryPtoU = nullptr;
 
         // Going the other way, however, is handled by Flux::PtoU.
-        // All PtoU implementations are device-side (called prim_to_flux)
+        // All PtoU implementations are device-side (called prim_to_flux),
+        // so we do not need something like
         //std::function<void(MeshBlockData<Real>*, IndexDomain, bool)> BlockPtoU = nullptr;
 
         // Source term to add to the conserved variables during each step
@@ -85,7 +86,6 @@ class KHARMAPackage : public StateDescriptor {
         std::function<void(MeshData<Real>*)> FixFlux = nullptr;
 
         // Apply any floors or limiters specific to the package (that is, on the package's variables)
-        // Called by Floors::*ApplyFloors
         std::function<void(MeshBlockData<Real>*, IndexDomain)> BlockApplyFloors = nullptr;
         std::function<void(MeshData<Real>*, IndexDomain)> MeshApplyFloors = nullptr;
 
@@ -129,18 +129,21 @@ TaskStatus BlockUtoP(MeshBlockData<Real> *mbd, IndexDomain domain, bool coarse=f
 TaskStatus MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
- * Version of UtoP specifically for boundaries. Some packages sync & apply boundaries to
- * conserved variables, some to primitive variables.
+ * U to P specifically for boundaries (domain and MPI).
+ * All packages must define this, even if not using UtoP, as KHARMA must sync conserved
+ * variables in AMR mode.
  */
 TaskStatus BoundaryUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse=false);
 /**
- * P to U for boundaries.  As it's internal to the flux updates, the "normal" PtoU is
- * implemented device-side and called from the "Flux" package
+ * For each package, run DomainBoundaryPtoU if available, otherwise BoundaryUtoP.
+ * This is for domain boundaries: if we're syncing the conserved variables, we still
+ * want to apply domain boundaries to the GRHD primitive variables
+ * See KBoundaries::ApplyBoundary for details
  */
-TaskStatus BoundaryPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse=false);
+TaskStatus BoundaryPtoUElseUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse=false);
 
 /**
- * Fill all conserved variables (U) from primitive variables (P), over a whole block
+ * Fill all conserved variables (U) from primitive variables (P), over a domain on a single block
  */
 // TaskStatus BlockPtoU(MeshBlockData<Real> *mbd, IndexDomain domain, bool coarse=false);
 
diff --git a/kharma/main.cpp b/kharma/main.cpp
index 724057d0..baf7f1a0 100644
--- a/kharma/main.cpp
+++ b/kharma/main.cpp
@@ -207,8 +207,10 @@ int main(int argc, char *argv[])
 
     // Begin code block to ensure driver is cleaned up
     {
-        std::string driver_type = pmesh->packages.Get("Driver")->Param<std::string>("type");
-        if (MPIRank0()) std::cout << "Running " << driver_type << " driver" << std::endl;
+        if (MPIRank0()) {
+            std::string driver_name = pmesh->packages.Get("Driver")->Param<std::string>("name");
+            std::cout << "Running " << driver_name << " driver" << std::endl;
+        }
 
         // Pull out things we need to give the driver
         auto pin = pman.pinput.get(); // All parameters in the input file or command line

From efcf77e8be86f5a8564446fcd3c7e44aacdbf5c9 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 3 Oct 2023 14:21:08 -0600
Subject: [PATCH 08/47] Problem init & tests (round 1)

This moves the rest of the problem initializations to using the
unified field init.  It also fixes some issues starting up problems when
magnetic field has been added, which may have affected the
starting internal energy of tori at times in the past (will check).

It also moves (restores) a few very specific/brittle parameter files
into their specific test dirs, rather than global 'pars' dir.
---
 kharma/prob/bondi.cpp                         |  26 ++
 kharma/prob/bz_monopole.cpp                   |   1 -
 kharma/prob/emhd/anisotropic_conduction.hpp   |  14 +-
 kharma/prob/emhd/conducting_atmosphere.cpp    |  10 +-
 kharma/prob/emhd/emhdmodes.hpp                |  27 +-
 kharma/prob/emhd/emhdshock.hpp                |  23 +-
 kharma/prob/explosion.hpp                     |   1 -
 kharma/prob/fm_torus.hpp                      |  32 +--
 kharma/prob/kelvin_helmholtz.hpp              |   1 -
 kharma/prob/mhdmodes.hpp                      |  27 +-
 kharma/prob/post_initialize.cpp               |  13 +-
 kharma/prob/problem.cpp                       |  18 +-
 kharma/prob/seed_B.cpp                        |  30 +-
 kharma/prob/seed_B.hpp                        |  24 +-
 kharma/prob/shock_tube.hpp                    |  20 +-
 .../bondi_analytic_128.txt                    | 128 ---------
 .../bondi_analytic_256.txt                    | 256 ------------------
 .../bondi_analytic_64.txt                     |  64 -----
 tests/bondi_viscous/check.py                  |  69 +++--
 tests/bondi_viscous/run.sh                    |   6 +-
 tests/conducting_atmosphere/check.py          |  15 +-
 .../conducting_atmosphere.par                 |  97 +++++++
 tests/conducting_atmosphere/run.sh            |   2 +-
 tests/emhdshock/emhdshock.par                 |  94 +++++++
 tests/mhdmodes/check.py                       |  35 +--
 tests/mhdmodes/run.sh                         |  63 +++--
 tests/noh/check.py                            |   2 +-
 tests/noh/run.sh                              |   7 +-
 28 files changed, 481 insertions(+), 624 deletions(-)
 delete mode 100644 tests/bondi_viscous/bondi_viscous_128_default/bondi_analytic_128.txt
 delete mode 100644 tests/bondi_viscous/bondi_viscous_256_default/bondi_analytic_256.txt
 delete mode 100644 tests/bondi_viscous/bondi_viscous_64_default/bondi_analytic_64.txt
 create mode 100644 tests/conducting_atmosphere/conducting_atmosphere.par
 create mode 100644 tests/emhdshock/emhdshock.par

diff --git a/kharma/prob/bondi.cpp b/kharma/prob/bondi.cpp
index d97d448b..4e0c11d7 100644
--- a/kharma/prob/bondi.cpp
+++ b/kharma/prob/bondi.cpp
@@ -187,5 +187,31 @@ TaskStatus SetBondiImpl(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain do
         }
     );
 
+    // Generally I avoid this, but the viscous Bondi test problem has very unique
+    // boundary requirements to converge.  The GRMHD vars must be held constant,
+    // but the pressure anisotropy allowed to change as necessary with outflow conditions
+    if (pmb->packages.Get("Globals")->Param<std::string>("problem") == "bondi_viscous") {
+        BoundaryFace bface = KBoundaries::BoundaryFaceOf(domain);
+        bool inner = KBoundaries::BoundaryIsInner(bface);
+        IndexRange ib_i = bounds.GetBoundsI(domain);
+        int ref = inner ? ib_i.s : ib_i.e;
+        pmb->par_for("bondi_viscous_boundary", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+            KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
+                GReal Xembed[GR_DIM];
+                G.coord_embed(k, j, i, Loci::center, Xembed);
+                GReal r = Xembed[1];
+                // TODO more general?
+                if (m_p.B1 >= 0) {
+                    P(m_p.B1, k, j, i) = 1/(r*r*r);
+                    P(m_p.B2, k, j, i) = 0.;
+                    P(m_p.B3, k, j, i) = 0.;
+                }
+                if (m_p.DP >= 0) {
+                    P(m_p.DP, k, j, i) = P(m_p.DP, k, j, ref);
+                }
+            }
+        );
+    }
+
     return TaskStatus::complete;
 }
diff --git a/kharma/prob/bz_monopole.cpp b/kharma/prob/bz_monopole.cpp
index c5c4ee0e..3a442091 100644
--- a/kharma/prob/bz_monopole.cpp
+++ b/kharma/prob/bz_monopole.cpp
@@ -46,7 +46,6 @@ TaskStatus InitializeBZMonopole(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P = rc->Get("prims.B").data;
 
     Real bsq_o_rho_max = pin->GetOrAddReal("floors", "bsq_over_rho_max", 1.e2);
     Real rho_min_limit = pin->GetOrAddReal("floors", "rho_min_geom", 1.e-6);
diff --git a/kharma/prob/emhd/anisotropic_conduction.hpp b/kharma/prob/emhd/anisotropic_conduction.hpp
index b467efce..26220c7e 100644
--- a/kharma/prob/emhd/anisotropic_conduction.hpp
+++ b/kharma/prob/emhd/anisotropic_conduction.hpp
@@ -48,8 +48,7 @@ TaskStatus InitializeAnisotropicConduction(std::shared_ptr<MeshBlockData<Real>>&
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    // It is well and good this problem should cry if B/EMHD are disabled.
-    GridVector B_P = rc->Get("prims.B").data;
+    // It is well and good this problem should cry if EMHD is disabled.
     GridVector q = rc->Get("prims.q").data;
     GridVector dP = rc->Get("prims.dP").data;
 
@@ -62,6 +61,14 @@ TaskStatus InitializeAnisotropicConduction(std::shared_ptr<MeshBlockData<Real>>&
 
     const Real R = m::sqrt(Rsq);
 
+    pin->GetOrAddString("b_field", "type", "wave");
+    pin->GetOrAddReal("b_field", "phase", 0.);
+    // Constant B1
+    pin->GetOrAddReal("b_field", "B10", B0);
+    // Amp & wavenumber of sin() for B2
+    pin->GetOrAddReal("b_field", "amp2_B2", B0);
+    pin->GetOrAddReal("b_field", "k1", 2*M_PI*k0);
+
     IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
     IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
     IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);
@@ -77,9 +84,6 @@ TaskStatus InitializeAnisotropicConduction(std::shared_ptr<MeshBlockData<Real>>&
             uvec(0, k, j, i) = 0.;
             uvec(1, k, j, i) = 0.;
             uvec(2, k, j, i) = 0.;
-            B_P(0, k, j, i) = B0;
-            B_P(1, k, j, i) = B0 * sin(2*M_PI*k0*X[1]);
-            B_P(2, k, j, i) = 0;
             q(k, j, i) = 0.;
             dP(k, j, i) = 0.;
         }
diff --git a/kharma/prob/emhd/conducting_atmosphere.cpp b/kharma/prob/emhd/conducting_atmosphere.cpp
index 170cd914..271b2111 100644
--- a/kharma/prob/emhd/conducting_atmosphere.cpp
+++ b/kharma/prob/emhd/conducting_atmosphere.cpp
@@ -71,6 +71,10 @@ TaskStatus InitializeAtmosphere(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
     // Type of input to the problem
     const std::string input = pin->GetOrAddString("conducting_atmosphere", "input", "ODE");
 
+    // Set default B field parameters
+    pin->GetOrAddString("b_field", "type", "monopole_cube");
+    pin->GetOrAddReal("b_field", "B10", 1.);
+
     // Bounds of the domain
     IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
     IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
@@ -104,13 +108,11 @@ TaskStatus InitializeAtmosphere(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
     GridScalar rho  = rc->Get("prims.rho").data; 
     GridScalar u    = rc->Get("prims.u").data; 
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P  = rc->Get("prims.B").data;
 
     // Host side mirror of primitives
     auto rho_host   = rho.GetHostMirror();
     auto u_host     = u.GetHostMirror();
     auto uvec_host  = uvec.GetHostMirror();
-    auto B_host     = B_P.GetHostMirror();
 
     // Then for EMHD if enabled
     GridScalar q;
@@ -167,9 +169,6 @@ TaskStatus InitializeAtmosphere(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
                     q_host(k, j, i) = q_temp;
 
                 // Now the remaining primitives
-                B_host(V1, k, j, i)    = 1./(Xembed[1]*Xembed[1]*Xembed[1]);
-                B_host(V2, k, j, i)    = 0.;
-                B_host(V3, k, j, i)    = 0.;
                 if (use_emhd && emhd_params.viscosity)
                     dP_host(k, j, i)   = 0.;
 
@@ -223,7 +222,6 @@ TaskStatus InitializeAtmosphere(std::shared_ptr<MeshBlockData<Real>>& rc, Parame
     rho.DeepCopy(rho_host);
     u.DeepCopy(u_host);
     uvec.DeepCopy(uvec_host);
-    B_P.DeepCopy(B_host);
     if (use_emhd && emhd_params.conduction)
         q.DeepCopy(q_host);
     if (use_emhd && emhd_params.viscosity)
diff --git a/kharma/prob/emhd/emhdmodes.hpp b/kharma/prob/emhd/emhdmodes.hpp
index 7c9c7f9d..371575d2 100644
--- a/kharma/prob/emhd/emhdmodes.hpp
+++ b/kharma/prob/emhd/emhdmodes.hpp
@@ -53,8 +53,7 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     GridScalar rho  = rc->Get("prims.rho").data;
     GridScalar u    = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    // It is well and good this problem should cry if B/EMHD are disabled.
-    GridVector B_P = rc->Get("prims.B").data;
+    // It is well and good this problem should cry if EMHD is disabled.
     GridVector q   = rc->Get("prims.q").data;
     GridVector dP  = rc->Get("prims.dP").data;
 
@@ -89,7 +88,19 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     const Real k2 = 4. * M_PI;
     // END POSSIBLE ARGS
 
-    // TODO SET B PARAMS HERE
+    // Set magnetic field parameters for our field transport package
+    pin->GetOrAddString("b_field", "type", "wave");
+    pin->GetOrAddReal("b_field", "B10", B10);
+    pin->GetOrAddReal("b_field", "B20", B20);
+    pin->GetOrAddReal("b_field", "B30", B30);
+    pin->GetOrAddReal("b_field", "k1", k1);
+    pin->GetOrAddReal("b_field", "k2", k2);
+
+    pin->GetOrAddReal("b_field", "amp_B1", amp * (-0.05973794979640743));
+    pin->GetOrAddReal("b_field", "amp2_B1", amp * (0.03351707506150924));
+
+    pin->GetOrAddReal("b_field", "amp_B2", amp * (0.02986897489820372));
+    pin->GetOrAddReal("b_field", "amp2_B2", amp * (-0.016758537530754618));
 
     IndexDomain domain = IndexDomain::interior;
     IndexRange ib = pmb->cellbounds.GetBoundsI(domain);
@@ -99,8 +110,8 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             Real X[GR_DIM];
             G.coord_embed(k, j, i, Loci::center, X);
-            const Real cos_phi = cos(k1*X[1] + k2*X[2]);
-            const Real sin_phi = sin(k1*X[1] + k2*X[2]);
+            const Real cos_phi = m::cos(k1*X[1] + k2*X[2]);
+            const Real sin_phi = m::sin(k1*X[1] + k2*X[2]);
 
             // Perturbations: no higher-order terms
             const Real drho     = amp * (((-0.518522524082246)*cos_phi) + ((0.1792647678001878)*sin_phi));
@@ -108,9 +119,6 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
             const Real du1      = amp * (((0.008463122479547856)*cos_phi) + ((-0.011862022608466367)*sin_phi));
             const Real du2      = amp * (((-0.16175466371870734)*cos_phi) + ((0.034828080823603294)*sin_phi));
             const Real du3      = 0.;
-            const Real dB1      = amp * (((-0.05973794979640743)*cos_phi) + ((0.03351707506150924)*sin_phi));
-            const Real dB2      = amp * (((0.02986897489820372)*cos_phi) - ((0.016758537530754618)*sin_phi));
-            const Real dB3      = 0.;
             const Real dq       = amp * (((0.5233486841539436)*cos_phi) - ((0.04767672501939603)*sin_phi));
             const Real ddelta_p = amp * (((0.2909106062057657)*cos_phi) - ((0.02159452055336572)*sin_phi));
 
@@ -120,9 +128,6 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
             uvec(V1, k, j, i) = u10 + du1;
             uvec(V2, k, j, i) = u20 + du2;
             uvec(V3, k, j, i) = u30 + du3;
-            B_P(V1, k, j, i) = B10 + dB1;
-            B_P(V2, k, j, i) = B20 + dB2;
-            B_P(V3, k, j, i) = B30 + dB3;
             q(k, j, i) = q0 + dq;
             dP(k, j, i) = delta_p0 + ddelta_p;
 
diff --git a/kharma/prob/emhd/emhdshock.hpp b/kharma/prob/emhd/emhdshock.hpp
index c61e07c1..5a55d8ae 100644
--- a/kharma/prob/emhd/emhdshock.hpp
+++ b/kharma/prob/emhd/emhdshock.hpp
@@ -61,7 +61,6 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     GridScalar rho  = rc->Get("prims.rho").data;
     GridScalar u    = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P  = rc->Get("prims.B").data;
     GridVector q    = rc->Get("prims.q").data;
     GridVector dP   = rc->Get("prims.dP").data;
 
@@ -70,6 +69,11 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     // Type of input to the problem
     const std::string input = pin->GetOrAddString("emhdshock", "input", "BVP");
 
+    // Both shocks do not have a B jump condition, just set B10
+    // TODO take magnetization?
+    pin->GetOrAddString("b_field", "type", "constant");
+    pin->GetOrAddReal("b_field", "B10", 1.e-5);
+
     // Obtain EMHD params
     const EMHD::EMHD_parameters& emhd_params = EMHD::GetEMHDParameters(pmb->packages);
     // Obtain GRMHD params
@@ -100,10 +104,13 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
         fp_q   = fopen(fbvp_q,   "r");
         fp_dP  = fopen(fbvp_dP,  "r");
 
+        if (fp_rho == NULL || fp_u == NULL || fp_u1 == NULL || fp_q == NULL || fp_dP == NULL) {
+            throw std::runtime_error("Could not open conducting atmosphere solution!");
+        }
+
         auto rho_host   = rho.GetHostMirror();
         auto u_host     = u.GetHostMirror();
         auto uvec_host  = uvec.GetHostMirror();
-        auto B_host     = B_P.GetHostMirror();
         auto q_host     = q.GetHostMirror();
         auto dP_host    = dP.GetHostMirror();
 
@@ -124,9 +131,6 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
                     // Now the remaining primitives
                     uvec_host(1, k, j, i) = 0.;
                     uvec_host(2, k, j, i) = 0.;
-                    B_host(V1, k, j, i)  = 1.e-5;
-                    B_host(V2, k, j, i)  = 0.;
-                    B_host(V3, k, j, i)  = 0.;
 
                     if (emhd_params.higher_order_terms) {
 
@@ -165,7 +169,6 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
         rho.DeepCopy(rho_host);
         u.DeepCopy(u_host);
         uvec.DeepCopy(uvec_host);
-        B_P.DeepCopy(B_host);
         q.DeepCopy(q_host);
         dP.DeepCopy(dP_host);
         Kokkos::fence();
@@ -182,16 +185,13 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
         double u1L  = 1.,     u1R  = 0.32434571;
         double u2L  = 0.,     u2R  = 0.;
         double u3L  = 0.,     u3R  = 0.;
-        double B1L  = 1.e-5,  B1R  = 1.e-5;
-        double B2L  = 0,      B2R  = 0.;
-        double B3L  = 0.,     B3R  = 0.;
+        const GReal x1_center = (x1min + x1max) / 2.;
 
         pmb->par_for("emhdshock_init", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
             KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
 
                 Real X[GR_DIM];
                 G.coord_embed(k, j, i, Loci::center, X);
-                const Real x1_center = (x1min + x1max) / 2.;
 
                 bool lhs = X[1] < x1_center;
 
@@ -201,9 +201,6 @@ TaskStatus InitializeEMHDShock(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
                 uvec(V1, k, j, i) = (lhs) ? u1L : u1R;
                 uvec(V2, k, j, i) = (lhs) ? u2L : u2R;
                 uvec(V3, k, j, i) = (lhs) ? u3L : u3R;
-                B_P(V1, k, j, i)  = (lhs) ? B1L : B1R;
-                B_P(V2, k, j, i)  = (lhs) ? B2L : B2R;
-                B_P(V3, k, j, i)  = (lhs) ? B3L : B3R;
                 q(k ,j, i)       = 0.;   
                 dP(k ,j, i)      = 0.;   
 
diff --git a/kharma/prob/explosion.hpp b/kharma/prob/explosion.hpp
index 65102d4f..5ed7a73c 100644
--- a/kharma/prob/explosion.hpp
+++ b/kharma/prob/explosion.hpp
@@ -55,7 +55,6 @@ TaskStatus InitializeExplosion(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P = rc->Get("prims.B").data;
 
     const auto& G = pmb->coords;
 
diff --git a/kharma/prob/fm_torus.hpp b/kharma/prob/fm_torus.hpp
index 211fe1e7..326edf1e 100644
--- a/kharma/prob/fm_torus.hpp
+++ b/kharma/prob/fm_torus.hpp
@@ -17,11 +17,14 @@ TaskStatus InitializeFMTorus(std::shared_ptr<MeshBlockData<Real>>& rc, Parameter
  */
 KOKKOS_INLINE_FUNCTION Real lnh_calc(const GReal a, const Real l, const GReal rin, const GReal r, const GReal th)
 {
-    Real sth = sin(th);
-    Real cth = cos(th);
+    // TODO this isn't faster than splitting into two evaluations of a sub-function,
+    // and it doesn't matter anyway.  Make it clearer
+    Real sth = m::sin(th);
+    Real cth = m::cos(th);
 
-    Real r2 = m::pow(r, 2);
-    Real a2 = m::pow(a, 2);
+    Real r2 = r*r;
+    Real a2 = a*a;
+    // Metric 
     Real DD = r2 - 2. * r + a2;
     Real AA = m::pow(r2 + a2, 2) - DD * a2 * sth * sth;
     Real SS = r2 + a2 * cth * cth;
@@ -46,7 +49,7 @@ KOKKOS_INLINE_FUNCTION Real lnh_calc(const GReal a, const Real l, const GReal ri
                         4. * (l * l * SS * SS) * DD /
                             (AA * AA * sth * sth)) -
             2. * a * r * l / AA -
-            (0.5 *
+                (0.5 *
                     m::log((1. +
                         m::sqrt(1. +
                             4. * (l * l * SSin * SSin) * DDin /
@@ -69,17 +72,14 @@ KOKKOS_INLINE_FUNCTION Real lnh_calc(const GReal a, const Real l, const GReal ri
  */
 KOKKOS_INLINE_FUNCTION Real lfish_calc(const GReal a, const GReal r)
 {
-    return (((m::pow(a, 2) - 2. * a * m::sqrt(r) + m::pow(r, 2)) *
-             ((-2. * a * r *
-               (m::pow(a, 2) - 2. * a * m::sqrt(r) +
-                m::pow(r,
-                    2))) /
-                  m::sqrt(2. * a * m::sqrt(r) + (-3. + r) * r) +
-              ((a + (-2. + r) * m::sqrt(r)) * (m::pow(r, 3) + m::pow(a, 2) *
-                                                            (2. + r))) /
+    GReal sqtr = m::sqrt(r);
+    return ((a*a - 2. * a * sqtr + r*r) *
+             ((-2. * a * r * (a*a - 2. * a * sqtr + r*r)) /
+                  m::sqrt(2. * a * sqtr + (-3. + r) * r) +
+              ((a + (-2. + r) * sqtr) * (r*r*r + a*a * (2. + r))) /
                   m::sqrt(1 + (2. * a) / m::pow(r, 1.5) - 3. / r))) /
-            (m::pow(r, 3) * m::sqrt(2. * a * m::sqrt(r) + (-3. + r) * r) *
-             (m::pow(a, 2) + (-2. + r) * r)));
+            (r*r*r * m::sqrt(2. * a * sqtr + (-3. + r) * r) *
+             (a*a + (-2. + r) * r));
 }
 
 /**
@@ -88,7 +88,7 @@ KOKKOS_INLINE_FUNCTION Real lfish_calc(const GReal a, const GReal r)
  * This function is *not* used for the actual initialization (where rho is calculated
  * alongside the other primitive variables).  Rather, it is for:
  * 1. Normalization, in which the max of this function over the domain is calculated.
- * 2. B field initialization, which requires density the untilted disk for simplicity
+ * 2. B field initialization, which requires density of the untilted disk for simplicity
  */
 KOKKOS_INLINE_FUNCTION Real fm_torus_rho(const GReal a, const GReal rin, const GReal rmax, const Real gam,
                                          const Real kappa, const GReal r, const GReal th)
diff --git a/kharma/prob/kelvin_helmholtz.hpp b/kharma/prob/kelvin_helmholtz.hpp
index 25c79033..921f97da 100644
--- a/kharma/prob/kelvin_helmholtz.hpp
+++ b/kharma/prob/kelvin_helmholtz.hpp
@@ -53,7 +53,6 @@ TaskStatus InitializeKelvinHelmholtz(std::shared_ptr<MeshBlockData<Real>>& rc, P
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P = rc->Get("prims.B").data;
 
     // follows notation of Lecoanet et al. eq. 8 et seq.
     const Real tscale = pin->GetOrAddReal("kelvin_helmholtz", "tscale", 0.05);
diff --git a/kharma/prob/mhdmodes.hpp b/kharma/prob/mhdmodes.hpp
index 4a8167f0..b7d49ce0 100644
--- a/kharma/prob/mhdmodes.hpp
+++ b/kharma/prob/mhdmodes.hpp
@@ -63,12 +63,11 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P = rc->Get("prims.B").data;
 
     const auto& G = pmb->coords;
 
     const int nmode = pin->GetOrAddInteger("mhdmodes", "nmode", 1);
-    const bool one_period = pin->GetOrAddBoolean("mhdmodes", "one_period", true);
+    const bool one_period = pin->GetOrAddBoolean("mhdmodes", "one_period", nmode != 0);
 
     // Mean state
     const Real rho0 = pin->GetOrAddReal("mhdmodes", "rho0", 1.);
@@ -82,6 +81,7 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
     // Set to 0 for "full" 3D wave.
     const int dir = pin->GetOrAddInteger("mhdmodes", "dir", 0);
     const Real amp = pin->GetOrAddReal("mhdmodes", "amp", 1.e-4);
+    const Real phase = pin->GetOrAddReal("mhdmodes", "phase", 0.);
 
     // Note the modes below don't work right if you manually set these
     // TODO generate modes on the fly for any k values
@@ -190,12 +190,10 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
         }
     }
 
-    // Record the parameters
+    // Record the parameters we set via nmode
     // This might be useful to read when checking, too...
-    // TODO 
     pin->SetReal("mhdmodes", "omega_real", omega.real());
     pin->SetReal("mhdmodes", "omega_imag", omega.imag());
-
     pin->SetReal("mhdmodes", "drho", drho);
     pin->SetReal("mhdmodes", "du", du);
     pin->SetReal("mhdmodes", "du1", du1);
@@ -207,13 +205,16 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
 
     // Set B field parameters for our mode
     pin->GetOrAddString("b_field", "type", "wave");
-    pin->GetOrAddReal("b_field", "b10", B10);
-    pin->GetOrAddReal("b_field", "b20", B20);
-    pin->GetOrAddReal("b_field", "b30", B30);
+    pin->GetOrAddReal("b_field", "B10", B10);
+    pin->GetOrAddReal("b_field", "B20", B20);
+    pin->GetOrAddReal("b_field", "B30", B30);
     pin->GetOrAddReal("b_field", "amp_B1", amp*dB1);
     pin->GetOrAddReal("b_field", "amp_B2", amp*dB2);
     pin->GetOrAddReal("b_field", "amp_B3", amp*dB3);
-    pin->GetOrAddReal("b_field", "phase", 0.);
+    pin->GetOrAddReal("b_field", "k1", k1);
+    pin->GetOrAddReal("b_field", "k2", k2);
+    pin->GetOrAddReal("b_field", "k3", k3);
+    pin->GetOrAddReal("b_field", "phase", phase);
 
     IndexDomain domain = IndexDomain::interior;
     IndexRange ib = pmb->cellbounds.GetBoundsI(domain);
@@ -226,14 +227,14 @@ TaskStatus InitializeMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramete
             Real mode = amp * m::cos(k1 * X[1] + k2 * X[2] + k3 * X[3]);
             rho(k, j, i) = rho0 + drho * mode;
             u(k, j, i) = u0 + du * mode;
-            uvec(0, k, j, i) = u10 + du1 * mode;
-            uvec(1, k, j, i) = u20 + du2 * mode;
-            uvec(2, k, j, i) = u30 + du3 * mode;
+            uvec(V1, k, j, i) = u10 + du1 * mode;
+            uvec(V2, k, j, i) = u20 + du2 * mode;
+            uvec(V3, k, j, i) = u30 + du3 * mode;
         }
     );
 
     // Override end time to be exactly 1 period for moving modes, unless we set otherwise
-    if (nmode != 0 && one_period) {
+    if (one_period) {
         pin->SetReal("parthenon/time", "tlim", 2. * M_PI / m::abs(omega.imag()));
     }
 
diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp
index 1fc7a790..8dae599c 100644
--- a/kharma/prob/post_initialize.cpp
+++ b/kharma/prob/post_initialize.cpp
@@ -104,8 +104,8 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
         }
     }
 
-    // Add any hotspots.
-    // Note any other modifications made when restarting should be made around here
+    // Add any hotspots *after* we've seeded fields,
+    // since seeding may be based on density
     if (pin->GetOrAddBoolean("blob", "add_blob", false)) {
         for (auto &pmb : pmesh->block_list) {
             auto rc = pmb->meshblock_data.Get();
@@ -121,7 +121,7 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
         KHARMA::ResetGlobals(pin, pmesh);
     }
 
-    // Clean the B field if we've introduced a divergence somewhere
+    // Clean the B field, generally for resizing/restarting
     // We call this function any time the package is loaded:
     // if we decided to load it in kharma.cpp, we need to clean.
     if (pkgs.count("B_Cleanup")) {
@@ -135,13 +135,16 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
         B_Cleanup::CleanupDivergence(md);
     }
 
+    // If PtoU was called before the B field was initialized or corrected,
+    // the total energy might be wrong.  Now that we have the field,
+    // wipe away any temporary "totals" which may have omitted it
+    Flux::MeshPtoU(md.get(), IndexDomain::entire);
+
     // Finally, synchronize boundary values.
     // Freeze any Dirichlet physical boundaries as they are now, after cleanup/sync/etc.
     KBoundaries::FreezeDirichlet(md);
     // This is the first sync if there is no B field
     KHARMADriver::SyncAllBounds(md);
-    // And make sure the trivial primitive values are up-to-date
-    //Packages::MeshUtoPExceptMHD(md.get(), IndexDomain::entire, false);
 
     // TODO output parsed parameters now we have *everything* including any problem configs for B field
 }
diff --git a/kharma/prob/problem.cpp b/kharma/prob/problem.cpp
index 2c0a8b11..01897edb 100644
--- a/kharma/prob/problem.cpp
+++ b/kharma/prob/problem.cpp
@@ -125,7 +125,7 @@ void KHARMA::ProblemGenerator(MeshBlock *pmb, ParameterInput *pin)
         status = ReadKharmaRestart(rc, pin);
     } else if (prob == "gizmo") {
         status = InitializeGIZMO(rc, pin);
-    } else if (prob == "vacuum") {
+    } else if (prob == "vacuum" || prob == "bz_monopole") {
         // No need for a separate initializer, just seed w/floors
         status = Floors::ApplyInitialFloors(pin, rc.get(), IndexDomain::interior);
     }
@@ -153,20 +153,16 @@ void KHARMA::ProblemGenerator(MeshBlock *pmb, ParameterInput *pin)
         }
     }
 
-    // TODO blob here?
-
     // Floors are NOT automatically applied at this point anymore.
     // If needed, they are applied within the problem-specific call.
     // See InitializeFMTorus in fm_torus.cpp for the details for torus problems.
 
-    // Fill the conserved variables U,
-    // which we'll usually treat as the independent/fundamental state.
-    // This will need to be repeated once magnetic field is seeded
-    // Note we do the whole domain, in case we're using Dirichlet conditions
-    Flux::BlockPtoU(rc.get(), IndexDomain::entire);
-
-    // Finally, freeze in the current ghost zone values if using Dirichlet conditions
-    KBoundaries::FreezeDirichletBlock(rc.get());
+    // Note we no longer call PtoU here either, as GRMHD variables' PtoU requires
+    // the magnetic field, which is added in PostInitialize, after all blocks
+    // are filled with other variables (it can be related to density averages which
+    // require correct ghost zones)
+    // ALL OTHER VARIABLES, however, must fill U if a magnetic field will depend on
+    // them in any way, as conserved variables are MPI-synchronized
 
     EndFlag();
 }
diff --git a/kharma/prob/seed_B.cpp b/kharma/prob/seed_B.cpp
index c85b5fd9..a335dc26 100644
--- a/kharma/prob/seed_B.cpp
+++ b/kharma/prob/seed_B.cpp
@@ -101,12 +101,16 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
     if constexpr (Seed == BSeedType::constant ||
                   Seed == BSeedType::monopole ||
                   Seed == BSeedType::monopole_cube ||
-                  Seed == BSeedType::orszag_tang)
+                  Seed == BSeedType::orszag_tang ||
+                  Seed == BSeedType::wave || 
+                  Seed == BSeedType::shock_tube)
     {
-        // All custom B fields should set what they need of these
-        const Real b10 = pin->GetOrAddReal("b_field", "B10", 0.);
-        const Real b20 = pin->GetOrAddReal("b_field", "B20", 0.);
-        const Real b30 = pin->GetOrAddReal("b_field", "B30", 0.);
+        // All custom B fields should set what they need of these.
+        // We take the same names, but they may mean different things to the
+        // particular init function, check seed_B.hpp
+        const Real B10 = pin->GetOrAddReal("b_field", "B10", 0.);
+        const Real B20 = pin->GetOrAddReal("b_field", "B20", 0.);
+        const Real B30 = pin->GetOrAddReal("b_field", "B30", 0.);
         const Real k1 = pin->GetOrAddReal("b_field", "k1", 0.);
         const Real k2 = pin->GetOrAddReal("b_field", "k2", 0.);
         const Real k3 = pin->GetOrAddReal("b_field", "k3", 0.);
@@ -127,9 +131,10 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
                     GReal Xembed[GR_DIM];
                     double null1, null2;
                     double B_Pf1, B_Pf2, B_Pf3;
+                    // TODO handle calling Seed() mid-run and adding field
                     G.coord_embed(k, j, i, Loci::face1, Xembed);
                     GReal gdet = G.gdet(Loci::face1, j, i);
-                    B_Pf1 = b10;
+                    B_Pf1 = B10;
                     seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
                                  amp_B1, amp_B2, amp_B3,
                                  amp2_B1, amp2_B2, amp2_B3,
@@ -138,7 +143,7 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
 
                     G.coord_embed(k, j, i, Loci::face2, Xembed);
                     gdet = G.gdet(Loci::face2, j, i);
-                    B_Pf2 = b20;
+                    B_Pf2 = B20;
                     seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
                                  amp_B1, amp_B2, amp_B3,
                                  amp2_B1, amp2_B2, amp2_B3,
@@ -147,7 +152,7 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
 
                     G.coord_embed(k, j, i, Loci::face3, Xembed);
                     gdet = G.gdet(Loci::face3, j, i);
-                    B_Pf3 = b30;
+                    B_Pf3 = B30;
                     seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
                                  amp_B1, amp_B2, amp_B3,
                                  amp2_B1, amp2_B2, amp2_B3,
@@ -165,6 +170,9 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
                     GReal Xembed[GR_DIM];
                     G.coord_embed(k, j, i, Loci::center, Xembed);
                     const GReal gdet = G.gdet(Loci::center, j, i);
+                    B_P(V1, k, j, i) = B10;
+                    B_P(V2, k, j, i) = B20;
+                    B_P(V3, k, j, i) = B30;
                     seed_b<Seed>(Xembed, gdet, k1, k2, k3, phase,
                                  amp_B1, amp_B2, amp_B3,
                                  amp2_B1, amp2_B2, amp2_B3,
@@ -175,7 +183,7 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
             );
             // We still need to update conserved flux values, but then we're done
             B_FluxCT::BlockPtoU(rc, domain);
-        }
+        } // TODO B_CD!!
         return TaskStatus::complete;
     } else { // Seed with vector potential A otherwise
         // Require and load what we need if necessary
@@ -332,7 +340,7 @@ TaskStatus SeedBFieldType(MeshBlockData<Real> *rc, ParameterInput *pin, IndexDom
             }
             // Finally, make sure we initialize the primitive field too
             B_FluxCT::BlockUtoP(rc, domain);
-        }
+        } // TODO B_CD!!
 
         return TaskStatus::complete;
     }
@@ -385,6 +393,8 @@ TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin)
             status = SeedBFieldType<BSeedType::orszag_tang_a>(rc, pin);
         } else if (b_field_type == "wave") {
             status = SeedBFieldType<BSeedType::wave>(rc, pin);
+        } else if (b_field_type == "shock_tube") {
+            status = SeedBFieldType<BSeedType::shock_tube>(rc, pin);
         } else {
             throw std::invalid_argument("Magnetic field seed type not supported: " + b_field_type);
         }
diff --git a/kharma/prob/seed_B.hpp b/kharma/prob/seed_B.hpp
index 61537a2f..dea13857 100644
--- a/kharma/prob/seed_B.hpp
+++ b/kharma/prob/seed_B.hpp
@@ -40,8 +40,18 @@ TaskStatus SeedBField(MeshData<Real> *md, ParameterInput *pin);
 
 TaskStatus NormalizeBField(MeshData<Real> *md, ParameterInput *pin);
 
+/*
+ * B field initializations.
+ * TO ADD A FIELD:
+ * 1. add its internal name to the enum below
+ * 2. Implement the template specialization for your field, either from seed_a<> or seed_b<>
+ * 3. Add your specialization to the `if` statements in SeedBField
+ * 4. If you used seed_b<>, add your case where SeedBFieldType<> selects direct initialization
+ * 5. If you added arguments, make sure the calls in SeedBFieldType<> are up-to-date
+ */
+
 // Internal representation of the field initialization preference, used for templating
-enum BSeedType{constant, monopole, monopole_cube, orszag_tang, orszag_tang_a, wave,
+enum BSeedType{constant, monopole, monopole_cube, orszag_tang, orszag_tang_a, wave, shock_tube,
                 sane, mad, mad_quadrupole, r3s3, r5s5, gaussian, bz_monopole, vertical};
 
 #define SEEDA_ARGS GReal *x, const GReal *dxc, double rho, double rin, double min_A, double A0, double arg1
@@ -151,13 +161,23 @@ KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::monopole_cube>(SEEDB_ARGS)
 template<>
 KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::wave>(SEEDB_ARGS)
 {
-    const Real smode = m::cos(k1 * x[1] + k2 * x[2] + k3 * x[3] + phase);
+    const Real smode = m::sin(k1 * x[1] + k2 * x[2] + k3 * x[3] + phase);
     const Real cmode = m::cos(k1 * x[1] + k2 * x[2] + k3 * x[3] + phase);
     B1 += amp_B1 * cmode + amp2_B1 * smode;
     B2 += amp_B2 * cmode + amp2_B2 * smode;
     B3 += amp_B3 * cmode + amp2_B3 * smode;
 }
 
+// Shock tube init
+template<>
+KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::shock_tube>(SEEDB_ARGS)
+{
+    const bool lhs = x[1] < phase;
+    B1 += (lhs) ? amp_B1 : amp2_B1;
+    B2 += (lhs) ? amp_B2 : amp2_B2;
+    B3 += (lhs) ? amp_B3 : amp2_B3;
+}
+
 // For Orszag-Tang vortex
 template<>
 KOKKOS_INLINE_FUNCTION void seed_b<BSeedType::orszag_tang>(SEEDB_ARGS)
diff --git a/kharma/prob/shock_tube.hpp b/kharma/prob/shock_tube.hpp
index b41eaf6a..b9f5abcf 100644
--- a/kharma/prob/shock_tube.hpp
+++ b/kharma/prob/shock_tube.hpp
@@ -16,7 +16,6 @@ TaskStatus InitializeShockTube(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     GridScalar rho = rc->Get("prims.rho").data;
     GridScalar u = rc->Get("prims.u").data;
     GridVector uvec = rc->Get("prims.uvec").data;
-    GridVector B_P = rc->Get("prims.B").data;
 
     const auto& G = pmb->coords;
 
@@ -32,6 +31,7 @@ TaskStatus InitializeShockTube(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     const Real u2R = pin->GetOrAddReal("shock", "u2R", 0.0);
     const Real u3L = pin->GetOrAddReal("shock", "u3L", 0.0);
     const Real u3R = pin->GetOrAddReal("shock", "u3R", 0.0);
+
     const Real B1L = pin->GetOrAddReal("shock", "B1L", 0.0);
     const Real B1R = pin->GetOrAddReal("shock", "B1R", 0.0);
     const Real B2L = pin->GetOrAddReal("shock", "B2L", 0.0);
@@ -48,6 +48,15 @@ TaskStatus InitializeShockTube(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
     const Real x1max = pin->GetReal("parthenon/mesh", "x1max");
     const Real center = (x1min + x1max) / 2.;
 
+    pin->GetOrAddString("b_field", "type", "shock_tube");
+    pin->GetOrAddReal("b_field", "phase", center);
+    pin->GetOrAddReal("b_field", "amp_B1", B1L);
+    pin->GetOrAddReal("b_field", "amp_B2", B2L);
+    pin->GetOrAddReal("b_field", "amp_B3", B3L);
+    pin->GetOrAddReal("b_field", "amp2_B1", B1R);
+    pin->GetOrAddReal("b_field", "amp2_B2", B2R);
+    pin->GetOrAddReal("b_field", "amp2_B3", B3R);
+
     pmb->par_for("ot_init", kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             Real X[GR_DIM];
@@ -59,17 +68,8 @@ TaskStatus InitializeShockTube(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
             uvec(0, k, j, i) = (lhs) ? u1L : u1R;
             uvec(1, k, j, i) = (lhs) ? u2L : u2R;
             uvec(2, k, j, i) = (lhs) ? u3L : u3R;
-            B_P(0, k, j, i)  = (lhs) ? B1L : B1R;
-            B_P(1, k, j, i)  = (lhs) ? B2L : B2R;
-            B_P(2, k, j, i)  = (lhs) ? B3L : B3R;
         }
     );
 
-    if(pmb->packages.AllPackages().count("Electrons")) {
-        // Get e- starting parameters
-
-        // Set e- starting state
-    }
-
     return TaskStatus::complete;
 }
diff --git a/tests/bondi_viscous/bondi_viscous_128_default/bondi_analytic_128.txt b/tests/bondi_viscous/bondi_viscous_128_default/bondi_analytic_128.txt
deleted file mode 100644
index 97df8b68..00000000
--- a/tests/bondi_viscous/bondi_viscous_128_default/bondi_analytic_128.txt
+++ /dev/null
@@ -1,128 +0,0 @@
-1.804604567587375641e-02 3.334784880280494690e-03 -5.720360875129699707e-01 3.203265241319423057e-03
-1.769512519240379333e-02 3.227407578378915787e-03 -5.655881166458129883e-01 3.141545548595221261e-03
-1.735138520598411560e-02 3.123594913631677628e-03 -5.592013597488403320e-01 3.080914918348832011e-03
-1.701467670500278473e-02 3.023225814104080200e-03 -5.528752207756042480e-01 3.021340354083649083e-03
-1.668484508991241455e-02 2.926182234659790993e-03 -5.466093420982360840e-01 2.962831289634906308e-03
-1.636174879968166351e-02 2.832352416589856148e-03 -5.404032468795776367e-01 2.905403743434595114e-03
-1.604524627327919006e-02 2.741627395153045654e-03 -5.342563390731811523e-01 2.849007060224692609e-03
-1.573519408702850342e-02 2.653900301083922386e-03 -5.281683802604675293e-01 2.793589218016250420e-03
-1.543146185576915741e-02 2.569071482867002487e-03 -5.221387147903442383e-01 2.739166120096183776e-03
-1.513390894979238510e-02 2.487041056156158447e-03 -5.161669850349426270e-01 2.685726415972453356e-03
-1.484241150319576263e-02 2.407715423032641411e-03 -5.102526545524597168e-01 2.633225577159857556e-03
-1.455683726817369461e-02 2.331002615392208099e-03 -5.043954849243164062e-01 2.581643180119114721e-03
-1.427706424146890640e-02 2.256814856082201004e-03 -4.985947608947753906e-01 2.530984252484574926e-03
-1.400297041982412338e-02 2.185066696256399155e-03 -4.928501248359680176e-01 2.481241882113827645e-03
-1.373443286865949631e-02 2.115675015375018120e-03 -4.871611893177032471e-01 2.432399938156275775e-03
-1.347134262323379517e-02 2.048562280833721161e-03 -4.815274775028228760e-01 2.384406996040078046e-03
-1.321357581764459610e-02 1.983649795874953270e-03 -4.759485125541687012e-01 2.337276681629182895e-03
-1.296102628111839294e-02 1.920864568091928959e-03 -4.704238772392272949e-01 2.291014141894550434e-03
-1.271358411759138107e-02 1.860134885646402836e-03 -4.649530947208404541e-01 2.245568208908161906e-03
-1.247114315629005432e-02 1.801391597837209702e-03 -4.595358073711395264e-01 2.200939176100989974e-03
-1.223359536379575729e-02 1.744568115100264549e-03 -4.541715979576110840e-01 2.157122854302493363e-03
-1.200084201991558075e-02 1.689600176177918911e-03 -4.488598704338073730e-01 2.114060868088609314e-03
-1.177278161048889160e-02 1.636425382457673550e-03 -4.436002969741821289e-01 2.071770489416943726e-03
-1.154931634664535522e-02 1.584983663633465767e-03 -4.383924603462219238e-01 2.030269705892970289e-03
-1.133034937083721161e-02 1.535217510536313057e-03 -4.332359433174133301e-01 1.989515835773508854e-03
-1.111578755080699921e-02 1.487069996073842049e-03 -4.281302988529205322e-01 1.949498268381351677e-03
-1.090554054826498032e-02 1.440488267689943314e-03 -4.230750501155853271e-01 1.910206118756374105e-03
-1.069951709359884262e-02 1.395419007167220116e-03 -4.180698990821838379e-01 1.871588708969029518e-03
-1.049762964248657227e-02 1.351812505163252354e-03 -4.131143093109130859e-01 1.833661157484072326e-03
-1.029979344457387924e-02 1.309619750827550888e-03 -4.082079529762268066e-01 1.796440371627679427e-03
-1.010592095553874969e-02 1.268793246708810329e-03 -4.033503532409667969e-01 1.759924705383275612e-03
-9.915933012962341309e-03 1.229287940077483654e-03 -3.985411226749420166e-01 1.724055372964755596e-03
-9.729747660458087921e-03 1.191060058772563934e-03 -3.937799334526062012e-01 1.688805942792860528e-03
-9.547286666929721832e-03 1.154066878370940685e-03 -3.890663385391235352e-01 1.654217684394830015e-03
-9.368475526571273804e-03 1.118268002755939960e-03 -3.843997716903686523e-01 1.620231830373676908e-03
-9.193234145641326904e-03 1.083622919395565987e-03 -3.797800242900848389e-01 1.586873097588673833e-03
-9.021490812301635742e-03 1.050094026140868664e-03 -3.752066791057586670e-01 1.554134133672500570e-03
-8.853174746036529541e-03 1.017644419334828854e-03 -3.706792891025543213e-01 1.521989997213422149e-03
-8.688212372362613678e-03 9.862381266430020332e-04 -3.661974966526031494e-01 1.490461414923649258e-03
-8.526538498699665070e-03 9.558408055454492569e-04 -3.617607951164245605e-01 1.459449767754728532e-03
-8.368079550564289093e-03 9.264187538065016270e-04 -3.573690056800842285e-01 1.429012699391340969e-03
-8.212774991989135742e-03 8.979404228739440441e-04 -3.530215322971343994e-01 1.399152004113709885e-03
-8.060555905103683472e-03 8.703742059879004955e-04 -3.487181067466735840e-01 1.369818157141651982e-03
-7.911361753940582275e-03 8.436903008259832859e-04 -3.444583415985107422e-01 1.341002475825115196e-03
-7.765128277242183685e-03 8.178594871424138546e-04 -3.402418196201324463e-01 1.312704698153458046e-03
-7.621795870363712311e-03 7.928538252599537373e-04 -3.360682427883148193e-01 1.284932081461519736e-03
-7.481303531676530838e-03 7.686461904086172581e-04 -3.319371938705444336e-01 1.257708139553503045e-03
-7.343596313148736954e-03 7.452104473486542702e-04 -3.278482556343078613e-01 1.230985147337607805e-03
-7.208613213151693344e-03 7.225210429169237614e-04 -3.238011002540588379e-01 1.204750552347711497e-03
-7.076301146298646927e-03 7.005537627264857292e-04 -3.197953701019287109e-01 1.178989571961009449e-03
-6.946604233235120773e-03 6.792847416363656521e-04 -3.158307075500488281e-01 1.153691907111484326e-03
-6.819469388574361801e-03 6.586912204511463642e-04 -3.119067549705505371e-01 1.128875833546346850e-03
-6.694845389574766159e-03 6.387513712979853153e-04 -3.080230653285980225e-01 1.104517199013304923e-03
-6.572678219527006149e-03 6.194434245117008686e-04 -3.041794598102569580e-01 1.080599920285228461e-03
-6.452920846641063690e-03 6.007468327879905701e-04 -3.003753721714019775e-01 1.057094160332787370e-03
-6.335521582514047623e-03 5.826417473144829273e-04 -2.966106235980987549e-01 1.034038106400140579e-03
-6.220433861017227173e-03 5.651088431477546692e-04 -2.928847670555114746e-01 1.011439583019826472e-03
-6.107610184699296951e-03 5.481294938363134861e-04 -2.891975343227386475e-01 9.892395393871365652e-04
-5.997005384415388107e-03 5.316857132129371166e-04 -2.855485379695892334e-01 9.674366098181498737e-04
-5.888572428375482559e-03 5.157600389793515205e-04 -2.819374501705169678e-01 9.460302288950747087e-04
-5.782269407063722610e-03 5.003356491215527058e-04 -2.783638834953308105e-01 9.250196446214486827e-04
-5.678051151335239410e-03 4.853962745983153582e-04 -2.748275995254516602e-01 9.043954937958295333e-04
-5.575876682996749878e-03 4.709262284450232983e-04 -2.713282108306884766e-01 8.841678397662819508e-04
-5.475703626871109009e-03 4.569101438391953707e-04 -2.678653895854949951e-01 8.643277044767471405e-04
-5.377492401748895645e-03 4.433335852809250355e-04 -2.644387781620025635e-01 8.448558086077091528e-04
-5.281202495098114014e-03 4.301820881664752960e-04 -2.610480785369873047e-01 8.257276186450014958e-04
-5.186795257031917572e-03 4.174419445917010307e-04 -2.576930224895477295e-01 8.069410885907020870e-04
-5.094233434647321701e-03 4.051000869367271662e-04 -2.543732225894927979e-01 7.885034504824877614e-04
-5.003478843718767166e-03 3.931433602701872587e-04 -2.510883510112762451e-01 7.704145432179249336e-04
-4.914494697004556656e-03 3.815595991909503937e-04 -2.478381842374801636e-01 7.526604366432750786e-04
-4.827246069908142090e-03 3.703365800902247429e-04 -2.446223348379135132e-01 7.352219628433730517e-04
-4.741697106510400772e-03 3.594628069549798965e-04 -2.414404898881912231e-01 7.180950180851005575e-04
-4.657814744859933853e-03 3.489270166028290987e-04 -2.382924109697341919e-01 7.012796204896215623e-04
-4.575564526021480560e-03 3.387183242011815310e-04 -2.351776659488677979e-01 6.847753276983956430e-04
-4.494913853704929352e-03 3.288263396825641394e-04 -2.320961058139801025e-01 6.685767101385736407e-04
-4.415831528604030609e-03 3.192407602909952402e-04 -2.290472984313964844e-01 6.526807964704674789e-04
-4.338284023106098175e-03 3.099518071394413710e-04 -2.260310500860214233e-01 6.370788345620336445e-04
-4.262241534888744354e-03 3.009499632753431797e-04 -2.230470478534698486e-01 6.217579052427622778e-04
-4.187673795968294144e-03 2.922260900959372520e-04 -2.200949490070343018e-01 6.067054265421926518e-04
-4.114551935344934464e-03 2.837712818291038275e-04 -2.171744853258132935e-01 5.919184176374028307e-04
-4.042845685034990311e-03 2.755769528448581696e-04 -2.142854034900665283e-01 5.773955800511261331e-04
-3.972528036683797836e-03 2.676347503438591957e-04 -2.114273905754089355e-01 5.631334470306947459e-04
-3.903570352122187614e-03 2.599366707727313042e-04 -2.086001485586166382e-01 5.491285438773987025e-04
-3.835946321487426758e-03 2.524750307202339172e-04 -2.058034241199493408e-01 5.353795032711571684e-04
-3.769627306610345840e-03 2.452420594636350870e-04 -2.030369490385055542e-01 5.218729260418854743e-04
-3.704589325934648514e-03 2.382306265644729137e-04 -2.003004252910614014e-01 5.085891625483255404e-04
-3.640806069597601891e-03 2.314337907591834664e-04 -1.975935697555541992e-01 4.955169158844020556e-04
-3.578251926228404045e-03 2.248445816803723574e-04 -1.949161738157272339e-01 4.826513946301850405e-04
-3.516903379932045937e-03 2.184564800700172782e-04 -1.922678947448730469e-01 4.699861290170068043e-04
-3.456735517829656601e-03 2.122630685335025191e-04 -1.896485239267349243e-01 4.575215013801592996e-04
-3.397725289687514305e-03 2.062582352664321661e-04 -1.870577782392501831e-01 4.452655575464078866e-04
-3.339849645271897316e-03 2.004359848797321320e-04 -1.844954341650009155e-01 4.332181376867249483e-04
-3.283086465671658516e-03 1.947906275745481253e-04 -1.819611340761184692e-01 4.213643944662569946e-04
-3.227412700653076172e-03 1.893164298962801695e-04 -1.794546991586685181e-01 4.096883395634948470e-04
-3.172806696966290474e-03 1.840080803958699107e-04 -1.769759356975555420e-01 3.981777951074119779e-04
-3.119248431175947189e-03 1.788603694876655936e-04 -1.745244562625885010e-01 3.868214075353400305e-04
-3.066716017201542854e-03 1.738681894494220614e-04 -1.721000969409942627e-01 3.756189938886759843e-04
-3.015189897269010544e-03 1.690266799414530396e-04 -1.697025746107101440e-01 3.645674179774591704e-04
-2.964649349451065063e-03 1.643310824874788523e-04 -1.673316955566406250e-01 3.536379485230422984e-04
-2.915075747296214104e-03 1.597768568899482489e-04 -1.649871468544006348e-01 3.428143930642417870e-04
-2.866449300199747086e-03 1.553595502628013492e-04 -1.626687347888946533e-01 3.321145229563386052e-04
-2.818751381710171700e-03 1.510748406872153282e-04 -1.603762358427047729e-01 3.215299602465088581e-04
-2.771963831037282944e-03 1.469186099711805582e-04 -1.581093966960906982e-01 3.110228113625697322e-04
-2.726068720221519470e-03 1.428868272341787815e-04 -1.558679640293121338e-01 3.005641086759121999e-04
-2.681048586964607239e-03 1.389756216667592525e-04 -1.536517292261123657e-01 2.901593240276960708e-04
-2.636884804815053940e-03 1.351811370113864541e-04 -1.514604836702346802e-01 2.798175067280202970e-04
-2.593562472611665726e-03 1.314998662564903498e-04 -1.492939442396163940e-01 2.695193068518350848e-04
-2.551063895225524902e-03 1.279282296309247613e-04 -1.471519470214843750e-01 2.592452550406087108e-04
-2.509373473003506660e-03 1.244628365384414792e-04 -1.450342237949371338e-01 2.489698182455574453e-04
-2.468474442139267921e-03 1.211003036587499082e-04 -1.429405808448791504e-01 2.386650229364721586e-04
-2.428351901471614838e-03 1.178375096060335636e-04 -1.408708095550537109e-01 2.283028982002951708e-04
-2.388990484178066254e-03 1.146713257185183465e-04 -1.388246715068817139e-01 2.178570031434298368e-04
-2.350375289097428322e-03 1.115987906814552844e-04 -1.368019580841064453e-01 2.073025985383840049e-04
-2.312491647899150848e-03 1.086169941117987037e-04 -1.348024457693099976e-01 1.966037089493451401e-04
-2.275324892252683640e-03 1.057230911101214588e-04 -1.328259557485580444e-01 1.857279496639460388e-04
-2.238861285150051117e-03 1.029143968480639160e-04 -1.308722496032714844e-01 1.746309161373481398e-04
-2.203087555244565010e-03 1.001883065328001976e-04 -1.289411485195159912e-01 1.632670303924783634e-04
-2.167989034205675125e-03 9.754221537150442600e-05 -1.270324140787124634e-01 1.515961107366246938e-04
-2.133553382009267807e-03 9.497370047029107809e-05 -1.251458823680877686e-01 1.395781407012574595e-04
-2.099767560139298439e-03 9.248035348718985915e-05 -1.232813224196434021e-01 1.270995360625891857e-04
-2.066618530079722404e-03 9.005988249555230141e-05 -1.214385703206062317e-01 1.141313115277284139e-04
-2.034094184637069702e-03 8.771003194851800799e-05 -1.196173727512359619e-01 1.005667518635686771e-04
-2.002182183787226677e-03 8.542864088667556643e-05 -1.178175881505012512e-01 8.631312882876906220e-05
-1.970870885998010635e-03 8.321363566210493445e-05 -1.160390079021453857e-01 7.123526749421901360e-05
-1.940148184075951576e-03 8.106292079901322722e-05 -1.142814531922340393e-01 5.524471272396668599e-05
-1.910003134980797768e-03 7.897462637629359961e-05 -1.125446930527687073e-01 3.816948319780611462e-05
-1.880423631519079208e-03 7.694675150560215116e-05 -1.108285933732986450e-01 1.981819949089328931e-05
-1.851399429142475128e-03 7.497750630136579275e-05 -1.091329231858253479e-01 0.000000000000000000e+00
diff --git a/tests/bondi_viscous/bondi_viscous_256_default/bondi_analytic_256.txt b/tests/bondi_viscous/bondi_viscous_256_default/bondi_analytic_256.txt
deleted file mode 100644
index 6961d76b..00000000
--- a/tests/bondi_viscous/bondi_viscous_256_default/bondi_analytic_256.txt
+++ /dev/null
@@ -1,256 +0,0 @@
-1.716369204223155975e-02 3.067483427003026009e-03 -5.556835532188415527e-01 3.049203811044207384e-03
-1.700004935264587402e-02 3.018894931301474571e-03 -5.525988340377807617e-01 3.020226009540082687e-03
-1.683804951608181000e-02 2.971101086586713791e-03 -5.495284795761108398e-01 2.991511476279015008e-03
-1.667767763137817383e-02 2.924087690189480782e-03 -5.464724898338317871e-01 2.963051834283930534e-03
-1.651891879737377167e-02 2.877843100577592850e-03 -5.434306263923645020e-01 2.934845993569251341e-03
-1.636174879968166351e-02 2.832352416589856148e-03 -5.404032468795776367e-01 2.906891689220341378e-03
-1.620615832507610321e-02 2.787604695186018944e-03 -5.373899340629577637e-01 2.879178593732568351e-03
-1.605212502181529999e-02 2.743586199358105659e-03 -5.343906879425048828e-01 2.851706132566705736e-03
-1.589963771402835846e-02 2.700285986065864563e-03 -5.314055085182189941e-01 2.824472806500916144e-03
-1.574867591261863708e-02 2.657691249623894691e-03 -5.284343957901000977e-01 2.797477775134330023e-03
-1.559922657907009125e-02 2.615790115669369698e-03 -5.254773497581481934e-01 2.770721421767756432e-03
-1.545127574354410172e-02 2.574571641162037849e-03 -5.225340723991394043e-01 2.744196954001513599e-03
-1.530480384826660156e-02 2.534023951739072800e-03 -5.196046829223632812e-01 2.717898689670209238e-03
-1.515979599207639694e-02 2.494135405868291855e-03 -5.166891217231750488e-01 2.691826628521836167e-03
-1.501624006778001785e-02 2.454895991832017899e-03 -5.137872695922851562e-01 2.665981145554064888e-03
-1.487411931157112122e-02 2.416294533759355545e-03 -5.108991861343383789e-01 2.640359626369303755e-03
-1.473341975361108780e-02 2.378320321440696716e-03 -5.080246925354003906e-01 2.614961421988014258e-03
-1.459412463009357452e-02 2.340962411835789680e-03 -5.051638484001159668e-01 2.589786532410197264e-03
-1.445621810853481293e-02 2.304211026057600975e-03 -5.023164749145507812e-01 2.564834852098720353e-03
-1.431969180703163147e-02 2.268056152388453484e-03 -4.994826614856719971e-01 2.540097515067705775e-03
-1.418452616780996323e-02 2.232488011941313744e-03 -4.966621398925781250e-01 2.515572079118275073e-03
-1.405070815235376358e-02 2.197495894506573677e-03 -4.938551485538482666e-01 2.491258925292234860e-03
-1.391822472214698792e-02 2.163071185350418091e-03 -4.910614490509033203e-01 2.467157288170522371e-03
-1.378706470131874084e-02 2.129204804077744484e-03 -4.882809519767761230e-01 2.443266254180473471e-03
-1.365720760077238083e-02 2.095885574817657471e-03 -4.855137765407562256e-01 2.419591966115367844e-03
-1.352864690124988556e-02 2.063106512650847435e-03 -4.827596843242645264e-01 2.396132757416540113e-03
-1.340136490762233734e-02 2.030857140198349953e-03 -4.800187945365905762e-01 2.372885027313342485e-03
-1.327534951269626617e-02 1.999129774048924446e-03 -4.772909283638000488e-01 2.349846284483063461e-03
-1.315058674663305283e-02 1.967914635315537453e-03 -4.745761156082153320e-01 2.327013727258641501e-03
-1.302706636488437653e-02 1.937204273417592049e-03 -4.718742370605468750e-01 2.304381027282849468e-03
-1.290477439761161804e-02 1.906990073621273041e-03 -4.691852331161499023e-01 2.281946551531343573e-03
-1.278369780629873276e-02 1.877263421192765236e-03 -4.665091931819915771e-01 2.259709375005557793e-03
-1.266382355242967606e-02 1.848016283474862576e-03 -4.638459086418151855e-01 2.237668918101621152e-03
-1.254514046013355255e-02 1.819240977056324482e-03 -4.611953496932983398e-01 2.215823963044098472e-03
-1.242763455957174301e-02 1.790929585695266724e-03 -4.585576057434082031e-01 2.194177055875227034e-03
-1.231129560619592667e-02 1.763074425980448723e-03 -4.559324085712432861e-01 2.172726156298820932e-03
-1.219611149281263351e-02 1.735668280161917210e-03 -4.533198475837707520e-01 2.151468464358420336e-03
-1.208206918090581894e-02 1.708702882751822472e-03 -4.507198333740234375e-01 2.130400177798243751e-03
-1.196915749460458755e-02 1.682171714492142200e-03 -4.481323659420013428e-01 2.109518053658852432e-03
-1.185736339539289474e-02 1.656067208386957645e-03 -4.455573558807373047e-01 2.088816329759587108e-03
-1.174667850136756897e-02 1.630382379516959190e-03 -4.429947435855865479e-01 2.068292396165747997e-03
-1.163708884268999100e-02 1.605110592208802700e-03 -4.404445290565490723e-01 2.047945549788011792e-03
-1.152858510613441467e-02 1.580244977958500385e-03 -4.379065036773681641e-01 2.027774760267228062e-03
-1.142115518450737000e-02 1.555778435431420803e-03 -4.353808760643005371e-01 2.007779310617372869e-03
-1.131478603929281235e-02 1.531704328954219818e-03 -4.328674077987670898e-01 1.987957540992423434e-03
-1.120947208255529404e-02 1.508017187006771564e-03 -4.303660392761230469e-01 1.968308505367164125e-03
-1.110519655048847198e-02 1.484709326177835464e-03 -4.278768301010131836e-01 1.948831054143558604e-03
-1.100195012986660004e-02 1.461774809285998344e-03 -4.253996908664703369e-01 1.929524618834849853e-03
-1.089972723275423050e-02 1.439208630472421646e-03 -4.229345023632049561e-01 1.910387720606013598e-03
-1.079851109534502029e-02 1.417003222741186619e-03 -4.204812943935394287e-01 1.891419803632207524e-03
-1.069829706102609634e-02 1.395153929479420185e-03 -4.180400073528289795e-01 1.872620546658754041e-03
-1.059906929731369019e-02 1.373653532937169075e-03 -4.156106412410736084e-01 1.853988453947166088e-03
-1.050082128494977951e-02 1.352497376501560211e-03 -4.131931066513061523e-01 1.835522756295321746e-03
-1.040354277938604355e-02 1.331679755821824074e-03 -4.107872545719146729e-01 1.817222387261382532e-03
-1.030722074210643768e-02 1.311194035224616528e-03 -4.083931446075439453e-01 1.799086790844584569e-03
-1.021185051649808884e-02 1.291036256588995457e-03 -4.060106873512268066e-01 1.781115911258556676e-03
-1.011741999536752701e-02 1.271200366318225861e-03 -4.036398530006408691e-01 1.763308319400915111e-03
-1.002391800284385681e-02 1.251680776476860046e-03 -4.012806415557861328e-01 1.745662050104283824e-03
-9.931336157023906708e-03 1.232472364790737629e-03 -3.989329934120178223e-01 1.728174306184958537e-03
-9.839666076004505157e-03 1.213570241816341877e-03 -3.965966999530792236e-01 1.710842846316475195e-03
-9.748898446559906006e-03 1.194969867356121540e-03 -3.942719101905822754e-01 1.693664638210693818e-03
-9.659022092819213867e-03 1.176665420643985271e-03 -3.919585049152374268e-01 1.676638252107266530e-03
-9.570030495524406433e-03 1.158652361482381821e-03 -3.896564245223999023e-01 1.659761580260549728e-03
-9.481912478804588318e-03 1.140926149673759937e-03 -3.873656392097473145e-01 1.643033320121943617e-03
-9.394660592079162598e-03 1.123482012189924717e-03 -3.850860595703125000e-01 1.626450455508494626e-03
-9.308265522122383118e-03 1.106315408833324909e-03 -3.828177452087402344e-01 1.610009999202180768e-03
-9.222717024385929108e-03 1.089421333745121956e-03 -3.805605769157409668e-01 1.593711806123459694e-03
-9.138009510934352875e-03 1.072795828804373741e-03 -3.783144652843475342e-01 1.577556590123219528e-03
-9.054132737219333649e-03 1.056434120982885361e-03 -3.760794401168823242e-01 1.561545057842212043e-03
-8.971079252660274506e-03 1.040332601405680180e-03 -3.738554120063781738e-01 1.545680374815922237e-03
-8.888838812708854675e-03 1.024486147798597813e-03 -3.716423511505126953e-01 1.529964152353707751e-03
-8.807404898107051849e-03 1.008890918456017971e-03 -3.694402277469635010e-01 1.514393849019976881e-03
-8.726768195629119873e-03 9.935431880876421928e-04 -3.672490417957305908e-01 1.498967165093053298e-03
-8.646920323371887207e-03 9.784383000805974007e-04 -3.650685548782348633e-01 1.483680988364736650e-03
-8.567855693399906158e-03 9.635728201828896999e-04 -3.628988564014434814e-01 1.468531124423920952e-03
-8.489564992487430573e-03 9.489428484812378883e-04 -3.607399463653564453e-01 1.453515324863183505e-03
-8.412038907408714294e-03 9.345440194010734558e-04 -3.585917353630065918e-01 1.438632721688875129e-03
-8.335271850228309631e-03 9.203731315210461617e-04 -3.564541637897491455e-01 1.423881745168323202e-03
-8.259254507720470428e-03 9.064262267202138901e-04 -3.543271720409393311e-01 1.409261041534629879e-03
-8.183981291949748993e-03 8.926997543312609196e-04 -3.522107601165771484e-01 1.394766584808358398e-03
-8.109441958367824554e-03 8.791898144409060478e-04 -3.501048386096954346e-01 1.380398176507262060e-03
-8.035630919039249420e-03 8.658931474201381207e-04 -3.480094075202941895e-01 1.366149015207681564e-03
-7.962542586028575897e-03 8.528066100552678108e-04 -3.459243476390838623e-01 1.352030190515267783e-03
-7.890164852142333984e-03 8.399261278100311756e-04 -3.438497781753540039e-01 1.338040051945830209e-03
-7.818494923412799835e-03 8.272488485090434551e-04 -3.417854011058807373e-01 1.324178164677941496e-03
-7.747523020952939987e-03 8.147713961079716682e-04 -3.397313654422760010e-01 1.310445994716974080e-03
-7.677243091166019440e-03 8.024902781471610069e-04 -3.376876115798950195e-01 1.296844702623601133e-03
-7.607648149132728577e-03 7.904025260359048843e-04 -3.356540501117706299e-01 1.283373860206222421e-03
-7.538730744272470474e-03 7.785049383528530598e-04 -3.336306214332580566e-01 1.270026869655093002e-03
-7.470485288649797440e-03 7.667945465072989464e-04 -3.316173851490020752e-01 1.256793546582370266e-03
-7.402903400361537933e-03 7.552680326625704765e-04 -3.296141326427459717e-01 1.243673998322951261e-03
-7.335981354117393494e-03 7.439230103045701981e-04 -3.276209831237792969e-01 1.230667811851342839e-03
-7.269707974046468735e-03 7.327557541429996490e-04 -3.256377875804901123e-01 1.217775194574115217e-03
-7.204080466181039810e-03 7.217639940790832043e-04 -3.236645162105560303e-01 1.204996402013013652e-03
-7.139089517295360565e-03 7.109445286914706230e-04 -3.217011690139770508e-01 1.192333423628720709e-03
-7.074732333421707153e-03 7.002949714660644531e-04 -3.197476863861083984e-01 1.179786118810683022e-03
-7.010999135673046112e-03 6.898121791891753674e-04 -3.178039789199829102e-01 1.167353694549179552e-03
-6.947885267436504364e-03 6.794935907237231731e-04 -3.158700764179229736e-01 1.155036166428871245e-03
-6.885385606437921524e-03 6.693368777632713318e-04 -3.139458596706390381e-01 1.142835260183226774e-03
-6.823491305112838745e-03 6.593388970941305161e-04 -3.120313882827758789e-01 1.130751647216891127e-03
-6.762197706848382950e-03 6.494973786175251007e-04 -3.101266026496887207e-01 1.118766102407034013e-03
-6.701497826725244522e-03 6.398097029887139797e-04 -3.082313239574432373e-01 1.106874679518922346e-03
-6.641387473791837692e-03 6.302733672782778740e-04 -3.063457012176513672e-01 1.095102677810591649e-03
-6.581859663128852844e-03 6.208861595951020718e-04 -3.044695258140563965e-01 1.083457693127233903e-03
-6.522908341139554977e-03 6.116455188021063805e-04 -3.026028871536254883e-01 1.071909760724144714e-03
-6.464528385549783707e-03 6.025490001775324345e-04 -3.007456958293914795e-01 1.060459203069826937e-03
-6.406713742762804031e-03 5.935943918302655220e-04 -2.988978624343872070e-01 1.049106255682725379e-03
-6.349458824843168259e-03 5.847794818691909313e-04 -2.970594167709350586e-01 1.037851560346049083e-03
-6.292757578194141388e-03 5.761018837802112103e-04 -2.952302694320678711e-01 1.026695617426719121e-03
-6.236605346202850342e-03 5.675595020875334740e-04 -2.934104204177856445e-01 1.015638266020308343e-03
-6.180995143949985504e-03 5.591499502770602703e-04 -2.915998399257659912e-01 1.004680043498215646e-03
-6.125923711806535721e-03 5.508714239113032818e-04 -2.897984087467193604e-01 9.938222184690970878e-04
-6.071383599191904068e-03 5.427215946838259697e-04 -2.880061566829681396e-01 9.830641840937958363e-04
-6.017371546477079391e-03 5.346985417418181896e-04 -2.862230241298675537e-01 9.724045250379492701e-04
-5.963879637420177460e-03 5.267999949865043163e-04 -2.844489514827728271e-01 9.618423363107919026e-04
-5.910905078053474426e-03 5.190241499803960323e-04 -2.826839387416839600e-01 9.513767083394826431e-04
-5.858441814780235291e-03 5.113690276630222797e-04 -2.809278964996337891e-01 9.410062396597822362e-04
-5.806484259665012360e-03 5.038327071815729141e-04 -2.791808545589447021e-01 9.307299890634868587e-04
-5.755027756094932556e-03 4.964131512679159641e-04 -2.774427235126495361e-01 9.205457889794694745e-04
-5.704067647457122803e-03 4.891086718998849392e-04 -2.757135033607482910e-01 9.104527456725462417e-04
-5.653598811477422714e-03 4.819173482246696949e-04 -2.739930748939514160e-01 9.004501095154510831e-04
-5.603616125881671906e-03 4.748373758047819138e-04 -2.722814679145812988e-01 8.905374972466838799e-04
-5.554114468395709991e-03 4.678669210989028215e-04 -2.705786228179931641e-01 8.807142550528743261e-04
-5.505089648067951202e-03 4.610042378772050142e-04 -2.688845098018646240e-01 8.709866752252799034e-04
-5.456537473946809769e-03 4.542477836366742849e-04 -2.671990990638732910e-01 8.613466821769578561e-04
-5.408450961112976074e-03 4.475955502130091190e-04 -2.655223309993743896e-01 8.517974887135010406e-04
-5.360827781260013580e-03 4.410461697261780500e-04 -2.638542056083679199e-01 8.423379009388863971e-04
-5.313663277775049210e-03 4.345979250501841307e-04 -2.621946036815643311e-01 8.329671765935310265e-04
-5.266950465738773346e-03 4.282489826437085867e-04 -2.605435848236083984e-01 8.236844223944004726e-04
-5.220687948167324066e-03 4.219980619382113218e-04 -2.589010298252105713e-01 8.144890184164349902e-04
-5.174869671463966370e-03 4.158435040153563023e-04 -2.572669684886932373e-01 8.053814647854373298e-04
-5.129490513354539871e-03 4.097837081644684076e-04 -2.556413114070892334e-01 7.963603765456862046e-04
-5.084547679871320724e-03 4.038171900901943445e-04 -2.540240585803985596e-01 7.874243208051845020e-04
-5.040036048740148544e-03 3.979424654971808195e-04 -2.524151802062988281e-01 7.785710220533770655e-04
-4.995950963348150253e-03 3.921581374015659094e-04 -2.508145868778228760e-01 7.697990500394148250e-04
-4.952289164066314697e-03 3.864626633003354073e-04 -2.492222487926483154e-01 7.611062041465502508e-04
-4.909045994281768799e-03 3.808547917287796736e-04 -2.476381957530975342e-01 7.524894314773492498e-04
-4.866217263042926788e-03 3.753329510800540447e-04 -2.460623532533645630e-01 7.439477227848116138e-04
-4.823798313736915588e-03 3.698958607856184244e-04 -2.444946318864822388e-01 7.354802405103347913e-04
-4.781785886734724045e-03 3.645421820692718029e-04 -2.429351061582565308e-01 7.270864102050059738e-04
-4.740175791084766388e-03 3.592705761548131704e-04 -2.413836866617202759e-01 7.187648658200237409e-04
-4.698963835835456848e-03 3.540797333698719740e-04 -2.398402690887451172e-01 7.105162996604768915e-04
-4.658146295696496964e-03 3.489684313535690308e-04 -2.383048981428146362e-01 7.023417542940500616e-04
-4.617718979716300964e-03 3.439353022258728743e-04 -2.367774844169616699e-01 6.942460084408869124e-04
-4.577678628265857697e-03 3.389791818335652351e-04 -2.352580875158309937e-01 6.862193925077211603e-04
-4.538020119071006775e-03 3.340988478157669306e-04 -2.337465882301330566e-01 6.782626330966285127e-04
-4.498741123825311661e-03 3.292930778115987778e-04 -2.322429716587066650e-01 6.703731151626043201e-04
-4.459837451577186584e-03 3.245607367716729641e-04 -2.307472229003906250e-01 6.625544620541861307e-04
-4.421305377036333084e-03 3.199006023351103067e-04 -2.292592525482177734e-01 6.548140987906500132e-04
-4.383140243589878082e-03 3.153115394525229931e-04 -2.277791053056716919e-01 6.471380672606776149e-04
-4.345340188592672348e-03 3.107925294898450375e-04 -2.263066619634628296e-01 6.395275755562245109e-04
-4.307900555431842804e-03 3.063423500861972570e-04 -2.248419523239135742e-01 6.319944922475439217e-04
-4.270818084478378296e-03 3.019599535036832094e-04 -2.233849167823791504e-01 6.245220323809081439e-04
-4.234088584780693054e-03 2.976442046929150820e-04 -2.219355702400207520e-01 6.171247693731173349e-04
-4.197709728032350540e-03 2.933942305389791727e-04 -2.204937487840652466e-01 6.098014397079750933e-04
-4.161676857620477676e-03 2.892088086809962988e-04 -2.190595716238021851e-01 6.025366092545816595e-04
-4.125987179577350616e-03 2.850869786925613880e-04 -2.176329195499420166e-01 5.953300076761356948e-04
-4.090637899935245514e-03 2.810277801472693682e-04 -2.162137776613235474e-01 5.881819441415299431e-04
-4.055623896420001984e-03 2.770301653072237968e-04 -2.148021310567855835e-01 5.810927401363955979e-04
-4.020944237709045410e-03 2.730932610575109720e-04 -2.133978605270385742e-01 5.740627171463639610e-04
-3.986593801528215408e-03 2.692160196602344513e-04 -2.120010554790496826e-01 5.670922067790906467e-04
-3.952570259571075439e-03 2.653975388966500759e-04 -2.106116265058517456e-01 5.601838904420140819e-04
-3.918869886547327042e-03 2.616368874441832304e-04 -2.092295140027999878e-01 5.533382330263037722e-04
-3.885489422827959061e-03 2.579331339802592993e-04 -2.078547477722167969e-01 5.465542640870832414e-04
-3.852426540106534958e-03 2.542854344937950373e-04 -2.064872235059738159e-01 5.398307946884053996e-04
-3.819677047431468964e-03 2.506928285583853722e-04 -2.051269561052322388e-01 5.331666358943238076e-04
-3.787239082157611847e-03 2.471546176820993423e-04 -2.037739306688308716e-01 5.265605174594351371e-04
-3.755108453333377838e-03 2.436697832308709621e-04 -2.024280577898025513e-01 5.200102419282768944e-04
-3.723283065482974052e-03 2.402375685051083565e-04 -2.010893523693084717e-01 5.135149841417092778e-04
-3.691758494824171066e-03 2.368570858379825950e-04 -1.997577548027038574e-01 5.070737409437619950e-04
-3.660533810034394264e-03 2.335276221856474876e-04 -1.984332650899887085e-01 5.006861234772110644e-04
-3.629604354500770569e-03 2.302482753293588758e-04 -1.971158385276794434e-01 4.943515375402611879e-04
-3.598967567086219788e-03 2.270182740176096559e-04 -1.958054006099700928e-01 4.880697455889978142e-04
-3.568622050806879997e-03 2.238369488622993231e-04 -1.945019662380218506e-01 4.818415107802307708e-04
-3.538563381880521774e-03 2.207034849561750889e-04 -1.932054907083511353e-01 4.756656405661530249e-04
-3.508788766339421272e-03 2.176170237362384796e-04 -1.919159442186355591e-01 4.695408860803005030e-04
-3.479296108707785606e-03 2.145770267816260457e-04 -1.906332969665527344e-01 4.634654208185696967e-04
-3.450082847848534584e-03 2.115826500812545419e-04 -1.893575340509414673e-01 4.574377992204147207e-04
-3.421144559979438782e-03 2.086331223836168647e-04 -1.880886107683181763e-01 4.514554198958551398e-04
-3.392480313777923584e-03 2.057278761640191078e-04 -1.868265122175216675e-01 4.455173169720519962e-04
-3.364087548106908798e-03 2.028661838266998529e-04 -1.855711638927459717e-01 4.396228530571216590e-04
-3.335962537676095963e-03 2.000473323278129101e-04 -1.843225508928298950e-01 4.337713907591806057e-04
-3.308102721348404884e-03 1.972706813830882311e-04 -1.830806583166122437e-01 4.279624486041275254e-04
-3.280506236478686333e-03 1.945355761563405395e-04 -1.818454861640930176e-01 4.221947602560124175e-04
-3.253170521929860115e-03 1.918413472594693303e-04 -1.806169450283050537e-01 4.164679452094941252e-04
-3.226092318072915077e-03 1.891873835120350122e-04 -1.793950349092483521e-01 4.107815452911755313e-04
-3.199269063770771027e-03 1.865730009740218520e-04 -1.781797409057617188e-01 4.051351023276596273e-04
-3.172699129208922386e-03 1.839976757764816284e-04 -1.769709885120391846e-01 3.995282361776110653e-04
-3.146379021927714348e-03 1.814606948755681515e-04 -1.757688075304031372e-01 3.939620477415104317e-04
-3.120307112112641335e-03 1.789615635061636567e-04 -1.745731234550476074e-01 3.884358706928163658e-04
-3.094481071457266808e-03 1.764996704878285527e-04 -1.733839064836502075e-01 3.829483301959528082e-04
-3.068897407501935959e-03 1.740743755362927914e-04 -1.722011715173721313e-01 3.774975337659158738e-04
-3.043555421754717827e-03 1.716852129902690649e-04 -1.710248142480850220e-01 3.720820830274438403e-04
-3.018451156094670296e-03 1.693315280135720968e-04 -1.698549091815948486e-01 3.666989791877355519e-04
-2.993583446368575096e-03 1.670128112891688943e-04 -1.686913520097732544e-01 3.613466050450033222e-04
-2.968949032947421074e-03 1.647285243961960077e-04 -1.675341129302978516e-01 3.560245038614123152e-04
-2.944546518847346306e-03 1.624781143618747592e-04 -1.663831919431686401e-01 3.507322188991276947e-04
-2.920373110100626945e-03 1.602610864210873842e-04 -1.652385741472244263e-01 3.454694601942537389e-04
-2.896426944062113762e-03 1.580769167048856616e-04 -1.641002148389816284e-01 3.402372239346000997e-04
-2.872705226764082909e-03 1.559250667924061418e-04 -1.629680842161178589e-01 3.350357685022379954e-04
-2.849206561222672462e-03 1.538051001261919737e-04 -1.618421077728271484e-01 3.298631060671725784e-04
-2.825927920639514923e-03 1.517164491815492511e-04 -1.607223302125930786e-01 3.247172487994090008e-04
-2.802868140861392021e-03 1.496587065048515797e-04 -1.596087068319320679e-01 3.195962088689523065e-04
-2.780024195089936256e-03 1.476313045714050531e-04 -1.585011929273605347e-01 3.144986389545947696e-04
-2.757394919171929359e-03 1.456338795833289623e-04 -1.573997586965560913e-01 3.094231595509212168e-04
-2.734976587817072868e-03 1.436658640159294009e-04 -1.563044041395187378e-01 3.043667975233969110e-04
-2.712769201025366783e-03 1.417268940713256598e-04 -1.552150845527648926e-01 2.993270540885746046e-04
-2.690769499167799950e-03 1.398164749843999743e-04 -1.541317850351333618e-01 2.943012703445045545e-04
-2.668975619599223137e-03 1.379341701976954937e-04 -1.530544459819793701e-01 2.892860938907345779e-04
-2.647385932505130768e-03 1.360795722575858235e-04 -1.519830375909805298e-01 2.842780121386620962e-04
-2.625998109579086304e-03 1.342522300546988845e-04 -1.509176045656204224e-01 2.792752394478310277e-04
-2.604810288175940514e-03 1.324517361354082823e-04 -1.498580425977706909e-01 2.742761881126264427e-04
-2.583820139989256859e-03 1.306776539422571659e-04 -1.488044112920761108e-01 2.692788494981652992e-04
-2.563026966527104378e-03 1.289296487811952829e-04 -1.477565765380859375e-01 2.642830869035813672e-04
-2.542427508160471916e-03 1.272072258871048689e-04 -1.467145979404449463e-01 2.592869564356481936e-04
-2.522020600736141205e-03 1.255100505659356713e-04 -1.456783860921859741e-01 2.542875410952232743e-04
-2.501803915947675705e-03 1.238377153640612960e-04 -1.446479707956314087e-01 2.492819238831642139e-04
-2.481776056811213493e-03 1.221898564836010337e-04 -1.436233073472976685e-01 2.442668749403794064e-04
-2.461935626342892647e-03 1.205661246785894036e-04 -1.426043212413787842e-01 2.392384546478730219e-04
-2.442279830574989319e-03 1.189660833915695548e-04 -1.415910869836807251e-01 2.341945958740678760e-04
-2.422806806862354279e-03 1.173893761006183922e-04 -1.405835002660751343e-01 2.291329466902329885e-04
-2.403516089543700218e-03 1.158357263193465769e-04 -1.395815759897232056e-01 2.240516312331043746e-04
-2.384404651820659637e-03 1.143046974902972579e-04 -1.385852396488189697e-01 2.189506539387458823e-04
-2.365471329540014267e-03 1.127959912992082536e-04 -1.375945359468460083e-01 2.138289358672349719e-04
-2.346714027225971222e-03 1.113092221203260124e-04 -1.366094052791595459e-01 2.086822238298576080e-04
-2.328131580725312233e-03 1.098440989153459668e-04 -1.356297880411148071e-01 2.035062605766995401e-04
-2.309722360223531723e-03 1.084002942661754787e-04 -1.346557140350341797e-01 1.982973030628560902e-04
-2.291484270244836807e-03 1.069774516508914530e-04 -1.336870938539505005e-01 1.930542857261014552e-04
-2.273415448144078255e-03 1.055752509273588657e-04 -1.327240318059921265e-01 1.877718818306050805e-04
-2.255514729768037796e-03 1.041934156091883779e-04 -1.317663341760635376e-01 1.824431088285549609e-04
-2.237780252471566200e-03 1.028315964504145086e-04 -1.308141052722930908e-01 1.770601590664857476e-04
-2.220210852101445198e-03 1.014895242406055331e-04 -1.298672854900360107e-01 1.716159733773617475e-04
-2.202804666012525558e-03 1.001668715616688132e-04 -1.289258003234863281e-01 1.661048447839135660e-04
-2.185559598729014397e-03 9.886331827146932483e-05 -1.279897093772888184e-01 1.605216576102571865e-04
-2.168474718928337097e-03 9.757863881532102823e-05 -1.270589232444763184e-01 1.548613459695540105e-04
-2.151548629626631737e-03 9.631252760300412774e-05 -1.261334717273712158e-01 1.491170823572880515e-04
-2.134779468178749084e-03 9.506466449238359928e-05 -1.252132654190063477e-01 1.432817942106481607e-04
-2.118165371939539909e-03 9.383480210090056062e-05 -1.242983415722846985e-01 1.373506795068198756e-04
-2.101705642417073250e-03 9.262267121812328696e-05 -1.233886554837226868e-01 1.313152452121739563e-04
-2.085398184135556221e-03 9.142798808170482516e-05 -1.224841699004173279e-01 1.251654102457772891e-04
-2.069242065772414207e-03 9.025049803312867880e-05 -1.215848773717880249e-01 1.188925280985824145e-04
-2.053235657513141632e-03 8.908996096579357982e-05 -1.206907629966735840e-01 1.124867771767234148e-04
-2.037377096712589264e-03 8.794607128947973251e-05 -1.198017895221710205e-01 1.059364497370102708e-04
-2.021665219217538834e-03 8.681863255333155394e-05 -1.189179345965385437e-01 9.922700588765054623e-05
-2.006098860874772072e-03 8.570733916712924838e-05 -1.180391684174537659e-01 9.234439165620919588e-05
-1.990676159039139748e-03 8.461198012810200453e-05 -1.171654835343360901e-01 8.528151142389726240e-05
-1.975396648049354553e-03 8.353232260560616851e-05 -1.162968501448631287e-01 7.801994594430422341e-05
-1.960257766768336296e-03 8.246811194112524390e-05 -1.154332458972930908e-01 7.054967210153930432e-05
-1.945258933119475842e-03 8.141912985593080521e-05 -1.145746633410453796e-01 6.284519422005574061e-05
-1.930398400872945786e-03 8.038512169150635600e-05 -1.137210652232170105e-01 5.489284034868858705e-05
-1.915675238706171513e-03 7.936589827295392752e-05 -1.128724217414855957e-01 4.669301142784747346e-05
-1.901088049635291100e-03 7.836121949367225170e-05 -1.120287403464317322e-01 3.822608322807355164e-05
-1.886635436676442623e-03 7.737085979897528887e-05 -1.111899539828300476e-01 2.944199581811238962e-05
-1.872316002845764160e-03 7.639460818609222770e-05 -1.103560924530029297e-01 2.032280144551531251e-05
-1.858128467574715614e-03 7.543223910033702850e-05 -1.095270961523056030e-01 1.085142089491031454e-05
-1.844071783125400543e-03 7.448357064276933670e-05 -1.087029650807380676e-01 9.999999999999999547e-07
diff --git a/tests/bondi_viscous/bondi_viscous_64_default/bondi_analytic_64.txt b/tests/bondi_viscous/bondi_viscous_64_default/bondi_analytic_64.txt
deleted file mode 100644
index 095bf35c..00000000
--- a/tests/bondi_viscous/bondi_viscous_64_default/bondi_analytic_64.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-2.009973116219043732e-02 3.990998491644859314e-03 -6.084101200103759766e-01 3.558629102739358906e-03
-1.928591914474964142e-02 3.725331742316484451e-03 -5.942628383636474609e-01 3.417428464649169099e-03
-1.850670017302036285e-02 3.477865131571888924e-03 -5.803928971290588379e-01 3.281391178847760642e-03
-1.776055805385112762e-02 3.247322514653205872e-03 -5.667958855628967285e-01 3.150485599348396611e-03
-1.704603619873523712e-02 3.032518550753593445e-03 -5.534673929214477539e-01 3.024416906864235005e-03
-1.636174879968166351e-02 2.832352416589856148e-03 -5.404032468795776367e-01 2.903056149472242956e-03
-1.570637151598930359e-02 2.645803149789571762e-03 -5.275989770889282227e-01 2.786120135274582337e-03
-1.507864054292440414e-02 2.471921965479850769e-03 -5.150505304336547852e-01 2.673418275879855741e-03
-1.447734516113996506e-02 2.309826202690601349e-03 -5.027536153793334961e-01 2.564948929317165376e-03
-1.390133798122406006e-02 2.158698625862598419e-03 -4.907041788101196289e-01 2.460588167217996058e-03
-1.334951352328062057e-02 2.017778344452381134e-03 -4.788980782032012939e-01 2.360069053203179938e-03
-1.282082404941320419e-02 1.886358717456459999e-03 -4.673311710357666016e-01 2.263244180108050195e-03
-1.231426373124122620e-02 1.763783046044409275e-03 -4.559995234012603760e-01 2.170060123178299723e-03
-1.182887796312570572e-02 1.649441663175821304e-03 -4.448990821838378906e-01 2.080273269952252931e-03
-1.136374846100807190e-02 1.542767276987433434e-03 -4.340259432792663574e-01 1.993762851774091415e-03
-1.091799978166818619e-02 1.443232176825404167e-03 -4.233760833740234375e-01 1.910557190014116214e-03
-1.049079839140176773e-02 1.350346719846129417e-03 -4.129458069801330566e-01 1.830522350370494995e-03
-1.008134800940752029e-02 1.263655489310622215e-03 -4.027311205863952637e-01 1.753453532136987555e-03
-9.688883088529109955e-03 1.182734384201467037e-03 -3.927283883094787598e-01 1.679298756798206860e-03
-9.312675334513187408e-03 1.107188872992992401e-03 -3.829338252544403076e-01 1.607920904118859086e-03
-8.952028118073940277e-03 1.036652945913374424e-03 -3.733437657356262207e-01 1.539189869474771313e-03
-8.606277406215667725e-03 9.707855642773211002e-04 -3.639545440673828125e-01 1.473051234712312915e-03
-8.274787105619907379e-03 9.092690888792276382e-04 -3.547626435756683350e-01 1.409389964401903646e-03
-7.956949062645435333e-03 8.518085232935845852e-04 -3.457643985748291016e-01 1.348190137276036871e-03
-7.652181200683116913e-03 7.981288945302367210e-04 -3.369564712047576904e-01 1.289244561101438903e-03
-7.359929848462343216e-03 7.479749619960784912e-04 -3.283352851867675781e-01 1.232524270093200876e-03
-7.079660892486572266e-03 7.011082489043474197e-04 -3.198975920677185059e-01 1.178063252421932775e-03
-6.810868624597787857e-03 6.573073915205895901e-04 -3.116399049758911133e-01 1.125660012524668484e-03
-6.553068757057189941e-03 6.163661601021885872e-04 -3.035589754581451416e-01 1.075158179971555340e-03
-6.305793765932321548e-03 5.780923529528081417e-04 -2.956515550613403320e-01 1.026517007545504163e-03
-6.068601738661527634e-03 5.423071561381220818e-04 -2.879144847393035889e-01 9.797906614376157477e-04
-5.841067060828208923e-03 5.088439211249351501e-04 -2.803445756435394287e-01 9.348581871019535049e-04
-5.622782744467258453e-03 4.775473498739302158e-04 -2.729387581348419189e-01 8.917221202554105131e-04
-5.413361359387636185e-03 4.482730582822114229e-04 -2.656939029693603516e-01 8.502178397566080093e-04
-5.212431773543357849e-03 4.208863538224250078e-04 -2.586071193218231201e-01 8.102325998417623382e-04
-5.019636359065771103e-03 3.952616243623197079e-04 -2.516754269599914551e-01 7.718161158785515844e-04
-4.834636114537715912e-03 3.712819598149508238e-04 -2.448958456516265869e-01 7.348557939475704182e-04
-4.657104611396789551e-03 3.488383081275969744e-04 -2.382656186819076538e-01 6.993182359984295542e-04
-4.486731719225645065e-03 3.278292715549468994e-04 -2.317818999290466309e-01 6.651178522425728540e-04
-4.323217086493968964e-03 3.081597969867289066e-04 -2.254419028759002686e-01 6.322289686902037671e-04
-4.166277591139078140e-03 2.897418162319809198e-04 -2.192430198192596436e-01 6.005251088069833623e-04
-4.015638027340173721e-03 2.724928781390190125e-04 -2.131824940443038940e-01 5.699934050505552465e-04
-3.871038323268294334e-03 2.563362650107592344e-04 -2.072577327489852905e-01 5.406458495437987781e-04
-3.732228418812155724e-03 2.412003523204475641e-04 -2.014662474393844604e-01 5.122905712814128983e-04
-3.598967567086219788e-03 2.270182740176096559e-04 -1.958054006099700928e-01 4.848685563758513219e-04
-3.471027826890349388e-03 2.137278206646442413e-04 -1.902728080749511719e-01 4.583627467159794955e-04
-3.348189173266291618e-03 2.012708428082987666e-04 -1.848660111427307129e-01 4.327539851236528678e-04
-3.230241360142827034e-03 1.895930763566866517e-04 -1.795825809240341187e-01 4.079802091157575406e-04
-3.116982523351907730e-03 1.786438806448131800e-04 -1.744202971458435059e-01 3.839021344878476550e-04
-3.008220810443162918e-03 1.683760638115927577e-04 -1.693767756223678589e-01 3.605152167153070496e-04
-2.903771121054887772e-03 1.587455190019682050e-04 -1.644497960805892944e-01 3.376953324809211566e-04
-2.803456503897905350e-03 1.497110642958432436e-04 -1.596371829509735107e-01 3.152200154006727694e-04
-2.707108389586210251e-03 1.412343262927606702e-04 -1.549367755651473999e-01 2.930779777823933952e-04
-2.614564495161175728e-03 1.332794199697673321e-04 -1.503463685512542725e-01 2.711579931693024474e-04
-2.525669289752840996e-03 1.258128322660923004e-04 -1.458639353513717651e-01 2.493602612763229083e-04
-2.440273761749267578e-03 1.188032765639945865e-04 -1.414874941110610962e-01 2.274038714995271575e-04
-2.358236582949757576e-03 1.122215835493989289e-04 -1.372149586677551270e-01 2.049739342898993020e-04
-2.279419684782624245e-03 1.060403810697607696e-04 -1.330444365739822388e-01 1.819409046091047017e-04
-2.203693147748708725e-03 1.002342178253456950e-04 -1.289739459753036499e-01 1.579125042445230236e-04
-2.130931708961725235e-03 9.477926505496725440e-05 -1.250016689300537109e-01 1.323912513433941364e-04
-2.061014994978904724e-03 8.965324377641081810e-05 -1.211257129907608032e-01 1.047926832720704832e-04
-1.993828220292925835e-03 8.483538113068789244e-05 -1.173442974686622620e-01 7.429226521448005635e-05
-1.929260906763374805e-03 8.030619937926530838e-05 -1.136556193232536316e-01 3.985325272669650325e-05
-1.867207814939320087e-03 7.604754500789567828e-05 -1.100579351186752319e-01 0.000000000000000000e+00
diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index 065220d7..bac34b83 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -1,14 +1,25 @@
+#!/usr/bin/env python3
+
+import os, sys
+
 import numpy as np
-import os, glob, h5py, sys
+from scipy.interpolate import splrep
+from scipy.integrate import solve_ivp
+
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib as mpl
 import matplotlib.pyplot as plt
 
 import pyharm
+import pyharm.grmhd.bondi as bondi
+import pyharm.plots.plot_dumps as pplt
 
+# Check that the computed Bondi solution matches
+# the analytic Bondi solution in rho,u and the
+# ODE results in dP
 
-if __name__=='__main__':
+if __name__ == '__main__':
     outputdir = './'
     kharmadir = '../../'
 
@@ -22,39 +33,45 @@
     fit = np.zeros([len(RES), NVAR])
 
     for r, res in enumerate(RES):
-            
-        # load analytic result
-        fpath = os.path.join(os.curdir,'bondi_viscous_{}_default'.format(res), 'bondi_analytic_{}.txt'.format(res))
-        rho_analytic, uu_analytic, dP_analytic = np.loadtxt(fpath, usecols=(0,1,3), unpack=True)
+
+        # Load dump for parameters
+        dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res), cache_conn=True)
+
+        # Compute analytic reference
+        mdot, rc, gam = dump['bondi']['mdot'], dump['bondi']['rs'], dump['gam']
+        eta, tau = dump['emhd']['eta'], dump['emhd']['tau']
+        state = bondi.get_bondi_fluid_state(mdot, rc, gam, dump.grid)
+        state.params['eta'] = eta
+        state.params['tau'] = tau
+        dP_check = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau)
         
         # load code data
         dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res))
-        
-        params    = dump.params
-        rho       = np.squeeze(dump['RHO'])
-        uu        = np.squeeze(dump['UU'])
-        dP_tilde  = np.squeeze(dump['prims'][8,Ellipsis])
-
-        t   = dump['t']
-        gam = params['gam']
-        tau = params['tau']
-        eta = params['eta']
-        higher_order_terms = params['higher_order_terms']		
-
-    # compute dP
-        if higher_order_terms=="true":
+
+        rho, uu, dP_tilde = dump['RHO'], dump['UU'], dump['dP']
+        #rho, uu = dump['RHO'], dump['UU']
+
+        # compute dP
+        if dump['emhd']['higher_order_terms'] == "true":
             print("Res: "+str(res)+"; higher order terms enabled")
-            P        = (gam - 1.) * uu
-            Theta    = P / rho
+            Theta    = (dump['gam'] - 1.) * uu / rho
             nu_emhd  = eta / rho
             dP       = dP_tilde * np.sqrt(nu_emhd * rho * Theta / tau)
         else:
             dP = dP_tilde
-        
+
+        # Plot
+        fig = plt.figure(figsize=(6,6))
+        ax = fig.add_subplot(1,1,1)
+        pplt.plot_diff_xz(ax, dump, state, 'rho')
+        plt.legend()
+        fig.savefig("compare_rho_{}.png".format(res))
+        plt.close(fig)
+
         # compute L1 norm
-        L1[r,0] = np.mean(np.fabs(rho - rho_analytic[:,None]))
-        L1[r,1] = np.mean(np.fabs(uu  - uu_analytic[:,None]))
-        L1[r,2] = np.mean(np.fabs(dP  - dP_analytic[:,None])[1:-1])
+        L1[r,0] = np.mean(np.fabs(rho[:,0,0] - state['rho'][:,0,0]))
+        L1[r,1] = np.mean(np.fabs(uu[:,0,0]  - state['u'][:,0,0]))
+        L1[r,2] = np.mean(np.fabs(dP[:,0,0]  - dP_check)[1:-1])
 
     # MEASURE CONVERGENCE
     L1 = np.array(L1)
diff --git a/tests/bondi_viscous/run.sh b/tests/bondi_viscous/run.sh
index b8320ee6..0da6979b 100755
--- a/tests/bondi_viscous/run.sh
+++ b/tests/bondi_viscous/run.sh
@@ -13,7 +13,7 @@ conv_2d() {
     do
         # Four blocks
         half=$(( $res / 2 ))
-        $BASE/run.sh -i $BASE/pars/bondi_viscous.par debug/verbose=1 \
+        $BASE/run.sh -i $BASE/pars/bondi_viscous.par debug/verbose=1 parthenon/time/tlim=400 \
             parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
             parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
             b_field/implicit=false $2 >log_${1}_${res}.txt 2>&1
@@ -22,9 +22,7 @@ conv_2d() {
         mv bondi.out0.final.phdf emhd_2d_${res}_end_${1}.phdf
     done
     check_code=0
-    # pyharm-convert --double *.phdf
     python check.py $ALL_RES $1 2d || check_code=$?
-    # rm -r *.phdf
     rm -r *.xdmf
     rm -r *.out0*
     if [[ $check_code != 0 ]]; then
@@ -35,7 +33,7 @@ conv_2d() {
     fi
 }
 
-ALL_RES="64,128,256"
+ALL_RES="8,16,32,64"
 conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "in 2D, WENO5"
 
 exit $exit_code
diff --git a/tests/conducting_atmosphere/check.py b/tests/conducting_atmosphere/check.py
index 6f758167..655e0489 100644
--- a/tests/conducting_atmosphere/check.py
+++ b/tests/conducting_atmosphere/check.py
@@ -52,7 +52,20 @@
             q        = q_tilde * np.sqrt(chi_emhd * rho * Theta**2 / tau)
         else:
             q = q_tilde
-        
+
+        fig = plt.figure(figsize=(8,8))
+        plt.plot(np.mean(q, axis=-1))
+        plt.plot(q_analytic)
+        plt.savefig("compare_{}.png".format(res))
+
+        fig = plt.figure(figsize=(8,8))
+        plt.plot(np.mean(q - q_analytic[:,None], axis=-1))
+        plt.savefig("diff_{}.png".format(res))
+
+        # fig, ax = plt.subplots(1,1,figsize=(8,8))
+        # pplt.plot_xy(q - q_analytic[:,None], axis=-1))
+        # plt.savefig("diff_{}.png".format(res))
+
         # compute L1 norm
         # compute L1 norm
         L1[r,0] = np.mean(np.fabs(rho - rho_analytic[:,None]))
diff --git a/tests/conducting_atmosphere/conducting_atmosphere.par b/tests/conducting_atmosphere/conducting_atmosphere.par
new file mode 100644
index 00000000..523b5fc5
--- /dev/null
+++ b/tests/conducting_atmosphere/conducting_atmosphere.par
@@ -0,0 +1,97 @@
+# Hydrostatic conducting atmosphere
+# Try to maintain the ODE solution that represnts hydrostatic equilibrium
+# Checks the geometrical terms
+# IMPORTANT: This test is different from the other tests in its initialization
+#            It reads in ".txt" files that correspond to the ODE solution (set input to "ODE" in <conducting_atmosphere>)
+#            Run it with a single MPI task
+
+<parthenon/job>
+problem_id = conducting_atmosphere
+
+<parthenon/mesh>
+refinement = none
+numlevel   = 1
+nx1 = 256
+nx2 = 256
+nx3 = 1
+
+<parthenon/meshblock>
+nx1 = 256
+nx2 = 256
+nx3 = 1
+
+
+<coordinates>
+base      = ks
+transform = mks
+a         = 0.0
+hslope    = 1.0
+r_in      = 200.
+r_out     = 300.
+
+<boundaries>
+inner_x1 = dirichlet
+outer_x1 = dirichlet
+check_inflow_inner_x1 = false
+check_inflow_outer_x1 = false
+
+<parthenon/time>
+tlim       = 400.
+
+<driver>
+type = imex
+
+<GRMHD>
+implicit       = true
+cfl            = 0.9
+gamma          = 1.333333
+reconstruction = weno5
+
+<b_field>
+implicit        = false
+initial_cleanup = false
+
+<implicit>
+max_nonlinear_iter  = 3
+rootfind_tol        = 1.e-20
+jacobian_delta      = 4.e-8
+linesearch          = true
+max_linesearch_iter = 3
+linesearch_eps      = 1.e-4
+
+# IMPORTANT: This block must be present and values filled in all EGRMHD simulations
+<emhd>
+on                 = true
+higher_order_terms = true
+feedback           = true
+stability_limits   = false
+
+conduction = true
+viscosity  = false
+
+closure_type = kappa_eta
+tau   = 10.
+kappa = 0.1
+eta   = 0.0
+
+<conducting_atmosphere>
+input = ODE
+
+<floors>
+disable_floors = true
+
+<debug>
+verbose = 1
+flag_verbose = 2
+extra_checks = 1
+
+<parthenon/output0>
+file_type = hdf5
+dt = 10
+single_precision_output = false
+variables = prims, solve_norm, solve_fail
+ghost_zones = true
+
+<parthenon/output1>
+file_type = hst
+dt = 100
diff --git a/tests/conducting_atmosphere/run.sh b/tests/conducting_atmosphere/run.sh
index ae70088b..964d4c3e 100755
--- a/tests/conducting_atmosphere/run.sh
+++ b/tests/conducting_atmosphere/run.sh
@@ -14,6 +14,7 @@ conv_2d() {
     do
         cp conducting_atmosphere_${res}_default/atmosphere_soln_*.txt .
         $BASE/run.sh -n 1 -i $BASE/pars/conducting_atmosphere.par debug/verbose=1 \
+            parthenon/time/tlim=200 parthenon/output0/dt=1000000 \
             parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
             parthenon/meshblock/nx1=$res parthenon/meshblock/nx2=$res parthenon/meshblock/nx3=1 \
             $2 >log_${1}_${res}.txt 2>&1
@@ -34,5 +35,4 @@ conv_2d() {
 }
 
 ALL_RES="64,128,256,512"
-#ALL_RES="64,128"
 conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "in 2D, WENO5"
diff --git a/tests/emhdshock/emhdshock.par b/tests/emhdshock/emhdshock.par
new file mode 100644
index 00000000..26e191b0
--- /dev/null
+++ b/tests/emhdshock/emhdshock.par
@@ -0,0 +1,94 @@
+# EMHD Shock problem
+# Try to maintain the BVP solution to a discontuinity
+# Checks the higher order terms implementation in flat space
+# IMPORTANT: This test is different from the other tests in its initialization
+#            It reads in ".txt" files that correspond to the BVP solution (set input to "BVP" in <emhdshock>)
+#            One, in principle, can run this problem with the usual ideal MHD jump conditions but this
+#            may not allow a quantitative check
+#            Run it with a single MPI task
+
+<parthenon/job>
+problem_id = emhdshock
+
+<parthenon/mesh>
+refinement = none
+numlevel   = 1
+
+nx1 = 1024
+x1min  = -0.5
+x1max  = 1.5
+ix1_bc = outflow
+ox1_bc = outflow
+
+nx2 = 1
+x2min  = 0.0
+x2max  = 1.0
+ix2_bc = periodic
+ox2_bc = periodic
+
+nx3 = 1
+x3min  = 0.0
+x3max  = 1.0
+ix3_bc = periodic
+ox3_bc = periodic
+
+<parthenon/meshblock>
+nx1 = 1024
+nx2 = 1
+nx3 = 1
+
+<coordinates>
+base      = cartesian_minkowski
+transform = null
+
+<parthenon/time>
+# "RK2" is the only option for implicit solver
+tlim       = 0.5
+integrator = rk2
+dt_min     = 1.e-6
+
+<GRMHD>
+cfl            = 0.25
+gamma          = 1.333333
+reconstruction = linear_mc
+
+<b_field>
+implicit        = true
+initial_cleanup = false
+
+# IMPORTANT: This block must be present and values filled in all EGRMHD simulations
+<emhd>
+on                 = true
+higher_order_terms = true
+
+closure_type       = soundspeed
+tau                = 0.1
+conduction_alpha   = 5.0
+viscosity_alpha    = 3.0
+
+<implicit>
+max_nonlinear_iter = 3
+rootfind_tol       = 1.e-20
+jacobian_delta     = 4.e-8
+
+<emhdshock>
+# The input can be the BVP solution or the ideal MHD Rankine-Hugoniot jump conditions
+input = BVP
+
+<floors>
+disable_floors = true
+
+<debug>
+verbose      = 1
+flag_verbose = 2
+extra_checks = 1
+
+<parthenon/output0>
+file_type               = hdf5
+dt                      = 0.05
+single_precision_output = false
+variables               = prims.rho, prims.u, prims.uvec, prims.B, prims.q, prims.dP
+
+<parthenon/output1>
+file_type = hst
+dt        = 0.1
\ No newline at end of file
diff --git a/tests/mhdmodes/check.py b/tests/mhdmodes/check.py
index 5b7ec02d..0dfffc21 100644
--- a/tests/mhdmodes/check.py
+++ b/tests/mhdmodes/check.py
@@ -60,19 +60,19 @@
         dvar[6] = 0.0977545707307
         dvar[7] = 0.0977545707307
     if "alfven" in SHORT:
-        dvar[3] =  -0.339683110243
-        dvar[4] =  0.339683110243
-        dvar[6] =  0.620173672946
-        dvar[7] =  -0.620173672946
+        dvar[3] = -0.339683110243
+        dvar[4] = 0.339683110243
+        dvar[6] = 0.620173672946
+        dvar[7] = -0.620173672946
     if "fast" in SHORT:
-        dvar[0]  =  0.481846076323
-        dvar[1]    =  0.642461435098
-        dvar[2]   =  -0.0832240462505
-        dvar[3]   =  -0.224080007379
-        dvar[4]   =  -0.224080007379
-        dvar[5]   =  0.406380545676
-        dvar[6]   =  -0.203190272838
-        dvar[7]   =  -0.203190272838
+        dvar[0] = 0.481846076323
+        dvar[1] = 0.642461435098
+        dvar[2] = -0.0832240462505
+        dvar[3] = -0.224080007379
+        dvar[4] = -0.224080007379
+        dvar[5] = 0.406380545676
+        dvar[6] = -0.203190272838
+        dvar[7] = -0.203190272838
 else:
     # EIGENMODES: 2D
     # We only *convergence check* dir = 3 i.e. X1/X2 plane runs
@@ -113,9 +113,12 @@
     dvar_code.append(dump['U1'] - var0[2])
     dvar_code.append(dump['U2'] - var0[3])
     dvar_code.append(dump['U3'] - var0[4])
-    dvar_code.append(dump['B1'] - var0[5])
-    dvar_code.append(dump['B2'] - var0[6])
-    dvar_code.append(dump['B3'] - var0[7])
+    try:
+        dvar_code.append(dump['B1'] - var0[5])
+        dvar_code.append(dump['B2'] - var0[6])
+        dvar_code.append(dump['B3'] - var0[7])
+    except IOError:
+        NVAR = 5
 
     dvar_sol = []
     L1.append([])
@@ -155,7 +158,7 @@
 plt.xscale('log', base=2); plt.yscale('log')
 plt.xlim([RES[0]/np.sqrt(2.), RES[-1]*np.sqrt(2.)])
 plt.xlabel('N'); plt.ylabel('L1')
-plt.title("MHD mode test convergence, {}".format(LONG))
+plt.title("{}".format(LONG))
 plt.legend(loc=1)
 plt.savefig("convergence_modes_{}_{}.png".format(DIM,SHORT))
 
diff --git a/tests/mhdmodes/run.sh b/tests/mhdmodes/run.sh
index 0b35f925..5426905e 100755
--- a/tests/mhdmodes/run.sh
+++ b/tests/mhdmodes/run.sh
@@ -2,18 +2,24 @@
 
 BASE=../..
 
-# Most of the point of this one is exercising all 3D of transport
-# TODO restore 2D test, use for codepath equivalence stuff (faster).
+# This test confirms that all of the many transport options in KHARMA
+# can converge when modeling each of the basic linearized modes:
+# slow, fast, and alfven waves
+
+# It tests:
+# 1. different reconstructions WENO vs linear
+# 2. different drivers, simple, KHARMA, & ImEx
+# 3. different B field transports, Flux-CT and Face-CT
 
 exit_code=0
 
 conv_3d() {
-    ALL_RES="8,16,24,32,48,64"
-    for res in 8 16 24 32 48 64
+    IFS=',' read -ra RES_LIST <<< "$ALL_RES"
+    for res in "${RES_LIST[@]}"
     do
       # Eight blocks
       half=$(( $res / 2 ))
-      $BASE/run.sh -i $BASE/pars/mhdmodes.par debug/verbose=2 \
+      $BASE/run.sh -i $BASE/pars/tests/mhdmodes.par debug/verbose=2 mhdmodes/dir=0 \
                       parthenon/output0/single_precision_output=false parthenon/output0/dt=100. \
                       parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=$res \
                       parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=$half \
@@ -31,12 +37,12 @@ conv_3d() {
     fi
 }
 conv_2d() {
-    ALL_RES="16,24,32,48,64,96,128,256"
-    for res in 16 24 32 48 64 96 128 256
+    IFS=',' read -ra RES_LIST <<< "$ALL_RES"
+    for res in "${RES_LIST[@]}"
     do
       # Four blocks
       half=$(( $res / 2 ))
-      $BASE/run.sh -i $BASE/pars/mhdmodes.par debug/verbose=1 mhdmodes/dir=3 \
+      $BASE/run.sh -i $BASE/pars/tests/mhdmodes.par debug/verbose=2 mhdmodes/dir=3 \
                       parthenon/output0/single_precision_output=false parthenon/output0/dt=100. \
                       parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
                       parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
@@ -55,30 +61,49 @@ conv_2d() {
 }
 
 # Normal MHD modes, 2D, defaults
+ALL_RES="16,24,32,48,64"
 conv_2d slow mhdmodes/nmode=1 "slow mode in 2D"
 conv_2d alfven mhdmodes/nmode=2 "Alfven mode in 2D"
 conv_2d fast mhdmodes/nmode=3 "fast mode in 2D"
 
 # Entropy mode as reconstruction demo
-#conv_2d entropy_nob "mhdmodes/nmode=0 b_field/solver=none" "entropy mode in 2D, no B field" # TODO init currently requires B
-conv_3d entropy mhdmodes/nmode=0 "entropy mode in 3D"
-conv_3d entropy_mc "mhdmodes/nmode=0 GRMHD/reconstruction=linear_mc" "entropy mode in 3D, linear/MC reconstruction"
-conv_3d entropy_vl "mhdmodes/nmode=0 GRMHD/reconstruction=linear_vl" "entropy mode in 3D, linear/VL reconstruction"
+conv_2d entropy_nob "mhdmodes/nmode=0 b_field/solver=none" "entropy mode in 2D, no B field"
+conv_2d entropy mhdmodes/nmode=0 "entropy mode in 3D, WENO reconstruction"
+conv_2d entropy_mc "mhdmodes/nmode=0 driver/reconstruction=linear_mc" "entropy mode in 2D, linear/MC reconstruction"
+#conv_2d entropy_vl "mhdmodes/nmode=0 driver/reconstruction=linear_vl" "entropy mode in 2D, linear/VL reconstruction"
+conv_2d entropy_donor "mhdmodes/nmode=0 driver/reconstruction=donor_cell" "entropy mode in 2D, Donor Cell reconstruction"
 
+# KHARMA driver
+conv_2d slow_kharma   "mhdmodes/nmode=1 driver/type=kharma" "slow mode in 2D, KHARMA driver"
+conv_2d alfven_kharma "mhdmodes/nmode=2 driver/type=kharma" "Alfven mode in 2D, KHARMA driver"
+conv_2d fast_kharma   "mhdmodes/nmode=3 driver/type=kharma" "fast mode in 2D, KHARMA driver"
 # ImEx driver
-conv_2d slow_imex   "mhdmodes/nmode=1 driver/type=imex" "slow mode in 3D, ImEx explicit"
-conv_2d alfven_imex "mhdmodes/nmode=2 driver/type=imex" "Alfven mode in 3D, ImEx explicit"
-conv_2d fast_imex   "mhdmodes/nmode=3 driver/type=imex" "fast mode in 3D, ImEx explicit"
+conv_2d slow_imex   "mhdmodes/nmode=1 driver/type=imex" "slow mode in 2D, ImEx explicit"
+conv_2d alfven_imex "mhdmodes/nmode=2 driver/type=imex" "Alfven mode in 2D, ImEx explicit"
+conv_2d fast_imex   "mhdmodes/nmode=3 driver/type=imex" "fast mode in 2D, ImEx explicit"
 # B field totally explicit
 conv_2d slow_imex_semi   "mhdmodes/nmode=1 driver/type=imex GRMHD/implicit=true b_field/implicit=false" "slow mode 3D, ImEx semi-implicit"
 conv_2d alfven_imex_semi "mhdmodes/nmode=2 driver/type=imex GRMHD/implicit=true b_field/implicit=false" "Alfven mode 3D, ImEx semi-implicit"
 conv_2d fast_imex_semi   "mhdmodes/nmode=3 driver/type=imex GRMHD/implicit=true b_field/implicit=false" "fast mode 3D, ImEx semi-implicit"
-# All variables semi-implicit
-conv_2d slow_imex_im   "mhdmodes/nmode=1 driver/type=imex GRMHD/implicit=true b_field/implicit=true b_field/kill_on_large_divb=false" "slow mode 3D, ImEx implicit"
-conv_2d alfven_imex_im "mhdmodes/nmode=2 driver/type=imex GRMHD/implicit=true b_field/implicit=true b_field/kill_on_large_divb=false" "Alfven mode 3D, ImEx implicit"
-conv_2d fast_imex_im   "mhdmodes/nmode=3 driver/type=imex GRMHD/implicit=true b_field/implicit=true b_field/kill_on_large_divb=false" "fast mode 3D, ImEx implicit"
+
+# KHARMA driver
+conv_2d slow_kharma_ct   "mhdmodes/nmode=1 driver/type=kharma b_field/solver=face_ct" "slow mode in 2D, KHARMA driver w/face CT"
+conv_2d alfven_kharma_ct "mhdmodes/nmode=2 driver/type=kharma b_field/solver=face_ct" "Alfven mode in 2D, KHARMA driver w/face CT"
+conv_2d fast_kharma_ct   "mhdmodes/nmode=3 driver/type=kharma b_field/solver=face_ct" "fast mode in 2D, KHARMA driver w/face CT"
+# ImEx driver
+conv_2d slow_imex_ct   "mhdmodes/nmode=1 driver/type=imex b_field/solver=face_ct" "slow mode in 2D, ImEx explicit w/face CT"
+conv_2d alfven_imex_ct "mhdmodes/nmode=2 driver/type=imex b_field/solver=face_ct" "Alfven mode in 2D, ImEx explicit w/face CT"
+conv_2d fast_imex_ct   "mhdmodes/nmode=3 driver/type=imex b_field/solver=face_ct" "fast mode in 2D, ImEx explicit w/face CT"
+
+
+# simple driver, high res
+ALL_RES="16,24,32,48,64,96,128,192,256"
+conv_2d slow_highres   "mhdmodes/nmode=1 driver/type=imex" "slow mode in 2D, simple driver"
+conv_2d alfven_highres "mhdmodes/nmode=2 driver/type=imex" "Alfven mode in 2D, simple driver"
+conv_2d fast_highres   "mhdmodes/nmode=3 driver/type=imex" "fast mode in 2D, simple driver"
 
 # 3D versions, basics only
+ALL_RES="16,24,32"
 conv_3d slow "mhdmodes/nmode=1 mhdmodes/dir=3" "slow mode in 3D"
 conv_3d alfven "mhdmodes/nmode=2 mhdmodes/dir=3" "Alfven mode in 3D"
 conv_3d fast "mhdmodes/nmode=3 mhdmodes/dir=3" "fast mode in 3D"
diff --git a/tests/noh/check.py b/tests/noh/check.py
index bf2021d9..bb33fa88 100644
--- a/tests/noh/check.py
+++ b/tests/noh/check.py
@@ -50,7 +50,7 @@
     powerfit = np.polyfit(np.log(resolutions), np.log(l1_norm), 1)[0]
     print("Power fit: {} {}".format(powerfit, l1_norm))
     # These bounds were chosen heuristically
-    if powerfit < -1.9 and powerfit > -2.1:
+    if powerfit < -0.85 and powerfit > -1.15:
         fail = 0
     else:
         fail = 1
diff --git a/tests/noh/run.sh b/tests/noh/run.sh
index 3bc841d5..ba7a6ad5 100755
--- a/tests/noh/run.sh
+++ b/tests/noh/run.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-# Bash script to run 1D (Noh) shock test
+# Bash script to run 1D Noh shock test
 
 # Set paths
 KHARMADIR=../..
@@ -8,11 +8,12 @@ KHARMADIR=../..
 exit_code=0
 
 noh_test() {
-    ALL_RES="64,128,256,512,1024,2048"
+    ALL_RES="128,256,512,1024,2048"
     for res in 64 128 256 512 1024 2048
     do
         eighth=$(($res / 8))
-        $KHARMADIR/run.sh -i $KHARMADIR/pars/noh.par debug/verbose=1 \ #parthenon/output0/dt=1000 \
+        $KHARMADIR/run.sh -i $KHARMADIR/pars/noh.par debug/verbose=1 parthenon/output0/dt=1000 \
+                            electrons/gamma_e=1.666667 \
                             parthenon/mesh/nx1=$res parthenon/meshblock/nx1=$eighth \
                             >log_noh_${res}.txt 2>&1
 

From 1303b9b1efafb3db9516e03da196d37e0a1a7c59 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 3 Oct 2023 15:16:14 -0600
Subject: [PATCH 09/47] Fix propagating B3 in 2D with face CT, pass mhdmodes

---
 kharma/b_ct/b_ct.cpp  | 60 ++++++++++++++++++++++++-------------------
 scripts/ci/cpu.yml    | 20 ++++++---------
 tests/mhdmodes/run.sh |  3 ++-
 3 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index 02659f1c..85f08854 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -215,11 +215,12 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
     const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, 0, 0);
     const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::interior, -1, 1);
     const IndexRange block = IndexRange{0, emf_pack.GetDim(5)-1};
+    const int kd = ndim > 2 ? 1 : 0;
+    const int jd = ndim > 1 ? 1 : 0;
+    const int id = ndim > 0 ? 1 : 0;
 
     auto pmb0 = md->GetBlockData(0)->GetBlockPointer().get();
 
-    std::string scheme = pmesh->packages.Get("B_CT")->Param<std::string>("ct_scheme");
-
     // Calculate circulation by averaging fluxes
     // This is the base of most other schemes, which make corrections
     // It is the entirety of B&S '99
@@ -237,13 +238,24 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
                 emf_pack(bl, E2, 0, k, j, i) =
                     0.25*(B_U(bl).flux(X3DIR, V1, k, j, i - 1) + B_U(bl).flux(X3DIR, V1, k, j, i)
                         - B_U(bl).flux(X1DIR, V3, k - 1, j, i) - B_U(bl).flux(X1DIR, V3, k, j, i));
+                emf_pack(bl, E3, 0, k, j, i) =
+                    0.25*(B_U(bl).flux(X1DIR, V2, k, j - 1, i) + B_U(bl).flux(X1DIR, V2, k, j, i)
+                        - B_U(bl).flux(X2DIR, V1, k, j, i - 1) - B_U(bl).flux(X2DIR, V1, k, j, i));
+            } else if (ndim > 1) {
+                emf_pack(bl, E1, 0, k, j, i) =  B_U(bl).flux(X2DIR, V3, k, j, i);
+                emf_pack(bl, E2, 0, k, j, i) = -B_U(bl).flux(X1DIR, V3, k, j, i);
+                emf_pack(bl, E3, 0, k, j, i) =
+                    0.25*(B_U(bl).flux(X1DIR, V2, k, j - 1, i) + B_U(bl).flux(X1DIR, V2, k, j, i)
+                        - B_U(bl).flux(X2DIR, V1, k, j, i - 1) - B_U(bl).flux(X2DIR, V1, k, j, i));
+            } else {
+                emf_pack(bl, E1, 0, k, j, i) = 0;
+                emf_pack(bl, E2, 0, k, j, i) = -B_U(bl).flux(X1DIR, V3, k, j, i);
+                emf_pack(bl, E3, 0, k, j, i) =  B_U(bl).flux(X1DIR, V2, k, j, i);
             }
-            emf_pack(bl, E3, 0, k, j, i) =
-                0.25*(B_U(bl).flux(X1DIR, V2, k, j - 1, i) + B_U(bl).flux(X1DIR, V2, k, j, i)
-                    - B_U(bl).flux(X2DIR, V1, k, j, i - 1) - B_U(bl).flux(X2DIR, V1, k, j, i));
         }
     );
 
+    std::string scheme = pmesh->packages.Get("B_CT")->Param<std::string>("ct_scheme");
     if (scheme == "bs99") {
         // Nothing more to do
     } else if (scheme == "gs05_0" || scheme == "gs05_c") {
@@ -268,17 +280,15 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
                     const auto& G = B_U.GetCoords(bl);
                     // Just subtract centered emf from twice the face version
                     // More stable for planar flows even without anything fancy
-                    if (ndim > 2) {
-                        emf_pack(bl, E1, 0, k, j, i) = 2 * emf_pack(bl, E1, 0, k, j, i)
-                            - 0.25*(emfc(bl, V1, k, j, i)     + emfc(bl, V1, k, j - 1, i)
-                                  + emfc(bl, V1, k, j - 1, i) + emfc(bl, V1, k - 1, j - 1, i));
-                        emf_pack(bl, E2, 0, k, j, i) = 2 * emf_pack(bl, E2, 0, k, j, i)
-                            - 0.25*(emfc(bl, V2, k, j, i)     + emfc(bl, V2, k, j, i - 1)
-                                  + emfc(bl, V2, k - 1, j, i) + emfc(bl, V2, k - 1, j, i - 1));
-                    }
+                    emf_pack(bl, E1, 0, k, j, i) = 2 * emf_pack(bl, E1, 0, k, j, i)
+                        - 0.25*(emfc(bl, V1, k, j, i)     + emfc(bl, V1, k, j - jd, i)
+                                + emfc(bl, V1, k, j - jd, i) + emfc(bl, V1, k - kd, j - jd, i));
+                    emf_pack(bl, E2, 0, k, j, i) = 2 * emf_pack(bl, E2, 0, k, j, i)
+                        - 0.25*(emfc(bl, V2, k, j, i)     + emfc(bl, V2, k, j, i - id)
+                                + emfc(bl, V2, k - kd, j, i) + emfc(bl, V2, k - kd, j, i - id));
                     emf_pack(bl, E3, 0, k, j, i) = 2 * emf_pack(bl, E3, 0, k, j, i)
-                        - 0.25*(emfc(bl, V3, k, j, i)     + emfc(bl, V3, k, j, i - 1)
-                              + emfc(bl, V3, k, j - 1, i) + emfc(bl, V3, k, j - 1, i - 1));
+                        - 0.25*(emfc(bl, V3, k, j, i)     + emfc(bl, V3, k, j, i - id)
+                              + emfc(bl, V3, k, j - jd, i) + emfc(bl, V3, k, j - jd, i - id));
                 }
             );
         } else if (scheme == "gs05_c") {
@@ -362,17 +372,15 @@ TaskStatus B_CT::AddSource(MeshData<Real> *md, MeshData<Real> *mdudt)
             dB_Uf_dt(bl, F2, 0, k, j, i) /= G.Volume<F2>(k, j, i);
         }
     );
-    if (ndim > 2) {
-        pmb0->par_for("B_CT_Circ_3", block.s, block.e, b1.ks, b1.ke, b.js, b.je, b.is, b.ie,
-            KOKKOS_LAMBDA (const int &bl, const int &k, const int &j, const int &i) {
-                const auto& G = dB_Uf_dt.GetCoords(bl);
-                dB_Uf_dt(bl, F3, 0, k, j, i) = (G.Volume<E2>(k, j, i + 1) * emf_pack(bl, E2, 0, k, j, i + 1)
-                                              - G.Volume<E2>(k, j, i)     * emf_pack(bl, E2, 0, k, j, i)
-                                              - G.Volume<E1>(k, j + 1, i) * emf_pack(bl, E1, 0, k, j + 1, i)
-                                              + G.Volume<E1>(k, j, i)     * emf_pack(bl, E1, 0, k, j, i)) / G.Volume<F3>(k, j, i);
-            }
-        );
-    }
+    pmb0->par_for("B_CT_Circ_3", block.s, block.e, b1.ks, b1.ke, b.js, b.je, b.is, b.ie,
+        KOKKOS_LAMBDA (const int &bl, const int &k, const int &j, const int &i) {
+            const auto& G = dB_Uf_dt.GetCoords(bl);
+            dB_Uf_dt(bl, F3, 0, k, j, i) = (G.Volume<E2>(k, j, i + 1) * emf_pack(bl, E2, 0, k, j, i + 1)
+                                            - G.Volume<E2>(k, j, i)     * emf_pack(bl, E2, 0, k, j, i)
+                                            - G.Volume<E1>(k, j + 1, i) * emf_pack(bl, E1, 0, k, j + 1, i)
+                                            + G.Volume<E1>(k, j, i)     * emf_pack(bl, E1, 0, k, j, i)) / G.Volume<F3>(k, j, i);
+        }
+    );
 
     // Explicitly zero polar faces
     // In spherical, zero B2 on X2 face regardless of boundary condition
diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index a5889edd..1f13629d 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -10,28 +10,24 @@ variables:
   OMPI_ALLOW_RUN_AS_ROOT: 1
   OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
   GIT_SUBMODULE_STRATEGY: recursive
+  MAMBA_ROOT_PREFIX: /mamba
 
 ### DEFAULT TEST BEHAVIOR ###
 default:
   tags:
     - public-kharma-runner
-  # Be default: install pyharm, then run test in cwd
+  # By default: install pyharm, then run test in cwd
   # For new tests, write one run.sh script which runs/verifies
   # interleaved, and prints a summary of results.
   before_script:
-    - dnf -y install hostname environment-modules git mpich fftw wget
+    - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - export PATH="$HOME/.local/bin:$PATH"
-    - wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-    - bash Miniforge3.sh -b -p "/home/conda"
-    - source "/home/conda/etc/profile.d/conda.sh"
-    - conda install h5py
-    - git clone https://github.com/AFD-Illinois/pyharm.git /home/pyharm
-    - conda activate
-    - cd /home/pyharm
-    - pip install --user .
-    - cd -
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+    - eval "$(./bin/micromamba shell hook -s posix)"
+    - micromamba create -y -f environment.yml
+    - micromamba activate pyharm
+    - ./install.sh
 
 # Tests can be executed in parallel
 stages:
diff --git a/tests/mhdmodes/run.sh b/tests/mhdmodes/run.sh
index 5426905e..b1647b97 100755
--- a/tests/mhdmodes/run.sh
+++ b/tests/mhdmodes/run.sh
@@ -71,7 +71,8 @@ conv_2d entropy_nob "mhdmodes/nmode=0 b_field/solver=none" "entropy mode in 2D,
 conv_2d entropy mhdmodes/nmode=0 "entropy mode in 3D, WENO reconstruction"
 conv_2d entropy_mc "mhdmodes/nmode=0 driver/reconstruction=linear_mc" "entropy mode in 2D, linear/MC reconstruction"
 #conv_2d entropy_vl "mhdmodes/nmode=0 driver/reconstruction=linear_vl" "entropy mode in 2D, linear/VL reconstruction"
-conv_2d entropy_donor "mhdmodes/nmode=0 driver/reconstruction=donor_cell" "entropy mode in 2D, Donor Cell reconstruction"
+# TODO doesn't converge?
+#conv_2d entropy_donor "mhdmodes/nmode=0 driver/reconstruction=donor_cell" "entropy mode in 2D, Donor Cell reconstruction"
 
 # KHARMA driver
 conv_2d slow_kharma   "mhdmodes/nmode=1 driver/type=kharma" "slow mode in 2D, KHARMA driver"

From c817f1907b3aa39c0d53eee405a9798a4724b3ae Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 3 Oct 2023 17:01:58 -0600
Subject: [PATCH 10/47] Pass bondi problem

Fix run script, fix enabling flag_verbose in imex
Support running in bare KS coordinates via new trivial transform
(needed the check a little looser, turns out KS are not good coords)
---
 kharma/coordinates/coordinate_embedding.hpp   |  6 +++-
 kharma/coordinates/coordinate_systems.hpp     | 28 +++++++++++++++-
 kharma/implicit/implicit.cpp                  | 13 ++++++--
 pars/bondi/bondi.par                          | 16 ++--------
 tests/bondi/check.py                          |  2 +-
 tests/bondi/run.sh                            | 20 ++++++++----
 .../bondi_analytic_32.txt                     | 32 -------------------
 7 files changed, 59 insertions(+), 58 deletions(-)
 delete mode 100644 tests/bondi_viscous/bondi_viscous_32_default/bondi_analytic_32.txt

diff --git a/kharma/coordinates/coordinate_embedding.hpp b/kharma/coordinates/coordinate_embedding.hpp
index bec773fb..eb0e6b08 100644
--- a/kharma/coordinates/coordinate_embedding.hpp
+++ b/kharma/coordinates/coordinate_embedding.hpp
@@ -153,7 +153,11 @@ class CoordinateEmbedding {
             bool spherical = is_spherical();
 
             if (transform_str == "null" || transform_str == "none") {
-                transform.emplace<NullTransform>(NullTransform());
+                if (spherical) {
+                    transform.emplace<SphNullTransform>(SphNullTransform());
+                } else {
+                    transform.emplace<NullTransform>(NullTransform());
+                }
             } else if (transform_str == "exponential" || transform_str == "exp" || transform_str == "eks") {
                 if (!spherical) throw std::invalid_argument("Transform is for spherical coordinates!");
                 transform.emplace<ExponentialTransform>(ExponentialTransform());
diff --git a/kharma/coordinates/coordinate_systems.hpp b/kharma/coordinates/coordinate_systems.hpp
index b0361618..28d7717c 100644
--- a/kharma/coordinates/coordinate_systems.hpp
+++ b/kharma/coordinates/coordinate_systems.hpp
@@ -373,6 +373,32 @@ class NullTransform {
             DLOOP2 dXdx[mu][nu] = (mu == nu);
         }
 };
+// This only exists separately to define startx & stopx. Could fall back on base coords for these?
+class SphNullTransform {
+    public:
+        static constexpr char name[] = "SphNullTransform";
+        static constexpr GReal startx[3] = {-1, 0., 0.};
+        static constexpr GReal stopx[3] = {-1, M_PI, 2*M_PI};
+        // Coordinate transformations
+        // Any coordinate value protections (th < 0, th > pi, phi > 2pi) should be in the base system
+        KOKKOS_INLINE_FUNCTION void coord_to_embed(const GReal Xnative[GR_DIM], GReal Xembed[GR_DIM]) const
+        {
+            DLOOP1 Xembed[mu] = Xnative[mu];
+        }
+        KOKKOS_INLINE_FUNCTION void coord_to_native(const GReal Xembed[GR_DIM], GReal Xnative[GR_DIM]) const
+        {
+            DLOOP1 Xnative[mu] = Xembed[mu];
+        }
+        // Tangent space transformation matrices
+        KOKKOS_INLINE_FUNCTION void dxdX(const GReal X[GR_DIM], Real dxdX[GR_DIM][GR_DIM]) const
+        {
+            DLOOP2 dxdX[mu][nu] = (mu == nu);
+        }
+        KOKKOS_INLINE_FUNCTION void dXdx(const GReal X[GR_DIM], Real dXdx[GR_DIM][GR_DIM]) const
+        {
+            DLOOP2 dXdx[mu][nu] = (mu == nu);
+        }
+};
 
 /**
  * Just exponentiate the radial coordinate
@@ -644,4 +670,4 @@ class FunkyTransform {
 // These act as a wannabe "interface" or "parent class" with the exception that access requires "mpark::visit"
 // See coordinate_embedding.hpp
 using SomeBaseCoords = mpark::variant<SphMinkowskiCoords, CartMinkowskiCoords, SphBLCoords, SphKSCoords, SphBLExtG, SphKSExtG>;
-using SomeTransform = mpark::variant<NullTransform, ExponentialTransform, SuperExponentialTransform, ModifyTransform, FunkyTransform>;
+using SomeTransform = mpark::variant<NullTransform, SphNullTransform, ExponentialTransform, SuperExponentialTransform, ModifyTransform, FunkyTransform>;
diff --git a/kharma/implicit/implicit.cpp b/kharma/implicit/implicit.cpp
index f0ddd5d4..1b7ee4b6 100644
--- a/kharma/implicit/implicit.cpp
+++ b/kharma/implicit/implicit.cpp
@@ -131,6 +131,10 @@ std::shared_ptr<KHARMAPackage> Implicit::Initialize(ParameterInput *pin, std::sh
         pkg->AddField("residual", m);
     }
 
+    // The major call, to Step(), is done manually from the ImEx driver
+    // But, we just register the diagnostics function to print out solver failures
+    pkg->PostStepDiagnosticsMesh = Implicit::PostStepDiagnostics;
+
     return pkg;
 }
 
@@ -601,9 +605,11 @@ TaskStatus Implicit::Step(MeshData<Real> *md_full_step_init, MeshData<Real> *md_
         EndFlag();
     }
 
-    if (flag_verbose > 0) {
-        Reductions::CheckFlagReduceAndPrintHits(md_solver, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2);
-    }
+    // if (flag_verbose > 0) {
+    //     // Start the reduction as soon as we have the data
+    //     // Dangerous, so commented
+    //     Reductions::StartFlagReduce(md_solver, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2);
+    // }
 
     EndFlag();
     return TaskStatus::complete;
@@ -620,6 +626,7 @@ TaskStatus Implicit::PostStepDiagnostics(const SimTime& tm, MeshData<Real> *md)
 
     // Debugging/diagnostic info about implicit solver
     if (flag_verbose > 0) {
+        Reductions::StartFlagReduce(md, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2);
         Reductions::CheckFlagReduceAndPrintHits(md, "solve_fail", Implicit::status_names, IndexDomain::interior, false, 2);
     }
 
diff --git a/pars/bondi/bondi.par b/pars/bondi/bondi.par
index 67b22a36..ddcf77b6 100644
--- a/pars/bondi/bondi.par
+++ b/pars/bondi/bondi.par
@@ -31,7 +31,7 @@ hslope = 0.3
 r_in = 3.0
 r_out = 30.0
 # If using "Funky" MKS later, where is "startx1"?
-fmks_zero_point = 0.0
+fmks_zero_point = 1.0
 
 <parthenon/time>
 tlim = 50.0
@@ -51,12 +51,6 @@ rs = 8.0
 <floors>
 # Disable floors
 disable_floors = true
-# If using B field, enable w/:
-rho_min_geom = 1e-6
-u_min_geom = 1e-8
-bsq_over_rho_max = 100
-u_over_rho_max = 100
-gamma_max = 10
 
 <boundaries>
 # We'll be adding material, and that's okay
@@ -66,12 +60,6 @@ check_inflow_outer_x1 = false
 # No field
 type = none
 solver = none
-# To add magnetic field
-#type = monopole
-#b10 = 1
-# Or
-#type = vertical
-#bz = 0.1
 
 <debug>
 verbose = 0
@@ -83,7 +71,7 @@ file_type = hdf5
 dt = 5.0
 single_precision_output = true
 # Fields not present are silently ignored
-variables = prims.rho, prims.u, prims.uvec, prims.B, pflag
+variables = prims.rho, prims.u, prims.uvec, pflag
 
 <parthenon/output1>
 file_type = hst
diff --git a/tests/bondi/check.py b/tests/bondi/check.py
index 898359ae..a34c3aab 100644
--- a/tests/bondi/check.py
+++ b/tests/bondi/check.py
@@ -52,7 +52,7 @@
     L1[var] = np.array(L1[var])
     powerfit = np.polyfit(np.log(RES), np.log(L1[var]), 1)[0]
     print("Powerfit: {} L1: {}".format(powerfit, L1[var]))
-    if powerfit < -2.2 or powerfit > -1.9:
+    if powerfit < -2.2 or powerfit > -1.85:
         fail = 1
 
 # MAKE PLOTS
diff --git a/tests/bondi/run.sh b/tests/bondi/run.sh
index ff75ed2d..3f21749a 100755
--- a/tests/bondi/run.sh
+++ b/tests/bondi/run.sh
@@ -5,12 +5,12 @@ BASE=../..
 exit_code=0
 
 conv_2d() {
-    ALL_RES="16,32,48,64"
-    for res in 16 32 48 64
+    IFS=',' read -ra RES_LIST <<< "$ALL_RES"
+    for res in "${RES_LIST[@]}"
     do
       # Four blocks
       half=$(( $res / 2 ))
-      $BASE/run.sh -i $BASE/pars/bondi.par debug/verbose=1 debug/flag_verbose=2 parthenon/time/tlim=50 \
+      $BASE/run.sh -i $BASE/pars/bondi/bondi.par debug/verbose=1 debug/flag_verbose=2 parthenon/time/tlim=50 \
                                            parthenon/output0/dt=1000 parthenon/output0/single_precision_output=false \
                                            parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
                                            parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
@@ -28,16 +28,20 @@ conv_2d() {
     fi
 }
 
+# Test boundaries
+ALL_RES="16,24,32,48,64"
 conv_2d dirichlet "boundaries/inner_x1=dirichlet boundaries/outer_x1=dirichlet" "in 2D, Dirichlet boundaries"
 
 # Test coordinates
-#conv_2d fmks coordinates/transform=fmks "in 2D, FMKS coordinates"
 conv_2d mks coordinates/transform=mks "in 2D, MKS coordinates"
 conv_2d eks coordinates/transform=eks "in 2D, EKS coordinates"
-# TODO broken
-#conv_2d ks coordinates/transform=null "in 2D, KS coordinates"
+# Some coordinate systems do better/worse than 2o at low res
+ALL_RES="48,64,96,128"
+conv_2d fmks coordinates/transform=fmks "in 2D, FMKS coordinates"
+conv_2d ks coordinates/transform=null "in 2D, KS coordinates"
 
 # Recon
+ALL_RES="16,24,32,48,64"
 conv_2d linear_mc GRMHD/reconstruction=linear_mc "in 2D, linear recon with MC limiter"
 conv_2d linear_vl GRMHD/reconstruction=linear_vl "in 2D, linear recon with VL limiter"
 
@@ -45,4 +49,8 @@ conv_2d linear_vl GRMHD/reconstruction=linear_vl "in 2D, linear recon with VL li
 conv_2d imex driver/type=imex "in 2D, with Imex driver"
 conv_2d imex_im "driver/type=imex GRMHD/implicit=true" "in 2D, semi-implicit stepping"
 
+# TODO magnetized?
+
+# TODO 3D, esp magnetized
+
 exit $exit_code
diff --git a/tests/bondi_viscous/bondi_viscous_32_default/bondi_analytic_32.txt b/tests/bondi_viscous/bondi_viscous_32_default/bondi_analytic_32.txt
deleted file mode 100644
index d8e2a9c2..00000000
--- a/tests/bondi_viscous/bondi_viscous_32_default/bondi_analytic_32.txt
+++ /dev/null
@@ -1,32 +0,0 @@
-2.584501169621944427e-02 6.068188231438398361e-03 -6.999706029891967773e-01 4.522162550684973893e-03
-2.356654405593872070e-02 5.203045438975095749e-03 -6.652408838272094727e-01 4.140434014011852570e-03
-2.149801887571811676e-02 4.464387428015470505e-03 -6.319671869277954102e-01 3.789233857803978815e-03
-1.961943879723548889e-02 3.833322320133447647e-03 -6.001013517379760742e-01 3.466035327603429750e-03
-1.791276969015598297e-02 3.293838584795594215e-03 -5.695956349372863770e-01 3.168579558245918169e-03
-1.636174879968166351e-02 2.832352416589856148e-03 -5.404032468795776367e-01 2.894997972590632879e-03
-1.495170872658491135e-02 2.437338000163435936e-03 -5.124777555465698242e-01 2.643221787914404146e-03
-1.366940420120954514e-02 2.099006203934550285e-03 -4.857742488384246826e-01 2.411604044596365147e-03
-1.250288169831037521e-02 1.809038803912699223e-03 -4.602482616901397705e-01 2.198377945378686849e-03
-1.144134253263473511e-02 1.560364384204149246e-03 -4.358564615249633789e-01 2.002143248357593670e-03
-1.047503110021352768e-02 1.346965902484953403e-03 -4.125564098358154297e-01 1.821718323887453698e-03
-9.595127776265144348e-03 1.163721084594726562e-03 -3.903068006038665771e-01 1.655920184890402862e-03
-8.793655782938003540e-03 1.006267266348004341e-03 -3.690673708915710449e-01 1.503540855322907160e-03
-8.063399232923984528e-03 8.708859095349907875e-04 -3.487989008426666260e-01 1.363545946702775496e-03
-7.397830486297607422e-03 7.544056279584765434e-04 -3.294633328914642334e-01 1.234987096634110760e-03
-6.791035179048776627e-03 6.541201728396117687e-04 -3.110238611698150635e-01 1.116773817419731670e-03
-6.237659603357315063e-03 5.677193985320627689e-04 -2.934447228908538818e-01 1.008369874502772559e-03
-5.732852034270763397e-03 4.932292504236102104e-04 -2.766912281513214111e-01 9.088907239085136256e-04
-5.272216163575649261e-03 4.289627831894904375e-04 -2.607300579547882080e-01 8.173766547032038435e-04
-4.851764533668756485e-03 3.734769416041672230e-04 -2.455290406942367554e-01 7.334003942844100716e-04
-4.467881284654140472e-03 3.255369665566831827e-04 -2.310569882392883301e-01 6.563852240762084326e-04
-4.117285367101430893e-03 2.840855740942060947e-04 -2.172840982675552368e-01 5.854909152618705589e-04
-3.796998877078294754e-03 2.482170821167528629e-04 -2.041816562414169312e-01 5.201248927715958637e-04
-3.504319349303841591e-03 2.171552332583814859e-04 -1.917218714952468872e-01 4.596615136618002577e-04
-3.236790420487523079e-03 1.902341609820723534e-04 -1.798784136772155762e-01 4.033137860565213931e-04
-2.992182737216353416e-03 1.668826007517054677e-04 -1.686256676912307739e-01 3.502789494691153460e-04
-2.768469508737325668e-03 1.466100511606782675e-04 -1.579393297433853149e-01 2.995213350777409657e-04
-2.563808113336563110e-03 1.289951469516381621e-04 -1.477960050106048584e-01 2.498668730583249718e-04
-2.376524033024907112e-03 1.136757491622120142e-04 -1.381733268499374390e-01 1.991955526768476121e-04
-2.205094322562217712e-03 1.003404613584280014e-04 -1.290498524904251099e-01 1.446416184657461858e-04
-2.048132708296179771e-03 8.872123726177960634e-05 -1.204050555825233459e-01 8.137099178784023786e-05
-1.904378994368016720e-03 7.858742901589721441e-05 -1.122193336486816406e-01 0.000000000000000000e+00

From c5d183f5aebe6f68ee51f49bd6229b7fa276dcda Mon Sep 17 00:00:00 2001
From: Ben Prather <bprather@lanl.gov>
Date: Wed, 4 Oct 2023 08:36:07 -0600
Subject: [PATCH 11/47] Pass aniso_conduction, emhdmodes

* Restores EMHD terms to stress-energy tensor
    (no, I do not remember why they were removed in this branch...)
* Run script touch-ups for bondi_viscous but dP still not converging
---
 kharma/flux/flux_functions.hpp      |  8 +++-----
 kharma/flux/get_flux.hpp            |  1 +
 pars/emhd/bondi_viscous.par         |  9 ++++-----
 tests/anisotropic_conduction/run.sh |  2 +-
 tests/bondi_viscous/check.py        | 10 +++++-----
 tests/bondi_viscous/run.sh          |  4 ++--
 tests/emhdmodes/run.sh              |  2 +-
 7 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/kharma/flux/flux_functions.hpp b/kharma/flux/flux_functions.hpp
index b1de46ea..6c6d577a 100644
--- a/kharma/flux/flux_functions.hpp
+++ b/kharma/flux/flux_functions.hpp
@@ -73,7 +73,7 @@ KOKKOS_INLINE_FUNCTION void calc_tensor(const Local& P, const VarMap& m_p, const
         // GRMHD stress-energy tensor w/ first index up, second index down
         GRMHD::calc_tensor(P(m_p.RHO), P(m_p.UU), (gam - 1) * P(m_p.UU), D, dir, T);
     } else {
-        // GRHD stress-energy tensor w/ first index up, second index down
+        // GRHD stress-energy tensor
         GRHD::calc_tensor(P(m_p.RHO), P(m_p.UU), (gam - 1) * P(m_p.UU), D, dir, T);
     }
 }
@@ -125,8 +125,7 @@ KOKKOS_INLINE_FUNCTION void prim_to_flux(const GRCoordinates& G, const Local& P,
 
     // Stress-energy tensor
     Real T[GR_DIM];
-    //calc_tensor(P, m_p, D, emhd_params, gam, dir, T);
-    GRMHD::calc_tensor(P(m_p.RHO), P(m_p.UU), (gam - 1) * P(m_p.UU), D, dir, T);
+    calc_tensor(P, m_p, D, emhd_params, gam, dir, T);
     flux(m_u.UU) = T[0] * gdet + flux(m_u.RHO);
     flux(m_u.U1) = T[1] * gdet;
     flux(m_u.U2) = T[2] * gdet;
@@ -190,8 +189,7 @@ KOKKOS_INLINE_FUNCTION void prim_to_flux(const GRCoordinates& G, const Global& P
     flux(m_u.RHO, k, j, i) = P(m_p.RHO, k, j, i) * D.ucon[dir] * gdet;
 
     Real T[GR_DIM];
-    //calc_tensor(P, m_p, D, emhd_params, gam, k, j, i, dir, T);
-    GRMHD::calc_tensor(P(m_p.RHO, k, j, i), P(m_p.UU, k, j, i), (gam - 1) * P(m_p.UU, k, j, i), D, dir, T);
+    calc_tensor(P, m_p, D, emhd_params, gam, k, j, i, dir, T);
     flux(m_u.UU, k, j, i) = T[0] * gdet + flux(m_u.RHO, k, j, i);
     flux(m_u.U1, k, j, i) = T[1] * gdet;
     flux(m_u.U2, k, j, i) = T[2] * gdet;
diff --git a/kharma/flux/get_flux.hpp b/kharma/flux/get_flux.hpp
index fc77b63b..2c3bad88 100644
--- a/kharma/flux/get_flux.hpp
+++ b/kharma/flux/get_flux.hpp
@@ -340,6 +340,7 @@ inline TaskStatus GetFlux(MeshData<Real> *md)
     EndFlag();
 
     // Save the face velocities for upwinding/CT later
+    // TODO only for certain GS'05
     if (packages.AllPackages().count("B_CT")) {
         Flag("GetFlux_"+std::to_string(dir)+"_store_vel");
         const auto& vl_all = md->PackVariables(std::vector<std::string>{"Flux.vl"});
diff --git a/pars/emhd/bondi_viscous.par b/pars/emhd/bondi_viscous.par
index f7b15d23..16b0fead 100644
--- a/pars/emhd/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -31,8 +31,7 @@ implicit       = true
 
 <b_field>
 type            = monopole_cube
-implicit        = false
-initial_cleanup = false
+B10             = 1.
 
 <implicit>
 min_nonlinear_iter  = 1
@@ -65,8 +64,8 @@ rs   = 8.0
 disable_floors = true
 
 <boundaries>
-outer_x1 = dirichlet
-inner_x1 = dirichlet
+#outer_x1 = dirichlet
+#inner_x1 = dirichlet
 check_inflow_outer_x1 = false
 #check_inflow_inner_x1 = false
 
@@ -79,7 +78,7 @@ extra_checks = 1
 file_type               = hdf5
 dt                      = 100.0
 single_precision_output = false
-ghost_zones             = true
+ghost_zones             = false
 variables               = prims, solve_norm, solve_fail
 
 <parthenon/output1>
diff --git a/tests/anisotropic_conduction/run.sh b/tests/anisotropic_conduction/run.sh
index 8e641822..a3bc836d 100755
--- a/tests/anisotropic_conduction/run.sh
+++ b/tests/anisotropic_conduction/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
 
-../../run.sh -i ../../pars/anisotropic_conduction.par
+../../run.sh -i ../../pars/emhd/anisotropic_conduction.par parthenon/time/tlim=5
 
 python make_plots.py .
diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index bac34b83..c5a0095e 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -28,7 +28,7 @@
     RES   = [int(r) for r in sys.argv[1].split(",")]
     LONG  = sys.argv[2]
     SHORT = sys.argv[3]
-    
+
     L1  = np.zeros([len(RES), NVAR])
     fit = np.zeros([len(RES), NVAR])
 
@@ -38,13 +38,13 @@
         dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res), cache_conn=True)
 
         # Compute analytic reference
-        mdot, rc, gam = dump['bondi']['mdot'], dump['bondi']['rs'], dump['gam']
-        eta, tau = dump['emhd']['eta'], dump['emhd']['tau']
+        mdot, rc, gam = dump['bondi/mdot'], dump['bondi/rs'], dump['gam']
+        eta, tau = dump['emhd/eta'], dump['emhd/tau']
         state = bondi.get_bondi_fluid_state(mdot, rc, gam, dump.grid)
         state.params['eta'] = eta
         state.params['tau'] = tau
         dP_check = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau)
-        
+
         # load code data
         dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res))
 
@@ -52,7 +52,7 @@
         #rho, uu = dump['RHO'], dump['UU']
 
         # compute dP
-        if dump['emhd']['higher_order_terms'] == "true":
+        if dump['emhd/higher_order_terms'] == "true":
             print("Res: "+str(res)+"; higher order terms enabled")
             Theta    = (dump['gam'] - 1.) * uu / rho
             nu_emhd  = eta / rho
diff --git a/tests/bondi_viscous/run.sh b/tests/bondi_viscous/run.sh
index 0da6979b..ad7a8722 100755
--- a/tests/bondi_viscous/run.sh
+++ b/tests/bondi_viscous/run.sh
@@ -13,7 +13,7 @@ conv_2d() {
     do
         # Four blocks
         half=$(( $res / 2 ))
-        $BASE/run.sh -i $BASE/pars/bondi_viscous.par debug/verbose=1 parthenon/time/tlim=400 \
+        $BASE/run.sh -i $BASE/pars/emhd/bondi_viscous.par debug/verbose=1 parthenon/time/tlim=400 \
             parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
             parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
             b_field/implicit=false $2 >log_${1}_${res}.txt 2>&1
@@ -34,6 +34,6 @@ conv_2d() {
 }
 
 ALL_RES="8,16,32,64"
-conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "in 2D, WENO5"
+conv_2d emhd2d_weno driver/reconstruction=weno5 "in 2D, WENO5"
 
 exit $exit_code
diff --git a/tests/emhdmodes/run.sh b/tests/emhdmodes/run.sh
index 231c54c8..cd5b6854 100755
--- a/tests/emhdmodes/run.sh
+++ b/tests/emhdmodes/run.sh
@@ -13,7 +13,7 @@ conv_2d() {
     do
       # Four blocks
       half=$(( $res / 2 ))
-      $BASE/run.sh -i $BASE/pars/emhdmodes.par debug/verbose=1 \
+      $BASE/run.sh -i $BASE/pars/emhd/emhdmodes.par debug/verbose=1 \
                       parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
                       parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
                       $2 >log_${1}_${res}.txt 2>&1

From 4229a12c008c39159c22c2fba166e1e50f0e78e7 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprather@lanl.gov>
Date: Wed, 4 Oct 2023 08:36:31 -0600
Subject: [PATCH 12/47] Add back an old experimental option to cleanup

---
 kharma/b_cleanup/b_cleanup.cpp | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/kharma/b_cleanup/b_cleanup.cpp b/kharma/b_cleanup/b_cleanup.cpp
index 3d47e557..9d38ae13 100644
--- a/kharma/b_cleanup/b_cleanup.cpp
+++ b/kharma/b_cleanup/b_cleanup.cpp
@@ -87,6 +87,8 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
     params.Add("warn_without_convergence", warn_without_convergence);
     bool always_solve = pin->GetOrAddBoolean("b_cleanup", "always_solve", false);
     params.Add("always_solve", always_solve);
+    bool use_normalized_divb = pin->GetOrAddBoolean("b_cleanup", "use_normalized_divb", false);
+    params.Add("use_normalized_divb", use_normalized_divb);
 
     // Finally, initialize the solver
     // Translate parameters
@@ -208,6 +210,7 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
     auto always_solve = pkg->Param<bool>("always_solve");
     auto solver = pkg->Param<BiCGStabSolver<int>>("solver");
     auto verbose = pmesh->packages.Get("Globals")->Param<int>("verbose");
+    auto use_normalized = pkg->Param<bool>("use_normalized_divb");
 
     if (MPIRank0() && verbose > 0) {
         std::cout << "Cleaning divB to relative tolerance " << rel_tolerance << std::endl;
@@ -232,6 +235,21 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
     // and syncs ghost zones
     KHARMADriver::SyncAllBounds(md);
     B_FluxCT::CalcDivB(md.get(), "divB_RHS");
+    if (use_normalized) {
+        // Normalize divB by local metric determinant for fairer weighting of errors
+        // Note that laplacian operator will also have to be normalized ofc
+        auto divb_rhs = md->PackVariables(std::vector<std::string>{"divB_RHS"});
+        auto pmb0 = md->GetBlockData(0)->GetBlockPointer();
+        const IndexRange ib = md->GetBoundsI(IndexDomain::entire);
+        const IndexRange jb = md->GetBoundsJ(IndexDomain::entire);
+        const IndexRange kb = md->GetBoundsK(IndexDomain::entire);
+        pmb0->par_for("normalize_divB", 0, divb_rhs.GetDim(5)-1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+            KOKKOS_LAMBDA (const int& b, const int &k, const int &j, const int &i) {
+                const auto& G = divb_rhs.GetCoords(b);
+                divb_rhs(b, 0, k, j, i) /= G.gdet(Loci::corner, j, i);
+            }
+        );
+    }
     KHARMADriver::SyncAllBounds(md);
 
     // Add a solver container and associated MeshData
@@ -300,6 +318,9 @@ TaskStatus B_Cleanup::ApplyP(MeshData<Real> *msolve, MeshData<Real> *md)
 
 TaskStatus B_Cleanup::CornerLaplacian(MeshData<Real>* md, const std::string& p_var, MeshData<Real>* md_again, const std::string& lap_var)
 {
+    auto pkg = md->GetMeshPointer()->packages.Get("B_Cleanup");
+    const auto use_normalized = pkg->Param<bool>("use_normalized_divb");
+
     // Cover ghost cells; maximize since both ops have stencil >1
     const IndexRange ib = md->GetBoundsI(IndexDomain::entire);
     const IndexRange jb = md->GetBoundsJ(IndexDomain::entire);
@@ -339,6 +360,9 @@ TaskStatus B_Cleanup::CornerLaplacian(MeshData<Real>* md, const std::string& p_v
             const auto& G = lap.GetCoords(b);
             // This is the inverse diagonal element of a fictional a_ij Laplacian operator
             lap(b, 0, k, j, i) = B_FluxCT::corner_div(G, dB, b, k, j, i, ndim > 2);
+            if (use_normalized) {
+                lap(b, 0, k, j, i) /= G.gdet(Loci::corner, j, i);
+            }
         }
     );
 

From c4588cfa87410d79e8a801e664879555b99815e0 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 5 Oct 2023 12:27:42 -0600
Subject: [PATCH 13/47] Test updates & restarting

Restarts now record cons by default, GRMHD prims only
Restarting is now binary-similar @5 steps
Also add back strict 'set -euo pipefail' to kill tests on nonzero
returns, and fix some more test scripts
---
 kharma/b_flux_ct/b_flux_ct.cpp      | 22 +++++++
 kharma/b_flux_ct/b_flux_ct.hpp      |  8 +--
 kharma/driver/kharma_driver.cpp     |  8 +--
 kharma/grmhd/grmhd.cpp              |  3 +
 kharma/prob/post_initialize.cpp     | 54 ++++++++++++-----
 pars/bondi/bondi_1d.par             | 90 +++++++++++++++++++++++++++++
 tests/anisotropic_conduction/run.sh |  1 +
 tests/bondi/run.sh                  |  1 +
 tests/bondi_viscous/run.sh          |  2 +-
 tests/bz_monopole/run.sh            |  2 +-
 tests/conducting_atmosphere/run.sh  |  2 +-
 tests/emhdmodes/run.sh              |  2 +-
 tests/emhdshock/run.sh              |  4 +-
 tests/mhdmodes/run.sh               |  1 +
 tests/multizone/run.sh              | 19 +++---
 tests/noh/run.sh                    |  1 +
 tests/regrid/run.sh                 | 11 ++--
 tests/reinit/run.sh                 | 12 ++--
 tests/resize/run.sh                 | 11 +++-
 tests/restart/run.sh                | 12 +++-
 tests/tilt_init/run.sh              |  1 +
 tests/torus_sanity/mad_test.par     | 71 +++++++++++++++++++++++
 tests/torus_sanity/run.sh           |  3 +-
 23 files changed, 286 insertions(+), 55 deletions(-)
 create mode 100644 pars/bondi/bondi_1d.par
 create mode 100644 tests/torus_sanity/mad_test.par

diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index d4b4dd63..3be20c15 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -206,6 +206,28 @@ void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     );
 }
 
+void MeshPtoU(MeshData<Real> *md, IndexDomain domain, bool coarse)
+{
+    auto pmb0 = md->GetBlockData(0)->GetBlockPointer();
+
+    const auto& B_U = md->PackVariables(std::vector<std::string>{"cons.B"});
+    const auto& B_P = md->PackVariables(std::vector<std::string>{"prims.B"});
+
+    auto bounds = coarse ? pmb0->c_cellbounds : pmb0->cellbounds;
+    IndexRange ib = bounds.GetBoundsI(domain);
+    IndexRange jb = bounds.GetBoundsJ(domain);
+    IndexRange kb = bounds.GetBoundsK(domain);
+    IndexRange vec = IndexRange{0, B_U.GetDim(4)-1};
+    IndexRange block = IndexRange{0, B_U.GetDim(5)-1};
+
+    pmb0->par_for("UtoP_B", block.s, block.e, vec.s, vec.e, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA (const int& b, const int &mu, const int &k, const int &j, const int &i) {
+            const auto& G = B_U.GetCoords(b);
+            // Update the primitive B-fields
+            B_U(b, mu, k, j, i) = B_P(b, mu, k, j, i) * G.gdet(Loci::center, j, i);
+        }
+    );
+}
 void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
diff --git a/kharma/b_flux_ct/b_flux_ct.hpp b/kharma/b_flux_ct/b_flux_ct.hpp
index 2fc4c461..7de3f6c9 100644
--- a/kharma/b_flux_ct/b_flux_ct.hpp
+++ b/kharma/b_flux_ct/b_flux_ct.hpp
@@ -68,9 +68,10 @@ void MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
  * Reverse of the above.  Only used alone during initialization.
- * Generally, use Flux::BlockPtoU
+ * Generally, use Flux::BlockPtoU/Flux::MeshPtoU
  */
 void BlockPtoU(MeshBlockData<Real> *md, IndexDomain domain, bool coarse=false);
+void MeshPtoU(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
  * All flux corrections required by this package
@@ -90,7 +91,6 @@ void FixBoundaryFlux(MeshData<Real> *md, IndexDomain domain, bool coarse);
  * Alternate B field fix for X1 boundary, keeps zero divergence while permitting flux
  * through the boundary, at the cost of a short non-local solve.
  */
-// added by Hyerin
 TaskStatus FixX1Flux(MeshData<Real> *md);
 
 /**
@@ -205,14 +205,14 @@ KOKKOS_INLINE_FUNCTION void center_grad(const GRCoordinates& G, const Global& P,
                                           double& B1, double& B2, double& B3)
 {
     const double norm = (do_3D) ? 0.25 : 0.5;
-    // 2D divergence, averaging to corners
+    // 2D gradient, averaging to centers
     double term1 =  P(b, 0, k, j+1, i+1) + P(b, 0, k, j, i+1)
                   - P(b, 0, k, j+1, i)   - P(b, 0, k, j, i);
     double term2 =  P(b, 0, k, j+1, i+1) + P(b, 0, k, j+1, i)
                   - P(b, 0, k, j, i+1)   - P(b, 0, k, j, i);
     double term3 = 0.;
     if (do_3D) {
-        // Average to corners in 3D, add 3rd flux
+        // Average to centers in 3D, add 3rd flux
         term1 += P(b, 0, k+1, j+1, i+1) + P(b, 0, k+1, j, i+1)
                - P(b, 0, k+1, j+1, i)   - P(b, 0, k+1, j, i);
         term2 += P(b, 0, k+1, j+1, i+1) + P(b, 0, k+1, j+1, i)
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 5c7b30b8..64a67c30 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -144,12 +144,12 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     params.Add("sync_prims", sync_prims);
     if (sync_prims) {
         // If we're not in AMR, we can sync primitive variables directly
-        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::Restart, Metadata::GetUserFlag("Primitive")});
-        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::WithFluxes, Metadata::Conserved});
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::WithFluxes, Metadata::Conserved});
     } else {
         // If we're in AMR or using the KHARMA driver anyway, sync conserved vars
-        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::Restart, Metadata::GetUserFlag("Primitive")});
-        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
     }
 
     return pkg;
diff --git a/kharma/grmhd/grmhd.cpp b/kharma/grmhd/grmhd.cpp
index 153cc5a4..e1d7746a 100644
--- a/kharma/grmhd/grmhd.cpp
+++ b/kharma/grmhd/grmhd.cpp
@@ -141,6 +141,9 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
     flags_cons.insert(flags_cons.end(), flags_grmhd.begin(), flags_grmhd.end());
 
+    // We must additionally save the primtive variables as the "seed" for the next U->P solve
+    flags_prim.push_back(Metadata::Restart);
+
     // We must additionally fill ghost zones of primitive variables in GRMHD, to seed the solver
     // Only necessary to add here if syncing conserved vars
     // Note some startup behavior relies on having the GRHD prims marked for syncing,
diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp
index 8dae599c..3a9833d0 100644
--- a/kharma/prob/post_initialize.cpp
+++ b/kharma/prob/post_initialize.cpp
@@ -69,6 +69,8 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
 
     auto& pkgs = pmesh->packages.AllPackages();
 
+    auto prob_name = pin->GetString("parthenon/job", "problem_id");
+
     // Magnetic field operations
     if (pin->GetString("b_field", "solver") != "none") {
         // If we need to seed a field based on the problem's fluid initialization...
@@ -83,25 +85,11 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
 
             // If we're doing a torus problem or explicitly ask for it,
             // normalize the magnetic field according to the max density
-            bool is_torus = pin->GetString("parthenon/job", "problem_id") == "torus";
+            bool is_torus = prob_name == "torus";
             if (pin->GetOrAddBoolean("b_field", "norm", is_torus)) {
                 NormalizeBField(md.get(), pin);
             }
         }
-
-        // Regardless, if evolving a field we should print max(divB)
-        // divB is not stencil-1 and we may not have run the above.
-        // If we did, we still need another sync, so it works out
-        KBoundaries::FreezeDirichlet(md);
-        KHARMADriver::SyncAllBounds(md);
-
-        if (pkgs.count("B_FluxCT")) {
-            B_FluxCT::PrintGlobalMaxDivB(md.get());
-        } else if (pkgs.count("B_CT")) {
-            B_CT::PrintGlobalMaxDivB(md.get());
-        } else if (pkgs.count("B_CD")) {
-            //B_CD::PrintGlobalMaxDivB(md.get());
-        }
     }
 
     // Add any hotspots *after* we've seeded fields,
@@ -119,6 +107,40 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
         // Parthenon restores all parameters (global vars) when restarting,
         // but KHARMA needs a few (currently one) reset instead
         KHARMA::ResetGlobals(pin, pmesh);
+
+        // We only record the conserved magnetic field in KHARMA restarts,
+        // but we record primitive field in iharm3d restarts
+        bool iharm3d_restart = prob_name == "resize_restart";
+        if (!iharm3d_restart) {
+            if (pkgs.count("B_FluxCT")) {
+                B_FluxCT::MeshUtoP(md.get(), IndexDomain::entire);
+            } else if (pkgs.count("B_CT")) {
+                B_CT::MeshUtoP(md.get(), IndexDomain::entire);
+            }
+        } else {
+            if (pkgs.count("B_FluxCT")) {
+                B_FluxCT::MeshPtoU(md.get(), IndexDomain::entire);
+            } else if (pkgs.count("B_CT")) {
+                // TODO this is only true if not cleaning, amend when cleaning supports B_CT
+                throw std::runtime_error("Cannot restart face-centered field from iharm3d!");
+            }
+        }
+    }
+
+    if (pin->GetString("b_field", "solver") != "none") {
+        // Regardless of how we initialized, if evolving a field we should print max(divB)
+        // divB is not stencil-1, and we may or may not have initialized or read it
+        // Either way, we still need another sync, so it works out
+        KBoundaries::FreezeDirichlet(md);
+        KHARMADriver::SyncAllBounds(md);
+
+        if (pkgs.count("B_FluxCT")) {
+            B_FluxCT::PrintGlobalMaxDivB(md.get());
+        } else if (pkgs.count("B_CT")) {
+            B_CT::PrintGlobalMaxDivB(md.get());
+        } else if (pkgs.count("B_CD")) {
+            //B_CD::PrintGlobalMaxDivB(md.get());
+        }
     }
 
     // Clean the B field, generally for resizing/restarting
@@ -131,7 +153,7 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
             pouts->MakeOutputs(pmesh, pin, &tm, SignalHandler::OutputSignal::now);
         }
 
-        // This does its own MPI syncs
+        // Cleanup is applied to conserved variables
         B_Cleanup::CleanupDivergence(md);
     }
 
diff --git a/pars/bondi/bondi_1d.par b/pars/bondi/bondi_1d.par
new file mode 100644
index 00000000..c42434d0
--- /dev/null
+++ b/pars/bondi/bondi_1d.par
@@ -0,0 +1,90 @@
+# Bondi flow problem
+# Model a spherically symmetric, unmagnetized inflow
+# Uses more MeshBlocks than necessary, for debugging
+
+<parthenon/job>
+problem_id = bondi
+
+<parthenon/mesh>
+# Full mesh size, no refinement
+# Don't bother with xN boundaries for spherical coordinate systems
+# KHARMA will automatically place ~5 zones inside the EH
+nx1 = 128
+nx2 = 1
+nx3 = 1
+
+<parthenon/meshblock>
+nx1 = 128
+nx2 = 1
+nx3 = 1
+
+<coordinates>
+# Spherical Kerr-Schild coords
+base = spherical_ks
+# MKS of Gammie '03
+transform = mks
+# BH spin
+a = 0.0
+# MKS parameter
+hslope = 0.3
+# Radial domain in r_g
+r_in = 3.0
+r_out = 30.0
+# If using "Funky" MKS later, where is "startx1"?
+fmks_zero_point = 0.0
+
+<parthenon/time>
+tlim = 50.0
+
+<GRMHD>
+cfl = 0.9
+gamma = 1.666667
+reconstruction = weno5
+
+<bondi>
+# Bondi problem parameters:
+# density scaling/accretion rate
+mdot = 1.0
+# Sonic point
+rs = 8.0
+
+<floors>
+# Disable floors
+disable_floors = true
+# If using B field, enable w/:
+rho_min_geom = 1e-6
+u_min_geom = 1e-8
+bsq_over_rho_max = 100
+u_over_rho_max = 100
+gamma_max = 10
+
+<boundaries>
+# We'll be adding material, and that's okay
+check_inflow_outer_x1 = false
+
+<b_field>
+# No field
+type = none
+solver = none
+# To add magnetic field
+#type = monopole
+#B10 = 1
+# Or
+#type = vertical
+#bz = 0.1
+
+<debug>
+verbose = 0
+flag_verbose = 0
+extra_checks = 1
+
+<parthenon/output0>
+file_type = hdf5
+dt = 5.0
+single_precision_output = true
+# Fields not present are silently ignored
+variables = prims.rho, prims.u, prims.uvec, prims.B, pflag
+
+<parthenon/output1>
+file_type = hst
+dt = 0.1
diff --git a/tests/anisotropic_conduction/run.sh b/tests/anisotropic_conduction/run.sh
index a3bc836d..a5682232 100755
--- a/tests/anisotropic_conduction/run.sh
+++ b/tests/anisotropic_conduction/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 ../../run.sh -i ../../pars/emhd/anisotropic_conduction.par parthenon/time/tlim=5
 
diff --git a/tests/bondi/run.sh b/tests/bondi/run.sh
index 3f21749a..77cdaa9a 100755
--- a/tests/bondi/run.sh
+++ b/tests/bondi/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/bondi_viscous/run.sh b/tests/bondi_viscous/run.sh
index ad7a8722..8c0f882a 100755
--- a/tests/bondi_viscous/run.sh
+++ b/tests/bondi_viscous/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#set -euo pipefail
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/bz_monopole/run.sh b/tests/bz_monopole/run.sh
index ad9ff9de..2854c763 100755
--- a/tests/bz_monopole/run.sh
+++ b/tests/bz_monopole/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#set -euo pipefail
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/conducting_atmosphere/run.sh b/tests/conducting_atmosphere/run.sh
index 964d4c3e..bb62569c 100755
--- a/tests/conducting_atmosphere/run.sh
+++ b/tests/conducting_atmosphere/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# set -euo pipefail
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/emhdmodes/run.sh b/tests/emhdmodes/run.sh
index cd5b6854..0b37a067 100755
--- a/tests/emhdmodes/run.sh
+++ b/tests/emhdmodes/run.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-#set -euo pipefail
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/emhdshock/run.sh b/tests/emhdshock/run.sh
index 120201c7..9ba0aa89 100755
--- a/tests/emhdshock/run.sh
+++ b/tests/emhdshock/run.sh
@@ -1,7 +1,7 @@
 #!/bin/bash
 set -euo pipefail
 
-BASE=~/kharma
+BASE=../..
 
 # Extended MHD shock test convergence to exercise higher order terms
 # We'll use just 1 MPI rank to circumvent the somewhat annoying BVP initialization
@@ -10,7 +10,7 @@ conv_1d() {
     for res in 256 512 1024 2048
     do
         cp shock_soln_${res}_default/shock_soln_*.txt ./
-        $BASE/run.sh -n 1 -i $BASE/pars/emhdshock.par debug/verbose=1 \
+        $BASE/run.sh -n 1 -i ./emhdshock.par debug/verbose=1 \
                       parthenon/mesh/nx1=$res parthenon/mesh/nx2=1 parthenon/mesh/nx3=1 \
                       parthenon/meshblock/nx1=$res parthenon/meshblock/nx2=1 parthenon/meshblock/nx3=1
         mv emhdshock.out0.00000.phdf emhd_1d_${res}_start.phdf
diff --git a/tests/mhdmodes/run.sh b/tests/mhdmodes/run.sh
index b1647b97..6864c337 100755
--- a/tests/mhdmodes/run.sh
+++ b/tests/mhdmodes/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 BASE=../..
 
diff --git a/tests/multizone/run.sh b/tests/multizone/run.sh
index 1500f82c..836131f9 100755
--- a/tests/multizone/run.sh
+++ b/tests/multizone/run.sh
@@ -1,7 +1,10 @@
-#!/bin/bash 
-# Hyerin (02/17/23) copied from Ben's code
+#!/bin/bash
+set -euo pipefail
 
-# Bash script testing b_clean
+# Test a "multizone" run, consisting of several runs in sequence
+# Adapted from script by Hyerin Cho (02/17/23)
+
+# TODO simplify for single test. Replace with invocation of run.py?
 
 # User specified values here
 KERR=false
@@ -41,10 +44,10 @@ do
   runtime=10
   echo "Running for: " $runtime
   log_u_over_rho=-5.2915149 # test same vacuum conditions as r_shell when (rs=1e2.5)
-  start_time=$(($start_time+$runtime))  
+  start_time=$(($start_time+$runtime))
 
   #parfilename="../../kharma/pars/bondi_multizone/bondi_multizone_$(printf %05d ${VAR}).par" # parameter file
-  
+
   # set problem type and cleanup
   if [ $VAR -eq 0 ]; then
     prob="bondi" #"torus" #
@@ -53,20 +56,20 @@ do
     prob="resize_restart_kharma"
     init_c=1
   fi
-  
+
   # set BH spin
   if [[ $KERR == "true" ]]; then
     spin=0.99
   else
     spin=0.0
   fi
-  
+
   # output time steps
   output0_dt=$((${runtime}/10))
   #output1_dt=$((${runtime}/20*10))
   output1_dt=$((${runtime}/5))
   output2_dt=$((${runtime}/10))
-  
+
   # dt, fname, fname_fill
   if [ $VAR -ne 0 ]; then
     # update dt from the previous run
diff --git a/tests/noh/run.sh b/tests/noh/run.sh
index ba7a6ad5..ebfe9fa7 100755
--- a/tests/noh/run.sh
+++ b/tests/noh/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 # Bash script to run 1D Noh shock test
 
diff --git a/tests/regrid/run.sh b/tests/regrid/run.sh
index 4748290d..13dd4f4c 100755
--- a/tests/regrid/run.sh
+++ b/tests/regrid/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 # Bash script testing a fresh Orszag-Tang vortex vs a version
 # re-gridded to 64^2 tiles in the middle of the run,
@@ -42,11 +43,11 @@ $KHARMADIR/run.sh -i ./resize_orszag_tang.par >log_resize.txt 2>&1
 # Check the final .rhdf file for sanity (i.e., divB small)
 check_code=0
 pyharm-check-basics resize_restart.out1.final.rhdf || check_code=$?
-if [[ $check_code != 0 ]]; then                                                                                                            
-    echo Resize test FAIL: $check_code                                                                                                     
-    exit_code=1                                                                                                                            
-else                                                                                                                                       
-    echo Resize test success                                                                                                               
+if [[ $check_code != 0 ]]; then
+    echo Resize test FAIL: $check_code
+    exit_code=1
+else
+    echo Resize test success
 fi
 
 exit $exit_code
diff --git a/tests/reinit/run.sh b/tests/reinit/run.sh
index e6852c3e..62ec790c 100755
--- a/tests/reinit/run.sh
+++ b/tests/reinit/run.sh
@@ -1,21 +1,23 @@
 #!/bin/bash
+set -euo pipefail
 
 # Bash script testing determinism of problem initialization and first steps
 
 # Set paths
 KHARMADIR=../..
 
-$KHARMADIR/run.sh -i $KHARMADIR/pars/sane.par debug/archive_parameters=false perturbation/u_jitter=0 parthenon/time/nlim=5 \
+$KHARMADIR/run.sh -i $KHARMADIR/pars/tori_3d/sane.par perturbation/u_jitter=0 parthenon/time/nlim=5 \
                     >log_reinit_1.txt 2>&1
 
 mv torus.out1.final.rhdf torus.out1.final.first.rhdf
 
 #$KHARMADIR/run.sh -r torus.out1.00000.rhdf parthenon/time/nlim=5
-$KHARMADIR/run.sh -i $KHARMADIR/pars/sane.par debug/archive_parameters=false perturbation/u_jitter=0 parthenon/time/nlim=5 \
-                    >log_reinit_1.txt 2>&1
+$KHARMADIR/run.sh -i $KHARMADIR/pars/tori_3d/sane.par perturbation/u_jitter=0 parthenon/time/nlim=5 \
+                    >log_reinit_2.txt 2>&1
 
 mv torus.out1.final.rhdf torus.out1.final.second.rhdf
 
-# This one's a clear case.  Binary or bust
+# This one's a clear case.  Binary or bust, even the input params
+# /Info includes walltime, which obvs can change
 h5diff --exclude-path=/Info torus.out1.final.first.rhdf torus.out1.final.second.rhdf
-# And that's the exit code.  One and done.
\ No newline at end of file
+# And that's the exit code.  One and done.
diff --git a/tests/resize/run.sh b/tests/resize/run.sh
index 5b965bd0..f62a31f6 100755
--- a/tests/resize/run.sh
+++ b/tests/resize/run.sh
@@ -1,19 +1,24 @@
 #!/bin/bash
+set -euo pipefail
 
 # Bash script testing starting a simulation, then resizing it up
 
 # Set paths
 KHARMADIR=../..
 
-$KHARMADIR/run.sh -i $KHARMADIR/pars/sane.par parthenon/time/nlim=5 >log_resize_1.txt 2>&1
+# This at least stirs up the field slightly vs initialization
+$KHARMADIR/run.sh -i $KHARMADIR/pars/tori_3d/sane.par parthenon/time/nlim=5 >log_resize_1.txt 2>&1
 
+# We can only resize/restart from iharm3d-format files
 pyharm convert --to_restart torus.out0.final.phdf
 
 sleep 1
 
-$KHARMADIR/run.sh -i ../../pars/resize_restart >log_resize_2.txt 2>&1
+$KHARMADIR/run.sh -i $KHARMADIR/pars/restarts/resize_restart.par resize_restart/fname=torus.out0.final.h5 \
+                  b_cleanup/always_solve=1 parthenon/time/nlim=5 \
+                  >log_resize_2.txt 2>&1
 
 mv torus.out0.final.phdf torus.out0.final.restart.phdf
 
 # Check divB on the re-meshed output
-pyharm-check-basics torus.out0.final.restart.phdf
+pyharm check-basics torus.out0.final.restart.phdf
diff --git a/tests/restart/run.sh b/tests/restart/run.sh
index e3a36f2c..e5ec3215 100755
--- a/tests/restart/run.sh
+++ b/tests/restart/run.sh
@@ -1,12 +1,13 @@
 #!/bin/bash
+set -euo pipefail
 
 # Bash script testing initialization vs restart of a torus problem
-# TODO this *really* should be binary now.
+# Require binary similarity after 5 steps
 
 # Set paths
 KHARMADIR=../..
 
-$KHARMADIR/run.sh -i $KHARMADIR/pars/sane.par parthenon/time/nlim=5 >log_restart_1.txt 2>&1
+$KHARMADIR/run.sh -i $KHARMADIR/pars/tori_3d/sane.par parthenon/time/nlim=5 >log_restart_1.txt 2>&1
 
 mv torus.out0.final.phdf torus.out0.final.init.phdf
 
@@ -17,4 +18,9 @@ $KHARMADIR/run.sh -r torus.out1.00000.rhdf parthenon/time/nlim=5 >log_restart_2.
 mv torus.out0.final.phdf torus.out0.final.restart.phdf
 
 # compare.py allows for small (5e-10) difference
-pyharm-diff torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
+#pyharm-diff torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
+# Compare binary
+h5diff --exclude-path=/Info \
+       --exclude-path=/Input \
+       --exclude-path=/divB \
+       torus.out0.final.init.phdf torus.out0.final.restart.phdf
diff --git a/tests/tilt_init/run.sh b/tests/tilt_init/run.sh
index bfc8b967..bb6ca62a 100755
--- a/tests/tilt_init/run.sh
+++ b/tests/tilt_init/run.sh
@@ -1,4 +1,5 @@
 #!/bin/bash
+set -euo pipefail
 
 # Run default tilted problem to 5 steps
 ../../run.sh -i ../../pars/mad_tilt.par parthenon/time/nlim=5 debug/verbose=1 \
diff --git a/tests/torus_sanity/mad_test.par b/tests/torus_sanity/mad_test.par
new file mode 100644
index 00000000..ce2c2898
--- /dev/null
+++ b/tests/torus_sanity/mad_test.par
@@ -0,0 +1,71 @@
+# MAD model for testing. Differences from mad.par:
+# 1. Smaller, smaller meshblocks
+# 2. No history or restart file output
+# 3. Output in double, include divB, exclude jcon
+# 4. Default to 10 steps
+
+<parthenon/job>
+problem_id = torus
+
+<parthenon/mesh>
+refinement = none
+numlevel = 1
+nx1 = 128
+nx2 = 64
+nx3 = 64
+
+<parthenon/meshblock>
+nx1 = 32
+nx2 = 32
+nx3 = 32
+
+<coordinates>
+base = spherical_ks
+transform = fmks
+r_out = 1000
+a = 0.9375
+hslope = 0.3
+mks_smooth = 0.5
+poly_xt = 0.82
+poly_alpha = 14.0
+
+<parthenon/time>
+tlim = 10000.0
+nlim = 10
+
+<GRMHD>
+cfl = 0.7
+gamma = 1.666667
+reconstruction = weno5
+
+<driver>
+type = imex
+two_sync = true
+
+<torus>
+rin = 20.0
+rmax = 41.0
+
+<perturbation>
+u_jitter = 0.1
+
+<b_field>
+type = mad
+beta_min = 100.
+
+<floors>
+rho_min_geom = 1e-6
+u_min_geom = 1e-8
+bsq_over_rho_max = 100
+u_over_rho_max = 2
+
+<debug>
+verbose = 1
+extra_checks = 1
+flag_verbose = 2
+
+<parthenon/output0>
+file_type = hdf5
+dt = 5.0
+single_precision_output = false
+variables = prims.rho, prims.u, prims.uvec, prims.B, cons.B, fflag, pflag, divB
diff --git a/tests/torus_sanity/run.sh b/tests/torus_sanity/run.sh
index e2212aa8..8f1131d2 100755
--- a/tests/torus_sanity/run.sh
+++ b/tests/torus_sanity/run.sh
@@ -1,11 +1,12 @@
 #!/bin/bash
+set -euo pipefail
 
 BASE=../..
 exit_code=0
 
 check_sanity() {
     # mad_test.par is basically only used for this, so common options are there.
-    $BASE/run.sh -i $BASE/pars/mad_test.par $2 >log_divb_${1}.txt 2>&1 #|| exit_code=$?
+    $BASE/run.sh -i ./mad_test.par $2 >log_divb_${1}.txt 2>&1 #|| exit_code=$?
 
     pyharm check-basics -d --allowed_divb=1e-10 torus.out0.final.phdf || exit_code=$?
 }

From 802bdb211636d7bb8c2b0b603a05356b56a935a0 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 5 Oct 2023 13:16:50 -0600
Subject: [PATCH 14/47] Fix B_Cleanup

As with the rest of KHARMA, B-field cleaning was ignoring the divergence
on corners which fell on the the polar faces.  This is because BiCGStab
wasn't applying any physical boundary conditions on phi during solving.

The easy way to get correct boundary conditions was to declare
the scalar field and RHS to be explicitly defined at corners/nodes, and
let Parthenon apply the default bounds for that case (as well as
applying our own boundaries to the intermediate dB when calculating the
Laplacian)

With the new boundaries, the "physical" domain for phi is now larger,
so bicgstab_solver had to be heavily modified. Since it's no longer
going to be upstreamed, I just forked it.
---
 kharma/b_cleanup/b_cleanup.cpp       | 135 +++---
 kharma/b_cleanup/bicgstab_solver.hpp | 676 +++++++++++++++++++++++++++
 kharma/boundaries/boundaries.cpp     |  11 +-
 kharma/prob/post_initialize.cpp      |   7 +
 pars/restarts/resize_restart.par     |  12 +-
 tests/resize/run.sh                  |  10 +-
 6 files changed, 785 insertions(+), 66 deletions(-)
 create mode 100644 kharma/b_cleanup/bicgstab_solver.hpp

diff --git a/kharma/b_cleanup/b_cleanup.cpp b/kharma/b_cleanup/b_cleanup.cpp
index 9d38ae13..9cca1a7b 100644
--- a/kharma/b_cleanup/b_cleanup.cpp
+++ b/kharma/b_cleanup/b_cleanup.cpp
@@ -38,6 +38,7 @@
 
 #include "boundaries.hpp"
 #include "decs.hpp"
+#include "domain.hpp"
 #include "kharma.hpp"
 #include "kharma_driver.hpp"
 #include "grmhd.hpp"
@@ -55,7 +56,8 @@ void B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md) {}
 #else
 
 #include <parthenon/parthenon.hpp>
-#include <solvers/bicgstab_solver.hpp>
+// This is now part of KHARMA, but builds on some stuff not in all Parthenon versions
+#include "bicgstab_solver.hpp"
 
 using namespace parthenon;
 using namespace parthenon::solvers;
@@ -71,11 +73,11 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
     // TODO also support face divB!!
 
     // Solver options
-    // Allow setting tolerance relative to starting value.  Off by default
-    Real rel_tolerance = pin->GetOrAddReal("b_cleanup", "rel_tolerance", 1.);
+    // Allow setting tolerance relative to starting value
+    // Parthenon's BiCGStab solver stops on abs || rel, so this disables rel
+    Real rel_tolerance = pin->GetOrAddReal("b_cleanup", "rel_tolerance", 1e-20);
     params.Add("rel_tolerance", rel_tolerance);
-    // TODO add an absolute tolerance to the Parthenon BiCGStab solver
-    Real abs_tolerance = pin->GetOrAddReal("b_cleanup", "abs_tolerance", 1e-11);
+    Real abs_tolerance = pin->GetOrAddReal("b_cleanup", "abs_tolerance", 1e-9);
     params.Add("abs_tolerance", abs_tolerance);
     int max_iterations = pin->GetOrAddInteger("b_cleanup", "max_iterations", 1e8);
     params.Add("max_iterations", max_iterations);
@@ -103,11 +105,12 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
     // Solution
     pkg->AddParam<std::string>("sol_name", "p");
     // RHS.  Must not just be "divB" as that field does not sync boundaries
-    pkg->AddParam<std::string>("rhs_name", "divB_RHS");
-    // Construct a solver. We don't need the template parameter, so we use 'int'
+    pkg->AddParam<std::string>("rhs_name", "RHS_divB");
+    // Construct a solver. We don't need the template parameter, so we use 'int'.
     // The flag "StartupOnly" marks solver variables not to be sync'd later,
     // even though they're also marked FillGhost
-    BiCGStabSolver<int> solver(pkg.get(), rel_tolerance, SparseMatrixAccessor(), {}, {Metadata::GetUserFlag("StartupOnly")});
+    BiCGStabSolver<int> solver(pkg.get(), rel_tolerance, abs_tolerance,
+                                SparseMatrixAccessor(), {}, {Metadata::GetUserFlag("StartupOnly")});
     // Set callback
     solver.user_MatVec = B_Cleanup::CornerLaplacian;
 
@@ -115,15 +118,19 @@ std::shared_ptr<KHARMAPackage> B_Cleanup::Initialize(ParameterInput *pin, std::s
 
     // FIELDS
     std::vector<int> s_vector({NVEC});
-    std::vector<MetadataFlag> cleanup_flags({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::GetUserFlag("StartupOnly")});
-    auto cleanup_flags_ghost = cleanup_flags;
-    cleanup_flags_ghost.push_back(Metadata::FillGhost);
+    std::vector<MetadataFlag> cleanup_flags({Metadata::Real, Metadata::Derived, Metadata::OneCopy,
+                                             Metadata::GetUserFlag("StartupOnly")});
+    auto cleanup_flags_node = cleanup_flags;
+    cleanup_flags_node.push_back(Metadata::FillGhost);
+    cleanup_flags_node.push_back(Metadata::Node);
+    auto cleanup_flags_cell = cleanup_flags;
+    cleanup_flags_cell.push_back(Metadata::Cell);
     // Scalar potential, solution to del^2 p = div B
-    pkg->AddField("p", Metadata(cleanup_flags_ghost));
+    pkg->AddField("p", Metadata(cleanup_flags_node));
     // Gradient of potential; temporary for gradient calc
-    pkg->AddField("dB", Metadata(cleanup_flags, s_vector));
+    pkg->AddField("dB", Metadata(cleanup_flags_cell, s_vector));
     // Field divergence as RHS, i.e. including boundary sync
-    pkg->AddField("divB_RHS", Metadata(cleanup_flags_ghost));
+    pkg->AddField("RHS_divB", Metadata(cleanup_flags_node));
 
 
     // Optionally take care of B field transport ourselves.  Inadvisable.
@@ -213,14 +220,15 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
     auto use_normalized = pkg->Param<bool>("use_normalized_divb");
 
     if (MPIRank0() && verbose > 0) {
-        std::cout << "Cleaning divB to relative tolerance " << rel_tolerance << std::endl;
+        std::cout << "Cleaning divB to absolute tolerance " << abs_tolerance <<
+                     " OR relative tolerance " << rel_tolerance << std::endl;
         if (warn_flag) std::cout << "Convergence failure will produce a warning." << std::endl;
         if (fail_flag) std::cout << "Convergence failure will produce an error." << std::endl;
     }
 
     // Calculate/print inital max divB exactly as we would during run
     const double divb_start = B_FluxCT::GlobalMaxDivB(md.get(), true);
-    if (divb_start < rel_tolerance && !always_solve) {
+    if ((divb_start < abs_tolerance  || divb_start < rel_tolerance) && !always_solve) {
         // If divB is "pretty good" and we allow not solving...
         if (MPIRank0())
             std::cout << "Magnetic field divergence of " << divb_start << " is below tolerance. Skipping B field cleanup." << std::endl;
@@ -230,38 +238,38 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
             std::cout << "Starting magnetic field divergence: " << divb_start << std::endl;
     }
 
+    // Add a solver container as a shallow copy on the default MeshData
+    // msolve is just a sub-set of vars we need from md, making MPI syncs etc faster
+    std::vector<std::string> names = KHARMA::GetVariableNames(&pmesh->packages, {Metadata::GetUserFlag("B_Cleanup"), Metadata::GetUserFlag("StartupOnly")});
+    auto &msolve = pmesh->mesh_data.AddShallow("solve", names);
+
     // Initialize the divB variable, which we'll be solving against.
     // This gets signed divB on all physical corners (total (N+1)^3)
-    // and syncs ghost zones
-    KHARMADriver::SyncAllBounds(md);
-    B_FluxCT::CalcDivB(md.get(), "divB_RHS");
+    B_FluxCT::CalcDivB(md.get(), "RHS_divB"); // this fn draws from cons.B, which is not in msolve
     if (use_normalized) {
         // Normalize divB by local metric determinant for fairer weighting of errors
         // Note that laplacian operator will also have to be normalized ofc
-        auto divb_rhs = md->PackVariables(std::vector<std::string>{"divB_RHS"});
-        auto pmb0 = md->GetBlockData(0)->GetBlockPointer();
-        const IndexRange ib = md->GetBoundsI(IndexDomain::entire);
-        const IndexRange jb = md->GetBoundsJ(IndexDomain::entire);
-        const IndexRange kb = md->GetBoundsK(IndexDomain::entire);
+        auto divb_rhs = msolve->PackVariables(std::vector<std::string>{"RHS_divB"});
+        auto pmb0 = msolve->GetBlockData(0)->GetBlockPointer();
+        const IndexRange ib = msolve->GetBoundsI(IndexDomain::entire);
+        const IndexRange jb = msolve->GetBoundsJ(IndexDomain::entire);
+        const IndexRange kb = msolve->GetBoundsK(IndexDomain::entire);
         pmb0->par_for("normalize_divB", 0, divb_rhs.GetDim(5)-1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
             KOKKOS_LAMBDA (const int& b, const int &k, const int &j, const int &i) {
                 const auto& G = divb_rhs.GetCoords(b);
-                divb_rhs(b, 0, k, j, i) /= G.gdet(Loci::corner, j, i);
+                divb_rhs(b, NN, 0, k, j, i) /= G.gdet(Loci::corner, j, i);
             }
         );
     }
-    KHARMADriver::SyncAllBounds(md);
-
-    // Add a solver container and associated MeshData
-    std::vector<std::string> names = KHARMA::GetVariableNames(&pmesh->packages, {Metadata::GetUserFlag("B_Cleanup"), Metadata::GetUserFlag("StartupOnly")});
-    auto &msolve = pmesh->mesh_data.Add("solve", names);
+    // make sure divB_RHS is sync'd
+    KHARMADriver::SyncAllBounds(msolve);
 
     // Create a TaskCollection of just the solve,
     // execute it to perform BiCGStab iteration
     TaskID t_none(0);
     TaskCollection tc;
     auto tr = tc.AddRegion(1);
-    auto t_solve_step = solver.CreateTaskList(t_none, 0, tr, md, msolve);
+    auto t_solve_step = solver.CreateTaskList(t_none, 0, tr, msolve, msolve);
     while (!tr.Execute());
     // Make sure solution's ghost zones are sync'd
     KHARMADriver::SyncAllBounds(msolve);
@@ -271,12 +279,10 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
         std::cout << "Applying magnetic field correction" << std::endl;
     }
     // Update the (conserved) magnetic field on physical zones using our solution
-    B_Cleanup::ApplyP(msolve.get(), md.get());
-
-    // Synchronize to update ghost zones
+    B_Cleanup::ApplyP(md.get(), md.get());
+    // Synchronize to update cons.B's ghost zones
     KHARMADriver::SyncAllBounds(md);
-
-    // Make sure primitive B reflects solution
+    // Make sure prims.B reflects solution
     B_FluxCT::MeshUtoP(md.get(), IndexDomain::entire, false);
 
     // Recalculate divB max for one last check
@@ -291,9 +297,7 @@ TaskStatus B_Cleanup::CleanupDivergence(std::shared_ptr<MeshData<Real>>& md)
 TaskStatus B_Cleanup::ApplyP(MeshData<Real> *msolve, MeshData<Real> *md)
 {
     // Apply on physical zones only, we'll be syncing/updating ghosts
-    const IndexRange ib = md->GetBoundsI(IndexDomain::interior);
-    const IndexRange jb = md->GetBoundsJ(IndexDomain::interior);
-    const IndexRange kb = md->GetBoundsK(IndexDomain::interior);
+    const IndexRange3 b = KDomain::GetRange(msolve, IndexDomain::interior, 0, 1);
     auto pmb0 = md->GetBlockData(0)->GetBlockPointer();
 
     auto P = msolve->PackVariables(std::vector<std::string>{"p"});
@@ -302,7 +306,7 @@ TaskStatus B_Cleanup::ApplyP(MeshData<Real> *msolve, MeshData<Real> *md)
     const int ndim = P.GetNdim();
 
     // dB = grad(p), defined at cell centers, subtract to make field divergence-free
-    pmb0->par_for("gradient_P", 0, P.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+    pmb0->par_for("gradient_P", 0, P.GetDim(5) - 1, b.ks, b.ke, b.js, b.je, b.is, b.ie,
         KOKKOS_LAMBDA (const int& b, const int &k, const int &j, const int &i) {
             const auto& G = P.GetCoords(b);
             double b1, b2, b3;
@@ -321,10 +325,10 @@ TaskStatus B_Cleanup::CornerLaplacian(MeshData<Real>* md, const std::string& p_v
     auto pkg = md->GetMeshPointer()->packages.Get("B_Cleanup");
     const auto use_normalized = pkg->Param<bool>("use_normalized_divb");
 
-    // Cover ghost cells; maximize since both ops have stencil >1
-    const IndexRange ib = md->GetBoundsI(IndexDomain::entire);
-    const IndexRange jb = md->GetBoundsJ(IndexDomain::entire);
-    const IndexRange kb = md->GetBoundsK(IndexDomain::entire);
+    // Updating interior is easier to follow -- BiCGStab will sync
+    const IndexRange ib = md->GetBoundsI(IndexDomain::interior);
+    const IndexRange jb = md->GetBoundsJ(IndexDomain::interior);
+    const IndexRange kb = md->GetBoundsK(IndexDomain::interior);
     auto pmb0 = md->GetBlockData(0)->GetBlockPointer();
 
     auto P = md->PackVariables(std::vector<std::string>{p_var});
@@ -333,16 +337,17 @@ TaskStatus B_Cleanup::CornerLaplacian(MeshData<Real>* md, const std::string& p_v
 
     const int ndim = P.GetNdim();
 
-    const IndexRange ib_l = IndexRange{ib.s, ib.e-1};
-    const IndexRange jb_l = (ndim > 1) ? IndexRange{jb.s, jb.e-1} : jb;
-    const IndexRange kb_l = (ndim > 2) ? IndexRange{kb.s, kb.e-1} : kb;
-    const IndexRange ib_r = IndexRange{ib.s+1, ib.e-1};
-    const IndexRange jb_r = (ndim > 1) ? IndexRange{jb.s+1, jb.e-1} : jb;
-    const IndexRange kb_r = (ndim > 2) ? IndexRange{kb.s+1, kb.e-1} : kb;
+    // P is defined on cell corners.  We need enough to take
+    // grad -> center, then div -> corner, so one extra in each direction
+    const IndexRange ib_l = IndexRange{ib.s-1, ib.e+1};
+    const IndexRange jb_l = (ndim > 1) ? IndexRange{jb.s-1, jb.e+1} : jb;
+    const IndexRange kb_l = (ndim > 2) ? IndexRange{kb.s-1, kb.e+1} : kb;
+    // The div computes corner i,j,k, so needs to be [0,N+1] to cover all physical corners
+    const IndexRange ib_r = IndexRange{ib.s, ib.e+1};
+    const IndexRange jb_r = (ndim > 1) ? IndexRange{jb.s, jb.e+1} : jb;
+    const IndexRange kb_r = (ndim > 2) ? IndexRange{kb.s, kb.e+1} : kb;
 
     // dB = grad(p), defined at cell centers
-    // Need a halo one zone *left*, as corner_div will read that.
-    // Therefore B's ghosts need to be up to date!
     pmb0->par_for("gradient_P", 0, P.GetDim(5) - 1, kb_l.s, kb_l.e, jb_l.s, jb_l.e, ib_l.s, ib_l.e,
         KOKKOS_LAMBDA (const int& b, const int &k, const int &j, const int &i) {
             const auto& G = P.GetCoords(b);
@@ -354,6 +359,32 @@ TaskStatus B_Cleanup::CornerLaplacian(MeshData<Real>* md, const std::string& p_v
         }
     );
 
+    // Replace ghost zone calculations with strict boundary conditions
+    // Only necessary in j so far, but there's no reason it shouldn't be done in i,k
+    for (int i=0; i < md->GetMeshPointer()->GetNumMeshBlocksThisRank(); i++) {
+        auto rc = md->GetBlockData(i);
+        auto pmb = rc->GetBlockPointer();
+        auto dB_block = rc->PackVariables(std::vector<std::string>{"dB"});
+        if (pmb->boundary_flag[BoundaryFace::inner_x2] == BoundaryFlag::user) {
+            pmb->par_for("dB_boundary", kb_l.s, kb_l.e, ib_l.s, ib_l.e,
+                KOKKOS_LAMBDA (const int &k, const int &i) {
+                    dB_block(V1, k, jb.s-1, i) = dB_block(V1, k, jb.s, i);
+                    dB_block(V2, k, jb.s-1, i) = -dB_block(V2, k, jb.s, i);
+                    dB_block(V3, k, jb.s-1, i) = dB_block(V3, k, jb.s, i);
+                }
+            );
+        }
+        if (pmb->boundary_flag[BoundaryFace::outer_x2] == BoundaryFlag::user) {
+            pmb->par_for("dB_boundary", kb_l.s, kb_l.e, ib_l.s, ib_l.e,
+                KOKKOS_LAMBDA (const int &k, const int &i) {
+                    dB_block(V1, k, jb.e+1, i) = dB_block(V1, k, jb.e, i);
+                    dB_block(V2, k, jb.e+1, i) = -dB_block(V2, k, jb.e, i);
+                    dB_block(V3, k, jb.e+1, i) = dB_block(V3, k, jb.e, i);
+                }
+            );
+        }
+    }
+
     // lap = div(dB), defined at cell corners
     pmb0->par_for("laplacian_dB", 0, lap.GetDim(5) - 1, kb_r.s, kb_r.e, jb_r.s, jb_r.e, ib_r.s, ib_r.e,
         KOKKOS_LAMBDA (const int& b, const int &k, const int &j, const int &i) {
diff --git a/kharma/b_cleanup/bicgstab_solver.hpp b/kharma/b_cleanup/bicgstab_solver.hpp
new file mode 100644
index 00000000..dc4fe559
--- /dev/null
+++ b/kharma/b_cleanup/bicgstab_solver.hpp
@@ -0,0 +1,676 @@
+//========================================================================================
+// (C) (or copyright) 2022. Triad National Security, LLC. All rights reserved.
+//
+// This program was produced under U.S. Government contract 89233218CNA000001 for Los
+// Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
+// for the U.S. Department of Energy/National Nuclear Security Administration. All rights
+// in the program are reserved by Triad National Security, LLC, and the U.S. Department
+// of Energy/National Nuclear Security Administration. The Government is granted for
+// itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
+// license in this material to reproduce, prepare derivative works, distribute copies to
+// the public, perform publicly and display publicly, and to permit others to do so.
+//========================================================================================
+#ifndef SOLVERS_BICGSTAB_SOLVER_HPP_
+#define SOLVERS_BICGSTAB_SOLVER_HPP_
+
+#include <memory>
+#include <string>
+#include <vector>
+
+#include "mesh/mesh.hpp"
+#include "interface/mesh_data.hpp"
+#include "interface/meshblock_data.hpp"
+#include "interface/state_descriptor.hpp"
+#include "kokkos_abstraction.hpp"
+#include "solvers/solver_utils.hpp"
+#include "tasks/task_id.hpp"
+#include "tasks/task_list.hpp"
+
+namespace parthenon {
+
+namespace solvers {
+
+struct BiCGStabCounter {
+  static int global_num_bicgstab_solvers;
+};
+
+template <typename SPType>
+class BiCGStabSolver : BiCGStabCounter {
+ public:
+  BiCGStabSolver() = default;
+  BiCGStabSolver(StateDescriptor *pkg, const Real rel_error_tol_in,
+                 const Real abs_error_tol_in, const SparseMatrixAccessor &sp,
+                 const std::vector<std::string> &aux_vars = {},
+                 std::vector<MetadataFlag> user_flags={})
+      : rel_error_tol(rel_error_tol_in), abs_error_tol(abs_error_tol_in),
+        sp_accessor(sp), max_iters(pkg->Param<int>("bicgstab_max_iterations")),
+        check_interval(pkg->Param<int>("bicgstab_check_interval")),
+        fail_flag(pkg->Param<bool>("bicgstab_abort_on_fail")),
+        warn_flag(pkg->Param<bool>("bicgstab_warn_on_fail")), aux_vars(aux_vars) {
+    Init(pkg, user_flags);
+  }
+  std::vector<std::string> SolverState() const {
+    std::vector<std::string> vars{spm_name, rhs_name, res, res0, vk, pk, tk, temp};
+    vars.insert(vars.end(), aux_vars.begin(), aux_vars.end());
+    return vars;
+  }
+  std::string label() const {
+    std::string lab;
+    for (const auto &s : SolverState())
+      lab += s;
+    return lab;
+  }
+
+  TaskID CreateTaskList(const TaskID &begin, const int i, TaskRegion &tr,
+                        std::shared_ptr<MeshData<Real>> md,
+                        std::shared_ptr<MeshData<Real>> mout) {
+    auto &solver = tr[i].AddIteration(solver_name);
+    solver.SetMaxIterations(max_iters);
+    solver.SetCheckInterval(check_interval);
+    solver.SetFailWithMaxIterations(fail_flag);
+    solver.SetWarnWithMaxIterations(warn_flag);
+    return CreateTaskList(begin, i, tr, solver, md, mout);
+  }
+
+  using FMatVec = std::function<TaskStatus(MeshData<Real> *, const std::string &,
+                                           MeshData<Real> *, const std::string &)>;
+  using FScale = std::function<TaskStatus(MeshData<Real> *, const std::string &)>;
+  FMatVec user_MatVec;
+  FMatVec user_pre_fluxcor;
+  FMatVec user_precomm_MatVec;
+  FScale user_precomm_scale;
+  FScale user_postcomm_scale;
+
+  std::vector<std::string> aux_vars;
+
+ private:
+  void Init(StateDescriptor *pkg, std::vector<MetadataFlag> user_flags) {
+    // create vectors used internally by the solver
+    spm_name = pkg->Param<std::string>("spm_name");
+    sol_name = pkg->Param<std::string>("sol_name");
+    rhs_name = pkg->Param<std::string>("rhs_name");
+
+    const std::string bicg_id(std::to_string(global_num_bicgstab_solvers));
+    solver_name = "internal_bicgstab_" + bicg_id;
+
+    res0 = "res_0" + bicg_id;
+    std::vector<MetadataFlag> base_flags({Metadata::Node, Metadata::OneCopy});
+    base_flags.insert(base_flags.end(), user_flags.begin(), user_flags.end());
+    auto meta = Metadata(base_flags);
+    pkg->AddField(res0, meta);
+
+    vk = "vk" + bicg_id;
+    tk = "tk" + bicg_id;
+    auto flux_flags = base_flags;
+    flux_flags.push_back(Metadata::WithFluxes);
+    meta = Metadata(flux_flags);
+    pkg->AddField(vk, meta);
+    pkg->AddField(tk, meta);
+
+    res = "res" + bicg_id;
+    pk = "pk" + bicg_id;
+    temp = "temp" + bicg_id;
+    auto ghost_flags = base_flags;
+    ghost_flags.push_back(Metadata::FillGhost);
+    meta = Metadata(ghost_flags);
+    pkg->AddField(pk, meta);
+    pkg->AddField(res, meta);
+    pkg->AddField(temp, meta);
+
+    global_num_bicgstab_solvers++;
+  }
+
+  TaskID CreateTaskList(const TaskID &begin, const int i, TaskRegion &tr,
+                        IterativeTasks &solver, std::shared_ptr<MeshData<Real>> md,
+                        std::shared_ptr<MeshData<Real>> mout) {
+    using Solver_t = BiCGStabSolver<SPType>;
+    using MD_t = MeshData<Real>;
+    TaskID none(0);
+    TaskList &tl = tr[i];
+    RegionCounter reg(solver_name);
+
+    // initialize some shared state
+    bicgstab_cntr = 0;
+    global_res0.val = 0.0;
+    global_res.val = 0.0;
+    rhoi.val = 0.0;
+    r0_dot_vk.val = 0.0;
+    t_dot_s.val = 0.0;
+    t_dot_t.val = 0.0;
+
+    auto MatVec = [this](auto &task_list, const TaskID &init_depend,
+                         std::shared_ptr<MeshData<Real>> &spmd,
+                         const std::string &name_in, const std::string &name_out) {
+      auto precom = init_depend;
+      auto vec_name = name_in;
+      if (this->user_precomm_MatVec) {
+        precom = task_list.AddTask(init_depend, this->user_precomm_MatVec, spmd.get(),
+                                   name_in, spmd.get(), this->temp);
+        vec_name = this->temp;
+      }
+      auto precom2 = precom;
+      if (this->user_precomm_scale) {
+        precom2 =
+            task_list.AddTask(precom, this->user_precomm_scale, spmd.get(), vec_name);
+      }
+
+      // TODO(BSP) this is AddBoundaryExchangeTasks, would use that except it's not
+      // templated for special iterative lists
+      auto dependency = precom2;
+      auto &tl = task_list;
+      auto &md = spmd;
+      const auto any = BoundaryType::any;
+      auto send = tl.AddTask(dependency, SendBoundBufs<any>, md);
+      auto recv = tl.AddTask(dependency, ReceiveBoundBufs<any>, md);
+      auto set = tl.AddTask(recv, SetBounds<any>, md);
+
+      auto pro = set;
+      if (md->GetMeshPointer()->multilevel) {
+        auto cbound = tl.AddTask(set, ApplyBoundaryConditionsOnCoarseOrFineMD, md, true);
+        pro = tl.AddTask(cbound, ProlongateBounds<any>, md);
+      }
+      auto fbound = tl.AddTask(pro, ApplyBoundaryConditionsOnCoarseOrFineMD, md, false);
+      auto boundaries = fbound;
+
+      auto postcomm = boundaries;
+      if (this->user_postcomm_scale) {
+        postcomm =
+            task_list.AddTask(boundaries, this->user_postcomm_scale, spmd.get(), vec_name);
+      }
+
+      auto update_rhs = postcomm;
+      if (this->user_MatVec) {
+        auto preflx = boundaries;
+        if (this->user_pre_fluxcor) {
+          auto calc_flx = task_list.AddTask(boundaries, this->user_pre_fluxcor, spmd.get(),
+                                            vec_name, spmd.get(), name_out);
+          auto send_flx =
+              task_list.AddTask(calc_flx, parthenon::LoadAndSendFluxCorrections, spmd);
+          auto recv_flx =
+              task_list.AddTask(calc_flx, parthenon::ReceiveFluxCorrections, spmd);
+          preflx = task_list.AddTask(recv_flx, parthenon::SetFluxCorrections, spmd);
+        }
+        update_rhs = task_list.AddTask(preflx, this->user_MatVec, spmd.get(), vec_name,
+                                       spmd.get(), name_out);
+      } else {
+        update_rhs = task_list.AddTask(boundaries, &Solver_t::MatVec<MD_t>, this, spmd.get(),
+                                       name_in, name_out);
+      }
+      return update_rhs;
+    };
+
+    auto get_init = MatVec(tl, begin, md, rhs_name, vk);
+
+    auto init_bicgstab = tl.AddTask(get_init, &Solver_t::InitializeBiCGStab<MD_t>, this,
+                                    md.get(), mout.get(), &global_res0.val);
+    tr.AddRegionalDependencies(reg.ID(), i, init_bicgstab);
+    // global reduction for initial residual
+    auto start_global_res0 =
+        (i == 0 ? tl.AddTask(init_bicgstab, &AllReduce<Real>::StartReduce, &global_res0,
+                             MPI_SUM)
+                : init_bicgstab);
+    auto finish_global_res0 =
+        tl.AddTask(start_global_res0, &AllReduce<Real>::CheckReduce, &global_res0);
+    tr.AddRegionalDependencies(reg.ID(), i, finish_global_res0);
+
+    // 1. \hat{r}_0 \cdot r_{i-1}
+    auto get_rhoi = solver.AddTask(init_bicgstab, &Solver_t::DotProduct<MD_t>, this,
+                                   md.get(), res0, res, &rhoi.val);
+    tr.AddRegionalDependencies(reg.ID(), i, get_rhoi);
+    auto start_global_rhoi =
+        (i == 0 ? solver.AddTask(get_rhoi, &AllReduce<Real>::StartReduce, &rhoi, MPI_SUM)
+                : get_rhoi);
+    auto finish_global_rhoi =
+        solver.AddTask(start_global_rhoi, &AllReduce<Real>::CheckReduce, &rhoi);
+
+    // 2. \beta = (rho_i/rho_{i-1}) (\alpha / \omega_{i-1})
+    // 3. p_i = r_{i-1} + \beta (p_{i-1} - \omega_{i-1} v_{i-1})
+    auto update_pk =
+        solver.AddTask(finish_global_rhoi, &Solver_t::Compute_pk<MD_t>, this, md.get());
+
+    // 4. v = A p
+    auto get_v = MatVec(solver, update_pk, md, pk, vk);
+
+    // 5. alpha = rho_i / (\hat{r}_0 \cdot v_i) [Actually just calculate \hat{r}_0 \cdot
+    // v_i]
+    auto get_r0dotv = solver.AddTask(get_v, &Solver_t::DotProduct<MD_t>, this, md.get(),
+                                     res0, vk, &r0_dot_vk.val);
+    tr.AddRegionalDependencies(reg.ID(), i, get_r0dotv);
+    auto start_global_r0dotv =
+        (i == 0 ? solver.AddTask(get_r0dotv, &AllReduce<Real>::StartReduce, &r0_dot_vk,
+                                 MPI_SUM)
+                : get_r0dotv);
+    auto finish_global_r0dotv =
+        solver.AddTask(start_global_r0dotv, &AllReduce<Real>::CheckReduce, &r0_dot_vk);
+    // alpha is actually updated in this next task
+
+    // 6. h = x_{i-1} + alpha p [Really updates x_i]
+    // 7. check for convergence [Not actually done]
+    // 8. s = r_{i-1} - alpha v
+    auto get_s = solver.AddTask(finish_global_r0dotv, &Solver_t::Update_h_and_s<MD_t>,
+                                this, md.get(), mout.get());
+
+    // 9. t = A s
+    auto get_t = MatVec(solver, get_s, md, res, tk);
+
+    // 10. omega = (t \cdot s) / (t \cdot t)
+    auto get_tdots = solver.AddTask(get_t, &Solver_t::OmegaDotProd<MD_t>, this, md.get(),
+                                    &t_dot_s.val, &t_dot_t.val);
+    tr.AddRegionalDependencies(reg.ID(), i, get_tdots);
+    auto start_global_tdots =
+        (i == 0
+             ? solver.AddTask(get_tdots, &AllReduce<Real>::StartReduce, &t_dot_s, MPI_SUM)
+             : get_tdots);
+    auto finish_global_tdots =
+        solver.AddTask(start_global_tdots, &AllReduce<Real>::CheckReduce, &t_dot_s);
+    auto start_global_tdott =
+        (i == 0
+             ? solver.AddTask(get_tdots, &AllReduce<Real>::StartReduce, &t_dot_t, MPI_SUM)
+             : get_tdots);
+    auto finish_global_tdott =
+        solver.AddTask(start_global_tdott, &AllReduce<Real>::CheckReduce, &t_dot_t);
+    // omega is actually updated in this next task
+
+    // 11. update x and residual
+    auto update_x = solver.AddTask(finish_global_tdots | finish_global_tdott,
+                                   &Solver_t::Update_x_res<MD_t>, this, md.get(),
+                                   mout.get(), &global_res.val);
+    tr.AddRegionalDependencies(reg.ID(), i, update_x);
+    auto start_global_res =
+        (i == 0 ? solver.AddTask(update_x, &AllReduce<Real>::StartReduce, &global_res,
+                                 MPI_SUM)
+                : update_x);
+    auto finish_global_res =
+        solver.AddTask(start_global_res, &AllReduce<Real>::CheckReduce, &global_res);
+
+    // 12. check for convergence
+    auto check = solver.SetCompletionTask(finish_global_res, &Solver_t::CheckConvergence,
+                                          this, i, true);
+    tr.AddGlobalDependencies(reg.ID(), i, check);
+
+    return check;
+  }
+
+ public:
+  template <typename T>
+  TaskStatus InitializeBiCGStab(T *u, T *du, Real *gres0) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    std::vector<std::string> vars({res, res0, vk, pk, rhs_name});
+    const auto &v = u->PackVariables(vars, imap);
+    const int ires = imap[res].first;
+    const int ires0 = imap[res0].first;
+    const int ivk = imap[vk].first;
+    const int ipk = imap[pk].first;
+    const int irhs = imap[rhs_name].first;
+
+    const auto &dv = du->PackVariables(std::vector<std::string>({sol_name}));
+
+    rhoi_old = 1.0;
+    alpha_old = 1.0;
+    omega_old = 1.0;
+    Real err(0);
+    const Real fac0 = 0.0;
+    const Real fac = 0.0;
+    par_reduce(
+        loop_pattern_mdrange_tag, "initialize bicgstab", DevExecSpace(), 0,
+        v.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lerr) {
+          // initialize guess for solution
+          dv(b, 0, k, j, i) = fac * v(b, irhs, k, j, i);
+
+          v(b, ires, k, j, i) = v(b, irhs, k, j, i) - fac * v(b, ivk, k, j, i);
+          v(b, ires0, k, j, i) = v(b, irhs, k, j, i) - fac0 * v(b, ivk, k, j, i);
+
+          v(b, ivk, k, j, i) = 0.0;
+          v(b, ipk, k, j, i) = 0.0;
+
+          lerr += v(b, irhs, k, j, i) * v(b, irhs, k, j, i);
+        },
+        Kokkos::Sum<Real>(err));
+    *gres0 += err;
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus update_r(T *u) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    std::vector<std::string> vars({res, rhs_name});
+    const auto &v = u->PackVariables(vars, imap);
+    const int ires = imap[res].first;
+    const int irhs = imap[rhs_name].first;
+
+    par_for(
+        loop_pattern_mdrange_tag, "initialize bicgstab", DevExecSpace(), 0,
+        v.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          v(b, ires, k, j, i) = v(b, irhs, k, j, i) - v(b, ires, k, j, i);
+        });
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus DotProduct(T *u, const std::string &vec1, const std::string &vec2,
+                        Real *reduce_sum) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    auto &v = u->PackVariables(std::vector<std::string>({vec1, vec2}));
+
+    Real gsum(0);
+    par_reduce(
+        loop_pattern_mdrange_tag, "DotProduct", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s,
+        kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) {
+          lsum += v(b, 0, k, j, i) * v(b, 1, k, j, i);
+        },
+        Kokkos::Sum<Real>(gsum));
+    *reduce_sum += gsum;
+    // printf("DotProduct: %s dot %s  = %e (%e)\n", vec1.c_str(), vec2.c_str(),
+    // *reduce_sum, gsum);
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus Compute_pk(T *u) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    auto &v = u->PackVariables(std::vector<std::string>({pk, res, vk, res0}), imap);
+    const int ipk = imap[pk].first;
+    const int ires = imap[res].first;
+    const int ires0 = imap[res0].first;
+    const int ivk = imap[vk].first;
+
+    const Real beta = (rhoi.val / rhoi_old) * (alpha_old / omega_old);
+    bool reset = false;
+    // if (std::abs(rhoi.val) < 1.e-8) {
+    //   // Reset
+    //   printf("Resetting (r_{i-1}, r_0) = %e res = %e \n", rhoi.val, res_old);
+    //   rhoi.val = res_old; // this should be the norm of the old residual, which we are
+    //   resetting to reset = true;
+    // }
+    // printf("Compute_pk: rho_i = %e rho_{i-1} = %e alpha_old = %e omega_old = %e beta =
+    // %e\n", rhoi.val, rhoi_old, alpha_old, omega_old, beta); rhoi_old = rhoi.val;
+    const Real w_o = omega_old;
+    par_for(
+        DEFAULT_LOOP_PATTERN, "compute pk", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s,
+        kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          v(b, ipk, k, j, i) = v(b, ires, k, j, i) +
+                               beta * (v(b, ipk, k, j, i) - w_o * v(b, ivk, k, j, i));
+          if (reset) {
+            v(b, ipk, k, j, i) = v(b, ires, k, j, i);
+            v(b, ires0, k, j, i) = v(b, ires, k, j, i);
+          }
+        });
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus MatVec(T *u, const std::string &in_vec, const std::string &out_vec) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    auto &v =
+        u->PackVariables(std::vector<std::string>({in_vec, out_vec, spm_name}), imap);
+    const int iin = imap[in_vec].first;
+    const int iout = imap[out_vec].first;
+    const int isp_lo = imap[spm_name].first;
+    const int isp_hi = imap[spm_name].second;
+    SparseMatrixAccessor &r_sp_accessor = sp_accessor;
+
+    par_for(
+        DEFAULT_LOOP_PATTERN, "MatVec", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s, kb.e,
+        jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          v(b, iout, k, j, i) =
+              r_sp_accessor.MatVec(v, isp_lo, isp_hi, v, iin, b, k, j, i);
+        });
+    // printf("MatVec: in_vec = %s out_vec = %s spm = %s\n", in_vec.c_str(),
+    // out_vec.c_str(), spm_name.c_str());
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus Update_h_and_s(T *u, T *du) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    auto &v = u->PackVariables(std::vector<std::string>({res, pk, vk}), imap);
+    auto &dv = du->PackVariables(std::vector<std::string>({sol_name}));
+    const int ires = imap[res].first;
+    const int ipk = imap[pk].first;
+    const int ivk = imap[vk].first;
+
+    Real alpha = rhoi.val / r0_dot_vk.val;
+    // printf("alpha = %e rho = %e (v, r_0) = %e\n", alpha, rhoi.val, r0_dot_vk.val);
+    if (std::abs(r0_dot_vk.val) < 1.e-200) alpha = 0.0;
+    par_for(
+        DEFAULT_LOOP_PATTERN, "Update_h", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s, kb.e,
+        jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          dv(b, 0, k, j, i) += alpha * v(b, ipk, k, j, i);
+          v(b, ires, k, j, i) -= alpha * v(b, ivk, k, j, i);
+        });
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus Update_h(T *u, T *du) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    auto &v = u->PackVariables(std::vector<std::string>({pk}));
+    auto &dv = du->PackVariables(std::vector<std::string>({sol_name}));
+    Real alpha = rhoi.val / r0_dot_vk.val;
+    // printf("Update_h: r0_dot_vk = %e rhoi = %e alpha = %e\n", r0_dot_vk.val, rhoi.val,
+    // alpha);
+    par_for(
+        DEFAULT_LOOP_PATTERN, "Update_h", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s, kb.e,
+        jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          dv(b, 0, k, j, i) += alpha * v(b, 0, k, j, i);
+        });
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus Update_s(T *u) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    auto &v = u->PackVariables(std::vector<std::string>({res, vk}), imap);
+    const int ires = imap[res].first;
+    const int ivk = imap[vk].first;
+    Real alpha = rhoi.val / r0_dot_vk.val;
+    // printf("Update_s: r0_dot_vk = %e rhoi = %e alpha = %e\n", r0_dot_vk.val, rhoi.val,
+    // alpha);
+    par_for(
+        DEFAULT_LOOP_PATTERN, "Update_s", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s, kb.e,
+        jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+          v(b, ires, k, j, i) -= alpha * v(b, ivk, k, j, i);
+        });
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus OmegaDotProd(T *u, Real *t_dot_s, Real *t_dot_t) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    auto &v = u->PackVariables(std::vector<std::string>({tk, res}));
+
+    // TODO(JCD): these should probably be merged
+    Real ts_sum(0);
+    par_reduce(
+        loop_pattern_mdrange_tag, "tk dot sk", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s,
+        kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) {
+          lsum += v(b, 0, k, j, i) * v(b, 1, k, j, i);
+        },
+        Kokkos::Sum<Real>(ts_sum));
+    *t_dot_s += ts_sum;
+
+    Real tt_sum(0);
+    par_reduce(
+        loop_pattern_mdrange_tag, "tk dot sk", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s,
+        kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lsum) {
+          lsum += v(b, 0, k, j, i) * v(b, 0, k, j, i);
+        },
+        Kokkos::Sum<Real>(tt_sum));
+    *t_dot_t += tt_sum;
+    // printf("OmegaDotProd: t_dot_s = %e (%e) t_dot_t = %e (%e)\n", *t_dot_s, ts_sum,
+    // *t_dot_t, tt_sum);
+    return TaskStatus::complete;
+  }
+
+  template <typename T>
+  TaskStatus Update_x_res(T *u, T *du, Real *gres) {
+    const auto &ibi = u->GetBoundsI(IndexDomain::interior);
+    const auto &jbi = u->GetBoundsJ(IndexDomain::interior);
+    const auto &kbi = u->GetBoundsK(IndexDomain::interior);
+    const int ndim = u->GetMeshPointer()->ndim;
+    const auto ib = IndexRange{ibi.s, ibi.e + (ndim > 0)};
+    const auto jb = IndexRange{jbi.s, jbi.e + (ndim > 1)};
+    const auto kb = IndexRange{kbi.s, kbi.e + (ndim > 2)};
+
+    PackIndexMap imap;
+    auto &v = u->PackVariables(std::vector<std::string>({res, tk}), imap);
+    const int ires = imap[res].first;
+    const int itk = imap[tk].first;
+    auto &dv = du->PackVariables(std::vector<std::string>({sol_name}));
+    Real omega = t_dot_s.val / t_dot_t.val;
+    if (std::abs(t_dot_t.val) < 1.e-200) omega = 0.0;
+    Real err(0);
+    par_reduce(
+        loop_pattern_mdrange_tag, "Update_x", DevExecSpace(), 0, v.GetDim(5) - 1, kb.s,
+        kb.e, jb.s, jb.e, ib.s, ib.e,
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &lerr) {
+          dv(b, 0, k, j, i) += omega * v(b, ires, k, j, i);
+          v(b, ires, k, j, i) -= omega * v(b, itk, k, j, i);
+          lerr += v(b, ires, k, j, i) * v(b, ires, k, j, i);
+        },
+        Kokkos::Sum<Real>(err));
+    *gres += err;
+    return TaskStatus::complete;
+  }
+
+  TaskStatus CheckConvergence(const int &i, bool report) {
+    if (i != 0) return TaskStatus::complete;
+    bicgstab_cntr++;
+    global_res.val = std::sqrt(global_res.val);
+    if (bicgstab_cntr == 1) global_res0.val = std::sqrt(global_res0.val);
+
+    // printf("rhoi: %e r0_dot_vk: %e t_dot_t: %e\n", rhoi.val, r0_dot_vk.val,
+    // t_dot_s.val);
+    //  Update global scalars
+    rhoi_old = rhoi.val;
+    alpha_old = rhoi.val / r0_dot_vk.val;
+    omega_old = t_dot_s.val / t_dot_t.val;
+    res_old = global_res.val;
+
+    bool converged = std::abs(global_res.val / global_res0.val) < rel_error_tol
+                    || std::abs(global_res.val) < abs_error_tol;
+
+    bool stop = bicgstab_cntr == max_iters;
+    if (std::abs(alpha_old) < 1.e-8 && std::abs(omega_old) < 1.e-8) stop = true;
+    if (bicgstab_cntr % check_interval == 0) {
+      if (Globals::my_rank == 0) {
+        std::cout << " its= " << bicgstab_cntr << " rho= " << rhoi_old
+                  << " alpha= " << alpha_old << " omega= " << omega_old
+                  << " relative-res: " << global_res.val / global_res0.val
+                  << " absolute-res: " << global_res.val
+                  << " absolute-res0: " << global_res0.val << " relerr-tol: " << rel_error_tol
+                  << " abserr-tol: " << abs_error_tol
+                  << std::endl;
+      }
+    }
+
+    global_res.val = 0.0;
+    rhoi.val = 0.0;
+    r0_dot_vk.val = 0.0;
+    t_dot_s.val = 0.0;
+    t_dot_t.val = 0.0;
+
+    return converged || stop ? TaskStatus::complete : TaskStatus::iterate;
+  }
+
+ private:
+  Real rel_error_tol, abs_error_tol;
+  SparseMatrixAccessor sp_accessor;
+  int max_iters, check_interval, bicgstab_cntr;
+  bool fail_flag, warn_flag;
+  std::string spm_name, sol_name, rhs_name, res, res0, vk, pk, tk, temp, solver_name;
+
+  Real rhoi_old, alpha_old, omega_old, res_old;
+
+  AllReduce<Real> global_res0;
+  AllReduce<Real> global_res;
+  AllReduce<Real> rhoi;
+  AllReduce<Real> r0_dot_vk;
+  AllReduce<Real> t_dot_s;
+  AllReduce<Real> t_dot_t;
+};
+
+} // namespace solvers
+
+} // namespace parthenon
+
+#endif // SOLVERS_BICGSTAB_SOLVER_HPP_
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index a256dd1f..cb529f32 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -34,6 +34,7 @@
 #include "boundaries.hpp"
 
 #include "decs.hpp"
+#include "domain.hpp"
 #include "kharma.hpp"
 #include "flux.hpp"
 #include "flux_functions.hpp"
@@ -251,9 +252,13 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
     pkg->KBoundaries[bface](rc, coarse);
     EndFlag();
 
-    // Exit immediately if we're syncing emf alone
-    // TODO can we check name?
-    if (rc->GetVariableVector().size() == 1) {
+    // This will now be called in 2 places we might not expect,
+    // where we still may want to control the physical bounds:
+    // 1. Syncing only the EMF during runs with CT
+    // 2. Syncing boundaries while solving for B field
+    // this generally guards against anytime we can't do the below
+    PackIndexMap prims_map;
+    if (GRMHD::PackMHDPrims(rc.get(), prims_map).GetDim(4) == 0) {
         EndFlag();
         return;
     }
diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp
index 3a9833d0..30e09004 100644
--- a/kharma/prob/post_initialize.cpp
+++ b/kharma/prob/post_initialize.cpp
@@ -155,6 +155,13 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
 
         // Cleanup is applied to conserved variables
         B_Cleanup::CleanupDivergence(md);
+
+        if (pin->GetOrAddBoolean("b_cleanup", "output_after_cleanup", false)) {
+            auto tm = SimTime(0., 0., 0, 0, 0, 0, 0.);
+            auto pouts = std::make_unique<Outputs>(pmesh, pin, &tm);
+            pouts->MakeOutputs(pmesh, pin, &tm, SignalHandler::OutputSignal::now);
+        }
+
     }
 
     // If PtoU was called before the B field was initialized or corrected,
diff --git a/pars/restarts/resize_restart.par b/pars/restarts/resize_restart.par
index efdecd7d..6938a583 100644
--- a/pars/restarts/resize_restart.par
+++ b/pars/restarts/resize_restart.par
@@ -40,12 +40,12 @@ use_dt = false
 skip_b_cleanup = false
 
 <b_cleanup>
-rel_tolerance = 1.
-abs_tolerance = 1.e-14
-check_interval = 100
-max_iterations = 1000000
-# See b_cleanup.cpp
-sor_factor = 20.3
+# Disable exiting on rel tolerance
+rel_tolerance = 1.e-20
+# This tolerance is quite small, increase if no convergence
+abs_tolerance = 1.e-9
+check_interval = 20
+max_iterations = 10000
 
 <floors>
 rho_min_geom = 1e-6
diff --git a/tests/resize/run.sh b/tests/resize/run.sh
index f62a31f6..07e237d8 100755
--- a/tests/resize/run.sh
+++ b/tests/resize/run.sh
@@ -15,10 +15,10 @@ pyharm convert --to_restart torus.out0.final.phdf
 sleep 1
 
 $KHARMADIR/run.sh -i $KHARMADIR/pars/restarts/resize_restart.par resize_restart/fname=torus.out0.final.h5 \
-                  b_cleanup/always_solve=1 parthenon/time/nlim=5 \
-                  >log_resize_2.txt 2>&1
-
-mv torus.out0.final.phdf torus.out0.final.restart.phdf
+                  b_cleanup/abs_tolerance=1e-7 b_cleanup/always_solve=1 parthenon/time/nlim=1 \
+                  parthenon/output0/single_precision_output=false >log_resize_2.txt 2>&1
 
 # Check divB on the re-meshed output
-pyharm check-basics torus.out0.final.restart.phdf
+# The tolerance is based on observed behavior with the high tolerance above
+# production sims should set abs_tolerance of significantly less
+pyharm check-basics --allowed_divb=2e-10 resize_restart.out0.00000.phdf

From 26cf7f1e87075d9e1e915e1dc21b2e0d40d919a5 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 5 Oct 2023 14:45:54 -0600
Subject: [PATCH 15/47] Fix some EMHD stuff

Not all flag names were updated "EMHD"->"EMHDVar", I suspect a merge
somewhere regressed that.
Also fix some more scripts
---
 kharma/emhd/emhd.cpp               | 10 ++++++----
 tests/bz_monopole/run.sh           |  6 +++---
 tests/conducting_atmosphere/run.sh |  4 ++--
 tests/emhdshock/check.py           | 14 +++++++-------
 tests/emhdshock/check.sh           |  7 +------
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp
index 65ea482b..b16fd7d2 100644
--- a/kharma/emhd/emhd.cpp
+++ b/kharma/emhd/emhd.cpp
@@ -128,7 +128,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     // EMHD is supported only with imex driver and implicit evolution,
     // synchronizing primitive variables
     Metadata::AddUserFlag("EMHDVar"); // "EMHD" name now taken by Parthenon for general flag, we want this one specific
-    std::vector<MetadataFlag> emhd_flags = {Metadata::Cell, Metadata::GetUserFlag("Implicit"), Metadata::GetUserFlag("EMHD")};
+    std::vector<MetadataFlag> emhd_flags = {Metadata::Cell, Metadata::GetUserFlag("Implicit"), Metadata::GetUserFlag("EMHDVar")};
 
     auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
     flags_prim.insert(flags_prim.end(), emhd_flags.begin(), emhd_flags.end());
@@ -185,8 +185,9 @@ void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
 
+    // Get only relevant cons, but all prims as we need the Lorentz factor
     PackIndexMap prims_map, cons_map;
-    auto U_E = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("EMHD"), Metadata::Conserved}, cons_map);
+    auto U_E = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map);
     auto P = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("Primitive")}, prims_map);
     const VarMap m_p(prims_map, false), m_u(cons_map, true);
 
@@ -217,9 +218,10 @@ void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
 
+    // Get only relevant cons, but all prims as we need the Lorentz factor
     PackIndexMap prims_map, cons_map;
-    auto U_E = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map);
-    auto P = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("Primitive")}, prims_map);
+    auto U_E = rc->PackVariables({Metadata::GetUserFlag("EMHDVar"), Metadata::Conserved}, cons_map);
+    auto P = rc->PackVariables({Metadata::GetUserFlag("Primitive")}, prims_map);
     const VarMap m_p(prims_map, false), m_u(cons_map, true);
 
     const auto& G = pmb->coords;
diff --git a/tests/bz_monopole/run.sh b/tests/bz_monopole/run.sh
index 2854c763..3ec7bfab 100755
--- a/tests/bz_monopole/run.sh
+++ b/tests/bz_monopole/run.sh
@@ -6,13 +6,13 @@ BASE=../..
 exit_code=0
 
 # Full run to test stability to completion
-$BASE/run.sh -i $BASE/pars/bz_monopole.par debug/verbose=1 parthenon/output0/single_precision_output=false >log_bz_monopole_full.txt 2>&1 #|| exit_code=$?
+$BASE/run.sh -i $BASE/pars/tests/bz_monopole.par debug/verbose=1 parthenon/output0/single_precision_output=false >log_bz_monopole_full.txt 2>&1 || exit_code=$?
 
 # At *least* check divB
 pyharm-check-basics bz_monopole.out0.final.phdf || exit_code=$?
 
 # Take 1 step to look for early signs of non-fatal instabilities
-$BASE/run.sh -i $BASE/pars/bz_monopole.par parthenon/time/nlim=1 parthenon/output0/dt=0.0 parthenon/output0/single_precision_output=false >log_bz_monopole_step.txt 2>&1 #|| exit_code=$?
+$BASE/run.sh -i $BASE/pars/tests/bz_monopole.par parthenon/time/nlim=1 parthenon/output0/dt=0.0 parthenon/output0/single_precision_output=false >log_bz_monopole_step.txt 2>&1 #|| exit_code=$?
 
-# Check is for plots only!
+# This just makes plots, it doesn't check anything
 python ./check.py
diff --git a/tests/conducting_atmosphere/run.sh b/tests/conducting_atmosphere/run.sh
index bb62569c..1f8b82f8 100755
--- a/tests/conducting_atmosphere/run.sh
+++ b/tests/conducting_atmosphere/run.sh
@@ -13,7 +13,7 @@ conv_2d() {
     for res in "${RES_LIST[@]}"
     do
         cp conducting_atmosphere_${res}_default/atmosphere_soln_*.txt .
-        $BASE/run.sh -n 1 -i $BASE/pars/conducting_atmosphere.par debug/verbose=1 \
+        $BASE/run.sh -n 1 -i ./conducting_atmosphere.par debug/verbose=1 \
             parthenon/time/tlim=200 parthenon/output0/dt=1000000 \
             parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
             parthenon/meshblock/nx1=$res parthenon/meshblock/nx2=$res parthenon/meshblock/nx3=1 \
@@ -35,4 +35,4 @@ conv_2d() {
 }
 
 ALL_RES="64,128,256,512"
-conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "in 2D, WENO5"
+conv_2d emhd2d_weno driver/reconstruction=weno5 "in 2D, WENO5"
diff --git a/tests/emhdshock/check.py b/tests/emhdshock/check.py
index 57678fcb..c557d6ad 100644
--- a/tests/emhdshock/check.py
+++ b/tests/emhdshock/check.py
@@ -9,7 +9,7 @@
 
 if __name__=='__main__':
 	outputdir = './'
-	kharmadir = '/home/vdhruv2/kharma'
+	kharmadir = '../../'
 	RES = [int(r) for r in sys.argv[1].split(",")]
 	
 	CONDUCTION = 1
@@ -25,13 +25,13 @@
 	for r, res in enumerate(RES):
 
 		# load analytic result
-		rho_analytic	 = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_rho.txt'))
-		u_analytic		 = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_u.txt'))
-		u1_analytic		 = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_u1.txt'))
+		rho_analytic	 = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_rho.txt'))
+		u_analytic		 = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_u.txt'))
+		u1_analytic		 = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_u1.txt'))
 		if CONDUCTION:
-			q_analytic   = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_q.txt'))
-		dP_analytic    = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_dP.txt'))
-		x_analytic     = np.loadtxt(os.path.join(kharmadir, 'kharma/prob/emhd/', 'shock_soln_{}_default'.format(res), 'shock_soln_xCoords.txt'))
+			q_analytic   = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_q.txt'))
+		dP_analytic    = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_dP.txt'))
+		x_analytic     = np.loadtxt(os.path.join(outputdir, 'shock_soln_{}_default'.format(res), 'shock_soln_xCoords.txt'))
 
 		# load code data
 		dfile = h5py.File('emhd_1d_{}_end.h5'.format(res), 'r')
diff --git a/tests/emhdshock/check.sh b/tests/emhdshock/check.sh
index b62e1ae7..fbfa821d 100755
--- a/tests/emhdshock/check.sh
+++ b/tests/emhdshock/check.sh
@@ -2,16 +2,11 @@
 
 # Run checks against analytic result for specified tests
 
-. /home/vdhruv2/anaconda3/etc/profile.d/conda.sh
-conda activate pyharm
-
 # Very small amplitude by default, preserve double precision
-~/pyHARM/scripts/pyharm-convert --double *.phdf
+pyharm convert --double *.phdf
 
 RES1D="256,512,1024,2048"
 
-conda activate base
-
 fail=0
 
 python3 check.py $RES1D "EMHD shock" emhd1d || fail=1

From 61de2e2ff2d167027bb68937269ebd452815918b Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 11:27:08 -0500
Subject: [PATCH 16/47] CI: try to fix CPU build

---
 machines/bp.sh     | 3 ++-
 scripts/ci/cpu.yml | 6 +++---
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/machines/bp.sh b/machines/bp.sh
index c746b042..47b52cd1 100644
--- a/machines/bp.sh
+++ b/machines/bp.sh
@@ -73,6 +73,7 @@ if [[ $HOST == "cinnabar"* ]]; then
   module purge # Handle modules inside this script
   HOST_ARCH="HSW" # This won't change
   DEVICE_ARCH="TURING75"
+  NPROC=56
 
   # Runtime
   MPI_NUM_PROCS=1
@@ -93,7 +94,7 @@ if [[ $HOST == "cinnabar"* ]]; then
       C_NATIVE="gcc"
       CXX_NATIVE="g++"
     else
-      module load nvhpc
+      module load nvhpc/23.7
       PREFIX_PATH="$HOME/libs/hdf5-nvhpc"
       C_NATIVE="nvc"
       CXX_NATIVE="nvc++"
diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 1f13629d..9d383895 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -38,15 +38,15 @@ stages:
 build:
   stage: build
   variables:
-    NPROC: 4
+    NPROC: 8
     HOST_ARCH: NATIVE
   before_script:
     - dnf -y groupinstall "Development Tools"
-    - dnf -y install hostname environment-modules cmake mpich-devel fftw-devel
+    - dnf -y install hostname environment-modules cmake mpich-devel hdf5-mpich-devel fftw-devel
     - source /etc/profile
     - module load mpi/mpich-x86_64
   script:
-    - ./make.sh clean hdf5
+    - ./make.sh clean
   artifacts:
     paths:
       - kharma.*

From 0a6585f9b6183ee9a65bfe226bd06aba03521e4f Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 13:52:41 -0500
Subject: [PATCH 17/47] CI+tests stuff

---
 scripts/ci/cpu.yml   | 8 +++++---
 tests/clean_tests.sh | 5 ++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 9d383895..b8d92246 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -3,7 +3,7 @@
 image: quay.io/centos/centos:stream9
 
 variables:
-  OMP_NUM_THREADS: 4
+  OMP_NUM_THREADS: 8
   OMP_PROC_BIND: "false"
   MPI_EXE: mpirun
   MPI_NUM_PROCS: 2
@@ -40,13 +40,15 @@ build:
   variables:
     NPROC: 8
     HOST_ARCH: NATIVE
+    C_NATIVE: gcc
+    CXX_NATIVE: g++
   before_script:
     - dnf -y groupinstall "Development Tools"
-    - dnf -y install hostname environment-modules cmake mpich-devel hdf5-mpich-devel fftw-devel
+    - dnf -y install hostname environment-modules cmake mpich-devel fftw-devel
     - source /etc/profile
     - module load mpi/mpich-x86_64
   script:
-    - ./make.sh clean
+    - ./make.sh clean hdf5
   artifacts:
     paths:
       - kharma.*
diff --git a/tests/clean_tests.sh b/tests/clean_tests.sh
index 1a6c541c..df0084e4 100755
--- a/tests/clean_tests.sh
+++ b/tests/clean_tests.sh
@@ -2,4 +2,7 @@
 # Cleans all temporary/gitignore files from tests
 
 TEST_DIR=$(dirname "$(readlink -f "$0")")
-rm -rf ${TEST_DIR}/*/*.{phdf,xdmf,rhdf,h5,hst,txt,png} ${TEST_DIR}/tilt_init/mks ${TEST_DIR}/*/frames_* ${TEST_DIR}/*/kharma_parsed_parameters*
+rm -rf ${TEST_DIR}/*/*.{phdf,xdmf,rhdf,h5,hst,txt,png} \
+       ${TEST_DIR}/tilt_init/mks \
+       ${TEST_DIR}/*/frames_* \
+       ${TEST_DIR}/*/kharma_parsed_parameters*

From c42db133b61775028f8fe6fad6e5520661e13084 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 14:27:05 -0500
Subject: [PATCH 18/47] CI: fix pyharm via mamba

---
 scripts/ci/cpu.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index b8d92246..ebf2ec9a 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -25,9 +25,8 @@ default:
     - module load mpi/mpich-x86_64
     - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
     - eval "$(./bin/micromamba shell hook -s posix)"
-    - micromamba create -y -f environment.yml
-    - micromamba activate pyharm
-    - ./install.sh
+    - git clone https://github.com/AFD-Illinois/pyharm.git /pyharm && cd /pyharm
+    - micromamba create -y -f environment.yml && micromamba activate pyharm && ./install.sh
 
 # Tests can be executed in parallel
 stages:

From 184f65b4835e5a8d5894ca3799ca71ffc31f3ea7 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 14:57:18 -0500
Subject: [PATCH 19/47] CI: pyharm install fixes

---
 scripts/ci/cpu.yml | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index ebf2ec9a..c5742bb2 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -25,8 +25,10 @@ default:
     - module load mpi/mpich-x86_64
     - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
     - eval "$(./bin/micromamba shell hook -s posix)"
-    - git clone https://github.com/AFD-Illinois/pyharm.git /pyharm && cd /pyharm
-    - micromamba create -y -f environment.yml && micromamba activate pyharm && ./install.sh
+    - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
+    - micromamba create -y -f /pyharm/environment.yml
+    - micromamba activate pyharm
+    - cd /pyharm && ./install.sh
 
 # Tests can be executed in parallel
 stages:

From 15b40265c5195db2750af64965c7faa27ad255f4 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 15:13:33 -0500
Subject: [PATCH 20/47] CI again

---
 scripts/ci/cpu.yml   |  1 +
 scripts/ci/nvhpc.yml | 14 ++++++--------
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index c5742bb2..d8b5781a 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -29,6 +29,7 @@ default:
     - micromamba create -y -f /pyharm/environment.yml
     - micromamba activate pyharm
     - cd /pyharm && ./install.sh
+    - cd -
 
 # Tests can be executed in parallel
 stages:
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 61a5be9c..9b49e58b 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -24,14 +24,12 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - wget -O Miniforge3.sh "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
-    - bash Miniforge3.sh -b -p "/home/conda"
-    - source "/home/conda/etc/profile.d/conda.sh"
-    - conda install h5py
-    - git clone https://github.com/AFD-Illinois/pyharm.git /home/pyharm
-    - conda activate
-    - cd /home/pyharm
-    - pip install --user .
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
+    - eval "$(./bin/micromamba shell hook -s posix)"
+    - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
+    - micromamba create -y -f /pyharm/environment.yml
+    - micromamba activate pyharm
+    - cd /pyharm && ./install.sh    
     - cd -
 
 # Tests can be executed in parallel,

From dd1a566d37ae3a535731b76829bbfca47b4e60ac Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 21:56:10 -0500
Subject: [PATCH 21/47] Rename AddMPIBoundarySync to AddBoundarySync

---
 kharma/driver/imex_step.cpp     |  4 ++--
 kharma/driver/kharma_driver.cpp | 37 +++++++++++++++++++--------------
 kharma/driver/kharma_driver.hpp |  2 +-
 kharma/driver/kharma_step.cpp   |  4 ++--
 kharma/driver/simple_step.cpp   |  2 +-
 5 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/kharma/driver/imex_step.cpp b/kharma/driver/imex_step.cpp
index ae74be89..b68a8d62 100644
--- a/kharma/driver/imex_step.cpp
+++ b/kharma/driver/imex_step.cpp
@@ -140,7 +140,7 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta
                 // Pull out a container of only EMF to synchronize
                 auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
                 auto t_emf_local = tl.AddTask(t_fluxes, B_CT::CalculateEMF, md_sub_step_init.get());
-                auto t_emf = KHARMADriver::AddMPIBoundarySync(t_emf_local, tl, md_emf_only);
+                auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_emf_only);
             }
             tl.AddTask(t_emf, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
             auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
@@ -250,7 +250,7 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta
         // but hasn't been tested to do so yet.
         auto t_floors = tl.AddTask(t_implicit, Packages::MeshApplyFloors, md_sub_step_final.get(), IndexDomain::interior);
 
-        KHARMADriver::AddMPIBoundarySync(t_floors, tl, md_sub_step_final);
+        KHARMADriver::AddBoundarySync(t_floors, tl, md_sub_step_final);
     }
 
     // Async Region: Any post-sync tasks.  Fixups, timestep & AMR tagging.
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 64a67c30..6e9a4fdc 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -81,12 +81,16 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     std::string flux = pin->GetOrAddString("driver", "flux", "llf");
     params.Add("use_hlle", (flux == "hlle"));
 
-    // Reconstruction scheme: plm, weno5, ppm...
-    // Allow an old parameter location
-    std::string grmhd_recon_option = pin->GetOrAddString("GRMHD", "reconstruction", "weno5");
-    std::string recon = pin->GetOrAddString("driver", "reconstruction", grmhd_recon_option);
+    // Reconstruction scheme.  TODO bunch more here, PPM esp...
+    std::vector<std::string> allowed_vals = {"donor_cell", "linear_mc", "weno5"};
+    std::string recon = pin->GetOrAddString("driver", "reconstruction", "weno5", allowed_vals);
     bool lower_edges = pin->GetOrAddBoolean("driver", "lower_edges", false);
     bool lower_poles = pin->GetOrAddBoolean("driver", "lower_poles", false);
+    if (lower_edges && lower_poles)
+        throw std::runtime_error("Cannot enable lowered reconstruction on edges and poles!");
+    if ((lower_edges || lower_poles) && recon != "weno5")
+        throw std::runtime_error("Lowered reconstructions can only be enabled with weno5!");
+
     int stencil = 0;
     if (recon == "donor_cell") {
         params.Add("recon", KReconstruction::Type::donor_cell);
@@ -97,21 +101,18 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     } else if (recon == "linear_mc") {
         params.Add("recon", KReconstruction::Type::linear_mc);
         stencil = 3;
-    } else if (recon == "weno5_lower_edges" || (recon == "weno5" && lower_edges)) {
+    } else if (recon == "weno5" && lower_edges) {
         params.Add("recon", KReconstruction::Type::weno5_lower_edges);
         stencil = 5;
-    } else if (recon == "weno5_lower_poles" || (recon == "weno5" && lower_poles)) {
+    } else if (recon == "weno5" && lower_poles) {
         params.Add("recon", KReconstruction::Type::weno5_lower_poles);
         stencil = 5;
     } else if (recon == "weno5") {
         params.Add("recon", KReconstruction::Type::weno5);
         stencil = 5;
-    } else {
-        std::cerr << "Reconstruction type not supported!  Supported reconstructions:" << std::endl;
-        std::cerr << "donor_cell, linear_mc, weno5, weno5_lower_edges, weno5_lower_poles (linear_vl coming back soon!)" << std::endl;
-        throw std::invalid_argument("Unsupported reconstruction algorithm!");
-    }
+    } // we only allow these options
     // Warn if using less than 3 ghost zones w/WENO etc, 2 w/Linear, etc.
+    // SMR/AMR independently requires an even number of zones, so we usually use 4
     if (Globals::nghost < (stencil/2 + 1)) {
         throw std::runtime_error("Not enough ghost zones for specified reconstruction!");
     }
@@ -136,12 +137,16 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     bool prims_are_fundamental = driver_type != DriverType::kharma;
     params.Add("prims_are_fundamental", prims_are_fundamental);
 
-    // Finally, we set default flags for primitive and conserved variables
-    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
+    // Which variables we *actually send* via Parthenon/MPI may differ, however.
+    // Prolongation/restriction should happen on conserved vars, so we must sync
+    // those in multilevel meshes.  If prims are funcamental but not sync'd,
+    // we "emulate" syncing them with PtoU/UtoP on boundaries
     bool sync_prims = prims_are_fundamental &&
                         (!pin->DoesParameterExist("parthenon/mesh", "numlevel") ||
                          pin->GetInteger("parthenon/mesh", "numlevel") == 1);
     params.Add("sync_prims", sync_prims);
+    // Finally, we set default flags for primitive and conserved variables
+    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
     if (sync_prims) {
         // If we're not in AMR, we can sync primitive variables directly
         params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
@@ -165,11 +170,11 @@ void KHARMADriver::AddFullSyncRegion(TaskCollection& tc, std::shared_ptr<MeshDat
     TaskRegion &bound_sync = tc.AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
         auto &tl = bound_sync[i];
-        AddMPIBoundarySync(t_none, tl, md_sync);
+        AddBoundarySync(t_none, tl, md_sync);
     }
 }
 
-TaskID KHARMADriver::AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &mc1)
+TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &mc1)
 {
     Flag("AddBoundarySync");
     auto t_start_sync = t_start;
@@ -240,7 +245,7 @@ TaskStatus KHARMADriver::SyncAllBounds(std::shared_ptr<MeshData<Real>> &md)
 
     TaskCollection tc;
     auto tr = tc.AddRegion(1);
-    AddMPIBoundarySync(t_none, tr[0], md);
+    AddBoundarySync(t_none, tr[0], md);
     while (!tr.Execute());
 
     EndFlag();
diff --git a/kharma/driver/kharma_driver.hpp b/kharma/driver/kharma_driver.hpp
index 18c943c5..5fe825c4 100644
--- a/kharma/driver/kharma_driver.hpp
+++ b/kharma/driver/kharma_driver.hpp
@@ -127,7 +127,7 @@ class KHARMADriver : public MultiStageDriver {
          * This sequence is used identically in several places, so it makes sense
          * to define once and use elsewhere.
          */
-        static TaskID AddMPIBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &md);
+        static TaskID AddBoundarySync(const TaskID t_start, TaskList &tl, std::shared_ptr<MeshData<Real>> &md);
 
         /**
          * Calculate the fluxes in each direction
diff --git a/kharma/driver/kharma_step.cpp b/kharma/driver/kharma_step.cpp
index 86730060..7b22c80b 100644
--- a/kharma/driver/kharma_step.cpp
+++ b/kharma/driver/kharma_step.cpp
@@ -161,7 +161,7 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
                 // Pull out a container of only EMF to synchronize
                 auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
                 auto t_emf_local = tl.AddTask(t_fluxes, B_CT::CalculateEMF, md_sub_step_init.get());
-                auto t_emf = KHARMADriver::AddMPIBoundarySync(t_emf_local, tl, md_emf_only);
+                auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_emf_only);
             }
             tl.AddTask(t_emf, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
             auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
@@ -221,7 +221,7 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
                                                 md_sub_step_init.get(), md_sub_step_final.get());
         }
 
-        KHARMADriver::AddMPIBoundarySync(t_copy_prims, tl, md_sync);
+        KHARMADriver::AddBoundarySync(t_copy_prims, tl, md_sync);
     }
 
     EndFlag();
diff --git a/kharma/driver/simple_step.cpp b/kharma/driver/simple_step.cpp
index ea30839e..a21a2a84 100644
--- a/kharma/driver/simple_step.cpp
+++ b/kharma/driver/simple_step.cpp
@@ -125,7 +125,7 @@ TaskCollection KHARMADriver::MakeSimpleTaskCollection(BlockList_t &blocks, int s
         auto t_floors = tl.AddTask(t_UtoP, Packages::MeshApplyFloors, md_sub_step_final.get(), IndexDomain::interior);
 
         // Boundary sync: neighbors must be available for FixUtoP below
-        KHARMADriver::AddMPIBoundarySync(t_floors, tl, md_sub_step_final);
+        KHARMADriver::AddBoundarySync(t_floors, tl, md_sub_step_final);
     }
 
     // Async Region: Any post-sync tasks.  Fixups, timestep & AMR tagging.

From 8ff938390a33e6d2eaf9c459f7b0e61e5b5c5e49 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 22:04:20 -0500
Subject: [PATCH 22/47] Get viscosity-only EMHD working again

---
 kharma/emhd/emhd.hpp           | 20 +++++++++++++++++---
 kharma/flux/flux.cpp           |  2 ++
 kharma/flux/flux_functions.hpp |  9 ++++-----
 kharma/flux/get_flux.hpp       |  9 +++++++--
 kharma/types.hpp               | 18 +++++++++++++++++-
 tests/bondi/run.sh             |  9 ++++-----
 tests/restart/run.sh           | 16 ++++++++--------
 7 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/kharma/emhd/emhd.hpp b/kharma/emhd/emhd.hpp
index ac043876..90b0505d 100644
--- a/kharma/emhd/emhd.hpp
+++ b/kharma/emhd/emhd.hpp
@@ -74,6 +74,16 @@ class EMHD_parameters {
         Real kappa;
         Real eta;
 
+        void print() const
+        {
+            printf("EMHD Parameters:\n");
+            printf("higher order: %d feedback: %d conduction: %d viscosity: %d\n",
+                    higher_order_terms, feedback, conduction, viscosity);
+            printf("kappa: %g eta: %g tau: %g conduction_a: %g viscosity_a: %g \n",
+                    kappa, eta, tau, conduction_alpha, viscosity_alpha);
+            // TODO closuretype
+        }
+
 };
 
 /**
@@ -109,7 +119,7 @@ void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse);
  */
 inline EMHD_parameters GetEMHDParameters(Packages_t& packages)
 {
-    EMHD::EMHD_parameters emhd_params_tmp;
+    EMHD::EMHD_parameters emhd_params_tmp = {0};
     if (packages.AllPackages().count("EMHD")) {
         emhd_params_tmp = packages.Get("EMHD")->Param<EMHD::EMHD_parameters>("emhd_params");
     }
@@ -257,7 +267,9 @@ KOKKOS_INLINE_FUNCTION void set_parameters(const GRCoordinates& G, const Local&
     FourVectors Dtmp;
     GRMHD::calc_4vecs(G, P, m_p, j, i, Loci::center, Dtmp);
     double bsq = m::max(dot(Dtmp.bcon, Dtmp.bcov), SMALL);
-    set_parameters(G, P(m_p.RHO), P(m_p.UU), P(m_p.Q), P(m_p.DP),
+    Real qtilde = (m_p.Q >= 0) ? P(m_p.Q) : 0.;
+    Real dPtilde = (m_p.DP >= 0) ? P(m_p.DP) : 0.;
+    set_parameters(G, P(m_p.RHO), P(m_p.UU), qtilde, dPtilde,
                     bsq, emhd_params, gam, j, i, tau, chi_e, nu_e);
 }
 
@@ -269,7 +281,9 @@ KOKKOS_INLINE_FUNCTION void set_parameters(const GRCoordinates& G, const Variabl
     FourVectors Dtmp;
     GRMHD::calc_4vecs(G, P, m_p, k, j, i, Loci::center, Dtmp);
     double bsq = m::max(dot(Dtmp.bcon, Dtmp.bcov), SMALL);
-    set_parameters(G, P(m_p.RHO, k, j, i), P(m_p.UU, k, j, i), P(m_p.Q, k, j, i), P(m_p.DP, k, j, i),
+    Real qtilde = (m_p.Q >= 0) ? P(m_p.Q, k, j, i) : 0.;
+    Real dPtilde = (m_p.DP >= 0) ? P(m_p.DP, k, j, i) : 0.;
+    set_parameters(G, P(m_p.RHO, k, j, i), P(m_p.UU, k, j, i), qtilde, dPtilde,
                     bsq, emhd_params, gam, j, i, tau, chi_e, nu_e);
 }
 
diff --git a/kharma/flux/flux.cpp b/kharma/flux/flux.cpp
index c00e54b0..da47ecb5 100644
--- a/kharma/flux/flux.cpp
+++ b/kharma/flux/flux.cpp
@@ -52,6 +52,8 @@ std::shared_ptr<KHARMAPackage> Flux::Initialize(ParameterInput *pin, std::shared
     // That's what this function is for.
     int nvar = KHARMA::PackDimension(packages.get(), Metadata::WithFluxes);
     std::vector<int> s_flux({nvar});
+    if (packages->Get("Globals")->Param<int>("verbose") > 2)
+        std::cout << "Allocating fluxes for " << nvar << " variables" << std::endl;
     // TODO optionally move all these to faces? Not important yet, & faces have no output, more memory
     std::vector<MetadataFlag> flags_flux = {Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy};
     Metadata m = Metadata(flags_flux, s_flux);
diff --git a/kharma/flux/flux_functions.hpp b/kharma/flux/flux_functions.hpp
index 6c6d577a..c128960c 100644
--- a/kharma/flux/flux_functions.hpp
+++ b/kharma/flux/flux_functions.hpp
@@ -58,9 +58,9 @@ KOKKOS_INLINE_FUNCTION void calc_tensor(const Local& P, const VarMap& m_p, const
     if (m_p.Q >= 0 || m_p.DP >= 0) {
         // Apply higher-order terms conversion if necessary
         Real qtilde = 0., dPtilde = 0.;
-        if (emhd_params.conduction)
+        if (m_p.Q >= 0)
             qtilde = P(m_p.Q);
-        if (emhd_params.viscosity)
+        if (m_p.DP >= 0)
             dPtilde = P(m_p.DP);
         const Real Theta = (gam - 1) * P(m_p.UU) / P(m_p.RHO);
         const Real cs2   = gam * (gam - 1) * P(m_p.UU) / (P(m_p.RHO) + gam * P(m_p.UU));
@@ -85,12 +85,11 @@ KOKKOS_INLINE_FUNCTION void calc_tensor(const Global& P, const VarMap& m_p, cons
                                         Real T[GR_DIM])
 {
     if (m_p.Q >= 0 || m_p.DP >= 0) {
-
         // Apply higher-order terms conversion if necessary
         Real qtilde = 0., dPtilde = 0.;
-        if (emhd_params.conduction)
+        if (m_p.Q >= 0)
             qtilde = P(m_p.Q, k, j, i);
-        if (emhd_params.viscosity)
+        if (m_p.DP >= 0)
             dPtilde = P(m_p.DP, k, j, i);
         const Real Theta = (gam - 1) * P(m_p.UU, k, j, i) / P(m_p.RHO, k, j, i);
         const Real cs2   = gam * (gam - 1) * P(m_p.UU, k, j, i) / (P(m_p.RHO, k, j, i) + gam * P(m_p.UU, k, j, i));
diff --git a/kharma/flux/get_flux.hpp b/kharma/flux/get_flux.hpp
index 2c3bad88..5f2edd77 100644
--- a/kharma/flux/get_flux.hpp
+++ b/kharma/flux/get_flux.hpp
@@ -124,8 +124,13 @@ inline TaskStatus GetFlux(MeshData<Real> *md)
     const int n1 = pmb0->cellbounds.ncellsi(IndexDomain::entire);
     const IndexRange block = IndexRange{0, cmax.GetDim(5) - 1};
     const int nvar = U_all.GetDim(4);
-    //std::cout << "Calculating fluxes for " << cmax.GetDim(5) << " blocks, "
-    //          << nvar << " variables (" << P_all.GetDim(4) << " primitives)" << std::endl;
+
+    if (globals.Get<int>("verbose") > 2) {
+        std::cout << "Calculating fluxes for " << cmax.GetDim(5) << " blocks, "
+                << nvar << " variables (" << P_all.GetDim(4) << " primitives)" << std::endl;
+        m_u.print(); m_p.print();
+        emhd_params.print();
+    }
 
     // Allocate scratch space
     const int scratch_level = 1; // 0 is actual scratch (tiny); 1 is HBM
diff --git a/kharma/types.hpp b/kharma/types.hpp
index 8670a44e..18e4a577 100644
--- a/kharma/types.hpp
+++ b/kharma/types.hpp
@@ -177,17 +177,33 @@ class VarMap {
             if (U1 >= 0) {
                 U2 = U1 + 1;
                 U3 = U1 + 2;
+            } else {
+                U2 = -1;
+                U3 = -1;
             }
             if (B1 >= 0) {
                 B2 = B1 + 1;
                 B3 = B1 + 2;
+            } else {
+                B2 = -1;
+                B3 = -1;
             }
             if (Bf1 >= 0) {
                 Bf2 = Bf1 + 1;
                 Bf3 = Bf1 + 2;
+            } else {
+                Bf2 = -1;
+                Bf3 = -1;
             }
         }
-        
+
+        void print() const
+        {
+            printf("VAR MAP:\n");
+            printf("prims: %d %d %d %d %d\n", RHO, UU, U1, U2, U3);
+            printf("B field cell: %d %d %d face: %d %d %d\n", B1, B2, B3, Bf1, Bf2, Bf3);
+            printf("EMHD q: %d dP: %d\n", Q, DP);
+        }
 };
 
 #if DEBUG
diff --git a/tests/bondi/run.sh b/tests/bondi/run.sh
index 77cdaa9a..edbdeafb 100755
--- a/tests/bondi/run.sh
+++ b/tests/bondi/run.sh
@@ -15,7 +15,7 @@ conv_2d() {
                                            parthenon/output0/dt=1000 parthenon/output0/single_precision_output=false \
                                            parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
                                            parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
-                                           $2 >log_${1}_${res}.txt 2>&1
+                                           $2 >log_${1}_${res}.txt 2>&1 || check_code=$?
         mv bondi.out0.00000.phdf bondi_2d_${res}_start_${1}.phdf
         mv bondi.out0.final.phdf bondi_2d_${res}_end_${1}.phdf
     done
@@ -44,14 +44,13 @@ conv_2d ks coordinates/transform=null "in 2D, KS coordinates"
 # Recon
 ALL_RES="16,24,32,48,64"
 conv_2d linear_mc GRMHD/reconstruction=linear_mc "in 2D, linear recon with MC limiter"
-conv_2d linear_vl GRMHD/reconstruction=linear_vl "in 2D, linear recon with VL limiter"
+# TODO reintroduce
+#conv_2d linear_vl GRMHD/reconstruction=linear_vl "in 2D, linear recon with VL limiter"
 
 # And the GRIM/classic driver
 conv_2d imex driver/type=imex "in 2D, with Imex driver"
 conv_2d imex_im "driver/type=imex GRMHD/implicit=true" "in 2D, semi-implicit stepping"
 
-# TODO magnetized?
-
-# TODO 3D, esp magnetized
+# TODO 3D, esp magnetized w/flux, face CT
 
 exit $exit_code
diff --git a/tests/restart/run.sh b/tests/restart/run.sh
index e5ec3215..66605a69 100755
--- a/tests/restart/run.sh
+++ b/tests/restart/run.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 
 # Bash script testing initialization vs restart of a torus problem
-# Require binary similarity after 5 steps
+# Require similarity to round-off after 5 steps
 
 # Set paths
 KHARMADIR=../..
@@ -17,10 +17,10 @@ $KHARMADIR/run.sh -r torus.out1.00000.rhdf parthenon/time/nlim=5 >log_restart_2.
 
 mv torus.out0.final.phdf torus.out0.final.restart.phdf
 
-# compare.py allows for small (5e-10) difference
-#pyharm-diff torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
-# Compare binary
-h5diff --exclude-path=/Info \
-       --exclude-path=/Input \
-       --exclude-path=/divB \
-       torus.out0.final.init.phdf torus.out0.final.restart.phdf
+# Compare to basic round-off
+pyharm diff --rel_tol 1e-15 torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
+# Compare binary. Sometimes works but not worth keeping always
+#h5diff --exclude-path=/Info \
+#       --exclude-path=/Input \
+#       --exclude-path=/divB \
+#       torus.out0.final.init.phdf torus.out0.final.restart.phdf

From 9b06281279f5c2ad986ef681d711523c884e975e Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Fri, 6 Oct 2023 23:01:57 -0500
Subject: [PATCH 23/47] Bump Kokkos to fix CUDA bug

---
 external/parthenon | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/external/parthenon b/external/parthenon
index 02898b68..1a0597f9 160000
--- a/external/parthenon
+++ b/external/parthenon
@@ -1 +1 @@
-Subproject commit 02898b683d2a33da5f7e912916e4ce367b733635
+Subproject commit 1a0597f99b3aceafc98ba1eb23e94663bfaeb57e

From ea701f99aff36f0c6e4204ca887c8e86b316d319 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Sat, 7 Oct 2023 01:12:08 -0500
Subject: [PATCH 24/47] Bunch of test script fixes

---
 pars/electrons/hubble.par       |  8 ++++----
 pars/electrons/noh.par          |  4 ++--
 tests/all_pars/run.sh           | 10 +++++++++-
 tests/emhdmodes/check.py        |  2 +-
 tests/hubble_flow/make_plots.py | 12 ++++++------
 tests/noh/check.py              | 26 +++++++++++++++-----------
 tests/noh/run.sh                |  6 +++---
 tests/run_all.sh                | 15 +++++++++++----
 tests/tilt_init/run.sh          |  2 +-
 9 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/pars/electrons/hubble.par b/pars/electrons/hubble.par
index b38e112f..55d94fab 100644
--- a/pars/electrons/hubble.par
+++ b/pars/electrons/hubble.par
@@ -32,6 +32,8 @@ nx2 = 1
 nx3 = 1
 
 <boundaries>
+inner_x1 = dirichlet
+outer_x1 = dirichlet
 check_inflow_inner_x1 = false
 check_inflow_outer_x1 = false
 
@@ -79,8 +81,6 @@ type = imex
 
 <parthenon/output0>
 file_type = hdf5
-# Once at the end
-ghost_zones = true
-dt = 1
-single_precision_output = false
+dt = 10
+single_precision_output = true
 variables = prims.rho, prims.u, prims.uvec, prims.Ktot, prims.Kel_Constant, fflag
diff --git a/pars/electrons/noh.par b/pars/electrons/noh.par
index 015b9057..2b9d2a3e 100644
--- a/pars/electrons/noh.par
+++ b/pars/electrons/noh.par
@@ -10,7 +10,7 @@ problem_id = noh
 refinement = none
 numlevel = 1
 
-nx1 = 2000
+nx1 = 1024
 x1min = 0.0
 x1max = 1.0
 
@@ -23,7 +23,7 @@ x3min = 0.0
 x3max = 1.0
 
 <parthenon/meshblock>
-nx1 = 2000
+nx1 = 1024
 nx2 = 1
 nx3 = 1
 
diff --git a/tests/all_pars/run.sh b/tests/all_pars/run.sh
index d2e654c9..ee9d480b 100755
--- a/tests/all_pars/run.sh
+++ b/tests/all_pars/run.sh
@@ -6,7 +6,15 @@ for folder in bondi electrons emhd shocks smr tests tori_2d tori_3d
 do
   for fil in ../../pars/$folder/*.par
   do
-    ../../run.sh -n 1 -i $fil parthenon/time/nlim=2
+    exit_code=0
+    par=$(basename $fil)
+    prob=${par%.*}
+    ../../run.sh -n 1 -i $fil parthenon/time/nlim=2 &>log_${prob}.txt || exit_code=$?
     rm -f *.{hst,phdf,rhdf,xdmf}
+    if [ $exit_code -ne 0 ]; then
+      echo $par FAIL
+    else
+      echo $par PASS
+    fi
   done
 done
diff --git a/tests/emhdmodes/check.py b/tests/emhdmodes/check.py
index 59991b57..7c577498 100644
--- a/tests/emhdmodes/check.py
+++ b/tests/emhdmodes/check.py
@@ -78,7 +78,7 @@
 
         var_numerical = dump['prims']
 
-        if higher_order_terms.lower() == "true":
+        if higher_order_terms:
             print("Higher order terms enabled")
             Theta = (gam - 1.) * dump['UU'] / dump['RHO']
             cs2   = gam * (gam - 1.) * dump['UU'] / (dump['RHO'] + (gam * dump['UU']) )
diff --git a/tests/hubble_flow/make_plots.py b/tests/hubble_flow/make_plots.py
index f0ab919e..4cda9061 100644
--- a/tests/hubble_flow/make_plots.py
+++ b/tests/hubble_flow/make_plots.py
@@ -17,24 +17,24 @@
 
 x = np.linspace(0.0, 1.0, 128)
 kap = (gam - 2) * (game - 1) / (game - 2) * u0 / rho0**game * (1 + v0 * t)**(game - 2)
-kap_dump = f['prims.Kel_Constant'][0,0,0,:,0]
+kap_dump = f['prims.Kel_Constant'][0,0,0,:]
 
 fig, ax = plt.subplots(2,2, figsize=(10,10))
-ax[0, 0].plot(x,f['prims.uvec'][0,0,0,:,0])
+ax[0, 0].plot(x,f['prims.uvec'][0,0,0,0,:])
 ax[0, 0].plot(x, v0*x / (1 + v0 * t))
 ax[0, 0].set_title("vx")
 
-ax[0, 1].plot(x,f['prims.rho'][0,0,0,:,0])
+ax[0, 1].plot(x,f['prims.rho'][0,0,0,:])
 ax[0, 1].plot(x, rho0 / (1 + v0 * t) * np.ones_like(x))
 ax[0, 1].set_title("rho")
 
-ax[1, 0].plot(x,f['prims.u'][0,0,0,:,0])
+ax[1, 0].plot(x,f['prims.u'][0,0,0,:])
 ax[1, 0].plot(x, ug0 / (1 + v0 * t)**2 * np.ones_like(x))
 ax[1, 0].set_title("u")
 
 kap = (gam - 2) * (game - 1) / (game - 2) * u0 / rho0**game * (1 + v0 * t)**(game - 2)
-ax[1, 1].plot(x, f['prims.Kel_Constant'][0,0,0,:,0])
+ax[1, 1].plot(x, f['prims.Kel_Constant'][0,0,0,:])
 ax[1, 1].plot(x, kap*np.ones_like(x))
 ax[1, 1].set_title("kappa_e")
 
-plt.savefig("hubble.png")
\ No newline at end of file
+plt.savefig("hubble.png")
diff --git a/tests/noh/check.py b/tests/noh/check.py
index bb33fa88..0566f9c5 100644
--- a/tests/noh/check.py
+++ b/tests/noh/check.py
@@ -1,9 +1,12 @@
 import numpy as np
 import os, sys, h5py
+
 import matplotlib
 matplotlib.use('Agg')
 import matplotlib.pyplot as plt
 
+import pyharm
+
 if __name__=='__main__':
     plotsdir = sys.argv[1]
     filesdir = sys.argv[2]
@@ -17,17 +20,18 @@
 
     # read data
     for r, resolution in enumerate(resolutions):
-        hfp = h5py.File(os.path.join(filesdir, 'noh.out0.final.res{:d}.h5'.format(resolution)))
-        gam = hfp['header/gam'][()]
-        gam_e = hfp['header/gamma_e'][()]
-        fel = hfp['header/fel_constant'][()]
-        rho = np.squeeze(hfp['prims'][Ellipsis,0][()])
-        uu = np.squeeze(hfp['prims'][Ellipsis,1][()])
-        kel = np.squeeze(hfp['prims'][Ellipsis,6][()])
-        startx1 = hfp['header/geom/startx1'][()]
-        dx1 = hfp['header/geom/dx1'][()]
-        n1 = hfp['header/n1'][()]
-        hfp.close()
+        #hfp = h5py.File(os.path.join(filesdir, 'noh.out0.final.res{:d}.h5'.format(resolution)))
+        hfp = pyharm.load_dump('noh.out0.final.res{:d}.phdf'.format(resolution))
+        gam = hfp['gam']
+        gam_e = hfp['gam_e']
+        fel = hfp['electrons/fel_constant']
+        rho = np.squeeze(hfp['rho'])
+        uu = np.squeeze(hfp['u'])
+        kel = np.squeeze(hfp['Kel_Constant'])
+        startx1 = hfp['startx1']
+        dx1 = hfp['dx1']
+        n1 = hfp['n1']
+        del hfp
 
         x1 = np.zeros(n1, dtype=float)
         for i in range(n1):
diff --git a/tests/noh/run.sh b/tests/noh/run.sh
index ebfe9fa7..f5a34a68 100755
--- a/tests/noh/run.sh
+++ b/tests/noh/run.sh
@@ -9,11 +9,11 @@ KHARMADIR=../..
 exit_code=0
 
 noh_test() {
-    ALL_RES="128,256,512,1024,2048"
-    for res in 64 128 256 512 1024 2048
+    ALL_RES="128,256,512,1024"
+    for res in 64 128 256 512 1024
     do
         eighth=$(($res / 8))
-        $KHARMADIR/run.sh -i $KHARMADIR/pars/noh.par debug/verbose=1 parthenon/output0/dt=1000 \
+        $KHARMADIR/run.sh -i $KHARMADIR/pars/electrons/noh.par debug/verbose=1 parthenon/output0/dt=1000 \
                             electrons/gamma_e=1.666667 \
                             parthenon/mesh/nx1=$res parthenon/meshblock/nx1=$eighth \
                             >log_noh_${res}.txt 2>&1
diff --git a/tests/run_all.sh b/tests/run_all.sh
index ea0da229..cc789804 100755
--- a/tests/run_all.sh
+++ b/tests/run_all.sh
@@ -2,10 +2,17 @@
 
 for dir in */
 do
-  cd $dir
+  prob=${dir%?}
+  cd $prob &>/dev/null
   if [ -f ./run.sh ]; then
-    echo "Running $dir"
-    ./run.sh
+    echo Running $prob
+    exit_code=0
+    ./run.sh >../log_${prob}.txt 2>&1 || exit_code=$?
+    if [ $exit_code -ne 0 ]; then
+      echo Test $prob FAIL
+    else
+      echo Test $prob PASS
+    fi
   fi
-  cd -
+  cd - &>/dev/null
 done
diff --git a/tests/tilt_init/run.sh b/tests/tilt_init/run.sh
index bb6ca62a..f7ccb04b 100755
--- a/tests/tilt_init/run.sh
+++ b/tests/tilt_init/run.sh
@@ -2,7 +2,7 @@
 set -euo pipefail
 
 # Run default tilted problem to 5 steps
-../../run.sh -i ../../pars/mad_tilt.par parthenon/time/nlim=5 debug/verbose=1 \
+../../run.sh -i ../../pars/tori_3d/mad_tilt.par parthenon/time/nlim=5 debug/verbose=1 \
                 parthenon/output0/single_precision_output=false \
                 >log_tilt_init.txt 2>&1
 

From de75557149e4e33b88140f30f1891cf3e49bb0f9 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Sat, 7 Oct 2023 09:43:41 -0500
Subject: [PATCH 25/47] Add some tests that are ready, fix artifacts

---
 scripts/ci/cpu.yml   | 9 ++++++++-
 scripts/ci/nvhpc.yml | 6 ++++++
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index d8b5781a..cdc93c6c 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -30,6 +30,12 @@ default:
     - micromamba activate pyharm
     - cd /pyharm && ./install.sh
     - cd -
+  # Always keep logs and plots.  Results should be printed to console!
+  artifacts:
+    when: always
+    paths:
+      - tests/*/*.png
+      - tests/*/*.txt
 
 # Tests can be executed in parallel
 stages:
@@ -64,4 +70,5 @@ tests:
     - ./run.sh
   parallel:
     matrix:
-      - TEST: [bondi, bondi_viscous, bz_monopole, emhdmodes, mhdmodes, noh, regrid, reinit, restart, tilt_init, torus_sanity]
+      - TEST: [all_pars, anisotropic_conduction, bondi, bondi_viscous, bz_monopole, \
+               emhdmodes, mhdmodes, noh, regrid, reinit, resize, restart, tilt_init, torus_sanity]
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 9b49e58b..0683c999 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -31,6 +31,12 @@ default:
     - micromamba activate pyharm
     - cd /pyharm && ./install.sh    
     - cd -
+  # Always keep logs and plots.  Results should be printed to console!
+  artifacts:
+    when: always
+    paths:
+      - tests/*/*.png
+      - tests/*/*.txt
 
 # Tests can be executed in parallel,
 # but be careful about GPU arch

From ff49d37f2f5e916c34c810eb635cd453e026f500 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Sat, 7 Oct 2023 10:28:33 -0500
Subject: [PATCH 26/47] CI fix

---
 scripts/ci/cpu.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index cdc93c6c..37735eca 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -70,5 +70,5 @@ tests:
     - ./run.sh
   parallel:
     matrix:
-      - TEST: [all_pars, anisotropic_conduction, bondi, bondi_viscous, bz_monopole, \
+      - TEST: [all_pars, anisotropic_conduction, bondi, bondi_viscous, bz_monopole,
                emhdmodes, mhdmodes, noh, regrid, reinit, resize, restart, tilt_init, torus_sanity]

From a8b177d581b516bf26b9c43aa677010c7de488a2 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Sat, 7 Oct 2023 12:02:17 -0500
Subject: [PATCH 27/47] Pass EMHDModes with Face CT. Clean up some other
 scripts.

---
 kharma/driver/imex_step.cpp       |  7 ++--
 pars/emhd/emhdmodes.par           |  8 ++--
 pars/tests/mhdmodes.par           |  5 +--
 tests/all_pars/run.sh             | 10 ++++-
 tests/bz_monopole/bz_monopole.par | 69 +++++++++++++++++++++++++++++++
 tests/bz_monopole/run.sh          |  6 ++-
 tests/emhdmodes/run.sh            | 12 +++---
 7 files changed, 98 insertions(+), 19 deletions(-)
 create mode 100644 tests/bz_monopole/bz_monopole.par

diff --git a/kharma/driver/imex_step.cpp b/kharma/driver/imex_step.cpp
index b68a8d62..967461d2 100644
--- a/kharma/driver/imex_step.cpp
+++ b/kharma/driver/imex_step.cpp
@@ -237,11 +237,12 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta
             auto t_implicit_step = tl.AddTask(t_copy_linesearch, Implicit::Step, md_full_step_init.get(), md_sub_step_init.get(), 
                                          md_flux_src.get(), md_linesearch.get(), md_solver.get(), integrator->beta[stage-1] * integrator->dt);
 
-            // Copy the entire solver state (everything defined on the grid, i.e. 'Cell') into the final state md_sub_step_final
+            // Copy the entire solver state (everything defined on the grid, incl. our new Face variables) into the final state md_sub_step_final
             // If we're entirely explicit, we just declare these equal
-            t_implicit = tl.AddTask(t_implicit_step, Copy<MeshData<Real>>, std::vector<MetadataFlag>({Metadata::Cell}),
+            auto t_implicit_c = tl.AddTask(t_implicit_step, Copy<MeshData<Real>>, std::vector<MetadataFlag>({Metadata::Cell}),
                                     md_solver.get(), md_sub_step_final.get());
-
+            t_implicit = tl.AddTask(t_implicit_step, WeightedSumDataFace, std::vector<MetadataFlag>({Metadata::Face}),
+                                    md_solver.get(), md_solver.get(), 1.0, 0.0, md_sub_step_final.get());
         }
 
         // Apply all floors & limits (GRMHD,EMHD,etc), but do *not* immediately correct UtoP failures with FixUtoP --
diff --git a/pars/emhd/emhdmodes.par b/pars/emhd/emhdmodes.par
index 68f20974..f50e35e5 100644
--- a/pars/emhd/emhdmodes.par
+++ b/pars/emhd/emhdmodes.par
@@ -39,9 +39,7 @@ transform = null
 <parthenon/time>
 tlim = 2.0
 nlim = -1
-# "RK2" is the only option for implicit solver
-integrator = rk2
-use_dt_light = true
+#use_dt_light = true
 
 <GRMHD>
 cfl = 0.9
@@ -73,8 +71,10 @@ use_qr              = true
 # General verbosity level:
 # 1: general archival info
 # 2: specific debugging logs
+# 3: egregious/temporary notes
 verbose = 0
 # Set to 1 to check each step for wavespeed of zero/NaN & exit
+# 2 enables some rarely seen stuff
 extra_checks = 1
 # Print summary of all flags hit during each step:
 # 1: Number of flags total
@@ -99,7 +99,7 @@ viscosity_alpha  = 1.0
 <parthenon/output0>
 file_type = hdf5
 # Output only final state
-dt = 100.0
+dt = 0.2
 # Output in double due to low amplitude
 single_precision_output = false
 variables = prims.rho, prims.u, prims.uvec, prims.B, prims.q, prims.dP, solve_norm, solve_fail
diff --git a/pars/tests/mhdmodes.par b/pars/tests/mhdmodes.par
index e8b7f5a7..87166e23 100644
--- a/pars/tests/mhdmodes.par
+++ b/pars/tests/mhdmodes.par
@@ -19,9 +19,8 @@ nmode = 1
 dir = 3
 
 # Size and parameters of the full mesh
-# KHARMA does not yet support AMR,
-# so all mesh declarations will have
-# the first two parameters
+# First two parameters are the defaults,
+# and specify no SMR/AMR
 <parthenon/mesh>
 refinement = none
 numlevel = 1
diff --git a/tests/all_pars/run.sh b/tests/all_pars/run.sh
index ee9d480b..3b066e1e 100755
--- a/tests/all_pars/run.sh
+++ b/tests/all_pars/run.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 set -euo pipefail
 
+return_code=0
+
 # Skip testing the restarting & benchmark scripts
 for folder in bondi electrons emhd shocks smr tests tori_2d tori_3d
 do
@@ -12,9 +14,13 @@ do
     ../../run.sh -n 1 -i $fil parthenon/time/nlim=2 &>log_${prob}.txt || exit_code=$?
     rm -f *.{hst,phdf,rhdf,xdmf}
     if [ $exit_code -ne 0 ]; then
-      echo $par FAIL
+      printf "%-40s %s\n" $par FAIL
+      return_code=1
     else
-      echo $par PASS
+      printf "%-40s %s\n" $par PASS
     fi
   done
 done
+
+exit $return_code
+
diff --git a/tests/bz_monopole/bz_monopole.par b/tests/bz_monopole/bz_monopole.par
new file mode 100644
index 00000000..a0b26367
--- /dev/null
+++ b/tests/bz_monopole/bz_monopole.par
@@ -0,0 +1,69 @@
+# Monopole in vacuum
+# Specific parameters for integration test
+
+<parthenon/job>
+problem_id = bz_monopole
+
+<parthenon/mesh>
+refinement = none
+numlevel = 1
+nx1 = 128
+nx2 = 128
+nx3 = 1
+
+<parthenon/meshblock>
+nx1 = 64
+nx2 = 64
+nx3 = 1
+
+<coordinates>
+base = spherical_ks
+transform = fmks
+r_out = 100.
+a = 0.9375
+hslope = 0.3
+mks_smooth = 0.5
+poly_xt = 0.82
+poly_alpha = 14.0
+
+<parthenon/time>
+tlim = 100.0
+nlim = -1
+
+<debug>
+verbose = 1
+extra_checks = 1
+flag_verbose = 0
+
+<GRMHD>
+cfl = 0.7
+gamma = 1.444444
+reconstruction = linear_mc
+
+<b_field>
+type = bz_monopole
+norm = false
+
+<floors>
+bsq_over_rho_max = 100
+rho_min_geom = 1e-20
+u_min_geom = 1e-20
+gamma_max = 10
+
+<wind>
+on = false
+ne = 1.e-4
+Tp = 100
+u1 = 0.4
+power = 40
+
+<parthenon/output0>
+file_type = hdf5
+dt = 5.0
+single_precision_output = false
+variables = prims.rho, prims.u, prims.uvec, prims.B, cons.B, divB
+ghost_zones = true
+
+<parthenon/output1>
+file_type = hst
+dt = 0.1
diff --git a/tests/bz_monopole/run.sh b/tests/bz_monopole/run.sh
index 3ec7bfab..677410f2 100755
--- a/tests/bz_monopole/run.sh
+++ b/tests/bz_monopole/run.sh
@@ -6,13 +6,15 @@ BASE=../..
 exit_code=0
 
 # Full run to test stability to completion
-$BASE/run.sh -i $BASE/pars/tests/bz_monopole.par debug/verbose=1 parthenon/output0/single_precision_output=false >log_bz_monopole_full.txt 2>&1 || exit_code=$?
+$BASE/run.sh -i ./bz_monopole.par debug/verbose=1 >log_bz_monopole_full.txt 2>&1 || exit_code=$?
 
 # At *least* check divB
 pyharm-check-basics bz_monopole.out0.final.phdf || exit_code=$?
 
 # Take 1 step to look for early signs of non-fatal instabilities
-$BASE/run.sh -i $BASE/pars/tests/bz_monopole.par parthenon/time/nlim=1 parthenon/output0/dt=0.0 parthenon/output0/single_precision_output=false >log_bz_monopole_step.txt 2>&1 #|| exit_code=$?
+$BASE/run.sh -i ./bz_monopole.par parthenon/time/nlim=1 parthenon/output0/dt=0.0 >log_bz_monopole_step.txt 2>&1 || exit_code=$?
 
 # This just makes plots, it doesn't check anything
 python ./check.py
+
+exit $exit_code
diff --git a/tests/emhdmodes/run.sh b/tests/emhdmodes/run.sh
index 0b37a067..c96ae5f3 100755
--- a/tests/emhdmodes/run.sh
+++ b/tests/emhdmodes/run.sh
@@ -23,10 +23,10 @@ conv_2d() {
     check_code=0
     python check.py $ALL_RES "$3" $1 2d || check_code=$?
     if [[ $check_code != 0 ]]; then
-        echo EMHD modes test $3 FAIL: $check_code
+        echo $3 FAIL: $check_code
         exit_code=1
     else
-        echo EMHD modes test $3 success
+        echo $3 success
     fi
 }
 
@@ -34,10 +34,12 @@ conv_2d() {
 # Just one default mode
 ALL_RES="32,64,128"
 conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "EMHD mode in 2D, WENO5"
-ALL_RES="16,32,64,128,256"
-conv_2d emhd2d_mc GRMHD/reconstruction=linear_mc "EMHD mode in 2D, linear/MC reconstruction"
-
 # Test that higher-order terms don't mess anything up
 conv_2d emhd2d_higher_order emhd/higher_order_terms=true "EMHD mode in 2D, higher order terms enabled"
+# Test we can use imex/EMHD and face CT
+conv_2d emhd2d_face_ct b_field/solver=face_ct "EMHD mode in 2D w/Face CT"
+
+ALL_RES="16,32,64,128,256"
+conv_2d emhd2d_mc GRMHD/reconstruction=linear_mc "EMHD mode in 2D, linear/MC reconstruction"
 
 exit $exit_code

From aea831f8734bff95ef2e1085df3bc5765b0db167 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Sat, 7 Oct 2023 13:01:55 -0500
Subject: [PATCH 28/47] Tone down CI core usage a touch, use abs paths

---
 scripts/ci/cpu.yml   | 10 +++++-----
 scripts/ci/nvhpc.yml |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 37735eca..635a99d6 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -3,7 +3,7 @@
 image: quay.io/centos/centos:stream9
 
 variables:
-  OMP_NUM_THREADS: 8
+  OMP_NUM_THREADS: 6
   OMP_PROC_BIND: "false"
   MPI_EXE: mpirun
   MPI_NUM_PROCS: 2
@@ -23,8 +23,8 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
-    - eval "$(./bin/micromamba shell hook -s posix)"
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj /mamba/micromamba
+    - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
     - micromamba activate pyharm
@@ -46,8 +46,8 @@ stages:
 build:
   stage: build
   variables:
-    NPROC: 8
-    HOST_ARCH: NATIVE
+    NPROC: 12
+    HOST_ARCH: HSW
     C_NATIVE: gcc
     CXX_NATIVE: g++
   before_script:
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 0683c999..5d30a628 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -24,8 +24,8 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
-    - eval "$(./bin/micromamba shell hook -s posix)"
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj /mamba/micromamba
+    - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
     - micromamba activate pyharm

From a3f43eb19cf4a76b66e18a38bbc80650135cf64d Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 17:05:45 -0500
Subject: [PATCH 29/47] Fix the boundaries of EMHD problems

All MHD variables in bondi_viscous now converge as expected, and
boundaries are applied to dP as expected. Source term seems to be
much, much too large for some reason.

Also Vbump Kokkos to fix a CUDA segfault (again?)
---
 external/parthenon               |  2 +-
 kharma/b_ct/b_ct.cpp             |  7 ++++--
 kharma/b_flux_ct/b_flux_ct.cpp   |  8 +++++++
 kharma/boundaries/boundaries.cpp | 37 ++++++++++++++++++++++++++++---
 kharma/driver/imex_step.cpp      |  2 +-
 kharma/emhd/emhd.cpp             |  2 ++
 kharma/flux/flux.cpp             | 13 ++++++++---
 tests/bondi_viscous/check.py     | 38 +++++++++++++++++++++++---------
 8 files changed, 88 insertions(+), 21 deletions(-)

diff --git a/external/parthenon b/external/parthenon
index 1a0597f9..72a97564 160000
--- a/external/parthenon
+++ b/external/parthenon
@@ -1 +1 @@
-Subproject commit 1a0597f99b3aceafc98ba1eb23e94663bfaeb57e
+Subproject commit 72a975647e5548fee643952a52f12a249fc2b325
diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index 85f08854..2243487b 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -73,7 +73,7 @@ std::shared_ptr<KHARMAPackage> B_CT::Initialize(ParameterInput *pin, std::shared
     params.Add("ct_scheme", ct_scheme);
     // Use the default Parthenon prolongation operator, rather than the divergence-preserving one
     // This relies entirely on the EMF communication for preserving the divergence
-    bool lazy_prolongation = pin->GetOrAddBoolean("b_field", "lazy_prolongation", true);
+    bool lazy_prolongation = pin->GetOrAddBoolean("b_field", "lazy_prolongation", false);
     // Need to preserve divergence if you refine/derefine during sim i.e. AMR
     if (lazy_prolongation && pin->GetString("parthenon/mesh", "refinement") == "adaptive")
         throw std::runtime_error("Cannot use non-preserving prolongation in AMR!");
@@ -83,6 +83,7 @@ std::shared_ptr<KHARMAPackage> B_CT::Initialize(ParameterInput *pin, std::shared
     // Flags for B fields on faces.
     // We don't mark these as "Primitive" and "Conserved" else they'd be bundled
     // with all the cell vars in a bunch of places we don't want
+    // Also note we *always* sync B field conserved var
     std::vector<MetadataFlag> flags_prim_f = {Metadata::Real, Metadata::Face, Metadata::Derived,
                                             Metadata::GetUserFlag("Explicit")};
     std::vector<MetadataFlag> flags_cons_f = {Metadata::Real, Metadata::Face, Metadata::Independent,
@@ -172,6 +173,8 @@ void B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     auto B_U = rc->PackVariables(std::vector<std::string>{"cons.B"});
     auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
     const auto& G = pmb->coords;
+    // Return if we're not syncing U & P at all (e.g. edges)
+    if (B_Uf.GetDim(4) == 0) return;
 
     // TODO get rid of prims on faces probably
 
@@ -213,7 +216,7 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
 
     // Figure out indices
     const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, 0, 0);
-    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::interior, -1, 1);
+    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::interior, -1, 2);
     const IndexRange block = IndexRange{0, emf_pack.GetDim(5)-1};
     const int kd = ndim > 2 ? 1 : 0;
     const int jd = ndim > 1 ? 1 : 0;
diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index 3be20c15..c9ce22ba 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -109,6 +109,14 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
     flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
 
+    // Always sync B field conserved var, for standardization with B_CT
+    if (!flags_cons.count(Metadata::FillGhost)) {
+        flags_cons.push_back(Metadata::FillGhost);
+    }
+    if (flags_prims.count(Metadata::FillGhost)) {
+        flags_prims.erase(std::remove(flags_prims.begin(), flags_prims.end(), Metadata::FillGhost), flags_prims.end()); 
+    }
+
     auto m = Metadata(flags_prim, s_vector);
     pkg->AddField("prims.B", m);
     m = Metadata(flags_cons, s_vector);
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index cb529f32..af1bf345 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -128,6 +128,11 @@ std::shared_ptr<KHARMAPackage> KBoundaries::Initialize(ParameterInput *pin, std:
         bool zero_flux = pin->GetOrAddBoolean("boundaries", "zero_flux_" + bname, zero_polar_flux && bdir == X2DIR);
         params.Add("zero_flux_" + bname, zero_flux);
 
+        // Allow specifically dP to outflow in otherwise Dirichlet conditions
+        // Only used for viscous_bondi problem
+        bool outflow_EMHD = pin->GetOrAddBoolean("boundaries", "outflow_EMHD_" + bname, false);
+        params.Add("outflow_EMHD_" + bname, outflow_EMHD);
+
         // BOUNDARY TYPES
         // Get the boundary type we specified in kharma
         auto btype = pin->GetString("boundaries", bname);
@@ -248,6 +253,13 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
     const auto btype_name = params.Get<std::string>(bname);
     const auto bdir = BoundaryDirection(bface);
 
+    // If we're pretending to sync primitives, but applying physical bounds
+    // to conserved variables, make sure we're up to date
+    if (pmb->packages.Get<KHARMAPackage>("Driver")->Param<bool>("prims_are_fundamental") &&
+        params.Get<bool>("domain_bounds_on_conserved")) {
+        Flux::BlockPtoU_Send(rc.get(), domain, coarse);
+    }
+
     Flag("Apply "+bname+" boundary: "+btype_name);
     pkg->KBoundaries[bface](rc, coarse);
     EndFlag();
@@ -271,6 +283,26 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
         EndFlag();
     }
 
+    // Allow specifically dP to outflow in otherwise Dirichlet conditions
+    // Only used for viscous_bondi problem
+    // TODO make this more general?
+    if (params.Get<bool>("outflow_EMHD_" + bname)) {
+        Flag("OutflowEMHD_"+bname);
+        auto EMHDg = rc->PackVariables({Metadata::GetUserFlag("EMHDVar"), Metadata::FillGhost});
+        const auto &bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+        const auto &range = (bdir == 1) ? bounds.GetBoundsI(IndexDomain::interior)
+                                : (bdir == 2 ? bounds.GetBoundsJ(IndexDomain::interior)
+                                    : bounds.GetBoundsK(IndexDomain::interior));
+        const int ref = BoundaryIsInner(domain) ? range.s : range.e;
+        pmb->par_for_bndry(
+            "outflow_EMHD", IndexRange{0,EMHDg.GetDim(4)-1}, domain, CC, coarse,
+            KOKKOS_LAMBDA (const int &v, const int &k, const int &j, const int &i) {
+                EMHDg(v, k, j, i) = EMHDg(v, (bdir == 3) ? ref : k, (bdir == 2) ? ref : j, (bdir == 1) ? ref : i);
+            }
+        );
+        EndFlag();
+    }
+
     /*
     * KHARMA is very particular about corner boundaries.
     * In particular, we apply the outflow boundary over ALL X2 & X3.
@@ -320,7 +352,8 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
             Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
         }
     } else {
-        Packages::BlockUtoP(rc.get(), domain, coarse);
+        // These get applied the same way regardless of driver
+        Packages::BoundaryUtoP(rc.get(), domain, coarse);
     }
 
     EndFlag();
@@ -350,8 +383,6 @@ void KBoundaries::CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, In
 {
     Flag("CorrectBPrimitive");
     std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
-    const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
-
     auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
     // Return if no field to correct
     if (B_P.GetDim(4) == 0) return;
diff --git a/kharma/driver/imex_step.cpp b/kharma/driver/imex_step.cpp
index 967461d2..5f947823 100644
--- a/kharma/driver/imex_step.cpp
+++ b/kharma/driver/imex_step.cpp
@@ -84,7 +84,7 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta
         if (use_jcon) {
             pmesh->mesh_data.Add("preserve");
             // Above only copies on allocate -- ensure we copy every step
-            Copy<MeshData<Real>>({}, base.get(), pmesh->mesh_data.Get("preserve").get());
+            Copy<MeshData<Real>>({Metadata::Cell}, base.get(), pmesh->mesh_data.Get("preserve").get());
         }
         if (use_implicit) {
             // When solving, we need a temporary copy with any explicit updates,
diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp
index b16fd7d2..a2db56e5 100644
--- a/kharma/emhd/emhd.cpp
+++ b/kharma/emhd/emhd.cpp
@@ -167,6 +167,8 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
 
     // UtoP function specifically for boundary sync (KHARMA must sync cons for AMR) and output
     pkg->BoundaryUtoP = EMHD::BlockUtoP;
+    // If we wanted to apply the domian boundaries to primitive EMHD variables
+    //pkg->DomainBoundaryPtoU = EMHD::BlockPtoU;
 
     // Add all explicit source terms -- implicit terms are called from Implicit::Step
     pkg->AddSource = EMHD::AddSource;
diff --git a/kharma/flux/flux.cpp b/kharma/flux/flux.cpp
index da47ecb5..79c482d7 100644
--- a/kharma/flux/flux.cpp
+++ b/kharma/flux/flux.cpp
@@ -168,12 +168,19 @@ TaskStatus Flux::BlockPtoU_Send(MeshBlockData<Real> *rc, IndexDomain domain, boo
 
     const EMHD::EMHD_parameters& emhd_params = EMHD::GetEMHDParameters(pmb->packages);
 
-    // Pack variables
+    // Pack variables. We never want to run this on the B field
+    using FC = Metadata::FlagCollection;
+    auto cons_flags = FC(Metadata::Conserved, Metadata::Cell, Metadata::GetUserFlag("HD"));
+    if (pmb->packages.AllPackages().count("EMHD"))
+        cons_flags = cons_flags + FC(Metadata::Conserved, Metadata::Cell, Metadata::GetUserFlag("EMHDVar"));
     PackIndexMap prims_map, cons_map;
-    const auto& P = rc->PackVariables({Metadata::GetUserFlag("Primitive")}, prims_map);
-    const auto& U = rc->PackVariables({Metadata::Conserved}, cons_map);
+    const auto& P = rc->PackVariables({Metadata::GetUserFlag("Primitive"), Metadata::Cell}, prims_map);
+    const auto& U = rc->PackVariables(cons_flags, cons_map);
     const VarMap m_u(cons_map, true), m_p(prims_map, false);
 
+    // Return if we're not syncing U & P at all (e.g. edges)
+    if (P.GetDim(4) == 0) return TaskStatus::complete;
+
     auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
     IndexRange ib = bounds.GetBoundsI(domain);
     IndexRange jb = bounds.GetBoundsJ(domain);
diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index c5a0095e..baba6974 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -23,8 +23,8 @@
     outputdir = './'
     kharmadir = '../../'
 
-    NVAR  = 3
-    VARS  = ['rho', 'u', 'dP']
+    NVAR  = 4
+    VARS  = ['rho', 'u', 'dP', 'B']
     RES   = [int(r) for r in sys.argv[1].split(",")]
     LONG  = sys.argv[2]
     SHORT = sys.argv[3]
@@ -44,12 +44,15 @@
         state.params['eta'] = eta
         state.params['tau'] = tau
         dP_check = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau)
+        state.cache['dP'] = dP_check
 
         # load code data
         dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res))
 
-        rho, uu, dP_tilde = dump['RHO'], dump['UU'], dump['dP']
+        # TODO iterate on names here
+        #rho, uu, dP_tilde = dump['RHO'], dump['UU'], dump['dP']
         #rho, uu = dump['RHO'], dump['UU']
+        rho, uu, dP_tilde, B1 = dump['RHO'], dump['UU'], dump['dP'], dump['B1']
 
         # compute dP
         if dump['emhd/higher_order_terms'] == "true":
@@ -61,17 +64,30 @@
             dP = dP_tilde
 
         # Plot
-        fig = plt.figure(figsize=(6,6))
-        ax = fig.add_subplot(1,1,1)
-        pplt.plot_diff_xz(ax, dump, state, 'rho')
+        for var in ['rho', 'u', 'B1', 'dP']:
+            fig = plt.figure(figsize=(6,6))
+            ax = fig.add_subplot(1,1,1)
+            pplt.plot_diff_xz(ax, dump, state, var)
+            plt.legend()
+            fig.savefig("compare_{}_{}.png".format(var, res))
+            plt.close(fig)
+
+        r_start_ind = 1
+        radius = np.mean(dump.grid['r'][r_start_ind:], axis=(1,2))
+        plt.plot(radius, dP_check[r_start_ind:], label='dP ODE check')
+        plt.plot(radius, np.mean(dump['dP'][r_start_ind:], axis=(1,2)), label='dP0 ODE check')
+        plt.plot(radius, np.mean(state['ucon'][1][r_start_ind:], axis=(1,2)), label='ur')
+        #plt.plot(radius, np.mean(coeff[r_start_ind:], axis=(1,2)), label='coeff')
         plt.legend()
-        fig.savefig("compare_rho_{}.png".format(res))
-        plt.close(fig)
+        plt.savefig('dP_soln_new.png')
+        plt.close()
+
 
         # compute L1 norm
-        L1[r,0] = np.mean(np.fabs(rho[:,0,0] - state['rho'][:,0,0]))
-        L1[r,1] = np.mean(np.fabs(uu[:,0,0]  - state['u'][:,0,0]))
-        L1[r,2] = np.mean(np.fabs(dP[:,0,0]  - dP_check)[1:-1])
+        L1[r,0] = np.mean(np.fabs(rho - state['rho'])[1:-1])
+        L1[r,1] = np.mean(np.fabs(uu  - state['u']))
+        L1[r,2] = np.mean(np.fabs(dP  - dP_check)[1:-1])
+        L1[r,3] = np.mean(np.fabs(B1  - state['B1']))
 
     # MEASURE CONVERGENCE
     L1 = np.array(L1)

From 2a99504938f14b024fa216a3df906897e9906062 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 17:31:27 -0500
Subject: [PATCH 30/47] Fix compile w/new Flux_CT B flags

---
 kharma/b_flux_ct/b_flux_ct.cpp | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index c9ce22ba..c2169c82 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -110,11 +110,12 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
 
     // Always sync B field conserved var, for standardization with B_CT
-    if (!flags_cons.count(Metadata::FillGhost)) {
+    // god std::vector is verbose
+    if (std::find(flags_cons.begin(), flags_cons.end(), Metadata::FillGhost) == flags_cons.end()) {
         flags_cons.push_back(Metadata::FillGhost);
     }
-    if (flags_prims.count(Metadata::FillGhost)) {
-        flags_prims.erase(std::remove(flags_prims.begin(), flags_prims.end(), Metadata::FillGhost), flags_prims.end()); 
+    if (std::find(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost) != flags_prim.end()) {
+        flags_prim.erase(std::remove(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost), flags_prim.end());
     }
 
     auto m = Metadata(flags_prim, s_vector);

From 0e363b242ca417a52a9029e7b297156a9c493062 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 17:45:39 -0500
Subject: [PATCH 31/47] CI: tar fixes

---
 scripts/ci/cpu.yml   | 2 +-
 scripts/ci/nvhpc.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 635a99d6..341268bc 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -23,7 +23,7 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj /mamba/micromamba
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xOvj bin/micromamba > /mamba/micromamba
     - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 5d30a628..800e3cd2 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -24,7 +24,7 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj /mamba/micromamba
+    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xOvj bin/micromamba > /mamba/micromamba
     - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml

From 5619ebb42c5574d2032a189827fcd9949a022b45 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 17:59:45 -0500
Subject: [PATCH 32/47] CI: more mamba installation

---
 scripts/ci/cpu.yml   | 4 +++-
 scripts/ci/nvhpc.yml | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 341268bc..2330eff9 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -11,6 +11,7 @@ variables:
   OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
   GIT_SUBMODULE_STRATEGY: recursive
   MAMBA_ROOT_PREFIX: /mamba
+  MAMBA_URL: https://micro.mamba.pm/api/micromamba/linux-64/latest
 
 ### DEFAULT TEST BEHAVIOR ###
 default:
@@ -23,7 +24,8 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xOvj bin/micromamba > /mamba/micromamba
+    - mkdir $MAMBA_ROOT_PREFIX
+    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba > $MAMBA_ROOT_PREFIX/micromamba
     - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 800e3cd2..27eda9f1 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -14,6 +14,8 @@ variables:
   OMPI_ALLOW_RUN_AS_ROOT: 1
   OMPI_ALLOW_RUN_AS_ROOT_CONFIRM: 1
   GIT_SUBMODULE_STRATEGY: recursive
+  MAMBA_ROOT_PREFIX: /mamba
+  MAMBA_URL: https://micro.mamba.pm/api/micromamba/linux-64/latest
 
 ### DEFAULT TEST BEHAVIOR ###
 default:
@@ -24,7 +26,8 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xOvj bin/micromamba > /mamba/micromamba
+    - mkdir $MAMBA_ROOT_PREFIX
+    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba > $MAMBA_ROOT_PREFIX/micromamba
     - eval "$(/mamba/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml

From b92750d716ff0b041fec22adbbda02922d41b4c3 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 18:17:42 -0500
Subject: [PATCH 33/47] CI: sometimes installation scripts are that way for a
 reason

---
 scripts/ci/cpu.yml   | 5 ++---
 scripts/ci/nvhpc.yml | 5 ++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 2330eff9..4651440a 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -24,9 +24,8 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - mkdir $MAMBA_ROOT_PREFIX
-    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba > $MAMBA_ROOT_PREFIX/micromamba
-    - eval "$(/mamba/micromamba shell hook -s posix)"
+    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba
+    - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
     - micromamba activate pyharm
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 27eda9f1..8ea630de 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -26,9 +26,8 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - mkdir $MAMBA_ROOT_PREFIX
-    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba > $MAMBA_ROOT_PREFIX/micromamba
-    - eval "$(/mamba/micromamba shell hook -s posix)"
+    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba
+    - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
     - micromamba activate pyharm

From b62fd1111859143acb452c4c95d05ae800054226 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 18:24:57 -0500
Subject: [PATCH 34/47] Fix Noh test by parsing e- gamma correctly in checker

---
 pars/electrons/noh.par | 4 ++--
 tests/noh/check.py     | 8 +++++++-
 tests/noh/run.sh       | 2 --
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/pars/electrons/noh.par b/pars/electrons/noh.par
index 2b9d2a3e..17151db5 100644
--- a/pars/electrons/noh.par
+++ b/pars/electrons/noh.par
@@ -65,8 +65,8 @@ zero_ug = false
 set_tlim = true
 centered = false
 
-#<floors>
-#disable_floors = true
+<floors>
+disable_floors = true
 
 <driver>
 type = imex
diff --git a/tests/noh/check.py b/tests/noh/check.py
index 0566f9c5..08136cd3 100644
--- a/tests/noh/check.py
+++ b/tests/noh/check.py
@@ -39,7 +39,8 @@
 
         u_e = (kel * rho**gam_e)/(gam_e - 1.)
         ratio_analytical = np.where(rho > 1.5, \
-                                    fel/2. * (((gam + 1.)/(gam - 1.))**gam_e * (1. - gam/gam_e) + 1. + gam/gam_e) * ((gam**2 - 1.)/(gam_e**2 - 1.)), \
+                                    fel/2. * (((gam + 1.)/(gam - 1.))**gam_e * (1. - gam/gam_e) + 1. + gam/gam_e) \
+                                        * ((gam**2 - 1.)/(gam_e**2 - 1.)), \
                                     0.)
 
         plt.figure(figsize=(6,6))
@@ -48,6 +49,11 @@
         plt.legend()
         plt.savefig("noh_results_{}.png".format(resolution))
 
+        plt.figure(figsize=(6,6))
+        plt.plot(x1, rho, label="Computed")
+        plt.legend()
+        plt.savefig("noh_rho_{}.png".format(resolution))
+
         l1_norm.append(np.mean(abs(u_e/uu - ratio_analytical)))
     
     l1_norm = np.array(l1_norm)
diff --git a/tests/noh/run.sh b/tests/noh/run.sh
index f5a34a68..65887234 100755
--- a/tests/noh/run.sh
+++ b/tests/noh/run.sh
@@ -20,7 +20,6 @@ noh_test() {
 
         cp noh.out0.final.phdf noh.out0.final.res$res.phdf
     done
-    pyharm-convert *.phdf
     check_code=0
     python check.py . . $ALL_RES 1.666667 || check_code=$?
     if [[ $check_code != 0 ]]; then
@@ -29,7 +28,6 @@ noh_test() {
     else
         echo Noh shock test success
     fi
-    rm *.phdf
 }
 
 noh_test

From e4747ebc6dfe481f009a40cd1a0dd7d96ab4c09a Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 18:26:19 -0500
Subject: [PATCH 35/47] CI: sigh

---
 scripts/ci/cpu.yml   | 2 +-
 scripts/ci/nvhpc.yml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 4651440a..df2abecf 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -24,7 +24,7 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba
+    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
     - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 8ea630de..589b9ed7 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -26,7 +26,7 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - curl -Ls $MAMBA_URL | tar -xOvj bin/micromamba
+    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
     - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml

From d7d61e79c420b1e9bff7d10d93ca4fed6b435dc7 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 19:54:33 -0500
Subject: [PATCH 36/47] Get rid of sync_prims

Supporting exchanging primitive vars only for ImEx driver in non-AMR
had become a source of bugs, incl. last commit.  Fix by simplifying.

ImEx driver needs to be able to sync conserved variables anyway for AMR,
so better to keep the same codepath even at the cost of the occasional
UtoP/PtoU call.
---
 kharma/b_flux_ct/b_flux_ct.cpp   |  9 ------
 kharma/boundaries/boundaries.cpp | 49 ++++----------------------------
 kharma/boundaries/boundaries.hpp |  5 ----
 kharma/driver/kharma_driver.cpp  | 30 +++++--------------
 kharma/grmhd/grmhd.cpp           |  6 ++--
 kharma/inverter/inverter.cpp     |  1 +
 6 files changed, 15 insertions(+), 85 deletions(-)

diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index c2169c82..3be20c15 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -109,15 +109,6 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
     flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
 
-    // Always sync B field conserved var, for standardization with B_CT
-    // god std::vector is verbose
-    if (std::find(flags_cons.begin(), flags_cons.end(), Metadata::FillGhost) == flags_cons.end()) {
-        flags_cons.push_back(Metadata::FillGhost);
-    }
-    if (std::find(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost) != flags_prim.end()) {
-        flags_prim.erase(std::remove(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost), flags_prim.end());
-    }
-
     auto m = Metadata(flags_prim, s_vector);
     pkg->AddField("prims.B", m);
     m = Metadata(flags_cons, s_vector);
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index af1bf345..d7f52799 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -335,24 +335,13 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
         }
     }
 
-    // If we applied the domain boundary to primitives (as we usually do)...
+    // CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
+    // So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
     if (!params.Get<bool>("domain_bounds_on_conserved")) {
-        bool sync_prims = rc->GetBlockPointer()->packages.Get("Driver")->Param<bool>("sync_prims");
-        // There are two modes of operation here:
-        if (sync_prims) {
-            // 1. ImEx w/o AMR:
-            //    PRIMITIVE variables (only) are marked FillGhost
-            //    So, run PtoU on EVERYTHING (and correct the B field)
-            CorrectBPrimitive(rc, domain, coarse);
-            Flux::BlockPtoU(rc.get(), domain, coarse);
-        } else {
-            // 2. Normal (KHARMA driver, ImEx w/AMR):
-            //    CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
-            //    So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
-            Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
-        }
+        // Only the GRMHD package defines a BoundaryPtoU
+        Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
     } else {
-        // These get applied the same way regardless of driver
+        // Or, apply the boundary to the conserved GRMHD variables, too!
         Packages::BoundaryUtoP(rc.get(), domain, coarse);
     }
 
@@ -379,34 +368,6 @@ void KBoundaries::CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDom
     );
 }
 
-void KBoundaries::CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse)
-{
-    Flag("CorrectBPrimitive");
-    std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
-    auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
-    // Return if no field to correct
-    if (B_P.GetDim(4) == 0) return;
-
-    const auto& G = pmb->coords;
-
-    const auto &bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
-    const int dir = BoundaryDirection(domain);
-    const auto &range = (dir == 1) ? bounds.GetBoundsI(IndexDomain::interior)
-                            : (dir == 2 ? bounds.GetBoundsJ(IndexDomain::interior)
-                                : bounds.GetBoundsK(IndexDomain::interior));
-    const int ref = BoundaryIsInner(domain) ? range.s : range.e;
-
-    pmb->par_for_bndry(
-        "Correct_B_P", IndexRange{0,NVEC-1}, domain, CC, coarse,
-        KOKKOS_LAMBDA (const int &v, const int &k, const int &j, const int &i) {
-            B_P(v, k, j, i) *= G.gdet(Loci::center, (dir == 2) ? ref : j, (dir == 1) ? ref : i)
-                                / G.gdet(Loci::center, j, i);
-        }
-    );
-
-    EndFlag();
-}
-
 TaskStatus KBoundaries::FixFlux(MeshData<Real> *md)
 {
     auto pmesh = md->GetMeshPointer();
diff --git a/kharma/boundaries/boundaries.hpp b/kharma/boundaries/boundaries.hpp
index dde70a60..fb910995 100644
--- a/kharma/boundaries/boundaries.hpp
+++ b/kharma/boundaries/boundaries.hpp
@@ -84,11 +84,6 @@ TaskStatus FixFlux(MeshData<Real> *rc);
  */
 void CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDomain domain, bool coarse);
 
-/**
- * Correct for geometry when applying primitive B field boundaries
- */
-void CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse);
-
 /**
  * Check for velocity toward the simulation domain in a zone, and eliminate it.
  */
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 6e9a4fdc..53d9382e 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -55,7 +55,7 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     // Driver options
     // The two current drivers are "kharma" or "imex", with the former being the usual KHARMA
     // driver (formerly HARM driver), and the latter supporting implicit stepping of some or all variables
-    // Mostly, packages should react to e.g. the "sync_prims" option rather than the driver name
+    // Mostly, packages should react to options rather than the driver name
     bool do_emhd = pin->GetOrAddBoolean("emhd", "on", false);
     std::string driver_type_s = pin->GetOrAddString("driver", "type", (do_emhd) ? "imex" : "kharma");
     DriverType driver_type;
@@ -137,25 +137,10 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     bool prims_are_fundamental = driver_type != DriverType::kharma;
     params.Add("prims_are_fundamental", prims_are_fundamental);
 
-    // Which variables we *actually send* via Parthenon/MPI may differ, however.
-    // Prolongation/restriction should happen on conserved vars, so we must sync
-    // those in multilevel meshes.  If prims are funcamental but not sync'd,
-    // we "emulate" syncing them with PtoU/UtoP on boundaries
-    bool sync_prims = prims_are_fundamental &&
-                        (!pin->DoesParameterExist("parthenon/mesh", "numlevel") ||
-                         pin->GetInteger("parthenon/mesh", "numlevel") == 1);
-    params.Add("sync_prims", sync_prims);
-    // Finally, we set default flags for primitive and conserved variables
-    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
-    if (sync_prims) {
-        // If we're not in AMR, we can sync primitive variables directly
-        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
-        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::WithFluxes, Metadata::Conserved});
-    } else {
-        // If we're in AMR or using the KHARMA driver anyway, sync conserved vars
-        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive")});
-        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
-    }
+    // Now that we're an AMR code, though, we always *sync* conserved variables
+    // This means "emulating" syncing primitives in some cases, by running PtoU -> sync -> UtoP
+    params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive")});
+    params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
 
     return pkg;
 }
@@ -188,8 +173,7 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
     // Note this has the side effect of filling U in some zones,
     // which must be replaced during e.g. startup code when primitive values should be truth
     bool prims_are_fundamental = params.Get<bool>("prims_are_fundamental");
-    bool sync_prims = params.Get<bool>("sync_prims");
-    if (prims_are_fundamental && !sync_prims) {
+    if (prims_are_fundamental) {
         TaskID t_all_ptou[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_ptou_final(0);
         int i_task = 0;
@@ -215,7 +199,7 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
     EndFlag();
 
     // If we're "syncing primitive variables" but just exchanged conserved variables (B, implicit, etc), we need to recover the prims
-    if (prims_are_fundamental && !sync_prims) {
+    if (prims_are_fundamental) {
         TaskID t_all_utop[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_utop_final(0);
         int i_task = 0;
diff --git a/kharma/grmhd/grmhd.cpp b/kharma/grmhd/grmhd.cpp
index e1d7746a..fedb683a 100644
--- a/kharma/grmhd/grmhd.cpp
+++ b/kharma/grmhd/grmhd.cpp
@@ -145,10 +145,8 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     flags_prim.push_back(Metadata::Restart);
 
     // We must additionally fill ghost zones of primitive variables in GRMHD, to seed the solver
-    // Only necessary to add here if syncing conserved vars
-    // Note some startup behavior relies on having the GRHD prims marked for syncing,
-    // so disable sync_utop_seed at your peril
-    if (!driver.Get<bool>("sync_prims") && pin->GetOrAddBoolean("GRMHD", "sync_utop_seed", true)) {
+    // Disabling this is not well tested regardless of how fancy the solver is, YMMV
+    if (pin->GetOrAddBoolean("GRMHD", "sync_utop_seed", true)) {
         flags_prim.push_back(Metadata::FillGhost);
     }
 
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index e538350c..b3281371 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -80,6 +80,7 @@ std::shared_ptr<KHARMAPackage> Inverter::Initialize(ParameterInput *pin, std::sh
     // This package is still loaded because fixes
     if (!implicit_grmhd) {
         pkg->BlockUtoP = Inverter::BlockUtoP;
+        pkg->BoundaryUtoP = Inverter::BlockUtoP;
     }
 
     pkg->PostStepDiagnosticsMesh = Inverter::PostStepDiagnostics;

From f1e1cb9d59f259b2c42670f2c8656da9dd8dacea Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 21:20:38 -0500
Subject: [PATCH 37/47] Repair some tests for always syncing cons

These bugs would have appeared more inscrutably when we ran w/AMR anyway
---
 kharma/electrons/electrons.cpp | 1 +
 kharma/inverter/inverter.cpp   | 7 +++++--
 tests/restart/run.sh           | 2 +-
 3 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/kharma/electrons/electrons.cpp b/kharma/electrons/electrons.cpp
index ff4b2dff..b6ac1c17 100644
--- a/kharma/electrons/electrons.cpp
+++ b/kharma/electrons/electrons.cpp
@@ -186,6 +186,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     }
 
     pkg->BlockUtoP = Electrons::BlockUtoP;
+    pkg->BoundaryUtoP = Electrons::BlockUtoP;
 
     return pkg;
 }
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index b3281371..a50ba646 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -103,7 +103,10 @@ inline void BlockPerformInversion(MeshBlockData<Real> *rc, IndexDomain domain, b
     auto P = GRMHD::PackHDPrims(rc, prims_map);
     const VarMap m_u(cons_map, true), m_p(prims_map, false);
 
-    GridScalar pflag = rc->Get("pflag").data;
+    auto pflag = rc->PackVariables(std::vector<std::string>{"pflag"});
+
+    if (U.GetDim(4) == 0 || pflag.GetDim(4) == 0)
+        return;
 
     const Real gam = pmb->packages.Get("GRMHD")->Param<Real>("gamma");
 
@@ -121,7 +124,7 @@ inline void BlockPerformInversion(MeshBlockData<Real> *rc, IndexDomain domain, b
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
             if (KDomain::inside(k, j, i, b)) {
                 // Run over all interior zones and any initialized ghosts
-                pflag(k, j, i) = static_cast<double>(Inverter::u_to_p<inverter>(G, U, m_u, gam, k, j, i, P, m_p, Loci::center));
+                pflag(0, k, j, i) = static_cast<double>(Inverter::u_to_p<inverter>(G, U, m_u, gam, k, j, i, P, m_p, Loci::center));
             }
         }
     );
diff --git a/tests/restart/run.sh b/tests/restart/run.sh
index 66605a69..966c516f 100755
--- a/tests/restart/run.sh
+++ b/tests/restart/run.sh
@@ -18,7 +18,7 @@ $KHARMADIR/run.sh -r torus.out1.00000.rhdf parthenon/time/nlim=5 >log_restart_2.
 mv torus.out0.final.phdf torus.out0.final.restart.phdf
 
 # Compare to basic round-off
-pyharm diff --rel_tol 1e-15 torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
+pyharm diff --rel_tol 1e-11 torus.out0.final.init.phdf torus.out0.final.restart.phdf -o compare_restart
 # Compare binary. Sometimes works but not worth keeping always
 #h5diff --exclude-path=/Info \
 #       --exclude-path=/Input \

From 868b033c7558119df3a71c2ee7a4341064bf5672 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Mon, 9 Oct 2023 22:33:19 -0500
Subject: [PATCH 38/47] Test fixes, but not the important ones

---
 kharma/b_ct/b_ct.hpp             |  4 ++++
 kharma/boundaries/boundaries.cpp |  7 ++++++-
 kharma/driver/imex_step.cpp      | 15 +++++++--------
 kharma/driver/kharma_step.cpp    | 12 ++++++------
 pars/emhd/bondi_viscous.par      | 13 ++++++++-----
 pars/smr/orszag_tang_refined.par |  1 -
 scripts/ci/cpu.yml               |  3 ++-
 scripts/ci/nvhpc.yml             |  4 ++--
 tests/emhdmodes/run.sh           |  2 +-
 9 files changed, 36 insertions(+), 25 deletions(-)

diff --git a/kharma/b_ct/b_ct.hpp b/kharma/b_ct/b_ct.hpp
index 319e455f..ab2b291d 100644
--- a/kharma/b_ct/b_ct.hpp
+++ b/kharma/b_ct/b_ct.hpp
@@ -306,6 +306,10 @@ struct ProlongateInternalOlivares {
             const int off_i = (DIM > 0) ? (elem%2)*(me == V2) + (elem/2)*(me == V3) + (me == V1) : 0;
             const int off_j = (DIM > 1) ? (elem%2)*(me == V3) + (elem/2)*(me == V1) + (me == V2) : 0;
             const int off_k = (DIM > 2) ? (elem%2)*(me == V1) + (elem/2)*(me == V2) + (me == V3) : 0;
+            if (((el == TE::F1) && (fi + off_i > ib.e)) ||
+                ((el == TE::F2) && (fj + off_j > jb.e)) ||
+                ((el == TE::F3) && (fk + off_k > kb.e)))
+                return;
 
             fine(me, l, m, n, fk+off_k, fj+off_j, fi+off_i) = (
                 // Average faces on either side of us in selected direction (diff), on each of the 4 sub-faces (off)
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index d7f52799..23a415c1 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -86,7 +86,12 @@ std::shared_ptr<KHARMAPackage> KBoundaries::Initialize(ParameterInput *pin, std:
     Metadata m_x1, m_x2, m_x3;
     {
         // We can't use GetVariablesByFlag yet, so ask the packages
-        int nvar = KHARMA::PackDimension(packages.get(), Metadata::FillGhost);
+        // These flags get anything that needs a physical boundary during the run
+        using FC = Metadata::FlagCollection;
+        FC ghost_vars = FC({Metadata::FillGhost, Metadata::Conserved})
+                    + FC({Metadata::FillGhost, Metadata::GetUserFlag("Primitive")})
+                    - FC({Metadata::GetUserFlag("StartupOnly")});
+        int nvar = KHARMA::PackDimension(packages.get(), ghost_vars);
 
         // We also don't know the mesh size, since it's not constructed.  We infer.
         const int ng = pin->GetInteger("parthenon/mesh", "nghost");
diff --git a/kharma/driver/imex_step.cpp b/kharma/driver/imex_step.cpp
index 5f947823..7fb9db74 100644
--- a/kharma/driver/imex_step.cpp
+++ b/kharma/driver/imex_step.cpp
@@ -132,26 +132,25 @@ TaskCollection KHARMADriver::MakeImExTaskCollection(BlockList_t &blocks, int sta
         auto t_fluxes = KHARMADriver::AddFluxCalculations(t_start_recv_bound, tl, recon, md_sub_step_init.get());
 
         // If we're in AMR, correct fluxes from neighbors
-        auto t_flux_bounds = t_fluxes;
+        auto t_emf = t_fluxes;
         if (pmesh->multilevel || use_b_ct) {
-            auto t_emf = t_fluxes;
-            // TODO this MPI sync should be bundled into fluxcorr
+            tl.AddTask(t_fluxes, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
+            auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
+            auto t_flux_bounds = tl.AddTask(t_recv_flux, parthenon::SetFluxCorrections, md_sub_step_init);
+            auto t_emf = t_flux_bounds;
             if (use_b_ct) {
                 // Pull out a container of only EMF to synchronize
                 auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
-                auto t_emf_local = tl.AddTask(t_fluxes, B_CT::CalculateEMF, md_sub_step_init.get());
+                auto t_emf_local = tl.AddTask(t_flux_bounds, B_CT::CalculateEMF, md_sub_step_init.get());
                 auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_emf_only);
             }
-            tl.AddTask(t_emf, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
-            auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
-            t_flux_bounds = tl.AddTask(t_recv_flux, parthenon::SetFluxCorrections, md_sub_step_init);
         }
 
         // Any package modifications to the fluxes.  e.g.:
         // 1. CT calculations for B field transport
         // 2. Zero fluxes through poles
         // etc 
-        auto t_fix_flux = tl.AddTask(t_flux_bounds, Packages::FixFlux, md_sub_step_init.get());
+        auto t_fix_flux = tl.AddTask(t_emf, Packages::FixFlux, md_sub_step_init.get());
 
         // Apply the fluxes to calculate a change in cell-centered values "md_flux_src"
         auto t_flux_div = tl.AddTask(t_fix_flux, Update::FluxDivergence<MeshData<Real>>, md_sub_step_init.get(), md_flux_src.get());
diff --git a/kharma/driver/kharma_step.cpp b/kharma/driver/kharma_step.cpp
index 7b22c80b..e5619dbd 100644
--- a/kharma/driver/kharma_step.cpp
+++ b/kharma/driver/kharma_step.cpp
@@ -154,25 +154,25 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
         auto t_fluxes = KHARMADriver::AddFluxCalculations(t_start_recv_flux, tl, recon, md_sub_step_init.get());
 
         // If we're in AMR, correct fluxes from neighbors
-        auto t_flux_bounds = t_fluxes;
+        auto t_emf = t_fluxes;
         if (pmesh->multilevel || use_b_ct) {
-            auto t_emf = t_fluxes;
+            tl.AddTask(t_fluxes, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
+            auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
+            auto t_flux_bounds = tl.AddTask(t_recv_flux, parthenon::SetFluxCorrections, md_sub_step_init);
+            auto t_emf = t_flux_bounds;
             if (use_b_ct) {
                 // Pull out a container of only EMF to synchronize
                 auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
                 auto t_emf_local = tl.AddTask(t_fluxes, B_CT::CalculateEMF, md_sub_step_init.get());
                 auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_emf_only);
             }
-            tl.AddTask(t_emf, parthenon::LoadAndSendFluxCorrections, md_sub_step_init);
-            auto t_recv_flux = tl.AddTask(t_fluxes, parthenon::ReceiveFluxCorrections, md_sub_step_init);
-            t_flux_bounds = tl.AddTask(t_recv_flux, parthenon::SetFluxCorrections, md_sub_step_init);
         }
 
         // Any package modifications to the fluxes.  e.g.:
         // 1. Flux-CT calculations for B field transport
         // 2. Zero fluxes through poles
         // etc 
-        auto t_fix_flux = tl.AddTask(t_flux_bounds, Packages::FixFlux, md_sub_step_init.get());
+        auto t_fix_flux = tl.AddTask(t_emf, Packages::FixFlux, md_sub_step_init.get());
 
         // Apply the fluxes to calculate a change in cell-centered values "md_flux_src"
         auto t_flux_div = tl.AddTask(t_fix_flux, Update::FluxDivergence<MeshData<Real>>, md_sub_step_init.get(), md_flux_src.get());
diff --git a/pars/emhd/bondi_viscous.par b/pars/emhd/bondi_viscous.par
index 16b0fead..efdd7c0a 100644
--- a/pars/emhd/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -64,10 +64,13 @@ rs   = 8.0
 disable_floors = true
 
 <boundaries>
-#outer_x1 = dirichlet
-#inner_x1 = dirichlet
+outer_x1 = dirichlet
+inner_x1 = dirichlet
 check_inflow_outer_x1 = false
-#check_inflow_inner_x1 = false
+check_inflow_inner_x1 = false
+# Force outflow bounds for EMHD vars
+outflow_EMHD_inner_x1 = true
+outflow_EMHD_outer_x1 = true
 
 <debug>
 verbose = 1
@@ -76,9 +79,9 @@ extra_checks = 1
 
 <parthenon/output0>
 file_type               = hdf5
-dt                      = 100.0
+dt                      = 10.0
 single_precision_output = false
-ghost_zones             = false
+ghost_zones             = true
 variables               = prims, solve_norm, solve_fail
 
 <parthenon/output1>
diff --git a/pars/smr/orszag_tang_refined.par b/pars/smr/orszag_tang_refined.par
index e6665b56..6765b1e7 100644
--- a/pars/smr/orszag_tang_refined.par
+++ b/pars/smr/orszag_tang_refined.par
@@ -41,7 +41,6 @@ integrator = rk2
 
 <driver>
 type = kharma
-nghost = 6
 
 <GRMHD>
 cfl = 0.9
diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index df2abecf..272dbbd7 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -24,7 +24,6 @@ default:
     - dnf -y install hostname environment-modules git mpich fftw bzip2
     - source /etc/profile
     - module load mpi/mpich-x86_64
-    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
     - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
@@ -56,12 +55,14 @@ build:
     - dnf -y install hostname environment-modules cmake mpich-devel fftw-devel
     - source /etc/profile
     - module load mpi/mpich-x86_64
+    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
   script:
     - ./make.sh clean hdf5
   artifacts:
     paths:
       - kharma.*
       - make_args
+      - bin/micromamba
 
 #Run all tests in parallel
 tests:
diff --git a/scripts/ci/nvhpc.yml b/scripts/ci/nvhpc.yml
index 589b9ed7..7cb2bc72 100644
--- a/scripts/ci/nvhpc.yml
+++ b/scripts/ci/nvhpc.yml
@@ -26,7 +26,6 @@ default:
   # interleaved, and prints a summary of results.
   before_script:
     - export PATH="$HOME/.local/bin:$PATH"
-    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
     - eval "$(bin/micromamba shell hook -s posix)"
     - git clone -b dev https://github.com/AFD-Illinois/pyharm.git /pyharm
     - micromamba create -y -f /pyharm/environment.yml
@@ -53,7 +52,7 @@ build:
     NPROC: 8
     HOST_ARCH: NATIVE
   before_script:
-    - echo "Skipping pyharm install in build."
+    - curl -Ls $MAMBA_URL | tar -xvj bin/micromamba
   script:
     - export PREFIX_PATH=$PWD/external/hdf5
     - ./make.sh clean cuda hdf5
@@ -61,6 +60,7 @@ build:
     paths:
       - kharma.*
       - make_args
+      - bin/micromamba
 
 #Run all tests in parallel
 tests:
diff --git a/tests/emhdmodes/run.sh b/tests/emhdmodes/run.sh
index c96ae5f3..668f51ef 100755
--- a/tests/emhdmodes/run.sh
+++ b/tests/emhdmodes/run.sh
@@ -33,7 +33,7 @@ conv_2d() {
 # 2D modes use small blocks, could pick up some problems at MPI ranks >> 1
 # Just one default mode
 ALL_RES="32,64,128"
-conv_2d emhd2d_weno GRMHD/reconstruction=weno5 "EMHD mode in 2D, WENO5"
+conv_2d emhd2d_weno driver/reconstruction=weno5 "EMHD mode in 2D, WENO5"
 # Test that higher-order terms don't mess anything up
 conv_2d emhd2d_higher_order emhd/higher_order_terms=true "EMHD mode in 2D, higher order terms enabled"
 # Test we can use imex/EMHD and face CT

From 76d346e210bc3252f028dd7c08928cbaaadcaa89 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 10 Oct 2023 10:04:50 -0500
Subject: [PATCH 39/47] Make ordering of GRMHD/EMHD ops consistent, drive-by
 fixes

---
 kharma/electrons/electrons.cpp  |  6 ++--
 kharma/emhd/emhd.cpp            |  4 +--
 kharma/inverter/inverter.cpp    |  3 +-
 kharma/kharma_package.cpp       | 51 +++++++++++++++++++++++++++------
 kharma/main.cpp                 |  2 ++
 kharma/prob/emhd/emhdmodes.hpp  | 12 +++-----
 kharma/prob/post_initialize.cpp |  2 --
 kharma/prob/problem.cpp         |  4 +--
 pars/emhd/bondi_viscous.par     |  2 +-
 9 files changed, 56 insertions(+), 30 deletions(-)

diff --git a/kharma/electrons/electrons.cpp b/kharma/electrons/electrons.cpp
index b6ac1c17..2ec96d61 100644
--- a/kharma/electrons/electrons.cpp
+++ b/kharma/electrons/electrons.cpp
@@ -263,11 +263,9 @@ void BlockPtoU(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     auto pmb = rc->GetBlockPointer();
 
     PackIndexMap prims_map, cons_map;
-    auto& P = rc->PackVariables({Metadata::GetUserFlag("Primitive")}, prims_map);
-    auto& U = rc->PackVariables({Metadata::Conserved}, cons_map);
+    auto& P = rc->PackVariables({Metadata::GetUserFlag("Primitive"), Metadata::Cell}, prims_map);
+    auto& U = rc->PackVariables({Metadata::Conserved, Metadata::Cell}, cons_map);
     const VarMap m_p(prims_map, false), m_u(cons_map, true);
-    // And then the local density
-    GridScalar rho_P = rc->Get("cons.rho").data;
 
     const auto& G = pmb->coords;
 
diff --git a/kharma/emhd/emhd.cpp b/kharma/emhd/emhd.cpp
index a2db56e5..c76272e5 100644
--- a/kharma/emhd/emhd.cpp
+++ b/kharma/emhd/emhd.cpp
@@ -181,8 +181,6 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     return pkg;
 }
 
-// TODO is relying on GRMHD P variables a mistake here?  They're available on physical boundaries at least,
-// maybe not internal?
 void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
@@ -193,6 +191,8 @@ void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
     auto P = rc->PackVariables(std::vector<MetadataFlag>{Metadata::GetUserFlag("Primitive")}, prims_map);
     const VarMap m_p(prims_map, false), m_u(cons_map, true);
 
+    if (U_E.GetDim(4) == 0) return;
+
     const auto& G = pmb->coords;
 
     auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index a50ba646..15e7f86f 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -76,8 +76,7 @@ std::shared_ptr<KHARMAPackage> Inverter::Initialize(ParameterInput *pin, std::sh
     }
     pkg->AddField("pflag", m);
 
-    // Don't operate if GRMHD variables are being evolved implicitly
-    // This package is still loaded because fixes
+    // Don't operate at the usual time if GRMHD variables are being evolved implicitly
     if (!implicit_grmhd) {
         pkg->BlockUtoP = Inverter::BlockUtoP;
         pkg->BoundaryUtoP = Inverter::BlockUtoP;
diff --git a/kharma/kharma_package.cpp b/kharma/kharma_package.cpp
index bb0b7aea..96099ef3 100644
--- a/kharma/kharma_package.cpp
+++ b/kharma/kharma_package.cpp
@@ -35,8 +35,10 @@
 
 #include "types.hpp"
 
-// TODO take & accumulate TaskStatus?  Useful for ::incomplete if we ever want to do that
-// TODO continue meshification until all is mesh
+// TODO clearly this needs a better concept of ordering.
+// probably this means something that returns an ordered list of packages
+// for the given operation, based on... declared dependencies?
+// it could also use full meshification & return codes
 
 TaskStatus Packages::FixFlux(MeshData<Real> *md)
 {
@@ -58,8 +60,8 @@ TaskStatus Packages::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool
     Flag("BlockUtoP");
     // Apply UtoP from B_CT first, as this fills cons.B at cell centers
     auto pmb = rc->GetBlockPointer();
-    auto pkgs = pmb->packages.AllPackages();
-    if (pkgs.count("B_CT")) {
+    auto kpackages = rc->GetBlockPointer()->packages.AllPackagesOfType<KHARMAPackage>();
+    if (kpackages.count("B_CT")) {
         KHARMAPackage *pkpackage = pmb->packages.Get<KHARMAPackage>("B_CT");
         if (pkpackage->BlockUtoP != nullptr) {
             Flag("BlockUtoP_B_CT");
@@ -67,9 +69,17 @@ TaskStatus Packages::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool
             EndFlag();
         }
     }
-    auto kpackages = rc->GetBlockPointer()->packages.AllPackagesOfType<KHARMAPackage>();
+    // Then GRMHD, as some packages require GRMHD prims in place for U->P
+    if (kpackages.count("Inverter")) {
+        KHARMAPackage *pkpackage = pmb->packages.Get<KHARMAPackage>("Inverter");
+        if (pkpackage->BlockUtoP != nullptr) {
+            Flag("BlockUtoP_Inverter");
+            pkpackage->BlockUtoP(rc, domain, coarse);
+            EndFlag();
+        }
+    }
     for (auto kpackage : kpackages) {
-        if (kpackage.second->BlockUtoP != nullptr && kpackage.first != "B_CT") {
+        if (kpackage.second->BlockUtoP != nullptr && kpackage.first != "B_CT" && kpackage.first != "Inverter") {
             Flag("BlockUtoP_"+kpackage.first);
             kpackage.second->BlockUtoP(rc, domain, coarse);
             EndFlag();
@@ -91,9 +101,18 @@ TaskStatus Packages::MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coars
 TaskStatus Packages::BoundaryUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     Flag("BoundaryUtoP");
+    auto pmb = rc->GetBlockPointer();
     auto kpackages = rc->GetBlockPointer()->packages.AllPackagesOfType<KHARMAPackage>();
+    if (kpackages.count("Inverter")) {
+        KHARMAPackage *pkpackage = pmb->packages.Get<KHARMAPackage>("Inverter");
+        if (pkpackage->BoundaryUtoP != nullptr) {
+            Flag("BoundaryUtoP_Inverter");
+            pkpackage->BoundaryUtoP(rc, domain, coarse);
+            EndFlag();
+        }
+    }
     for (auto kpackage : kpackages) {
-        if (kpackage.second->BoundaryUtoP != nullptr) {
+        if (kpackage.second->BoundaryUtoP != nullptr && kpackage.first != "Inverter") {
             Flag("BoundaryUtoP_"+kpackage.first);
             kpackage.second->BoundaryUtoP(rc, domain, coarse);
             EndFlag();
@@ -106,13 +125,27 @@ TaskStatus Packages::BoundaryUtoP(MeshBlockData<Real> *rc, IndexDomain domain, b
 TaskStatus Packages::BoundaryPtoUElseUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     Flag("DomainBoundaryLockstep");
+    auto pmb = rc->GetBlockPointer();
     auto kpackages = rc->GetBlockPointer()->packages.AllPackagesOfType<KHARMAPackage>();
+    // Some downstream UtoP rely on GRMHD prims, some cons
+    if (kpackages.count("GRMHD")) {
+        KHARMAPackage *pkpackage = pmb->packages.Get<KHARMAPackage>("GRMHD");
+        if (pkpackage->DomainBoundaryPtoU != nullptr) {
+            Flag("DomainBoundaryPtoU_GRMHD");
+            pkpackage->DomainBoundaryPtoU(rc, domain, coarse);
+            EndFlag();
+        } else if (pkpackage->BoundaryUtoP != nullptr) { // This won't be called
+            Flag("DomainBoundaryUtoP_GRMHD");
+            pkpackage->BoundaryUtoP(rc, domain, coarse);
+            EndFlag();
+        }
+    }
     for (auto kpackage : kpackages) {
-        if (kpackage.second->DomainBoundaryPtoU != nullptr) {
+        if (kpackage.second->DomainBoundaryPtoU != nullptr && kpackage.first != "GRMHD") {
             Flag("DomainBoundaryPtoU_"+kpackage.first);
             kpackage.second->DomainBoundaryPtoU(rc, domain, coarse);
             EndFlag();
-        } else if (kpackage.second->BoundaryUtoP != nullptr) {
+        } else if (kpackage.second->BoundaryUtoP != nullptr && kpackage.first != "GRMHD") {
             Flag("DomainBoundaryUtoP_"+kpackage.first);
             kpackage.second->BoundaryUtoP(rc, domain, coarse);
             EndFlag();
diff --git a/kharma/main.cpp b/kharma/main.cpp
index baf7f1a0..4d319fdc 100644
--- a/kharma/main.cpp
+++ b/kharma/main.cpp
@@ -205,6 +205,8 @@ int main(int argc, char *argv[])
     KHARMA::PostInitialize(pin, pmesh, is_restart);
     EndFlag();
 
+    // TODO output parsed parameters *here*, now we have everything including any problem configs for B field
+
     // Begin code block to ensure driver is cleaned up
     {
         if (MPIRank0()) {
diff --git a/kharma/prob/emhd/emhdmodes.hpp b/kharma/prob/emhd/emhdmodes.hpp
index 371575d2..ba0805a7 100644
--- a/kharma/prob/emhd/emhdmodes.hpp
+++ b/kharma/prob/emhd/emhdmodes.hpp
@@ -136,17 +136,13 @@ TaskStatus InitializeEMHDModes(std::shared_ptr<MeshBlockData<Real>>& rc, Paramet
                 // Zeros are q, dP, and bsq, only needed for torus closure
                 EMHD::set_parameters(G, rho(k, j, i), u(k, j, i), 0., 0., 0., emhd_params, gam, j, i, tau, chi_e, nu_e);
                 Real Theta = (gam - 1) * u(k, j, i) / rho(k, j, i);
-                Real q_tilde  = q(k, j, i); 
-                Real dP_tilde = dP(k, j, i);
-                if (emhd_params.higher_order_terms) {
-                    q_tilde  *= (chi_e != 0) ? m::sqrt(tau / (chi_e * rho(k, j, i) * Theta * Theta)) : 0.;
-                    dP_tilde *= (nu_e  != 0) ? m::sqrt(tau / (nu_e * rho(k, j, i) * Theta)) : 0.;
-                }
-                q(k, j, i) = q_tilde;
-                dP(k, j, i) = dP_tilde;
+                q(k, j, i)  *= (chi_e != 0) ? m::sqrt(tau / (chi_e * rho(k, j, i) * Theta * Theta)) : 0.;
+                dP(k, j, i) *= (nu_e  != 0) ? m::sqrt(tau / (nu_e * rho(k, j, i) * Theta)) : 0.;
             }
         }
     );
 
+    Flux::BlockPtoU(rc.get(), IndexDomain::interior, false);
+
     return TaskStatus::complete;
 }
diff --git a/kharma/prob/post_initialize.cpp b/kharma/prob/post_initialize.cpp
index 30e09004..16a007da 100644
--- a/kharma/prob/post_initialize.cpp
+++ b/kharma/prob/post_initialize.cpp
@@ -174,6 +174,4 @@ void KHARMA::PostInitialize(ParameterInput *pin, Mesh *pmesh, bool is_restart)
     KBoundaries::FreezeDirichlet(md);
     // This is the first sync if there is no B field
     KHARMADriver::SyncAllBounds(md);
-
-    // TODO output parsed parameters now we have *everything* including any problem configs for B field
 }
diff --git a/kharma/prob/problem.cpp b/kharma/prob/problem.cpp
index 01897edb..67ab0354 100644
--- a/kharma/prob/problem.cpp
+++ b/kharma/prob/problem.cpp
@@ -161,8 +161,8 @@ void KHARMA::ProblemGenerator(MeshBlock *pmb, ParameterInput *pin)
     // the magnetic field, which is added in PostInitialize, after all blocks
     // are filled with other variables (it can be related to density averages which
     // require correct ghost zones)
-    // ALL OTHER VARIABLES, however, must fill U if a magnetic field will depend on
-    // them in any way, as conserved variables are MPI-synchronized
+    // If the B field will depend on the conserved variables (for some reason?)
+    // they must be computed by the particular problem.
 
     EndFlag();
 }
diff --git a/pars/emhd/bondi_viscous.par b/pars/emhd/bondi_viscous.par
index efdd7c0a..8aa45d4a 100644
--- a/pars/emhd/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -81,7 +81,7 @@ extra_checks = 1
 file_type               = hdf5
 dt                      = 10.0
 single_precision_output = false
-ghost_zones             = true
+ghost_zones             = false
 variables               = prims, solve_norm, solve_fail
 
 <parthenon/output1>

From 20c2a11cd42e949ad49096769ca8dcd53f4eaed1 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 10 Oct 2023 09:12:31 -0600
Subject: [PATCH 40/47] Revert "Get rid of sync_prims"

This reverts commit d7d61e79c420b1e9bff7d10d93ca4fed6b435dc7.
---
 kharma/b_flux_ct/b_flux_ct.cpp   |  9 ++++++
 kharma/boundaries/boundaries.cpp | 49 ++++++++++++++++++++++++++++----
 kharma/boundaries/boundaries.hpp |  5 ++++
 kharma/driver/kharma_driver.cpp  | 30 ++++++++++++++-----
 kharma/grmhd/grmhd.cpp           |  6 ++--
 kharma/inverter/inverter.cpp     |  1 -
 6 files changed, 85 insertions(+), 15 deletions(-)

diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index 3be20c15..c2169c82 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -109,6 +109,15 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
     flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
 
+    // Always sync B field conserved var, for standardization with B_CT
+    // god std::vector is verbose
+    if (std::find(flags_cons.begin(), flags_cons.end(), Metadata::FillGhost) == flags_cons.end()) {
+        flags_cons.push_back(Metadata::FillGhost);
+    }
+    if (std::find(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost) != flags_prim.end()) {
+        flags_prim.erase(std::remove(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost), flags_prim.end());
+    }
+
     auto m = Metadata(flags_prim, s_vector);
     pkg->AddField("prims.B", m);
     m = Metadata(flags_cons, s_vector);
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index 23a415c1..9d5ffcd4 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -340,13 +340,24 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
         }
     }
 
-    // CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
-    // So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
+    // If we applied the domain boundary to primitives (as we usually do)...
     if (!params.Get<bool>("domain_bounds_on_conserved")) {
-        // Only the GRMHD package defines a BoundaryPtoU
-        Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
+        bool sync_prims = rc->GetBlockPointer()->packages.Get("Driver")->Param<bool>("sync_prims");
+        // There are two modes of operation here:
+        if (sync_prims) {
+            // 1. ImEx w/o AMR:
+            //    PRIMITIVE variables (only) are marked FillGhost
+            //    So, run PtoU on EVERYTHING (and correct the B field)
+            CorrectBPrimitive(rc, domain, coarse);
+            Flux::BlockPtoU(rc.get(), domain, coarse);
+        } else {
+            // 2. Normal (KHARMA driver, ImEx w/AMR):
+            //    CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
+            //    So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
+            Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
+        }
     } else {
-        // Or, apply the boundary to the conserved GRMHD variables, too!
+        // These get applied the same way regardless of driver
         Packages::BoundaryUtoP(rc.get(), domain, coarse);
     }
 
@@ -373,6 +384,34 @@ void KBoundaries::CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDom
     );
 }
 
+void KBoundaries::CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse)
+{
+    Flag("CorrectBPrimitive");
+    std::shared_ptr<MeshBlock> pmb = rc->GetBlockPointer();
+    auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
+    // Return if no field to correct
+    if (B_P.GetDim(4) == 0) return;
+
+    const auto& G = pmb->coords;
+
+    const auto &bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+    const int dir = BoundaryDirection(domain);
+    const auto &range = (dir == 1) ? bounds.GetBoundsI(IndexDomain::interior)
+                            : (dir == 2 ? bounds.GetBoundsJ(IndexDomain::interior)
+                                : bounds.GetBoundsK(IndexDomain::interior));
+    const int ref = BoundaryIsInner(domain) ? range.s : range.e;
+
+    pmb->par_for_bndry(
+        "Correct_B_P", IndexRange{0,NVEC-1}, domain, CC, coarse,
+        KOKKOS_LAMBDA (const int &v, const int &k, const int &j, const int &i) {
+            B_P(v, k, j, i) *= G.gdet(Loci::center, (dir == 2) ? ref : j, (dir == 1) ? ref : i)
+                                / G.gdet(Loci::center, j, i);
+        }
+    );
+
+    EndFlag();
+}
+
 TaskStatus KBoundaries::FixFlux(MeshData<Real> *md)
 {
     auto pmesh = md->GetMeshPointer();
diff --git a/kharma/boundaries/boundaries.hpp b/kharma/boundaries/boundaries.hpp
index fb910995..dde70a60 100644
--- a/kharma/boundaries/boundaries.hpp
+++ b/kharma/boundaries/boundaries.hpp
@@ -84,6 +84,11 @@ TaskStatus FixFlux(MeshData<Real> *rc);
  */
 void CheckInflow(std::shared_ptr<MeshBlockData<Real>> &rc, IndexDomain domain, bool coarse);
 
+/**
+ * Correct for geometry when applying primitive B field boundaries
+ */
+void CorrectBPrimitive(std::shared_ptr<MeshBlockData<Real>>& rc, IndexDomain domain, bool coarse);
+
 /**
  * Check for velocity toward the simulation domain in a zone, and eliminate it.
  */
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 53d9382e..6e9a4fdc 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -55,7 +55,7 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     // Driver options
     // The two current drivers are "kharma" or "imex", with the former being the usual KHARMA
     // driver (formerly HARM driver), and the latter supporting implicit stepping of some or all variables
-    // Mostly, packages should react to options rather than the driver name
+    // Mostly, packages should react to e.g. the "sync_prims" option rather than the driver name
     bool do_emhd = pin->GetOrAddBoolean("emhd", "on", false);
     std::string driver_type_s = pin->GetOrAddString("driver", "type", (do_emhd) ? "imex" : "kharma");
     DriverType driver_type;
@@ -137,10 +137,25 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     bool prims_are_fundamental = driver_type != DriverType::kharma;
     params.Add("prims_are_fundamental", prims_are_fundamental);
 
-    // Now that we're an AMR code, though, we always *sync* conserved variables
-    // This means "emulating" syncing primitives in some cases, by running PtoU -> sync -> UtoP
-    params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive")});
-    params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
+    // Which variables we *actually send* via Parthenon/MPI may differ, however.
+    // Prolongation/restriction should happen on conserved vars, so we must sync
+    // those in multilevel meshes.  If prims are funcamental but not sync'd,
+    // we "emulate" syncing them with PtoU/UtoP on boundaries
+    bool sync_prims = prims_are_fundamental &&
+                        (!pin->DoesParameterExist("parthenon/mesh", "numlevel") ||
+                         pin->GetInteger("parthenon/mesh", "numlevel") == 1);
+    params.Add("sync_prims", sync_prims);
+    // Finally, we set default flags for primitive and conserved variables
+    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
+    if (sync_prims) {
+        // If we're not in AMR, we can sync primitive variables directly
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::WithFluxes, Metadata::Conserved});
+    } else {
+        // If we're in AMR or using the KHARMA driver anyway, sync conserved vars
+        params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive")});
+        params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved});
+    }
 
     return pkg;
 }
@@ -173,7 +188,8 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
     // Note this has the side effect of filling U in some zones,
     // which must be replaced during e.g. startup code when primitive values should be truth
     bool prims_are_fundamental = params.Get<bool>("prims_are_fundamental");
-    if (prims_are_fundamental) {
+    bool sync_prims = params.Get<bool>("sync_prims");
+    if (prims_are_fundamental && !sync_prims) {
         TaskID t_all_ptou[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_ptou_final(0);
         int i_task = 0;
@@ -199,7 +215,7 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
     EndFlag();
 
     // If we're "syncing primitive variables" but just exchanged conserved variables (B, implicit, etc), we need to recover the prims
-    if (prims_are_fundamental) {
+    if (prims_are_fundamental && !sync_prims) {
         TaskID t_all_utop[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_utop_final(0);
         int i_task = 0;
diff --git a/kharma/grmhd/grmhd.cpp b/kharma/grmhd/grmhd.cpp
index fedb683a..e1d7746a 100644
--- a/kharma/grmhd/grmhd.cpp
+++ b/kharma/grmhd/grmhd.cpp
@@ -145,8 +145,10 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
     flags_prim.push_back(Metadata::Restart);
 
     // We must additionally fill ghost zones of primitive variables in GRMHD, to seed the solver
-    // Disabling this is not well tested regardless of how fancy the solver is, YMMV
-    if (pin->GetOrAddBoolean("GRMHD", "sync_utop_seed", true)) {
+    // Only necessary to add here if syncing conserved vars
+    // Note some startup behavior relies on having the GRHD prims marked for syncing,
+    // so disable sync_utop_seed at your peril
+    if (!driver.Get<bool>("sync_prims") && pin->GetOrAddBoolean("GRMHD", "sync_utop_seed", true)) {
         flags_prim.push_back(Metadata::FillGhost);
     }
 
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index 15e7f86f..d669ff64 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -79,7 +79,6 @@ std::shared_ptr<KHARMAPackage> Inverter::Initialize(ParameterInput *pin, std::sh
     // Don't operate at the usual time if GRMHD variables are being evolved implicitly
     if (!implicit_grmhd) {
         pkg->BlockUtoP = Inverter::BlockUtoP;
-        pkg->BoundaryUtoP = Inverter::BlockUtoP;
     }
 
     pkg->PostStepDiagnosticsMesh = Inverter::PostStepDiagnostics;

From 0541185765a21996e8c47f769c04d8c6ce68de2a Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Tue, 10 Oct 2023 17:23:42 -0600
Subject: [PATCH 41/47] Sync primitive variables if they're fundamental

A few commits back I removed sync_prims, reasoning conserved variables
should be prolongated/restricted, and could always be recovered from
each other.  Both points were wrong: primitive vars can be prolongated
and restricted on boundaries fine, though the latter is not ideal. And,
in EMHD, it is not straightforward to recover P from U, as this is
normally done inline with the computation of the next step's state.

This commit switches to syncing primitive variables (sync_prims) anytime
they're fundamental (ImEx and simple drivers, "prims_are_fundamental")
Note the "conserved" B field is *always* what is sync'd, regardless
of the other primitive or conserved variables.

It also avoids loading the inverter package if GRMHD is implicitly
evolved, and expands some computed domains related to B to work
at the last prolongation operator bug before AMR.
---
 kharma/b_ct/b_ct.cpp             | 36 +++++++++--------
 kharma/b_ct/b_ct.hpp             | 14 +++----
 kharma/b_flux_ct/b_flux_ct.cpp   | 24 ++++-------
 kharma/b_flux_ct/b_flux_ct.hpp   |  2 +-
 kharma/boundaries/boundaries.cpp | 52 ++++++++++++------------
 kharma/driver/kharma_driver.cpp  | 68 ++++++++++----------------------
 kharma/driver/kharma_step.cpp    |  6 +--
 kharma/flux/get_flux.hpp         |  2 +-
 kharma/inverter/inverter.cpp     | 16 ++++----
 kharma/kharma.cpp                |  7 ++--
 10 files changed, 96 insertions(+), 131 deletions(-)

diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index 2243487b..a6f6076a 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -46,6 +46,7 @@
 using namespace parthenon;
 using parthenon::refinement_ops::ProlongateSharedMinMod;
 using parthenon::refinement_ops::RestrictAverage;
+using parthenon::refinement_ops::ProlongateInternalAverage;
 
 std::shared_ptr<KHARMAPackage> B_CT::Initialize(ParameterInput *pin, std::shared_ptr<Packages_t>& packages)
 {
@@ -93,6 +94,8 @@ std::shared_ptr<KHARMAPackage> B_CT::Initialize(ParameterInput *pin, std::shared
     m = Metadata(flags_cons_f);
     if (!lazy_prolongation)
         m.RegisterRefinementOps<ProlongateSharedMinMod, RestrictAverage, ProlongateInternalOlivares>();
+    else
+        m.RegisterRefinementOps<ProlongateSharedMinMod, RestrictAverage, ProlongateInternalAverage>();
     pkg->AddField("cons.fB", m);
 
     // Cell-centered versions.  Needed for BS, not for other schemes.
@@ -164,7 +167,7 @@ TaskStatus B_CT::MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse)
     return TaskStatus::complete;
 }
 
-void B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
+TaskStatus B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
     const int ndim = pmb->pmy_mesh->ndim;
@@ -204,6 +207,8 @@ void B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
             B_U(v, k, j, i) = B_P(v, k, j, i) * G.gdet(Loci::center, j, i);
         }
     );
+
+    return TaskStatus::complete;
 }
 
 TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
@@ -215,12 +220,9 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
     auto& emf_pack = md->PackVariables(std::vector<std::string>{"B_CT.emf"});
 
     // Figure out indices
-    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, 0, 0);
-    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::interior, -1, 2);
+    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::entire, 0, 0);
+    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::entire, 1, 1);
     const IndexRange block = IndexRange{0, emf_pack.GetDim(5)-1};
-    const int kd = ndim > 2 ? 1 : 0;
-    const int jd = ndim > 1 ? 1 : 0;
-    const int id = ndim > 0 ? 1 : 0;
 
     auto pmb0 = md->GetBlockData(0)->GetBlockPointer().get();
 
@@ -269,8 +271,7 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
         auto& B_U = md->PackVariablesAndFluxes(std::vector<std::string>{"cons.B"});
         auto& B_P = md->PackVariables(std::vector<std::string>{"prims.B"});
         // emf in center == -v x B
-        const IndexRange3 bc = KDomain::GetRange(md, IndexDomain::entire);
-        pmb0->par_for("B_CT_emfc", block.s, block.e, bc.ks, bc.ke, bc.js, bc.je, bc.is, bc.ie,
+        pmb0->par_for("B_CT_emfc", block.s, block.e, b.ks, b.ke, b.js, b.je, b.is, b.ie,
             KOKKOS_LAMBDA (const int &bl, const int &k, const int &j, const int &i) {
                 VLOOP emfc(bl, v, k, j, i) = 0.;
                 VLOOP3 emfc(bl, x, k, j, i) -= antisym(v, w, x) * uvec(bl, v, k, j, i) * B_U(bl, w, k, j, i);
@@ -278,19 +279,22 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
         );
 
         if (scheme == "gs05_0") {
+            const int kd = ndim > 2 ? 1 : 0;
+            const int jd = ndim > 1 ? 1 : 0;
+            const int id = ndim > 0 ? 1 : 0;
             pmb0->par_for("B_CT_emf_GS05_0", block.s, block.e, b1.ks, b1.ke, b1.js, b1.je, b1.is, b1.ie,
                 KOKKOS_LAMBDA (const int &bl, const int &k, const int &j, const int &i) {
                     const auto& G = B_U.GetCoords(bl);
                     // Just subtract centered emf from twice the face version
                     // More stable for planar flows even without anything fancy
                     emf_pack(bl, E1, 0, k, j, i) = 2 * emf_pack(bl, E1, 0, k, j, i)
-                        - 0.25*(emfc(bl, V1, k, j, i)     + emfc(bl, V1, k, j - jd, i)
-                                + emfc(bl, V1, k, j - jd, i) + emfc(bl, V1, k - kd, j - jd, i));
+                        - 0.25*(emfc(bl, V1, k, j, i)      + emfc(bl, V1, k, j - jd, i)
+                              + emfc(bl, V1, k, j - jd, i) + emfc(bl, V1, k - kd, j - jd, i));
                     emf_pack(bl, E2, 0, k, j, i) = 2 * emf_pack(bl, E2, 0, k, j, i)
-                        - 0.25*(emfc(bl, V2, k, j, i)     + emfc(bl, V2, k, j, i - id)
-                                + emfc(bl, V2, k - kd, j, i) + emfc(bl, V2, k - kd, j, i - id));
+                        - 0.25*(emfc(bl, V2, k, j, i)      + emfc(bl, V2, k, j, i - id)
+                              + emfc(bl, V2, k - kd, j, i) + emfc(bl, V2, k - kd, j, i - id));
                     emf_pack(bl, E3, 0, k, j, i) = 2 * emf_pack(bl, E3, 0, k, j, i)
-                        - 0.25*(emfc(bl, V3, k, j, i)     + emfc(bl, V3, k, j, i - id)
+                        - 0.25*(emfc(bl, V3, k, j, i)      + emfc(bl, V3, k, j, i - id)
                               + emfc(bl, V3, k, j - jd, i) + emfc(bl, V3, k, j - jd, i - id));
                 }
             );
@@ -301,7 +305,6 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
             pmb0->par_for("B_CT_emf_GS05_c", block.s, block.e, b1.ks, b1.ke, b1.js, b1.je, b1.is, b1.ie,
                 KOKKOS_LAMBDA (const int &bl, const int &k, const int &j, const int &i) {
                     const auto& G = B_U.GetCoords(bl);
-
                     // "simple" flux + upwinding method, Stone & Gardiner '09 but also in Stone+08 etc.
                     // Upwinded differences take in order (1-indexed):
                     // 1. EMF component direction to calculate
@@ -309,6 +312,7 @@ TaskStatus B_CT::CalculateEMF(MeshData<Real> *md)
                     // 3. Direction of upwinding
                     // ...then zone number...
                     // and finally, a boolean indicating a leftward (e.g., i-3/4) vs rightward (i-1/4) position
+                    // TODO(BSP) This doesn't properly support 2D. Yell when it's chosen?
                     if (ndim > 2) {
                         emf_pack(bl, E1, 0, k, j, i) +=
                               0.25*(upwind_diff(B_U(bl), emfc(bl), uvecf(bl), 1, 3, 2, k, j, i, false)
@@ -344,8 +348,8 @@ TaskStatus B_CT::AddSource(MeshData<Real> *md, MeshData<Real> *mdudt)
     auto& emf_pack = md->PackVariables(std::vector<std::string>{"B_CT.emf"});
 
     // Figure out indices
-    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, 0, 0);
-    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::interior, 0, 1);
+    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::entire, 0, 0);
+    const IndexRange3 b1 = KDomain::GetRange(md, IndexDomain::entire, 0, 1);
     const IndexRange block = IndexRange{0, emf_pack.GetDim(5)-1};
 
     auto pmb0 = md->GetBlockData(0)->GetBlockPointer().get();
diff --git a/kharma/b_ct/b_ct.hpp b/kharma/b_ct/b_ct.hpp
index ab2b291d..b9db5649 100644
--- a/kharma/b_ct/b_ct.hpp
+++ b/kharma/b_ct/b_ct.hpp
@@ -64,7 +64,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
  * input: Conserved B = sqrt(-gdet) * B^i
  * output: Primitive B = B^i
  */
-void BlockUtoP(MeshBlockData<Real> *mbd, IndexDomain domain, bool coarse=false);
+TaskStatus BlockUtoP(MeshBlockData<Real> *mbd, IndexDomain domain, bool coarse=false);
 TaskStatus MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
@@ -283,9 +283,10 @@ struct ProlongateInternalOlivares {
         const int fk = (DIM > 2) ? (k - ckb.s) * 2 + kb.s : kb.s;
 
         // Coefficients selecting a particular formula (see Olivares et al. 2019)
-        // TODO options here. This corresponds to Cunningham, but we could have:
-        // 1. differences of squares of zone dimesnions (Toth)
-        // 2. heuristic based on flux difference of top vs bottom halves (Olivares)
+        // TODO options here. There are 3 presented:
+        // 1. Zeros (Cunningham)
+        // 2. differences of squares of zone dimesnions (Toth)
+        // 3. heuristic based on flux difference of top vs bottom halves (Olivares)
         // constexpr Real a[3] = {0., 0., 0.};
         const Real a[3] = {(SQR(coords.Dxc<2>(fj)) - SQR(coords.Dxc<3>(fk))) / (SQR(coords.Dxc<2>(fj)) + SQR(coords.Dxc<3>(fk))),
                            (SQR(coords.Dxc<3>(fk)) - SQR(coords.Dxc<1>(fi))) / (SQR(coords.Dxc<3>(fk)) + SQR(coords.Dxc<1>(fi))),
@@ -322,11 +323,6 @@ struct ProlongateInternalOlivares {
                      + coeff[elem][2]*F<third,me,-1,DIM>(fine, coords, l, m, n, fk, fj, fi)
                      + coeff[elem][3]*F<third,me,next,DIM>(fine, coords, l, m, n, fk, fj, fi))
                 ) / coords.Volume<el>(fk+off_k, fj+off_j, fi+off_i);
-            //printf("%d %d\n", fi, fj);
-            // if (fi == 56 && fj == 70)
-            //     printf("I used dir %d offset %d %d %d, %d %d %d\n", me+1,
-            //         off_k-diff_k, off_j-diff_j, off_i-diff_i,
-            //         off_k+diff_k, off_j+diff_j, off_i+diff_i);
         }
     }
 };
diff --git a/kharma/b_flux_ct/b_flux_ct.cpp b/kharma/b_flux_ct/b_flux_ct.cpp
index c2169c82..11f14e3a 100644
--- a/kharma/b_flux_ct/b_flux_ct.cpp
+++ b/kharma/b_flux_ct/b_flux_ct.cpp
@@ -101,22 +101,11 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
                                               : Metadata::GetUserFlag("Explicit");
 
     // Flags for B fields
-    std::vector<MetadataFlag> flags_b = {Metadata::Cell, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
-
-    // "primitive" B field is field, "conserved" is flux
-    auto flags_prim = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("prim_flags");
-    flags_prim.insert(flags_prim.end(), flags_b.begin(), flags_b.end());
-    auto flags_cons = packages->Get("Driver")->Param<std::vector<MetadataFlag>>("cons_flags");
-    flags_cons.insert(flags_cons.end(), flags_b.begin(), flags_b.end());
-
-    // Always sync B field conserved var, for standardization with B_CT
-    // god std::vector is verbose
-    if (std::find(flags_cons.begin(), flags_cons.end(), Metadata::FillGhost) == flags_cons.end()) {
-        flags_cons.push_back(Metadata::FillGhost);
-    }
-    if (std::find(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost) != flags_prim.end()) {
-        flags_prim.erase(std::remove(flags_prim.begin(), flags_prim.end(), Metadata::FillGhost), flags_prim.end());
-    }
+    // We always mark conserved B to be sync'd for consistency, since it's strictly required for B_CT/AMR
+    std::vector<MetadataFlag> flags_prim = {Metadata::Real, Metadata::Derived, Metadata::GetUserFlag("Primitive"),
+                                            Metadata::Cell, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
+    std::vector<MetadataFlag> flags_cons = {Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::FillGhost, Metadata::WithFluxes, Metadata::Conserved,
+                                            Metadata::Cell, Metadata::GetUserFlag("MHD"), areWeImplicit, Metadata::Vector};
 
     auto m = Metadata(flags_prim, s_vector);
     pkg->AddField("prims.B", m);
@@ -192,7 +181,7 @@ void MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse)
         }
     );
 }
-void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
+TaskStatus BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
 {
     auto pmb = rc->GetBlockPointer();
 
@@ -213,6 +202,7 @@ void BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coarse)
             B_P(mu, k, j, i) = B_U(mu, k, j, i) / G.gdet(Loci::center, j, i);
         }
     );
+    return TaskStatus::complete;
 }
 
 void MeshPtoU(MeshData<Real> *md, IndexDomain domain, bool coarse)
diff --git a/kharma/b_flux_ct/b_flux_ct.hpp b/kharma/b_flux_ct/b_flux_ct.hpp
index 7de3f6c9..bb266fbc 100644
--- a/kharma/b_flux_ct/b_flux_ct.hpp
+++ b/kharma/b_flux_ct/b_flux_ct.hpp
@@ -63,7 +63,7 @@ std::shared_ptr<KHARMAPackage> Initialize(ParameterInput *pin, std::shared_ptr<P
  * input: Conserved B = sqrt(-gdet) * B^i
  * output: Primitive B = B^i
  */
-void BlockUtoP(MeshBlockData<Real> *md, IndexDomain domain, bool coarse=false);
+TaskStatus BlockUtoP(MeshBlockData<Real> *md, IndexDomain domain, bool coarse=false);
 void MeshUtoP(MeshData<Real> *md, IndexDomain domain, bool coarse=false);
 
 /**
diff --git a/kharma/boundaries/boundaries.cpp b/kharma/boundaries/boundaries.cpp
index 9d5ffcd4..87399e60 100644
--- a/kharma/boundaries/boundaries.cpp
+++ b/kharma/boundaries/boundaries.cpp
@@ -36,12 +36,15 @@
 #include "decs.hpp"
 #include "domain.hpp"
 #include "kharma.hpp"
-#include "flux.hpp"
 #include "flux_functions.hpp"
 #include "grmhd_functions.hpp"
 #include "pack.hpp"
 #include "types.hpp"
 
+#include "b_ct.hpp"
+#include "b_flux_ct.hpp"
+#include "flux.hpp"
+
 // Parthenon's boundaries
 #include <bvals/boundary_conditions.hpp>
 
@@ -258,13 +261,6 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
     const auto btype_name = params.Get<std::string>(bname);
     const auto bdir = BoundaryDirection(bface);
 
-    // If we're pretending to sync primitives, but applying physical bounds
-    // to conserved variables, make sure we're up to date
-    if (pmb->packages.Get<KHARMAPackage>("Driver")->Param<bool>("prims_are_fundamental") &&
-        params.Get<bool>("domain_bounds_on_conserved")) {
-        Flux::BlockPtoU_Send(rc.get(), domain, coarse);
-    }
-
     Flag("Apply "+bname+" boundary: "+btype_name);
     pkg->KBoundaries[bface](rc, coarse);
     EndFlag();
@@ -340,25 +336,31 @@ void KBoundaries::ApplyBoundary(std::shared_ptr<MeshBlockData<Real>> &rc, IndexD
         }
     }
 
-    // If we applied the domain boundary to primitives (as we usually do)...
-    if (!params.Get<bool>("domain_bounds_on_conserved")) {
-        bool sync_prims = rc->GetBlockPointer()->packages.Get("Driver")->Param<bool>("sync_prims");
-        // There are two modes of operation here:
-        if (sync_prims) {
-            // 1. ImEx w/o AMR:
-            //    PRIMITIVE variables (only) are marked FillGhost
-            //    So, run PtoU on EVERYTHING (and correct the B field)
-            CorrectBPrimitive(rc, domain, coarse);
-            Flux::BlockPtoU(rc.get(), domain, coarse);
-        } else {
-            // 2. Normal (KHARMA driver, ImEx w/AMR):
-            //    CONSERVED variables are marked FillGhost, plus FLUID PRIMITIVES.
-            //    So, run PtoU on FLUID, and UtoP on EVERYTHING ELSE
-            Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
+    bool sync_prims = rc->GetBlockPointer()->packages.Get("Driver")->Param<bool>("sync_prims");
+    // There are two modes of operation here:
+    if (sync_prims) {
+        // 1. Exchange/prolongate/restrict PRIMITIVE variables: (ImEx driver)
+        //    Primitive variables and conserved B field are marked FillGhost
+        //    Explicitly run UtoP on B field, then PtoU on everything
+        // TODO there should be a set of B field wrappers that dispatch this
+        auto pkgs = pmb->packages.AllPackages();
+        if (pkgs.count("B_FluxCT")) {
+            B_FluxCT::BlockUtoP(rc.get(), IndexDomain::entire);
+        } else if (pkgs.count("B_CT")) {
+            B_CT::BlockUtoP(rc.get(), IndexDomain::entire);
         }
+        Flux::BlockPtoU(rc.get(), domain, coarse);
     } else {
-        // These get applied the same way regardless of driver
-        Packages::BoundaryUtoP(rc.get(), domain, coarse);
+        // 2. Exchange/prolongate/restrict CONSERVED variables: (KHARMA driver, maybe ImEx+AMR)
+        //    Conserved variables are marked FillGhost, plus FLUID PRIMITIVES.
+        if (!params.Get<bool>("domain_bounds_on_conserved")) {
+            // To apply primitive boundaries to GRMHD, we run PtoU on that ONLY,
+            // and UtoP on EVERYTHING ELSE
+            Packages::BoundaryPtoUElseUtoP(rc.get(), domain, coarse);
+        } else {
+            // If we want to apply boundaries to conserved vars, just run UtoP on EVERYTHING
+            Packages::BoundaryUtoP(rc.get(), domain, coarse);
+        }
     }
 
     EndFlag();
diff --git a/kharma/driver/kharma_driver.cpp b/kharma/driver/kharma_driver.cpp
index 6e9a4fdc..b1c6e0f9 100644
--- a/kharma/driver/kharma_driver.cpp
+++ b/kharma/driver/kharma_driver.cpp
@@ -131,24 +131,13 @@ std::shared_ptr<KHARMAPackage> KHARMADriver::Initialize(ParameterInput *pin, std
     // but which should not be evolved (or more importantly, sync'd) during main stepping
     Metadata::AddUserFlag("StartupOnly");
 
-    // This marks whether we consider primitive or conserved state to be
-    // the ground truth when updating values in a step.
-    // Currently "imex" and "simple" drivers both update primitive vars
-    bool prims_are_fundamental = driver_type != DriverType::kharma;
-    params.Add("prims_are_fundamental", prims_are_fundamental);
-
-    // Which variables we *actually send* via Parthenon/MPI may differ, however.
-    // Prolongation/restriction should happen on conserved vars, so we must sync
-    // those in multilevel meshes.  If prims are funcamental but not sync'd,
-    // we "emulate" syncing them with PtoU/UtoP on boundaries
-    bool sync_prims = prims_are_fundamental &&
-                        (!pin->DoesParameterExist("parthenon/mesh", "numlevel") ||
-                         pin->GetInteger("parthenon/mesh", "numlevel") == 1);
+    // Synchronize primitive variables unless we're using the KHARMA driver that specifically doesn't
+    // This includes for AMR w/ImEx driver
+    // Note the "conserved" B field is always sync'd.  The "primitive" version only differs by sqrt(-g)
+    bool sync_prims = driver_type != DriverType::kharma;
     params.Add("sync_prims", sync_prims);
-    // Finally, we set default flags for primitive and conserved variables
-    // This first mode is only for simulations without AMR/SMR, as primitives shouldn't be prolongated
     if (sync_prims) {
-        // If we're not in AMR, we can sync primitive variables directly
+        // For ImEx/simple drivers, sync/prolongate/restrict primitive variables directly
         params.Add("prim_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Derived, Metadata::FillGhost, Metadata::GetUserFlag("Primitive")});
         params.Add("cons_flags", std::vector<MetadataFlag>{Metadata::Real, Metadata::Independent, Metadata::Restart, Metadata::WithFluxes, Metadata::Conserved});
     } else {
@@ -180,42 +169,23 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
     auto t_start_sync = t_start;
 
     // Pull the mesh pointer from mc1 so we can be a static method
-    auto &params = mc1->GetMeshPointer()->packages.Get("Driver")->AllParams();
-    bool multilevel = mc1->GetMeshPointer()->multilevel;
-
-    // If we're "syncing primitive variables" but must exchange conserved vars to prolong/restrict them,
-    // make sure to run P->U, then sync, then U->P
-    // Note this has the side effect of filling U in some zones,
-    // which must be replaced during e.g. startup code when primitive values should be truth
-    bool prims_are_fundamental = params.Get<bool>("prims_are_fundamental");
-    bool sync_prims = params.Get<bool>("sync_prims");
-    if (prims_are_fundamental && !sync_prims) {
-        TaskID t_all_ptou[mc1->NumBlocks() * BOUNDARY_NFACES];
-        TaskID t_ptou_final(0);
-        int i_task = 0;
-        for (int i_block = 0; i_block < mc1->NumBlocks(); i_block++) {
-            auto &rc = mc1->GetBlockData(i_block);
-            for (int i_bnd = 0; i_bnd < BOUNDARY_NFACES; i_bnd++) {
-                if (rc->GetBlockPointer()->boundary_flag[i_bnd] == BoundaryFlag::block ||
-                    rc->GetBlockPointer()->boundary_flag[i_bnd] == BoundaryFlag::periodic) {
-                    const auto bdomain = KBoundaries::BoundaryDomain((BoundaryFace) i_bnd);
-                    t_all_ptou[i_task] = tl.AddTask(t_start, Flux::BlockPtoU_Send, rc.get(), bdomain, false);
-                    t_ptou_final = t_ptou_final | t_all_ptou[i_task];
-                    i_task++;
-                }
-            }
-        }
-        t_start_sync = t_ptou_final;
-    }
+    auto pmesh = mc1->GetMeshPointer();
+    auto &params = pmesh->packages.Get("Driver")->AllParams();
+    bool multilevel = pmesh->multilevel;
+
+    // TODO PtoU for B field when sync_prims?
 
-    // The Parthenon exchange tasks include applying physical boundary conditions
+    // The Parthenon exchange tasks include applying physical boundary conditions now.
+    // We generally do not take advantage of this yet, but good to know when reasoning about initialization.
     Flag("ParthenonAddSync");
     auto t_sync_done = parthenon::AddBoundaryExchangeTasks(t_start_sync, tl, mc1, multilevel);
     auto t_bounds = t_sync_done;
     EndFlag();
 
-    // If we're "syncing primitive variables" but just exchanged conserved variables (B, implicit, etc), we need to recover the prims
-    if (prims_are_fundamental && !sync_prims) {
+    // We always just sync'd the "conserved" magnetic field
+    // Translate back to "primitive" (& cell-centered) field if that's what we'll be using
+    if (params.Get<bool>("sync_prims")) {
+        auto pkgs = pmesh->packages.AllPackages();
         TaskID t_all_utop[mc1->NumBlocks() * BOUNDARY_NFACES];
         TaskID t_utop_final(0);
         int i_task = 0;
@@ -225,7 +195,11 @@ TaskID KHARMADriver::AddBoundarySync(const TaskID t_start, TaskList &tl, std::sh
                 if (rc->GetBlockPointer()->boundary_flag[i_bnd] == BoundaryFlag::block ||
                     rc->GetBlockPointer()->boundary_flag[i_bnd] == BoundaryFlag::periodic) {
                     const auto bdomain = KBoundaries::BoundaryDomain((BoundaryFace) i_bnd);
-                    t_all_utop[i_task] = tl.AddTask(t_sync_done, Packages::BoundaryUtoP, rc.get(), bdomain, false);
+                    if (pkgs.count("B_FluxCT")) {
+                        t_all_utop[i_task] = tl.AddTask(t_sync_done, B_FluxCT::BlockUtoP, rc.get(), bdomain, false);
+                    } else if (pkgs.count("B_CT")) {
+                        t_all_utop[i_task] = tl.AddTask(t_sync_done, B_CT::BlockUtoP, rc.get(), bdomain, false);
+                    }
                     t_utop_final = t_utop_final | t_all_utop[i_task];
                     i_task++;
                 }
diff --git a/kharma/driver/kharma_step.cpp b/kharma/driver/kharma_step.cpp
index e5619dbd..9adb64ec 100644
--- a/kharma/driver/kharma_step.cpp
+++ b/kharma/driver/kharma_step.cpp
@@ -162,16 +162,16 @@ TaskCollection KHARMADriver::MakeDefaultTaskCollection(BlockList_t &blocks, int
             auto t_emf = t_flux_bounds;
             if (use_b_ct) {
                 // Pull out a container of only EMF to synchronize
-                auto &md_emf_only = pmesh->mesh_data.AddShallow("EMF", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
+                auto &md_b_ct = pmesh->mesh_data.AddShallow("B_CT", std::vector<std::string>{"B_CT.emf"}); // TODO this gets weird if we partition
                 auto t_emf_local = tl.AddTask(t_fluxes, B_CT::CalculateEMF, md_sub_step_init.get());
-                auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_emf_only);
+                auto t_emf = KHARMADriver::AddBoundarySync(t_emf_local, tl, md_b_ct);
             }
         }
 
         // Any package modifications to the fluxes.  e.g.:
         // 1. Flux-CT calculations for B field transport
         // 2. Zero fluxes through poles
-        // etc 
+        // etc
         auto t_fix_flux = tl.AddTask(t_emf, Packages::FixFlux, md_sub_step_init.get());
 
         // Apply the fluxes to calculate a change in cell-centered values "md_flux_src"
diff --git a/kharma/flux/get_flux.hpp b/kharma/flux/get_flux.hpp
index 5f2edd77..9f9da4f8 100644
--- a/kharma/flux/get_flux.hpp
+++ b/kharma/flux/get_flux.hpp
@@ -119,7 +119,7 @@ inline TaskStatus GetFlux(MeshData<Real> *md)
     const auto& Fr_all = md->PackVariables(std::vector<std::string>{"Flux.Fr"});
 
     // Get the domain size
-    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, -1, 1);
+    const IndexRange3 b = KDomain::GetRange(md, IndexDomain::interior, -1, 2);
     // Get other sizes we need
     const int n1 = pmb0->cellbounds.ncellsi(IndexDomain::entire);
     const IndexRange block = IndexRange{0, cmax.GetDim(5) - 1};
diff --git a/kharma/inverter/inverter.cpp b/kharma/inverter/inverter.cpp
index d669ff64..28919d35 100644
--- a/kharma/inverter/inverter.cpp
+++ b/kharma/inverter/inverter.cpp
@@ -64,22 +64,20 @@ std::shared_ptr<KHARMAPackage> Inverter::Initialize(ParameterInput *pin, std::sh
     // TODO add version attempting to recover from entropy, stuff like that
 
     // Flag denoting UtoP inversion failures
-    // Only needed if we're actually calling UtoP, but always allocated as it's retrieved often
-    // Needs boundary sync if treating primitive variables as fundamental
-    bool prims_are_fundamental = packages->Get("Driver")->Param<bool>("prims_are_fundamental");
-    bool implicit_grmhd = packages->Get("GRMHD")->Param<bool>("implicit");
+    // Needs boundary sync if treating primitive variables as fundamental, since we need to
+    // avoid failed neighbors when fixing.
+    bool sync_prims = packages->Get("Driver")->Param<bool>("sync_prims");
     Metadata m;
-    if (prims_are_fundamental && !implicit_grmhd) {
+    if (sync_prims) {
         m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy, Metadata::FillGhost});
     } else {
         m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy});
     }
     pkg->AddField("pflag", m);
 
-    // Don't operate at the usual time if GRMHD variables are being evolved implicitly
-    if (!implicit_grmhd) {
-        pkg->BlockUtoP = Inverter::BlockUtoP;
-    }
+    // We exist basically to do this
+    pkg->BlockUtoP = Inverter::BlockUtoP;
+    pkg->BoundaryUtoP = Inverter::BlockUtoP;
 
     pkg->PostStepDiagnosticsMesh = Inverter::PostStepDiagnostics;
 
diff --git a/kharma/kharma.cpp b/kharma/kharma.cpp
index 65748f00..fa03b9f0 100644
--- a/kharma/kharma.cpp
+++ b/kharma/kharma.cpp
@@ -301,8 +301,6 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr<ParameterInput> &pin)
     }
     // GRMHD needs globals to mark packages
     auto t_grmhd = tl.AddTask(t_globals | t_driver, KHARMA::AddPackage, packages, GRMHD::Initialize, pin.get());
-    // Inverter (TODO: split out fixups, then don't load this when GRMHD isn't loaded)
-    auto t_inverter = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Inverter::Initialize, pin.get());
     // Reductions, needed for most other packages
     auto t_reductions = tl.AddTask(t_none, KHARMA::AddPackage, packages, Reductions::Initialize, pin.get());
 
@@ -373,11 +371,14 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr<ParameterInput> &pin)
     KHARMA::AddPackage(packages, KBoundaries::Initialize, pin.get());
 
     // Load the implicit package last, and only if there are any variables which need implicit evolution
-    auto all_implicit = Metadata::FlagCollection(Metadata::GetUserFlag("Implicit"));
     int n_implicit = PackDimension(packages.get(), Metadata::GetUserFlag("Implicit"));
     if (n_implicit > 0) {
         KHARMA::AddPackage(packages, Implicit::Initialize, pin.get());
     }
+    // Only load the inverter if GRMHD isn't being evolved implicitly
+    if (PackDimension(packages.get(), {Metadata::GetUserFlag("Implicit"), Metadata::GetUserFlag("MHD")}) < 5) {
+        KHARMA::AddPackage(packages, Inverter::Initialize, pin.get());
+    }
 
 #if DEBUG
     // Carry the ParameterInput with us, for generating outputs whenever we want

From 4fc472fc9c9e553b88d2aad20e71641c815ddf71 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Wed, 11 Oct 2023 09:05:06 -0600
Subject: [PATCH 42/47] Fix the build

---
 kharma/b_ct/b_ct.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index a6f6076a..1627bf79 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -177,7 +177,7 @@ TaskStatus B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coa
     auto B_P = rc->PackVariables(std::vector<std::string>{"prims.B"});
     const auto& G = pmb->coords;
     // Return if we're not syncing U & P at all (e.g. edges)
-    if (B_Uf.GetDim(4) == 0) return;
+    if (B_Uf.GetDim(4) == 0) return TaskStatus::complete;
 
     // TODO get rid of prims on faces probably
 

From 218a875fbab08a0ed386b2ceaccd0e38a0b88634 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Wed, 11 Oct 2023 10:04:35 -0600
Subject: [PATCH 43/47] Fix floors so we don't need to load Inverter pkg

---
 kharma/b_ct/b_ct.cpp     |  2 +-
 kharma/floors/floors.cpp |  5 +++++
 kharma/kharma.cpp        | 31 ++++++++++++++++++-------------
 3 files changed, 24 insertions(+), 14 deletions(-)

diff --git a/kharma/b_ct/b_ct.cpp b/kharma/b_ct/b_ct.cpp
index 1627bf79..3751bd20 100644
--- a/kharma/b_ct/b_ct.cpp
+++ b/kharma/b_ct/b_ct.cpp
@@ -191,7 +191,7 @@ TaskStatus B_CT::BlockUtoP(MeshBlockData<Real> *rc, IndexDomain domain, bool coa
             B_Pf(F3, 0, k, j, i) = B_Uf(F3, 0, k, j, i) / G.gdet(Loci::face3, j, i);
         }
     );
-    // Average the primitive vals for zone centers (TODO right?)
+    // Average the primitive vals for zone centers
     const IndexRange3 bc = KDomain::GetRange(rc, domain, coarse);
     pmb->par_for("UtoP_B_center", bc.ks, bc.ke, bc.js, bc.je, bc.is, bc.ie,
         KOKKOS_LAMBDA (const int &k, const int &j, const int &i) {
diff --git a/kharma/floors/floors.cpp b/kharma/floors/floors.cpp
index 94ab5db8..e1a7ee76 100644
--- a/kharma/floors/floors.cpp
+++ b/kharma/floors/floors.cpp
@@ -138,6 +138,11 @@ std::shared_ptr<KHARMAPackage> Floors::Initialize(ParameterInput *pin, std::shar
     // Should switch these to "Integer" fields when Parthenon supports it
     Metadata m = Metadata({Metadata::Real, Metadata::Cell, Metadata::Derived, Metadata::OneCopy});
     pkg->AddField("fflag", m);
+    // When not using UtoP, we still need a dummy copy of pflag, too
+    // TODO we shouldn't require pflag
+    if (!packages->AllPackages().count("Inverter")) {
+        pkg->AddField("pflag", m);
+    }
 
     pkg->BlockApplyFloors = Floors::ApplyGRMHDFloors;
     pkg->PostStepDiagnosticsMesh = Floors::PostStepDiagnostics;
diff --git a/kharma/kharma.cpp b/kharma/kharma.cpp
index fa03b9f0..0f13eb8a 100644
--- a/kharma/kharma.cpp
+++ b/kharma/kharma.cpp
@@ -291,21 +291,28 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr<ParameterInput> &pin)
     auto t_globals = tl.AddTask(t_none, KHARMA::AddPackage, packages, KHARMA::InitializeGlobals, pin.get());
     // Neither will grid output, as any mesh will get GRCoordinates objects
     // FieldIsOutput actually just checks for substring match, so this matches any coords. variable
-    if (FieldIsOutput(pin.get(), "coords."))
+    if (FieldIsOutput(pin.get(), "coords.")) {
         auto t_coord_out = tl.AddTask(t_none, KHARMA::AddPackage, packages, CoordinateOutput::Initialize, pin.get());
+    }
     // Driver package is the foundation
     auto t_driver = tl.AddTask(t_none, KHARMA::AddPackage, packages, KHARMADriver::Initialize, pin.get());
-    // Floors package has no dependencies
-    if (!pin->GetOrAddBoolean("floors", "disable_floors", false)) {
-        auto t_floors = tl.AddTask(t_none, KHARMA::AddPackage, packages, Floors::Initialize, pin.get());
-    }
     // GRMHD needs globals to mark packages
     auto t_grmhd = tl.AddTask(t_globals | t_driver, KHARMA::AddPackage, packages, GRMHD::Initialize, pin.get());
+    // Only load the inverter if GRMHD/EMHD isn't being evolved implicitly
+    auto t_inverter = t_grmhd;
+    if (!pin->GetOrAddBoolean("GRMHD", "implicit", pin->GetOrAddBoolean("emhd", "on", false))) {
+        t_inverter = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Inverter::Initialize, pin.get());
+    }
+    // Floors package depends on having pflag
+    if (!pin->GetOrAddBoolean("floors", "disable_floors", false)) {
+        auto t_floors = tl.AddTask(t_inverter, KHARMA::AddPackage, packages, Floors::Initialize, pin.get());
+    }
     // Reductions, needed for most other packages
     auto t_reductions = tl.AddTask(t_none, KHARMA::AddPackage, packages, Reductions::Initialize, pin.get());
 
     // B field solvers, to ensure divB ~= 0.
     // Bunch of logic here: basically we want to load <=1 solver with an encoded order of preference:
+    // 0. Anything user-specified
     // 1. Prefer B_CT if AMR since it's compatible
     // 2. Prefer B_Flux_CT otherwise since it's well-tested
     auto t_b_field = t_none;
@@ -345,11 +352,11 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr<ParameterInput> &pin)
     if (pin->GetOrAddBoolean("electrons", "on", false)) {
         auto t_electrons = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Electrons::Initialize, pin.get());
     }
-    if (pin->GetOrAddBoolean("emhd", "on", false)) {
-        auto t_electrons = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, EMHD::Initialize, pin.get());
+    if (pin->GetBoolean("emhd", "on")) {
+        auto t_emhd = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, EMHD::Initialize, pin.get());
     }
     if (pin->GetOrAddBoolean("wind", "on", false)) {
-        auto t_electrons = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Wind::Initialize, pin.get());
+        auto t_wind = tl.AddTask(t_grmhd, KHARMA::AddPackage, packages, Wind::Initialize, pin.get());
     }
     // Enable calculating jcon iff it is in any list of outputs (and there's even B to calculate it).
     // Since it is never required to restart, this is the only time we'd write (hence, need) it
@@ -370,15 +377,13 @@ Packages_t KHARMA::ProcessPackages(std::unique_ptr<ParameterInput> &pin)
     // TODO avoid init if Parthenon will be handling all boundaries?
     KHARMA::AddPackage(packages, KBoundaries::Initialize, pin.get());
 
-    // Load the implicit package last, and only if there are any variables which need implicit evolution
+    // Load the implicit package last, if there are *any* variables that need implicit evolution
+    // This lets us just count by flag, rather than checking all the possible parameters that would
+    // trigger this
     int n_implicit = PackDimension(packages.get(), Metadata::GetUserFlag("Implicit"));
     if (n_implicit > 0) {
         KHARMA::AddPackage(packages, Implicit::Initialize, pin.get());
     }
-    // Only load the inverter if GRMHD isn't being evolved implicitly
-    if (PackDimension(packages.get(), {Metadata::GetUserFlag("Implicit"), Metadata::GetUserFlag("MHD")}) < 5) {
-        KHARMA::AddPackage(packages, Inverter::Initialize, pin.get());
-    }
 
 #if DEBUG
     // Carry the ParameterInput with us, for generating outputs whenever we want

From ac46a45adcb4bc7ae77ed5120596b9e26c4aab54 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Wed, 11 Oct 2023 17:13:28 -0600
Subject: [PATCH 44/47] Viscous Bondi test touchups, prototype SMR test

---
 .gitignore                    |   1 +
 pars/emhd/bondi_viscous.par   |   4 +-
 pars/smr/mhdmodes_refined.par |  90 ++++++++++++++++++
 tests/bondi_viscous/check.py  |  44 ++++-----
 tests/mhdmodes_smr/check.py   | 170 ++++++++++++++++++++++++++++++++++
 tests/mhdmodes_smr/run.sh     |  47 ++++++++++
 6 files changed, 327 insertions(+), 29 deletions(-)
 create mode 100644 pars/smr/mhdmodes_refined.par
 create mode 100644 tests/mhdmodes_smr/check.py
 create mode 100755 tests/mhdmodes_smr/run.sh

diff --git a/.gitignore b/.gitignore
index 8612874b..c2551928 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,6 +19,7 @@ logs/
 # Archival files
 kharma_parsed_*.par
 log_*.txt
+bondi_analytic_*.txt
 
 # Editor documents
 .project
diff --git a/pars/emhd/bondi_viscous.par b/pars/emhd/bondi_viscous.par
index 8aa45d4a..70d3b7ca 100644
--- a/pars/emhd/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -67,10 +67,10 @@ disable_floors = true
 outer_x1 = dirichlet
 inner_x1 = dirichlet
 check_inflow_outer_x1 = false
-check_inflow_inner_x1 = false
+check_inflow_inner_x1 = true
 # Force outflow bounds for EMHD vars
 outflow_EMHD_inner_x1 = true
-outflow_EMHD_outer_x1 = true
+outflow_EMHD_outer_x1 = false
 
 <debug>
 verbose = 1
diff --git a/pars/smr/mhdmodes_refined.par b/pars/smr/mhdmodes_refined.par
new file mode 100644
index 00000000..159e9f3f
--- /dev/null
+++ b/pars/smr/mhdmodes_refined.par
@@ -0,0 +1,90 @@
+# GRMHD Modes problem
+# Try to propagate several analytically-amenable linear modes
+# of the MHD equations
+# Heavily commented as a likely first problem for new users
+
+<parthenon/job>
+problem_id = mhdmodes
+
+<mhdmodes>
+nmode = 1
+dir = 3
+
+<parthenon/mesh>
+refinement = static
+numlevel = 2
+
+nx1 = 96
+x1min = 0.0
+x1max = 1.0
+ix1_bc = periodic
+ox1_bc = periodic
+
+nx2 = 96
+x2min = 0.0
+x2max = 1.0
+ix2_bc = periodic
+ox2_bc = periodic
+
+nx3 = 1
+x3min = 0.0
+x3max = 1.0
+ix3_bc = periodic
+ox3_bc = periodic
+
+# Size of a mesh block
+# # of meshblocks must be >= the number of MPI ranks,
+# however there may be multiple blocks per rank
+<parthenon/meshblock>
+nx1 = 32
+nx2 = 32
+nx3 = 1
+
+<parthenon/static_refinement0>
+x1min = 0.5
+x1max = 0.5
+x2min = 0.5
+x2max = 0.5
+level = 1
+
+<coordinates>
+base = cartesian_minkowski
+transform = null
+
+<parthenon/time>
+tlim = 5.0
+integrator = rk2
+dt_min = 0.0001
+
+<GRMHD>
+cfl = 0.9
+gamma = 1.333333
+implicit = false
+
+<b_field>
+solver = face_ct
+lazy_prolongation = true
+implicit = false
+
+<floors>
+disable_floors = true
+
+<debug>
+verbose = 0
+extra_checks = 1
+flag_verbose = 0
+
+<driver>
+type = kharma
+reconstruction = weno5
+
+<parthenon/output0>
+file_type = hdf5
+dt = 0.05
+single_precision_output = true
+variables = prims.rho, prims.u, prims.uvec, prims.B, divB
+
+<parthenon/output1>
+file_type = hst
+dt = 0.1
+
diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index baba6974..5e4e4245 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -46,48 +46,38 @@
         dP_check = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau)
         state.cache['dP'] = dP_check
 
-        # load code data
-        dump = pyharm.load_dump("emhd_2d_{}_end_emhd2d_weno.phdf".format(res))
-
-        # TODO iterate on names here
-        #rho, uu, dP_tilde = dump['RHO'], dump['UU'], dump['dP']
-        #rho, uu = dump['RHO'], dump['UU']
-        rho, uu, dP_tilde, B1 = dump['RHO'], dump['UU'], dump['dP'], dump['B1']
-
         # compute dP
-        if dump['emhd/higher_order_terms'] == "true":
+        if dump['emhd/higher_order_terms']:
             print("Res: "+str(res)+"; higher order terms enabled")
-            Theta    = (dump['gam'] - 1.) * uu / rho
-            nu_emhd  = eta / rho
-            dP       = dP_tilde * np.sqrt(nu_emhd * rho * Theta / tau)
+            Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
+            nu_emhd  = eta / dump['rho']
+            dP       = dump['dP'] * np.sqrt(nu_emhd * dump['rho'] * Theta / tau)
         else:
-            dP = dP_tilde
+            dP = dump['dP']
+            Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
+            nu_emhd  = eta / dump['rho']
+            dP_check /= np.sqrt(nu_emhd * dump['rho'] * Theta / tau)
 
         # Plot
-        for var in ['rho', 'u', 'B1', 'dP']:
+        for var in ['rho', 'u', 'B1']:
             fig = plt.figure(figsize=(6,6))
             ax = fig.add_subplot(1,1,1)
             pplt.plot_diff_xz(ax, dump, state, var)
-            plt.legend()
             fig.savefig("compare_{}_{}.png".format(var, res))
             plt.close(fig)
 
-        r_start_ind = 1
-        radius = np.mean(dump.grid['r'][r_start_ind:], axis=(1,2))
-        plt.plot(radius, dP_check[r_start_ind:], label='dP ODE check')
-        plt.plot(radius, np.mean(dump['dP'][r_start_ind:], axis=(1,2)), label='dP0 ODE check')
-        plt.plot(radius, np.mean(state['ucon'][1][r_start_ind:], axis=(1,2)), label='ur')
-        #plt.plot(radius, np.mean(coeff[r_start_ind:], axis=(1,2)), label='coeff')
+        radius = np.mean(dump.grid['r'], axis=(1,2))
+        plt.plot(radius, dP_check, label='dP ODE')
+        plt.plot(radius, np.mean(dP, axis=(1,2)), label='dP code')
         plt.legend()
-        plt.savefig('dP_soln_new.png')
+        plt.savefig('compare_dP_{}.png'.format(res))
         plt.close()
 
-
         # compute L1 norm
-        L1[r,0] = np.mean(np.fabs(rho - state['rho'])[1:-1])
-        L1[r,1] = np.mean(np.fabs(uu  - state['u']))
-        L1[r,2] = np.mean(np.fabs(dP  - dP_check)[1:-1])
-        L1[r,3] = np.mean(np.fabs(B1  - state['B1']))
+        L1[r,0] = np.mean(np.fabs(dump['rho'] - state['rho']))
+        L1[r,1] = np.mean(np.fabs(dump['u']  - state['u']))
+        L1[r,2] = np.mean(np.fabs(dP  - dP_check))
+        L1[r,3] = np.mean(np.fabs(dump['B1']  - state['B1']))
 
     # MEASURE CONVERGENCE
     L1 = np.array(L1)
diff --git a/tests/mhdmodes_smr/check.py b/tests/mhdmodes_smr/check.py
new file mode 100644
index 00000000..777313c6
--- /dev/null
+++ b/tests/mhdmodes_smr/check.py
@@ -0,0 +1,170 @@
+#!/usr/bin/env python
+
+# MHD linear modes convergence plots
+import os,sys
+from matplotlib.colors import LightSource
+import numpy as np
+import matplotlib.pyplot as plt
+
+import pyharm
+import pyharm.plots as pplt
+
+RES = [int(x) for x in sys.argv[1].split(",")]
+LONG = sys.argv[2]
+SHORT = sys.argv[3]
+if len(sys.argv) > 4:
+    DIM = sys.argv[4]
+else:
+    DIM = "3d"
+if len(sys.argv) > 5:
+    DIR = int(sys.argv[5])
+else:
+    DIR = 0
+
+print(DIR)
+
+NVAR = 8
+VARS = ['rho', 'u', 'u1', 'u2', 'u3', 'B1', 'B2', 'B3']
+
+amp = 1.e-4
+k1 = 2.*np.pi
+k2 = 2.*np.pi
+if DIM == "3d" and DIR == 0:
+    k3 = 2.*np.pi
+else:
+    k3 = 0
+var0 = np.zeros(NVAR)
+
+# Background
+var0[0] = 1.
+var0[1] = 1.
+# Magnetic field
+var0[5] = 1.
+var0[6] = 0.
+var0[7] = 0.
+
+L1 = []
+
+# EIGENMODES: 3D
+dvar = np.zeros(NVAR)
+if DIM == "3d" and DIR == 0:
+    if "entropy" in SHORT:
+        dvar[0] = 1.
+    if "slow" in SHORT:
+        dvar[0] = 0.556500332363
+        dvar[1] = 0.742000443151
+        dvar[2] = -0.282334999306
+        dvar[3] = 0.0367010491491
+        dvar[4] = 0.0367010491491
+        dvar[5] = -0.195509141461
+        dvar[6] = 0.0977545707307
+        dvar[7] = 0.0977545707307
+    if "alfven" in SHORT:
+        dvar[3] = -0.339683110243
+        dvar[4] = 0.339683110243
+        dvar[6] = 0.620173672946
+        dvar[7] = -0.620173672946
+    if "fast" in SHORT:
+        dvar[0] = 0.481846076323
+        dvar[1] = 0.642461435098
+        dvar[2] = -0.0832240462505
+        dvar[3] = -0.224080007379
+        dvar[4] = -0.224080007379
+        dvar[5] = 0.406380545676
+        dvar[6] = -0.203190272838
+        dvar[7] = -0.203190272838
+else:
+    # EIGENMODES: 2D
+    # We only *convergence check* dir = 3 i.e. X1/X2 plane runs
+    # Other directions are useful for diagnosis but won't fail if 3D runs don't
+    if "entropy" in SHORT:
+        dvar[0] = 1.
+    if "slow" in SHORT:
+        dvar[0] = 0.558104461559
+        dvar[1] = 0.744139282078
+        dvar[2] = -0.277124827421
+        dvar[3] = 0.0630348927707
+        dvar[5] = -0.164323721928
+        dvar[6] = 0.164323721928
+    if "alfven" in SHORT:
+        dvar[4] = 0.480384461415
+        dvar[7] = 0.877058019307
+    if "fast" in SHORT:
+        dvar[0] = 0.476395427447
+        dvar[1] = 0.635193903263
+        dvar[2] = -0.102965815319
+        dvar[3] = -0.316873207561
+        dvar[5] = 0.359559114174
+        dvar[6] = -0.359559114174
+
+dvar *= amp
+
+for m, res in enumerate(RES):
+    dump = pyharm.load_dump("mhd_{}_{}_{}_end.phdf".format(DIM, SHORT, res))
+
+    X1 = dump['x']
+    X2 = dump['y']
+    X3 = dump['z']
+
+    dvar_code = []
+    dvar_code.append(dump['RHO'] - var0[0])
+    dvar_code.append(dump['UU'] - var0[1])
+    dvar_code.append(dump['U1'] - var0[2])
+    dvar_code.append(dump['U2'] - var0[3])
+    dvar_code.append(dump['U3'] - var0[4])
+    try:
+        dvar_code.append(dump['B1'] - var0[5])
+        dvar_code.append(dump['B2'] - var0[6])
+        dvar_code.append(dump['B3'] - var0[7])
+    except IOError:
+        NVAR = 5
+
+    dvar_sol = []
+    L1.append([])
+    for k in range(NVAR):
+      dvar_sol.append(np.real(dvar[k])*np.cos(k1*X1 + k2*X2 + k3*X3))
+      L1[m].append(np.mean(np.fabs(dvar_code[k] - dvar_sol[k])))
+
+      fig = plt.figure(figsize=(5,5))
+      ax = fig.add_subplot(1,1,1)
+      pplt.plot_xz(ax, dump, dvar_code[k] - dvar_sol[k], native=True, window=[0,1,0,1])
+      plt.savefig("compare_{}_{}_{}_{}.png".format(VARS[k], DIM, SHORT, res))
+      
+
+# MEASURE CONVERGENCE
+L1 = np.array(L1)
+powerfits = [0.,]*NVAR
+fail = 0
+for k in range(NVAR):
+    if abs(dvar[k]) != 0.:
+        powerfits[k] = np.polyfit(np.log(RES), np.log(L1[:,k]), 1)[0]
+
+        print("Power fit {}: {} {}".format(VARS[k], powerfits[k], L1[:,k]))
+        # These bounds were chosen heuristically: fast u2/u3 converge fast
+        if powerfits[k] > -1.9 or ("entropy" not in SHORT and powerfits[k] < -2.1):
+            # Allow entropy wave to converge fast, otherwise everything is ~2
+            fail = 1
+
+# MAKE PLOTS
+fig = plt.figure(figsize=(5,5))
+
+ax = fig.add_subplot(1,1,1)
+for k in range(NVAR):
+    if abs(dvar[k]) != 0.:
+        ax.plot(RES, L1[:,k], marker='s', label=VARS[k])
+
+norm = L1[0,0]*RES[0]*RES[0]
+if norm < 1e-4:
+    norm = L1[0,3]*RES[0]*RES[0]
+xmin = RES[0]/2.
+xmax = RES[-1]*2.
+ax.plot([xmin, xmax], norm*np.asarray([xmin, xmax])**-2., color='k', linestyle='--', label='N^-2')
+
+plt.xscale('log', base=2); plt.yscale('log')
+plt.xlim([RES[0]/np.sqrt(2.), RES[-1]*np.sqrt(2.)])
+plt.xlabel('N'); plt.ylabel('L1')
+plt.title("{}".format(LONG))
+plt.legend(loc=1)
+plt.savefig("convergence_modes_{}_{}.png".format(DIM,SHORT))
+
+exit(fail)
diff --git a/tests/mhdmodes_smr/run.sh b/tests/mhdmodes_smr/run.sh
new file mode 100755
index 00000000..33b8e71b
--- /dev/null
+++ b/tests/mhdmodes_smr/run.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+set -euo pipefail
+
+BASE=../..
+
+# This test confirms that all of the many transport options in KHARMA
+# can converge when modeling each of the basic linearized modes:
+# slow, fast, and alfven waves
+
+# It tests:
+# 1. different reconstructions WENO vs linear
+# 2. different drivers, simple, KHARMA, & ImEx
+# 3. different B field transports, Flux-CT and Face-CT
+
+exit_code=0
+
+conv_2d() {
+    IFS=',' read -ra RES_LIST <<< "$ALL_RES"
+    for res in "${RES_LIST[@]}"
+    do
+      # 3x3 & refine center
+      block=$(($res / 3))
+      $BASE/run.sh -i $BASE/pars/smr/mhdmodes_refined.par debug/verbose=2 mhdmodes/dir=3 \
+                      parthenon/output0/single_precision_output=false parthenon/output0/dt=100. \
+                      parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
+                      parthenon/meshblock/nx1=$block parthenon/meshblock/nx2=$block parthenon/meshblock/nx3=1 \
+                      $2 >log_2d_${1}_${res}.txt 2>&1
+        mv mhdmodes.out0.00000.phdf mhd_2d_${1}_${res}_start.phdf
+        mv mhdmodes.out0.final.phdf mhd_2d_${1}_${res}_end.phdf
+    done
+    check_code=0
+    python check.py $ALL_RES "$3" $1  2d || check_code=$?
+    if [[ $check_code != 0 ]]; then
+        echo MHD modes test \"$3\" FAIL: $check_code
+        exit_code=1
+    else
+        echo MHD modes test \"$3\" success
+    fi
+}
+
+# Normal MHD modes, 2D, defaults
+ALL_RES="24,48,96,192"
+conv_2d slow mhdmodes/nmode=1 "slow mode in 2D"
+conv_2d alfven mhdmodes/nmode=2 "Alfven mode in 2D"
+conv_2d fast mhdmodes/nmode=3 "fast mode in 2D"
+
+exit $exit_code

From f774756f9683c7b2a53909efb20e2598bf145ab7 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Wed, 11 Oct 2023 18:20:42 -0600
Subject: [PATCH 45/47] Add & repair conducting atmo so it fails properly

---
 scripts/ci/cpu.yml                   | 2 +-
 tests/conducting_atmosphere/check.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/ci/cpu.yml b/scripts/ci/cpu.yml
index 272dbbd7..ca0299d5 100644
--- a/scripts/ci/cpu.yml
+++ b/scripts/ci/cpu.yml
@@ -72,5 +72,5 @@ tests:
     - ./run.sh
   parallel:
     matrix:
-      - TEST: [all_pars, anisotropic_conduction, bondi, bondi_viscous, bz_monopole,
+      - TEST: [all_pars, anisotropic_conduction, bondi, bondi_viscous, bz_monopole, conducting_atmosphere,
                emhdmodes, mhdmodes, noh, regrid, reinit, resize, restart, tilt_init, torus_sanity]
diff --git a/tests/conducting_atmosphere/check.py b/tests/conducting_atmosphere/check.py
index 655e0489..dd0e3f42 100644
--- a/tests/conducting_atmosphere/check.py
+++ b/tests/conducting_atmosphere/check.py
@@ -39,7 +39,7 @@
         
         t   = dfile['t'][()]
         gam = dfile['header/gam'][()]
-        higher_order_terms = dfile['header/higher_order_terms'][()].decode('UTF-8')
+        higher_order_terms = dfile['header/higher_order_terms']
 
         # compute q
         if higher_order_terms=="TRUE":

From 5eef03a75d2498c7c311f63885d7c9801c112e29 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 12 Oct 2023 11:56:31 -0500
Subject: [PATCH 46/47] Converge on last tests

Viscous Bondi:
use outflow conditions, solve ODE from outer edge value
(converges to fixed condition, but not at 2o, probably due to
limited runtime)

Conducting atmo:
actually return check result, converge by fixing detection of
higher-order terms.
---
 .gitignore                                    |   3 +-
 pars/emhd/bondi_viscous.par                   |   2 +-
 tests/bondi_viscous/check.py                  |  26 +-
 tests/bondi_viscous/check_ih3d.py             | 459 ++++++++++++++++++
 tests/bondi_viscous/run.sh                    |   2 +-
 tests/conducting_atmosphere/check.py          |   3 +-
 .../conducting_atmosphere.par                 |   2 +
 tests/conducting_atmosphere/run.sh            |   2 +
 8 files changed, 481 insertions(+), 18 deletions(-)
 create mode 100644 tests/bondi_viscous/check_ih3d.py

diff --git a/.gitignore b/.gitignore
index c2551928..2e87022a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -16,10 +16,11 @@ logs/
 *.rhdf
 *.xdmf
 *.hst
-# Archival files
+# Archival/test files
 kharma_parsed_*.par
 log_*.txt
 bondi_analytic_*.txt
+atmosphere_soln_*.txt
 
 # Editor documents
 .project
diff --git a/pars/emhd/bondi_viscous.par b/pars/emhd/bondi_viscous.par
index 70d3b7ca..6e2d1330 100644
--- a/pars/emhd/bondi_viscous.par
+++ b/pars/emhd/bondi_viscous.par
@@ -70,7 +70,7 @@ check_inflow_outer_x1 = false
 check_inflow_inner_x1 = true
 # Force outflow bounds for EMHD vars
 outflow_EMHD_inner_x1 = true
-outflow_EMHD_outer_x1 = false
+outflow_EMHD_outer_x1 = true
 
 <debug>
 verbose = 1
diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index 5e4e4245..0a905ff7 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -43,23 +43,22 @@
         state = bondi.get_bondi_fluid_state(mdot, rc, gam, dump.grid)
         state.params['eta'] = eta
         state.params['tau'] = tau
-        dP_check = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau)
-        state.cache['dP'] = dP_check
 
-        # compute dP
+        # compute dP either by adjusting dump to include higher-order terms,
+        # or the computed state to exclude them
         if dump['emhd/higher_order_terms']:
             print("Res: "+str(res)+"; higher order terms enabled")
             Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
-            nu_emhd  = eta / dump['rho']
-            dP       = dump['dP'] * np.sqrt(nu_emhd * dump['rho'] * Theta / tau)
+            # we're directly modifying the cache here. Inadvisable
+            dump.cache['dP'] = dump['dP'] * np.sqrt(eta * Theta / tau)
         else:
-            dP = dump['dP']
             Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
-            nu_emhd  = eta / dump['rho']
-            dP_check /= np.sqrt(nu_emhd * dump['rho'] * Theta / tau)
+            state.cache['dP'] /= np.sqrt(eta * Theta / tau)
+
+        state.cache['dP'] = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau, start=np.mean(dump['dP'][-1]))
 
         # Plot
-        for var in ['rho', 'u', 'B1']:
+        for var in ['rho', 'u', 'B1', 'dP']:
             fig = plt.figure(figsize=(6,6))
             ax = fig.add_subplot(1,1,1)
             pplt.plot_diff_xz(ax, dump, state, var)
@@ -67,16 +66,17 @@
             plt.close(fig)
 
         radius = np.mean(dump.grid['r'], axis=(1,2))
-        plt.plot(radius, dP_check, label='dP ODE')
-        plt.plot(radius, np.mean(dP, axis=(1,2)), label='dP code')
+        plt.plot(radius, state['dP'], label='dP ODE')
+        plt.plot(radius, np.mean(dump['dP'], axis=(1,2)), label='dP code')
+        plt.plot(radius, np.mean(dump['dP'], axis=(1,2)) - state['dP'], label='dP diff')
         plt.legend()
-        plt.savefig('compare_dP_{}.png'.format(res))
+        plt.savefig('compare_dP1d_{}.png'.format(res))
         plt.close()
 
         # compute L1 norm
         L1[r,0] = np.mean(np.fabs(dump['rho'] - state['rho']))
         L1[r,1] = np.mean(np.fabs(dump['u']  - state['u']))
-        L1[r,2] = np.mean(np.fabs(dP  - dP_check))
+        L1[r,2] = np.mean(np.fabs(np.mean(dump['dP'], axis=(1,2))  - state['dP'])[2:])
         L1[r,3] = np.mean(np.fabs(dump['B1']  - state['B1']))
 
     # MEASURE CONVERGENCE
diff --git a/tests/bondi_viscous/check_ih3d.py b/tests/bondi_viscous/check_ih3d.py
new file mode 100644
index 00000000..517999cd
--- /dev/null
+++ b/tests/bondi_viscous/check_ih3d.py
@@ -0,0 +1,459 @@
+import numpy as np
+import os, sys, h5py, glob
+from scipy import optimize
+from scipy.interpolate import splrep, splev
+from scipy.integrate import odeint, solve_ivp
+import matplotlib
+matplotlib.use('Agg')
+from matplotlib import pyplot as plt
+
+import pyharm
+import pyharm.io.gridfile as gridfile
+
+# Global dictionaries to store (i) fluid dump (ii) grid (iii) analytic solution data
+dump = {}
+grid = {}
+soln = {}
+
+############### GEOMETRY FUNCTIONS ###############
+# Compute gcov in BL from (r,th,phi) read from grid file
+def gcov_bl():
+    grid['gcov_bl'] = np.zeros_like(grid['gcov'])
+
+    DD = 1 - 2./grid['r'] + grid['a']**2/grid['r']**2
+    mu = 1 + grid['a']**2 * np.cos(grid['th'])**2 / grid['r']**2
+
+    grid['gcov_bl'][Ellipsis,0,0] = -(1 - 2./(grid['r'] * mu))
+    grid['gcov_bl'][Ellipsis,0,3] = -2 * grid['a'] * np.sin(grid['th'])**2 / (grid['r'] * mu)
+    grid['gcov_bl'][Ellipsis,3,0] = grid['gcov_bl'][Ellipsis,0,3]
+    grid['gcov_bl'][Ellipsis,1,1] = mu / DD
+    grid['gcov_bl'][Ellipsis,2,2] = grid['r']**2 * mu
+    grid['gcov_bl'][Ellipsis,3,3] = grid['r']**2 * np.sin(grid['th'])**2 * (1 + grid['a']**2/grid['r']**2 \
+                                    + 2 * grid['a']**2 * np.sin(grid['th'])**2 / (grid['r']**3 * mu))
+
+# Compute gcov in KS from (r,th,phi) read from grid file
+def gcov_ks():
+    grid['gcov_ks'] = np.zeros_like(grid['gcov'])
+    sigma = grid['r']**2 + (grid['a']**2 * np.cos(grid['th'])**2)
+    
+    grid['gcov_ks'][Ellipsis,0,0] = -1 + 2*grid['r']/sigma
+    grid['gcov_ks'][Ellipsis,0,1] = 2*grid['r']/sigma
+    grid['gcov_ks'][Ellipsis,0,3] = -(2*grid['a']*grid['r']*np.sin(grid['th'])**2)/sigma
+    grid['gcov_ks'][Ellipsis,1,0] = 2*grid['r']/sigma
+    grid['gcov_ks'][Ellipsis,1,1] = 1 + 2*grid['r']/sigma
+    grid['gcov_ks'][Ellipsis,1,3] = -grid['a']*np.sin(grid['th'])**2 * (1 + 2*grid['r']/sigma)
+    grid['gcov_ks'][Ellipsis,2,2] = sigma
+    grid['gcov_ks'][Ellipsis,3,0] = -(2*grid['a']*grid['r']*np.sin(grid['th'])**2)/sigma
+    grid['gcov_ks'][Ellipsis,3,1] = -grid['a']*np.sin(grid['th'])**2 * (1 + 2*grid['r']/sigma)
+    grid['gcov_ks'][Ellipsis,3,3] = np.sin(grid['th'])**2 * (sigma + grid['a']**2*np.sin(grid['th'])**2 * (1 + 2*grid['r']/sigma))
+
+# Compute gcov in KS from gcon_ks
+def gcon_ks():
+    grid['gcon_ks'] = np.linalg.inv(grid['gcov_ks'])
+
+# Compute transformation matrix from KS -> MKS / FMKS (for covariant indices)
+def dxdX_KS_to_FMKS():
+    dxdX = np.zeros((grid['n1'], grid['n2'], 4, 4), dtype=float)
+
+    if grid['metric'] == 'mks':
+        dxdX[Ellipsis,0,0] = dxdX[Ellipsis,3,3] = 1
+        dxdX[Ellipsis,1,1] = np.exp(grid['x1'])
+        dxdX[Ellipsis,2,2] = np.pi + (1 - grid['hslope']) * np.pi * np.cos(2 * np.pi * grid['x2'])
+    
+    else:
+        theta_g = (np.pi * grid['x2']) + ((1 - grid['hslope'])/2) * (np.sin(2*np.pi*grid['x2']))
+        theta_j = grid['D'] * (2*grid['x2'] - 1) * (1 + (((2 * grid['x2'] - 1) / grid['poly_xt'])**grid['poly_alpha']) / (1 + grid['poly_alpha'])) + np.pi/2
+        derv_theta_g = np.pi + (1 - grid['hslope']) * np.pi * np.cos(2 * np.pi * grid['x2'])
+        derv_theta_j = (2 * grid['poly_alpha'] * grid['D'] * (2 * grid['x2'] - 1)*((2 * grid['x2'] - 1) / grid['poly_xt'])**(grid['poly_alpha'] - 1)) / (grid['poly_xt'] * (grid['poly_alpha'] + 1)) + 2 * grid['D'] * (1 + (((2 * grid['x2'] - 1) / grid['poly_xt'])**grid['poly_alpha']) / (grid['poly_alpha'] + 1))
+        dxdX[Ellipsis,0,0] = dxdX[Ellipsis,3,3] = 1
+        dxdX[Ellipsis,1,1] = np.exp(grid['x1'])
+        dxdX[Ellipsis,2,1] = -grid['mks_smooth'] * np.exp(-grid['mks_smooth'] * grid['Dx1'][:,np.newaxis]) * (theta_j - theta_g)
+        dxdX[Ellipsis,2,2] = derv_theta_g + np.exp(-grid['mks_smooth'] * grid['Dx1'][:,np.newaxis]) * (derv_theta_j - derv_theta_g)
+
+    return dxdX
+
+# Compute transformation matrix from MKS / FMKS -> KS (for covariant indices)
+def dxdX_FMKS_to_KS():
+    return (np.linalg.inv(dxdX_KS_to_FMKS()))
+
+# Compute quantities manually from x^mu
+def bl_coords_from_x(grid_temp):
+    grid_temp['r']  = np.exp(grid_temp['x1'])
+    grid_temp['th'] = np.pi * grid_temp['x2'] + ((1 - grid['hslope'])/2.) * np.sin(2*np.pi*grid_temp['x2'])
+
+def gcov_ks_from_x(grid_temp):
+    bl_coords_from_x(grid_temp)
+
+    grid_temp['gcov_ks'] = np.zeros_like(grid['gcov'])
+    sigma = grid_temp['r']**2 + (grid_temp['a']**2 * np.cos(grid_temp['th'])**2)
+    
+    grid_temp['gcov_ks'][Ellipsis,0,0] = -1 + 2*grid_temp['r']/sigma
+    grid_temp['gcov_ks'][Ellipsis,0,1] = 2*grid_temp['r']/sigma
+    grid_temp['gcov_ks'][Ellipsis,0,3] = -(2*grid_temp['a']*grid_temp['r']*np.sin(grid_temp['th'])**2)/sigma
+    grid_temp['gcov_ks'][Ellipsis,1,0] = 2*grid_temp['r']/sigma
+    grid_temp['gcov_ks'][Ellipsis,1,1] = 1 + 2*grid_temp['r']/sigma
+    grid_temp['gcov_ks'][Ellipsis,1,3] = -grid_temp['a']*np.sin(grid_temp['th'])**2 * (1 + 2*grid_temp['r']/sigma)
+    grid_temp['gcov_ks'][Ellipsis,2,2] = sigma
+    grid_temp['gcov_ks'][Ellipsis,3,0] = -(2*grid_temp['a']*grid_temp['r']*np.sin(grid_temp['th'])**2)/sigma
+    grid_temp['gcov_ks'][Ellipsis,3,1] = -grid_temp['a']*np.sin(grid_temp['th'])**2 * (1 + 2*grid_temp['r']/sigma)
+    grid_temp['gcov_ks'][Ellipsis,3,3] = np.sin(grid_temp['th'])**2 * (sigma + grid_temp['a']**2*np.sin(grid_temp['th'])**2 * (1 + 2*grid_temp['r']/sigma))
+
+def dxdX_KS_to_MKS_from_x(grid_temp):
+    dxdX = np.zeros((grid['n1'], grid['n2'], 4, 4), dtype=float)
+
+    dxdX[Ellipsis,0,0] = dxdX[Ellipsis,3,3] = 1
+    dxdX[Ellipsis,1,1] = np.exp(grid_temp['x1'])
+    dxdX[Ellipsis,2,2] = np.pi + (1 - grid['hslope']) * np.pi * np.cos(2 * np.pi * grid_temp['x2'])
+
+    return dxdX
+
+def dxdX_MKS_to_KS_from_x(grid_temp):
+    dxdX = dxdX_KS_to_MKS_from_x(grid_temp)
+    return np.linalg.inv(dxdX)
+
+def gcov_from_x(grid_temp):
+    gcov_ks_from_x(grid_temp)
+    dxdX = dxdX_KS_to_MKS_from_x(grid_temp)
+
+    grid_temp['gcov'] = np.einsum('ijbn,ijmb->ijmn', dxdX, \
+                        np.einsum('ijam,ijab->ijmb', dxdX, grid_temp['gcov_ks']))
+
+    grid_temp['gcon'] = np.linalg.inv(grid_temp['gcov'])
+
+# Compute the Christoffel symbols in MKS/MMKS (like iharm3d/pyharm)
+def conn_func(sigma, alpha, beta):
+    delta = 1.e-5
+    conn = np.zeros((grid['n1'], grid['n2'], 4, 4, 4), dtype=float)
+    tmp  = np.zeros_like(conn)
+
+    x = np.zeros((grid['n1'], grid['n2'], 4), dtype=float)
+    x[Ellipsis,1] = grid['x1']
+    x[Ellipsis,2] = grid['x2']
+    x[Ellipsis,3] = grid['x3']
+
+    grid_h = {}; grid_h['a'] = grid['a']
+    grid_l = {}; grid_l['a'] = grid['a']
+
+    for mu in range(4):
+        xh = np.copy(x)
+        xl = np.copy(x)
+        xh[Ellipsis,mu] += delta
+        xl[Ellipsis,mu] -= delta
+
+        grid_h['x1'] = xh[Ellipsis,1]
+        grid_h['x2'] = xh[Ellipsis,2]
+        grid_l['x1'] = xl[Ellipsis,1]
+        grid_l['x2'] = xl[Ellipsis,2]
+
+        gcov_from_x(grid_h)
+        gcov_from_x(grid_l)
+
+        for lam in range(4):
+            for nu in range(4):
+                conn[Ellipsis,lam,nu,mu] = (grid_h['gcov'][Ellipsis,lam,nu] - grid_l['gcov'][Ellipsis,lam,nu]) \
+                                            / (xh[Ellipsis,mu] - xl[Ellipsis,mu])
+
+    for lam in range(4):
+        for nu in range(4):
+            for mu in range(4):
+                tmp[Ellipsis,lam,nu,mu] = 0.5 * (conn[Ellipsis,nu,lam,mu] + conn[Ellipsis,mu,lam,nu] \
+                - conn[Ellipsis,mu,nu,lam])
+
+    for lam in range(4):
+        for nu in range(4):
+            for mu in range(4):
+                conn[Ellipsis,lam,nu,mu] = 0
+                for kap in range(4):
+                    conn[Ellipsis,lam,nu,mu] += grid['gcon'][Ellipsis,lam,kap] * tmp[Ellipsis,kap,nu,mu]
+
+    return conn[Ellipsis,sigma,alpha,beta]
+
+
+
+############### READ DATA ###############
+# Read dump and/or grid file
+def load_data(dumpsdir, dumpno, read_grid=False):
+    dfile = pyharm.load_dump(dumpsdir+'/emhd_2d_8_end_emhd2d_weno.phdf')
+    dump['rc']    = dfile['rs']
+    dump['mdot']  = dfile['mdot']
+    dump['gam']   = dfile['gam']
+    dump['rEH']   = dfile['r_eh']
+
+    if read_grid:
+        gridfile.write_grid(dfile.grid, 'grid.h5')
+        gfile  = h5py.File(os.path.join(dumpsdir, 'grid.h5'), 'r')
+        grid['r']   = np.squeeze(gfile['r'])
+        grid['th']  = np.squeeze(gfile['th'])
+        grid['phi'] = np.squeeze(gfile['phi'])
+
+        grid['rEH_ind'] = np.argmin(np.fabs(grid['r'][:,0]-dump['rEH']) > 0.)
+        grid['n1']  = dfile['n1']
+        grid['n2']  = dfile['n2']
+        grid['n3']  = dfile['n3']
+        grid['dx1'] = dfile['dx1']
+        grid['dx2'] = dfile['dx2']
+
+        grid['x1'] = np.squeeze(gfile['X1'])
+        grid['x2'] = np.squeeze(gfile['X2'])
+        grid['x3'] = np.squeeze(gfile['X3'])
+
+        grid['metric'] = dfile['coordinates'].lower()
+        grid['gcov']   = np.squeeze(gfile['gcov'])
+        grid['gcon']   = np.squeeze(gfile['gcon'])
+        grid['gdet']   = np.squeeze(gfile['gdet'])
+        grid['lapse']  = np.squeeze(gfile['lapse'])
+
+        if grid['metric']=='mks' or grid['metric']=='mmks':
+            grid['a'] = dfile['a']
+            grid['rEH'] = dfile['r_eh']
+            grid['hslope'] = dfile['hslope']
+
+        if grid['metric']=='MMKS':
+            grid['mks_smooth'] = dfile['mks_smooth']
+            grid['poly_alpha'] = dfile['poly_alpha']
+            grid['poly_xt'] = dfile['poly_xt']
+            grid['D'] = (np.pi*grid['poly_xt']**grid['poly_alpha'])/(2*grid['poly_xt']**grid['poly_alpha']+(2/(1+grid['poly_alpha'])))
+
+        gfile.close()
+
+    del dfile
+
+
+
+############### COMPUTE ANALYTIC IDEAL BONDI SOLUTION ###############
+# Nonlinear expression to solve for T
+def T_func(T, r, C3, C4, N):
+    return (1 + (1 + N/2)*T)**2 * (1 - 2./r + (C4**2/(r**4 * T**N))) - C3
+
+# Obtain primitives for Bondi problem
+def get_prim():
+    N    = 2./ (dump['gam'] - 1)
+    rc   = dump['rc']
+    mdot = dump['mdot']
+    vc   = np.sqrt(1. / (2 * rc))
+    csc  = np.sqrt(vc**2 / (1 - 3*vc**2))
+    Tc   = 2*N*csc**2 / ((N + 2)*(2 - N*csc**2))
+    C4   = Tc**(N/2)*vc*rc**2
+    C3   = (1 + (1 + N/2)*Tc)**2 * (1 - 2./rc + vc**2)
+
+    # Root find T
+    T = np.zeros_like(grid['r'][:,0])
+    for index, r in enumerate(grid['r'][:,0]):
+        T0       = Tc
+        sol      = optimize.root(T_func, [T0], args=(r, C3, C4, N))
+        T[index] = sol.x[0]
+        if (sol.success!=True):
+            print("Not converged at r = {:.2f}", r)
+
+    # Compute remaining fluid variables
+    soln['T'] = T
+    soln['v'] = -C4 / (T**(N/2) * grid['r'][:,0]**2)
+    soln['K'] = (4*np.pi*C4 / mdot) ** (2./N)
+
+    soln['rho'] = soln['K']**(-N/2) * T**(N/2)
+    soln['u']   = (N/2) * soln['K']**(-N/2) * T**(N/2 + 1)
+
+    soln['mdot'] = mdot
+    soln['N']    = N
+    soln['rc']   = rc
+
+# Compute four vectors
+def compute_ub():
+
+    # We have u^r in BL. We need to convert this to ucon in MKS
+    # First compute u^t in BL
+    ucon_bl = np.zeros((grid['n1'], grid['n2'], 4), dtype=float)
+    AA = grid['gcov_bl'][Ellipsis,0,0]
+    BB = 2. * grid['gcov_bl'][Ellipsis,0,1]*soln['v'][:,None]
+    CC = 1. + grid['gcov_bl'][Ellipsis,1,1]*soln['v'][:,None]**2
+    
+    discr = BB*BB - 4.*AA*CC
+    ucon_bl[Ellipsis,0] = (-BB - np.sqrt(discr)) / (2.*AA)
+    ucon_bl[Ellipsis,1] = soln['v'][:,None]
+
+    # Convert ucon(Bl) to ucon(KS)
+    dxdX = np.zeros((grid['n1'], grid['n2'], 4, 4), dtype=float)
+    dxdX[Ellipsis,0,0] = dxdX[Ellipsis,1,1] = dxdX[Ellipsis,2,2] = dxdX[Ellipsis,3,3] = 1.
+    dxdX[Ellipsis,0,1] = 2*grid['r'] / (grid['r']**2 - 2.*grid['r'] + grid['a']**2)
+    dxdX[Ellipsis,3,1] = grid['a']/(grid['r']**2 - 2.*grid['r'] + grid['a']**2)
+
+    ucon_ks = np.zeros((grid['n1'], grid['n2'], 4), dtype=float)
+    for mu in range(4):
+        for nu in range(4):
+            ucon_ks[Ellipsis,mu] += dxdX[Ellipsis,mu,nu] * ucon_bl[Ellipsis,nu]
+
+    # Convert ucon(KS) to ucon(MKS/FMKS)
+    ucon_mks = np.zeros((grid['n1'], grid['n2'], 4), dtype=float)
+    dxdX = dxdX_FMKS_to_KS()
+    for mu in range(4):
+        for nu in range(4):
+            ucon_mks[Ellipsis,mu] += dxdX[Ellipsis,mu,nu] * ucon_ks[Ellipsis,nu]
+
+    ucov_mks = np.einsum('ijmn,ijn->ijm', grid['gcov'], ucon_mks)
+
+    # Compute velocity primitives
+    utilde = np.zeros((grid['n1'], grid['n2'], 3), dtype=float)
+
+    alpha = 1./np.sqrt(-grid['gcon'][Ellipsis,0,0])
+    beta  = np.zeros((grid['n1'], grid['n2'], 3), dtype=float)
+    beta[Ellipsis,0] = alpha * alpha * grid['gcon'][Ellipsis,0,1]
+    beta[Ellipsis,1] = alpha * alpha * grid['gcon'][Ellipsis,0,2]
+    beta[Ellipsis,2] = alpha * alpha * grid['gcon'][Ellipsis,0,3]
+    gamma = ucon_mks[Ellipsis,0] * alpha
+
+    utilde[Ellipsis,0] = ucon_mks[Ellipsis,1] + beta[Ellipsis,0]*gamma/alpha
+    utilde[Ellipsis,1] = ucon_mks[Ellipsis,2] + beta[Ellipsis,1]*gamma/alpha
+    utilde[Ellipsis,2] = ucon_mks[Ellipsis,3] + beta[Ellipsis,2]*gamma/alpha
+
+    # compute magnetic 4-vector
+    B = np.zeros((grid['n1'], grid['n2'], 3), dtype=float)
+    # radial magnetic field (B1 = 1/r^3)
+    B[Ellipsis,0] = 1. / grid['r']**3
+
+    gti    = grid['gcon'][Ellipsis,0,1:4]
+    gij    = grid['gcov'][Ellipsis,1:4,1:4]
+    beta_i = np.einsum('ijs,ij->ijs', gti, grid['lapse']**2)
+    qsq    = np.einsum('ijy,ijy->ij', np.einsum('ijxy,ijx->ijy', gij, utilde), utilde)
+    gamma  = np.sqrt(1 + qsq)
+    ui     = utilde - np.einsum('ijs,ij->ijs', beta_i, gamma/grid['lapse'])
+    ut     = gamma/grid['lapse']
+
+    bt = np.einsum('ijm,ijm->ij', np.einsum('ijsm,ijs->ijm', grid['gcov'][Ellipsis,1:4,:], B), ucon_mks)
+    bi = (B + np.einsum('ijs,ij->ijs', ucon_mks[Ellipsis,1:4], bt)) / ucon_mks[Ellipsis,0,None]
+    bcon_mks = np.append(bt[Ellipsis,None], bi, axis=2)
+    bcov_mks = np.einsum('ijmn,ijn->ijm', grid['gcov'], bcon_mks)
+
+    soln['ucon'] = ucon_mks[:,0,:]
+    soln['ucov'] = ucov_mks[:,0,:]
+    soln['bcon'] = bcon_mks[:,0,:]
+    soln['bcov'] = bcov_mks[:,0,:]
+    soln['bsq']  = np.einsum('im,im->i', soln['bcon'], soln['bcov'])
+
+
+
+############### ADDITIONAL FUNCTIONS FOR VISCOUS BONDI FLOW ###############
+# Compute Braginskii pressure anisotropy value
+def compute_dP0():
+    grid['dx'] = [grid['dx1'],grid['dx2']]
+
+    soln['tau'] = 30.
+    soln['eta'] = 0.01
+    nu_emhd     = soln['eta'] / soln['rho']
+    dP0         = np.zeros(grid['n1'], dtype=float)
+
+    # Compute derivatives of 4-velocity
+    ducovDx1 = np.zeros((grid['n1'], 4), dtype=float) # Represents d_x1(u_\mu)
+    delta = 1.e-5
+    x1    = grid['x1'][:,0]
+    x1h   = x1 + delta
+    x1l   = x1 - delta
+
+    ucovt_splrep = splrep(x1, soln['ucov'][:,0])
+    ucovr_splrep = splrep(x1, soln['ucov'][:,1])
+    ucovt_h = splev(x1h, ucovt_splrep) 
+    ucovt_l = splev(x1l, ucovt_splrep) 
+    ucovr_h = splev(x1h, ucovr_splrep) 
+    ucovr_l = splev(x1l, ucovr_splrep)
+
+    ducovDx1[:,0] = (ucovt_h - ucovt_l) / (x1h - x1l)
+    ducovDx1[:,1] = (ucovr_h - ucovr_l) / (x1h - x1l)
+
+    for mu in range(4):
+        for nu in range(4):
+            if mu == 1:
+                dP0 += 3*soln['rho']*nu_emhd * (soln['bcon'][:,mu]*soln['bcon'][:,nu] / soln['bsq']) \
+                        * ducovDx1[:,nu]
+                
+            gamma_term_1 = np.zeros((grid['n1'], grid['n2']), dtype=float)
+            for sigma in range(4):
+                gamma_term_1 += (3*soln['rho']*nu_emhd * (soln['bcon'][:,mu]*soln['bcon'][:,nu] / soln['bsq']))[:,None] \
+                                * (-conn_func(sigma, mu, nu) * soln['ucov'][:,None,sigma])
+
+            dP0 += np.mean(gamma_term_1, axis=1)
+
+        derv_term_2 = np.zeros((grid['n1'], grid['n2']), dtype=float)
+        if mu == 1:
+            for sigma in range(4):
+                derv_term_2 += (-soln['rho']*nu_emhd * ducovDx1[:,sigma])[:,None] \
+                                * grid['gcon'][Ellipsis,mu,sigma]
+
+        dP0 += np.mean(derv_term_2, axis=1)
+
+        gamma_term_2 = np.zeros((grid['n1'], grid['n2']), dtype=float)
+        for sigma in range(4):
+            for delta in range(4):
+                    gamma_term_2 += (soln['rho']*nu_emhd)[:,None] * (conn_func(sigma, mu, delta) * grid['gcon'][Ellipsis,mu,delta] * soln['ucov'][:,None,sigma])
+
+        dP0 += np.mean(gamma_term_2, axis=1)
+
+    # r_start = 3.0
+    # r_start_ind = np.argmin(np.fabs(grid['r'][:,0] - r_start))
+    # plt.semilogx(grid['r'][r_start_ind:,0], dP0[r_start_ind:])
+    # plt.savefig('dP0_analytic.png')
+    # plt.close()
+    
+    return dP0
+
+# Compute the coefficient of the second term on the RHS of the evolution equation of dP
+def compute_rhs_second_term():
+    nu_emhd = soln['eta'] / soln['rho']
+    P = soln['u'] * (dump['gam'] - 1.)
+
+    # compute derivative
+    delta = 1.e-5
+    x1    = grid['x1'][:,0]
+    x1h   = x1 + delta
+    x1l   = x1 - delta
+    expr  = np.log(soln['tau'] / (soln['rho'] * nu_emhd * P))
+    expr_splrep = splrep(x1, expr)
+    expr_h = splev(x1h, expr_splrep)
+    expr_l = splev(x1l, expr_splrep)
+
+    coeff  = 0.5 * (expr_h - expr_l) / (x1h - x1l)
+
+    return coeff
+
+# Return derivative d(dP)/dx1. Refer Equation (36) in grim paper
+def ddP_dX1(dP, x1, ur_splrep, dP0_splrep, coeff_splrep):
+    tau   = soln['tau']
+    ur    = splev(x1, ur_splrep)
+    dP0   = splev(x1, dP0_splrep)
+    coeff = splev(x1, coeff_splrep)
+
+    derivative = -((dP - dP0) / (tau * ur)) - (dP * coeff)
+    return derivative
+
+
+############### MAIN IS MAIN ###############
+if __name__=='__main__':
+    dumpsdir = '.'
+
+
+    load_data(dumpsdir, 0, True)
+    get_prim()
+    gcov_bl()
+    gcov_ks()
+    gcon_ks()
+    compute_ub()
+
+    dP0   = compute_dP0()
+    coeff = compute_rhs_second_term()
+
+    x1 = grid['x1'][:,0]
+    ur_splrep    = splrep(x1, soln['ucon'][:,1])
+    dP0_splrep   = splrep(x1, dP0)
+    coeff_splrep = splrep(x1, coeff)
+
+    solution = odeint(ddP_dX1, 0., x1[::-1], args=(ur_splrep, dP0_splrep, coeff_splrep))
+    np.savetxt('bondi_analytic_{}.txt'.format(grid['n1']), np.asarray([soln['rho'], soln['u'], soln['v'], solution[::-1,0]]).T)
+    
+    r_start = 3.0
+    r_start_ind = np.argmin(np.fabs(grid['r'][:,0] - r_start))
+    plt.plot(grid['r'][r_start_ind:,0], solution[::-1,0][r_start_ind:], label='dP ODE check')
+    plt.plot(grid['r'][r_start_ind:,0], dP0[r_start_ind:], label='dP0 ODE check')
+    plt.plot(grid['r'][r_start_ind:,0], soln['ucon'][:,1][r_start_ind:], label='ur')
+    #plt.plot(grid['r'][r_start_ind:,0], coeff[r_start_ind:], label='coeff')
+    plt.legend()
+    plt.savefig('dP_soln.png')
+    plt.close()
+    
\ No newline at end of file
diff --git a/tests/bondi_viscous/run.sh b/tests/bondi_viscous/run.sh
index 8c0f882a..a50a00bc 100755
--- a/tests/bondi_viscous/run.sh
+++ b/tests/bondi_viscous/run.sh
@@ -13,7 +13,7 @@ conv_2d() {
     do
         # Four blocks
         half=$(( $res / 2 ))
-        $BASE/run.sh -i $BASE/pars/emhd/bondi_viscous.par debug/verbose=1 parthenon/time/tlim=400 \
+        $BASE/run.sh -i $BASE/pars/emhd/bondi_viscous.par debug/verbose=1 \
             parthenon/mesh/nx1=$res parthenon/mesh/nx2=$res parthenon/mesh/nx3=1 \
             parthenon/meshblock/nx1=$half parthenon/meshblock/nx2=$half parthenon/meshblock/nx3=1 \
             b_field/implicit=false $2 >log_${1}_${res}.txt 2>&1
diff --git a/tests/conducting_atmosphere/check.py b/tests/conducting_atmosphere/check.py
index dd0e3f42..7352b830 100644
--- a/tests/conducting_atmosphere/check.py
+++ b/tests/conducting_atmosphere/check.py
@@ -39,10 +39,9 @@
         
         t   = dfile['t'][()]
         gam = dfile['header/gam'][()]
-        higher_order_terms = dfile['header/higher_order_terms']
 
         # compute q
-        if higher_order_terms=="TRUE":
+        if dfile['header/higher_order_terms']:
             print("Res: "+str(res)+"; higher order terms enabled")
             tau      = 10.
             kappa    = 0.1
diff --git a/tests/conducting_atmosphere/conducting_atmosphere.par b/tests/conducting_atmosphere/conducting_atmosphere.par
index 523b5fc5..ef23da7b 100644
--- a/tests/conducting_atmosphere/conducting_atmosphere.par
+++ b/tests/conducting_atmosphere/conducting_atmosphere.par
@@ -34,6 +34,8 @@ inner_x1 = dirichlet
 outer_x1 = dirichlet
 check_inflow_inner_x1 = false
 check_inflow_outer_x1 = false
+outflow_EMHD_inner_x1 = true
+outflow_EMHD_outer_x1 = true
 
 <parthenon/time>
 tlim       = 400.
diff --git a/tests/conducting_atmosphere/run.sh b/tests/conducting_atmosphere/run.sh
index 1f8b82f8..f1e801d9 100755
--- a/tests/conducting_atmosphere/run.sh
+++ b/tests/conducting_atmosphere/run.sh
@@ -36,3 +36,5 @@ conv_2d() {
 
 ALL_RES="64,128,256,512"
 conv_2d emhd2d_weno driver/reconstruction=weno5 "in 2D, WENO5"
+
+exit $exit_code

From 9750787c3ff9adff53d735c9d6c42287aa95ace6 Mon Sep 17 00:00:00 2001
From: Ben Prather <bprathr2@illinois.edu>
Date: Thu, 12 Oct 2023 12:05:58 -0500
Subject: [PATCH 47/47] Fix viscous Bondi test script for hypothetical case
 without higher order terms

---
 tests/bondi_viscous/check.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/bondi_viscous/check.py b/tests/bondi_viscous/check.py
index 0a905ff7..50419312 100644
--- a/tests/bondi_viscous/check.py
+++ b/tests/bondi_viscous/check.py
@@ -51,11 +51,11 @@
             Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
             # we're directly modifying the cache here. Inadvisable
             dump.cache['dP'] = dump['dP'] * np.sqrt(eta * Theta / tau)
+            state.cache['dP'] = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau, start=np.mean(dump['dP'][-1]))
         else:
             Theta    = (dump['gam'] - 1.) * dump['u'] / dump['rho']
-            state.cache['dP'] /= np.sqrt(eta * Theta / tau)
-
-        state.cache['dP'] = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau, start=np.mean(dump['dP'][-1]))
+            state.cache['dP'] = bondi.compute_dP(mdot, rc, gam, dump.grid, eta, tau, start=np.mean(dump['dP'][-1])) / \
+                                np.sqrt(eta * Theta / tau)
 
         # Plot
         for var in ['rho', 'u', 'B1', 'dP']: