From b5baf675379cddd745c1432e7a3a2fc6177c4489 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sun, 3 Oct 2021 13:54:15 +0200
Subject: [PATCH 01/42] Add simple anisotropic step function test

---
 .gitlab-ci.yml                                |   1 +
 tst/regression/CMakeLists.txt                 |   5 +-
 .../aniso_therm_cond_ring_conv/README.md      |   2 +-
 .../aniso_therm_cond_step/__init__.py         |   0
 .../aniso_therm_cond_step.py                  | 136 ++++++++++++++++++
 5 files changed, 142 insertions(+), 2 deletions(-)
 create mode 100644 tst/regression/test_suites/aniso_therm_cond_step/__init__.py
 create mode 100644 tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 57fb0a58..dcf7996e 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -115,4 +115,5 @@ cuda-regression:
       - build-cuda/tst/regression/outputs/cluster_hse/analytic_comparison.png
       - build-cuda/tst/regression/outputs/cluster_tabular_cooling/convergence.png
       - build-cuda/tst/regression/outputs/aniso_therm_cond_ring_conv/ring_convergence.png
+      - build-cuda/tst/regression/outputs/aniso_therm_cond_step/cond.png
       - build-cuda/tst/regression/outputs/field_loop/field_loop.png
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index 6e8bf180..6f1beedc 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -22,9 +22,12 @@ setup_test_serial("cluster_tabular_cooling" "--driver ${PROJECT_BINARY_DIR}/bin/
 
   setup_test_serial("aniso_therm_cond_ring_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
-  
+ 
   setup_test_serial("aniso_therm_cond_ring_multid" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
+ 
+  setup_test_serial("aniso_therm_cond_step" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 6" "convergence")
 
   setup_test_serial("field_loop" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/field_loop.in --num_steps 9" "convergence")
diff --git a/tst/regression/test_suites/aniso_therm_cond_ring_conv/README.md b/tst/regression/test_suites/aniso_therm_cond_ring_conv/README.md
index bfd8226d..8222148a 100644
--- a/tst/regression/test_suites/aniso_therm_cond_ring_conv/README.md
+++ b/tst/regression/test_suites/aniso_therm_cond_ring_conv/README.md
@@ -2,5 +2,5 @@
 
 Executes 2D ring diffusion problem following Sharma & Hammett (2007) and calculates convergence rate.
 Errors are calculated based on comparison to steady state solution.
-Convergence for this problem is not great, but matches other numbers reported in literate, e.g., Balsara, Tilley & Howk MNRAS (2008) doi:10.1111/j.1365-2966.2008.13085.x .
+Convergence for this problem is not great, but matches other numbers reported in literature, e.g., Balsara, Tilley & Howk MNRAS (2008) doi:10.1111/j.1365-2966.2008.13085.x .
 Also the minium temperature is checked to ensure that limiting is working (i.e., the temperature is nowhere below the initial background temperature).
diff --git a/tst/regression/test_suites/aniso_therm_cond_step/__init__.py b/tst/regression/test_suites/aniso_therm_cond_step/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py b/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
new file mode 100644
index 00000000..6e81e80a
--- /dev/null
+++ b/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
@@ -0,0 +1,136 @@
+
+#========================================================================================
+# AthenaPK - a performance portable block structured AMR MHD code
+# Copyright (c) 2020-2021, Athena Parthenon Collaboration. All rights reserved.
+# Licensed under the 3-clause BSD License, see LICENSE file for details
+#========================================================================================
+# (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved.
+#
+# This program was produced under U.S. Government contract 89233218CNA000001 for Los
+# Alamos National Laboratory (LANL), which is operated by Triad National Security, LLC
+# for the U.S. Department of Energy/National Nuclear Security Administration. All rights
+# in the program are reserved by Triad National Security, LLC, and the U.S. Department
+# of Energy/National Nuclear Security Administration. The Government is granted for
+# itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
+# license in this material to reproduce, prepare derivative works, distribute copies to
+# the public, perform publicly and display publicly, and to permit others to do so.
+#========================================================================================
+
+# Modules
+import math
+import numpy as np
+import matplotlib
+matplotlib.use('agg')
+import matplotlib.pylab as plt
+import sys
+import os
+import itertools
+import utils.test_case
+
+# To prevent littering up imported folders with .pyc files or __pycache_ folder
+sys.dont_write_bytecode = True
+
+res_cfgs = [50, 100]
+field_cfgs = [ "aligned", "perp", "angle" ]
+
+all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
+        
+def get_outname(all_cfg):
+    res, field_cfg = all_cfg
+    return f"{res}_{field_cfg}"
+
+def get_B(field_cfg):
+    if field_cfg == "aligned":
+        Bx = 1.0
+        By = 0.0
+    elif field_cfg == "perp":
+        Bx = 0.0
+        By = 1.0
+    elif field_cfg == "angle":
+        Bx = 1/np.sqrt(2)
+        By = 1/np.sqrt(2)
+    else:
+        raise "Unknown field_cfg: %s" % field_cfg
+
+    return Bx, By
+        
+
+class TestCase(utils.test_case.TestCaseAbs):
+    def Prepare(self,parameters, step):
+
+        assert parameters.num_ranks <= 4, "Use <= 4 ranks for diffusion test."
+
+        res, field_cfg = all_cfgs[step - 1]
+
+        Bx , By = get_B(field_cfg)
+
+        outname = get_outname(all_cfgs[step - 1])
+
+        parameters.driver_cmd_line_args = [
+            'parthenon/mesh/nx1=%d' % res,
+            'parthenon/meshblock/nx1=25',
+            'parthenon/mesh/nx2=%d' % res,
+            'parthenon/meshblock/nx2=25',
+            'parthenon/mesh/nx3=1',
+            'parthenon/meshblock/nx3=1',
+            'problem/diffusion/Bx=%f' % Bx,
+            'problem/diffusion/By=%f' % By,
+            'problem/diffusion/iprob=0',
+            'parthenon/output0/id=%s' % outname,
+            'hydro/gamma=2.0'
+            ]
+
+        return parameters
+
+    def Analyse(self,parameters):
+        
+        sys.path.insert(1, parameters.parthenon_path + '/scripts/python/packages/parthenon_tools/parthenon_tools')
+
+        try:
+            import phdf
+        except ModuleNotFoundError:
+            print("Couldn't find module to read Parthenon hdf5 files.")
+            return False
+
+        num_rows = len(res_cfgs)
+        fig, p = plt.subplots(num_rows, 1,
+            sharex=True, sharey=True)
+
+        for step in range(len(all_cfgs)):
+            outname = get_outname(all_cfgs[step])
+            data_filename = f"{parameters.output_path}/parthenon.{outname}.00001.phdf"
+            data_file = phdf.phdf(data_filename)
+            prim = data_file.Get("prim")
+            zz, yy,xx = data_file.GetVolumeLocations()
+            mask = yy == yy[0]
+            temp = prim[:,4][mask]
+            x = xx[mask]
+            res, field_cfg = all_cfgs[step]
+            row = res_cfgs.index(res)
+            p[row].plot(x,temp,'x',label=field_cfg)
+
+        def get_ref(x,
+                    u0 = 11.0,        # mean temp
+                    delta_u = 2.0,    # temp difference
+                    chi = 0.01,       # diffusivity coefficient
+                    t = 10,           # time
+                    b_x = 1.0         # magnetic field
+                ):
+            if b_x == 0:
+                return 10.0 if x < 0.0 else 12.0
+            else:
+                return u0 + delta_u/2*(math.erf((x + 0)/np.sqrt(4*chi*t*b_x**2)) -
+                                       math.erf((x - 1)/np.sqrt(4*chi*t*b_x**2)) -
+                                       math.erf((x + 1)/np.sqrt(4*chi*t*b_x**2)))
+        x = np.linspace(-1,1,200)
+        for field_cfg in field_cfgs:
+            Bx, By = get_B(field_cfg)
+            for i in range(num_rows):
+                y = [get_ref(x_, b_x = Bx) for x_ in x]
+                p[i].plot(x, y, '-', color='black', alpha=0.5)
+
+
+        fig.savefig(os.path.join(parameters.output_path, "cond.png"),
+                    bbox_inches='tight')
+
+        return True

From e9ac8f6b72921234df366a8ec7d561cb46532224 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sun, 3 Oct 2021 14:45:08 +0200
Subject: [PATCH 02/42] Separate FillDerived and EstimateTimestep in driver in
 prep for STS list

---
 src/hydro/hydro_driver.cpp | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 2340586a..abe029e0 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -305,10 +305,15 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
     auto fill_derived =
         tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
+  }
 
-    if (stage == integrator->nstages) {
-      auto new_dt = tl.AddTask(
-          fill_derived, parthenon::Update::EstimateTimestep<MeshData<Real>>, mu0.get());
+  if (stage == integrator->nstages) {
+    TaskRegion &tr = tc.AddRegion(num_partitions);
+    for (int i = 0; i < num_partitions; i++) {
+      auto &tl = tr[i];
+      auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto new_dt = tl.AddTask(none, parthenon::Update::EstimateTimestep<MeshData<Real>>,
+                               mu0.get());
     }
   }
 

From 2b30fcadb4f57cad213acd5b33383cfa4e89dc18 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sun, 3 Oct 2021 20:44:47 +0200
Subject: [PATCH 03/42] Add diffflux parameter

---
 src/hydro/diffusion/conduction.cpp |  2 +-
 src/hydro/hydro.cpp                | 17 +++++++++++++++--
 src/main.hpp                       |  1 +
 3 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 1413dba4..28986507 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -33,7 +33,7 @@ Real ThermalDiffusivity::Get(const Real pres, const Real rho, const Real gradTma
     // In practice: fac * \rho * c_{s,isoth}^3 * (gradT / gradTmag)
     // where T is calculated based on p/rho in the code.
     // Thus, everything is in code units and no conversion is required.
-    // The \rho above is cancelled as we convert the condution above to a diffusvity here.
+    // The rho above is cancelled as we convert the conduction above to a diffusvity here.
     const Real chi_sat =
         0.34 * std::pow(pres / rho, 3.0 / 2.0) / (gradTmag + TINY_NUMBER);
     return std::min(chi_spitzer, chi_sat);
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 608bad4e..0b73faa0 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -437,10 +437,22 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
 
     } else if (conduction_str != "none") {
       PARTHENON_FAIL(
-          "AthenaPK unknown conduction method. Options are: spitzer, thermal_diff");
+          "AthenaPK unknown conduction method. Options are: none, spitzer, thermal_diff");
     }
     pkg->AddParam<>("conduction", conduction);
 
+    auto diffflux_str = pin->GetOrAddString("diffusion", "flux", "none");
+    auto diffflux = DiffFlux::none;
+    if (diffflux_str == "unsplit") {
+      diffflux = DiffFlux::unsplit;
+    } else if (diffflux_str == "rkl2") {
+      diffflux = DiffFlux::rkl2;
+    } else if (diffflux_str != "none") {
+      PARTHENON_FAIL("AthenaPK unknown method for diffusive fluxes. Options are: none, "
+                     "unsplit, rkl2");
+    }
+    pkg->AddParam<>("diffflux", diffflux);
+
     if (fluid == Fluid::euler) {
       AdiabaticHydroEOS eos(pfloor, dfloor, efloor, gamma);
       pkg->AddParam<>("eos", eos);
@@ -911,7 +923,8 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshData<Real>> &md) {
   }
 
   const auto &conduction = pkg->Param<Conduction>("conduction");
-  if (conduction != Conduction::none) {
+  const auto &diffflux = pkg->Param<DiffFlux>("diffflux");
+  if ((diffflux == DiffFlux::unsplit) && (conduction != Conduction::none)) {
     ThermalFluxAniso(md.get());
   }
 
diff --git a/src/main.hpp b/src/main.hpp
index d7e9a209..fe8dcc85 100644
--- a/src/main.hpp
+++ b/src/main.hpp
@@ -36,6 +36,7 @@ enum class Integrator { undefined, rk1, rk2, vl2, rk3 };
 enum class Fluid { undefined, euler, glmmhd };
 enum class Cooling { none, tabular };
 enum class Conduction { none, spitzer, thermal_diff };
+enum class DiffFlux { none, unsplit, rkl2 };
 
 enum class Hst { idx, ekin, emag, divb };
 

From db6325527499a59f6ddeee495fb628dd9a1ff609 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sun, 3 Oct 2021 23:44:28 +0200
Subject: [PATCH 04/42] Add RKL2 STS task list

---
 inputs/diffusion.in               |   1 +
 src/CMakeLists.txt                |   1 +
 src/hydro/diffusion/diffusion.hpp |   3 +
 src/hydro/hydro.cpp               |   5 +-
 src/hydro/hydro_driver.cpp        | 318 +++++++++++++++++++++++++++++-
 5 files changed, 316 insertions(+), 12 deletions(-)

diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index a1a3f723..3147ffa1 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -59,6 +59,7 @@ reconstruction = dc
 gamma = 2.0
 
 <diffusion>
+flux = unsplit
 conduction = thermal_diff
 thermal_diff_coeff_code = 0.01
 
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 2066812d..1aa20356 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -7,6 +7,7 @@ add_executable(
         eos/adiabatic_glmmhd.cpp
         units.hpp
         eos/adiabatic_hydro.cpp
+        hydro/diffusion/diffusion.cpp
         hydro/diffusion/diffusion.hpp
         hydro/diffusion/conduction.cpp
         hydro/hydro_driver.cpp
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index 273464d7..69ab6e2d 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -88,4 +88,7 @@ Real EstimateConductionTimestep(MeshData<Real> *md);
 //! Calculate anisotropic thermal conduction
 void ThermalFluxAniso(MeshData<Real> *md);
 
+// Calculate all diffusion fluxes, i.e., update the .flux views in md
+TaskStatus CalcDiffFluxes(StateDescriptor *hydro_pkg, MeshData<Real> *md);
+
 #endif //  HYDRO_DIFFUSION_DIFFUSION_HPP_
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 0b73faa0..f9e24172 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -922,10 +922,9 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshData<Real>> &md) {
         });
   }
 
-  const auto &conduction = pkg->Param<Conduction>("conduction");
   const auto &diffflux = pkg->Param<DiffFlux>("diffflux");
-  if ((diffflux == DiffFlux::unsplit) && (conduction != Conduction::none)) {
-    ThermalFluxAniso(md.get());
+  if (diffflux == DiffFlux::unsplit) {
+    CalcDiffFluxes(pkg.get(), md.get());
   }
 
   return TaskStatus::complete;
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index abe029e0..d77b8c43 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -12,6 +12,7 @@
 
 // Parthenon headers
 #include "bvals/cc/bvals_cc_in_one.hpp"
+#include "diffusion/diffusion.hpp"
 #include "interface/update.hpp"
 #include "parthenon/driver.hpp"
 #include "parthenon/package.hpp"
@@ -77,10 +78,309 @@ TaskStatus CalculateGlobalMinDx(MeshData<Real> *md) {
   return TaskStatus::complete;
 }
 
+// Sets all fluxes to 0
+TaskStatus ResetFluxes(MeshData<Real> *md) {
+  auto pmb = md->GetBlockData(0)->GetBlockPointer();
+  IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
+  IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
+  IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
+
+  // In principle, we'd only need to pack Metadata::WithFluxes here, but
+  // choosing to mirror other use in the code so that the packs are already cached.
+  std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
+  auto cons_pack = md->PackVariablesAndFluxes(flags_ind);
+
+  const int ndim = pmb->pmy_mesh->ndim;
+  // Using separate loops for each dim as the launch overhead should be hidden
+  // by enough work over the entire pack and it allows to not use any conditionals.
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "ResetFluxes X1", parthenon::DevExecSpace(), 0,
+      cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
+      ib.e + 1,
+      KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
+        auto &cons = cons_pack(b);
+        cons.flux(X1DIR, v, k, j, i) = 0.0;
+      });
+
+  if (ndim < 2) {
+    return TaskStatus::complete;
+  }
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "ResetFluxes X2", parthenon::DevExecSpace(), 0,
+      cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e + 1,
+      ib.s, ib.e,
+      KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
+        auto &cons = cons_pack(b);
+        cons.flux(X2DIR, v, k, j, i) = 0.0;
+      });
+
+  if (ndim < 3) {
+    return TaskStatus::complete;
+  }
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "ResetFluxes X3", parthenon::DevExecSpace(), 0,
+      cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e + 1, jb.s, jb.e,
+      ib.s, ib.e,
+      KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
+        auto &cons = cons_pack(b);
+        cons.flux(X3DIR, v, k, j, i) = 0.0;
+      });
+  return TaskStatus::complete;
+}
+
+TaskStatus RKL2StepFirst(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
+                         MeshData<Real> *md_Yjm2, MeshData<Real> *md_MY0, const int s_rkl,
+                         const Real tau) {
+  auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer();
+  IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
+  IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
+  IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
+
+  // Compute coefficients. Meyer+2014 eq. (18)
+  Real mu_tilde_1 = 4. / 3. /
+                    (static_cast<Real>(s_rkl) * static_cast<Real>(s_rkl) +
+                     static_cast<Real>(s_rkl) - 2.);
+
+  // In principle, we'd only need to pack Metadata::WithFluxes here, but
+  // choosing to mirror other use in the code so that the packs are already cached.
+  std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
+  auto Y0 = md_Y0->PackVariablesAndFluxes(flags_ind);
+  auto Yjm1 = md_Yjm1->PackVariablesAndFluxes(flags_ind);
+  auto Yjm2 = md_Yjm2->PackVariablesAndFluxes(flags_ind);
+  auto MY0 = md_MY0->PackVariablesAndFluxes(flags_ind);
+
+  const int ndim = pmb->pmy_mesh->ndim;
+  // Using separate loops for each dim as the launch overhead should be hidden
+  // by enough work over the entire pack and it allows to not use any conditionals.
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "RKL first step", parthenon::DevExecSpace(), 0,
+      Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+      KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
+        Yjm1(b, v, k, j, i) =
+            Y0(b, v, k, j, i) + mu_tilde_1 * tau * MY0(b, v, k, j, i); // Y_1
+        Yjm2(b, v, k, j, i) = Y0(b, v, k, j, i);                       // Y_0
+      });
+
+  return TaskStatus::complete;
+}
+
+TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
+                         MeshData<Real> *md_Yjm2, MeshData<Real> *md_MY0,
+                         MeshData<Real> *md_MYjm1, const Real mu_j, const Real nu_j,
+                         const Real mu_tilde_j, const Real gamma_tilde_j,
+                         const Real tau) {
+  auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer();
+  IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
+  IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
+  IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
+
+  // In principle, we'd only need to pack Metadata::WithFluxes here, but
+  // choosing to mirror other use in the code so that the packs are already cached.
+  std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
+  auto Y0 = md_Y0->PackVariablesAndFluxes(flags_ind);
+  auto Yjm1 = md_Yjm1->PackVariablesAndFluxes(flags_ind);
+  auto Yjm2 = md_Yjm2->PackVariablesAndFluxes(flags_ind);
+  auto MY0 = md_MY0->PackVariablesAndFluxes(flags_ind);
+  auto MYjm1 = md_MYjm1->PackVariablesAndFluxes(flags_ind);
+
+  const int ndim = pmb->pmy_mesh->ndim;
+  // Using separate loops for each dim as the launch overhead should be hidden
+  // by enough work over the entire pack and it allows to not use any conditionals.
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "RKL other step", parthenon::DevExecSpace(), 0,
+      Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
+      KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
+        // First calc this step
+        const Real Yj = mu_j * Yjm1(b, v, k, j, i) + nu_j * Yjm2(b, v, k, j, i) +
+                        (1.0 - mu_j - nu_j) * Y0(b, v, k, j, i) +
+                        mu_tilde_j * tau * MYjm1(b, v, k, j, i) +
+                        gamma_tilde_j * tau * MY0(b, v, k, j, i);
+        // Then shuffle vars for next step
+        Yjm2(b, v, k, j, i) = Yjm1(b, v, k, j, i);
+        Yjm1(b, v, k, j, i) = Yj;
+      });
+
+  return TaskStatus::complete;
+}
+
+// Assumes that prim and cons are in sync initially.
+// Guarantees that prim and cons are in sync at the end.
+void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
+                 const Real tau, const int s_rkl) {
+
+  auto hydro_pkg = blocks[0]->packages.Get("Hydro");
+
+  TaskID none(0);
+
+  TaskRegion &region_init = ptask_coll->AddRegion(blocks.size());
+  for (int i = 0; i < blocks.size(); i++) {
+    auto &pmb = blocks[i];
+    auto &tl = region_init[i];
+    auto &u0 = pmb->meshblock_data.Get();
+    auto &u1 = pmb->meshblock_data.Get("u1");
+    // only need boundaries for Yjm1 (u1 here)
+    auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, u1.get(),
+                                 BoundaryCommSubset::all);
+
+    // Add extra registers. No-op for existing variables so it's safe to call every
+    // time.
+    // TODO(pgrete) this allocates all Variables, i.e., prim and cons vector, but only a
+    // subset is actually needed. Streamline to allocate only required vars.
+    pmb->meshblock_data.Add("MY0", u0);
+    pmb->meshblock_data.Add("Yjm2", u0);
+    pmb->meshblock_data.Add("MYjm1", u0);
+  }
+
+  const int num_partitions = pmesh->DefaultNumPartitions();
+  TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
+  for (int i = 0; i < num_partitions; i++) {
+    auto &tl = region_rkl2_step_init[i];
+    auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
+    auto &md_MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+    auto &md_Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &md_Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
+    // Reset flux arrays (not guaranteed to be zero)
+    auto reset_fluxes = tl.AddTask(none, ResetFluxes, mu0.get());
+
+    // Calculate the diffusive fluxes for Y0 (here u0) so that we can store the result
+    // as MY0 and reuse later (it is used in every subsetp).
+    auto hydro_diff_fluxes =
+        tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), mu0.get());
+
+    auto init_MY0 =
+        tl.AddTask(hydro_diff_fluxes, parthenon::Update::FluxDivergence<MeshData<Real>>,
+                   mu0.get(), md_MY0.get());
+
+    // Initialize Y0 and Y1 and the recursion relation needs data from the two
+    // preceeding stages.
+    auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, mu0.get(), md_Yjm1.get(),
+                                      md_Yjm2.get(), md_MY0.get(), s_rkl, tau);
+
+    // update ghost cells of Y1 (as MY1 is calculated for each Y_j)
+    // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
+    auto send = tl.AddTask(rkl2_step_first,
+                           parthenon::cell_centered_bvars::SendBoundaryBuffers, md_Yjm1);
+    auto recv =
+        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md_Yjm1);
+    auto fill_from_bufs =
+        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md_Yjm1);
+  }
+
+  TaskRegion &region_clear_bnd = ptask_coll->AddRegion(blocks.size());
+  for (int i = 0; i < blocks.size(); i++) {
+    auto &tl = region_clear_bnd[i];
+    auto &u1 = blocks[i]->meshblock_data.Get("u1");
+    auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
+                                       u1.get(), BoundaryCommSubset::all);
+  }
+  TaskRegion &region_cons_to_prim = ptask_coll->AddRegion(num_partitions);
+  for (int i = 0; i < num_partitions; i++) {
+    auto &tl = region_cons_to_prim[i];
+    auto &mu1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto fill_derived =
+        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu1.get());
+  }
+
+  // Compute coefficients. Meyer+2012 eq. (16)
+  Real b_j = 1. / 3.;
+  Real b_jm1 = 1. / 3.;
+  Real b_jm2 = 1. / 3.;
+  Real w1 = 4. / (static_cast<Real>(s_rkl) * static_cast<Real>(s_rkl) +
+                  static_cast<Real>(s_rkl) - 2.);
+  Real mu_j, nu_j, j, mu_tilde_j, gamma_tilde_j;
+
+  // RKL loop
+  for (int jj = 2; jj <= s_rkl; jj++) {
+    j = static_cast<Real>(jj);
+    b_j = (j * j + j - 2.0) / (2 * j * (j + 1.0));
+    mu_j = (2.0 * j - 1.0) / j * b_j / b_jm1;
+    nu_j = -(j - 1.0) / j * b_j / b_jm2;
+    mu_tilde_j = mu_j * w1;
+    gamma_tilde_j = -(1.0 - b_jm1) * mu_tilde_j; // -a_jm1*mu_tilde_j
+
+    TaskRegion &region_init_other = ptask_coll->AddRegion(blocks.size());
+    for (int i = 0; i < blocks.size(); i++) {
+      auto &pmb = blocks[i];
+      auto &tl = region_init_other[i];
+      auto &u0 = pmb->meshblock_data.Get();
+      auto &u1 = pmb->meshblock_data.Get("u1");
+      // only need boundaries for Yjm1 (u1 here)
+      auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, u1.get(),
+                                   BoundaryCommSubset::all);
+    }
+
+    TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
+    for (int i = 0; i < num_partitions; i++) {
+      auto &tl = region_rkl2_step_other[i];
+      auto &md_Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto &md_MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+      auto &md_Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &md_MYjm1 = pmesh->mesh_data.GetOrAdd("MYjm1", i);
+      auto &md_Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
+
+      // Reset flux arrays (not guaranteed to be zero)
+      auto reset_fluxes = tl.AddTask(none, ResetFluxes, md_Yjm1.get());
+
+      // Calculate the diffusive fluxes for Yjm1 (here u1)
+      auto hydro_diff_fluxes =
+          tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), md_Yjm1.get());
+      // Need to calc/stash flux div first, as Yjm1 is updated in the following task
+      auto calc_MYjm1 =
+          tl.AddTask(hydro_diff_fluxes, parthenon::Update::FluxDivergence<MeshData<Real>>,
+                     md_Yjm1.get(), md_MYjm1.get());
+
+      auto rkl2_step_other = tl.AddTask(
+          calc_MYjm1, RKL2StepOther, md_Y0.get(), md_Yjm1.get(), md_Yjm2.get(),
+          md_MY0.get(), md_MYjm1.get(), mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
+
+      // update ghost cells of Yjm1 (currently storing Yj)
+      // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
+      auto send = tl.AddTask(
+          rkl2_step_other, parthenon::cell_centered_bvars::SendBoundaryBuffers, md_Yjm1);
+      auto recv = tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers,
+                             md_Yjm1);
+      auto fill_from_bufs =
+          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md_Yjm1);
+    }
+    TaskRegion &region_clear_bnd_other = ptask_coll->AddRegion(blocks.size());
+    for (int i = 0; i < blocks.size(); i++) {
+      auto &tl = region_clear_bnd_other[i];
+      auto &u1 = blocks[i]->meshblock_data.Get("u1");
+      auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
+                                         u1.get(), BoundaryCommSubset::all);
+    }
+    TaskRegion &region_cons_to_prim_other = ptask_coll->AddRegion(num_partitions);
+    for (int i = 0; i < num_partitions; i++) {
+      auto &tl = region_cons_to_prim_other[i];
+      auto &mu1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto fill_derived =
+          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu1.get());
+    }
+
+    b_jm2 = b_jm1;
+    b_jm1 = b_j;
+  }
+
+  // copy final result back to u0
+  TaskRegion &region_copy_out = ptask_coll->AddRegion(blocks.size());
+  for (int i = 0; i < blocks.size(); i++) {
+    auto &tl = region_copy_out[i];
+    auto &u0 = blocks[i]->meshblock_data.Get();
+    auto &u1 = blocks[i]->meshblock_data.Get("u1");
+    tl.AddTask(
+        none,
+        [](MeshBlockData<Real> *u0, MeshBlockData<Real> *u1) {
+          u0->Get("cons").data.DeepCopy(u1->Get("cons").data);
+          u0->Get("prim").data.DeepCopy(u1->Get("prim").data);
+          return TaskStatus::complete;
+        },
+        u0.get(), u1.get());
+  }
+}
+
 // See the advection.hpp declaration for a description of how this function gets called.
 TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
   TaskCollection tc;
-  const auto &stage_name = integrator->stage_name;
   auto hydro_pkg = blocks[0]->packages.Get("Hydro");
 
   TaskID none(0);
@@ -107,8 +407,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
   const int num_partitions = pmesh->DefaultNumPartitions();
 
   // Calculate hyperbolic divergence cleaning speed
-  // TODO(pgrete) Calculating mindx is only required after remeshing. Need to find a clean
-  // solution for this one-off global reduction.
+  // TODO(pgrete) Calculating mindx is only required after remeshing. Need to find a
+  // clean solution for this one-off global reduction.
   if (hydro_pkg->Param<bool>("calc_c_h") && (stage == 1)) {
     // need to make sure that there's only one region in order to MPI_reduce to work
     TaskRegion &single_task_region = tc.AddRegion(1);
@@ -123,10 +423,10 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
         hydro_pkg.get());
     // Adding one task for each partition. Not using a (new) single partition containing
     // all blocks here as this (default) split is also used for the following tasks and
-    // thus does not create an overhead (such as creating a new MeshBlockPack that is just
-    // used here). Given that all partitions are in one task list they'll be executed
-    // sequentially. Given that a par_reduce to a host var is blocking it's also save to
-    // store the variable in the Params for now.
+    // thus does not create an overhead (such as creating a new MeshBlockPack that is
+    // just used here). Given that all partitions are in one task list they'll be
+    // executed sequentially. Given that a par_reduce to a host var is blocking it's
+    // also save to store the variable in the Params for now.
     for (int i = 0; i < num_partitions; i++) {
       auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
       auto new_mindx = tl.AddTask(prev_task, CalculateGlobalMinDx, mu0.get());
@@ -218,8 +518,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto calc_flux = tl.AddTask(none, calc_flux_fun, mu0);
 
     // TODO(pgrete) figure out what to do about the sources from the first stage
-    // that are potentially disregarded when the (m)hd fluxes are corrected in the second
-    // stage.
+    // that are potentially disregarded when the (m)hd fluxes are corrected in the
+    // second stage.
     if (hydro_pkg->Param<bool>("first_order_flux_correct")) {
       auto *first_order_flux_correct_fun =
           hydro_pkg->Param<FirstOrderFluxCorrectFun_t *>("first_order_flux_correct_fun");

From a968fd82c2c0aaf598d7fae22b71d3867021c442 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Mon, 4 Oct 2021 00:57:53 +0200
Subject: [PATCH 05/42] Add calc of RKL2 stages

---
 src/hydro/diffusion/diffusion.cpp | 24 +++++++++
 src/hydro/hydro.cpp               |  6 ++-
 src/hydro/hydro_driver.cpp        | 89 ++++++++++++++++++++++++++++++-
 3 files changed, 117 insertions(+), 2 deletions(-)
 create mode 100644 src/hydro/diffusion/diffusion.cpp

diff --git a/src/hydro/diffusion/diffusion.cpp b/src/hydro/diffusion/diffusion.cpp
new file mode 100644
index 00000000..376dc6b2
--- /dev/null
+++ b/src/hydro/diffusion/diffusion.cpp
@@ -0,0 +1,24 @@
+//========================================================================================
+// AthenaPK - a performance portable block structured AMR astrophysical MHD code.
+// Copyright (c) 2021, Athena-Parthenon Collaboration. All rights reserved.
+// Licensed under the 3-clause BSD License, see LICENSE file for details
+//========================================================================================
+//! \file diffusion.cpp
+//! \brief
+
+// Parthenon headers
+#include <parthenon/package.hpp>
+
+// AthenaPK headers
+#include "../../main.hpp"
+#include "diffusion.hpp"
+
+using namespace parthenon::package::prelude;
+
+TaskStatus CalcDiffFluxes(StateDescriptor *hydro_pkg, MeshData<Real> *md) {
+  const auto &conduction = hydro_pkg->Param<Conduction>("conduction");
+  if (conduction != Conduction::none) {
+    ThermalFluxAniso(md);
+  }
+  return TaskStatus::complete;
+}
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index f9e24172..7051a310 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -447,6 +447,8 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       diffflux = DiffFlux::unsplit;
     } else if (diffflux_str == "rkl2") {
       diffflux = DiffFlux::rkl2;
+      pkg->AddParam<Real>("dt_diff", 0.0); // diffusive timestep constraint
+      pkg->AddParam<int>("s_rkl", 0);      // number of steps in RKL2 super timestep
     } else if (diffflux_str != "none") {
       PARTHENON_FAIL("AthenaPK unknown method for diffusive fluxes. Options are: none, "
                      "unsplit, rkl2");
@@ -675,7 +677,9 @@ Real EstimateTimestep(MeshData<Real> *md) {
     min_dt = std::min(min_dt, tabular_cooling.EstimateTimeStep(md));
   }
 
-  if (hydro_pkg->Param<Conduction>("conduction") != Conduction::none) {
+  // For RKL2 STS, the diffusive timestep is calculated separately in the driver
+  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::unsplit) &&
+      (hydro_pkg->Param<Conduction>("conduction") != Conduction::none)) {
     min_dt = std::min(min_dt, EstimateConductionTimestep(md));
   }
 
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index d77b8c43..cdae9d02 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -206,9 +206,10 @@ TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
 // Assumes that prim and cons are in sync initially.
 // Guarantees that prim and cons are in sync at the end.
 void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
-                 const Real tau, const int s_rkl) {
+                 const Real tau) {
 
   auto hydro_pkg = blocks[0]->packages.Get("Hydro");
+  const auto s_rkl = hydro_pkg->Param<int>("s_rkl");
 
   TaskID none(0);
 
@@ -462,8 +463,90 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
         hydro_pkg.get());
   }
 
+  // Calc number of stages for RKL2 STS
+  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::rkl2) && (stage == 1)) {
+    // need to make sure that there's only one region in order to MPI_reduce to work
+    TaskRegion &single_task_region = tc.AddRegion(1);
+    auto &tl = single_task_region[0];
+    // First globally reset mindt_diff
+    auto prev_task = tl.AddTask(
+        none,
+        [](StateDescriptor *hydro_pkg) {
+          hydro_pkg->UpdateParam("dt_diff", std::numeric_limits<Real>::max());
+          return TaskStatus::complete;
+        },
+        hydro_pkg.get());
+    // Adding one task for each partition. Not using a (new) single partition containing
+    // all blocks here as this (default) split is also used for the following tasks and
+    // thus does not create an overhead (such as creating a new MeshBlockPack that is
+    // just used here). Given that all partitions are in one task list they'll be
+    // executed sequentially. Given that a par_reduce to a host var is blocking it's
+    // also save to store the variable in the Params for now.
+    for (int i = 0; i < num_partitions; i++) {
+      auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto new_mindt = tl.AddTask(
+          prev_task,
+          [](StateDescriptor *hydro_pkg, MeshData<Real> *md) {
+            const auto min_dt_diff = hydro_pkg->Param<Real>("dt_diff");
+            auto new_min_dt_diff = EstimateConductionTimestep(md);
+            if (new_min_dt_diff < min_dt_diff) {
+              hydro_pkg->UpdateParam("dt_diff", new_min_dt_diff);
+            }
+            return TaskStatus::complete;
+          },
+          hydro_pkg.get(), mu0.get());
+      prev_task = new_mindt;
+    }
+    auto reduce_mindt = prev_task;
+#ifdef MPI_PARALLEL
+    reduce_mindt = tl.AddTask(
+        prev_task,
+        [](StateDescriptor *hydro_pkg) {
+          Real mins[1];
+          mins[0] = hydro_pkg->Param<Real>("dt_diff");
+          PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 1, MPI_PARTHENON_REAL,
+                                            MPI_MIN, MPI_COMM_WORLD));
+
+          hydro_pkg->UpdateParam("dt_diff", mins[0]);
+          return TaskStatus::complete;
+        },
+        hydro_pkg.get());
+#endif
+    // Finally calc number of stages
+    auto update_c_h = tl.AddTask(
+        reduce_mindt,
+        [](StateDescriptor *hydro_pkg, const Real tau) {
+          auto mindt_diff = hydro_pkg->Param<Real>("dt_diff");
+
+          // get number of RKL steps
+          // eq (21) using half hyperbolic timestep due to Strang split
+          int s_rkl =
+              static_cast<int>(0.5 * (std::sqrt(9.0 + 16.0 * tau / mindt_diff) - 1.0)) +
+              1;
+          // ensure odd number of stages
+          if (s_rkl % 2 == 0) s_rkl += 1;
+
+          if (parthenon::Globals::my_rank == 0) {
+            const auto ratio = 2.0 * tau / mindt_diff;
+            std::cout << "STS ratio: " << ratio << " Taking " << s_rkl << " steps."
+                      << std::endl;
+            if (ratio > 100.0) {
+              std::cout << "WARNING: ratio is > 100. Proceed at own risk." << std::endl;
+            }
+          }
+
+          hydro_pkg->UpdateParam("s_rkl", s_rkl);
+          return TaskStatus::complete;
+        },
+        hydro_pkg.get(), 0.5 * tm.dt);
+  }
+
   // First add split sources before the main time integration
   if (stage == 1) {
+    const auto &diffflux = hydro_pkg->Param<DiffFlux>("diffflux");
+    if (diffflux == DiffFlux::rkl2) {
+      AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
+    }
     TaskRegion &strang_init_region = tc.AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = strang_init_region[i];
@@ -606,6 +689,10 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto fill_derived =
         tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
   }
+  const auto &diffflux = hydro_pkg->Param<DiffFlux>("diffflux");
+  if (diffflux == DiffFlux::rkl2 && stage == integrator->nstages) {
+    AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
+  }
 
   if (stage == integrator->nstages) {
     TaskRegion &tr = tc.AddRegion(num_partitions);

From 0e0986b499bbb472bdf36eef1ef8d614f4d13c6e Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Mon, 4 Oct 2021 09:44:53 -0400
Subject: [PATCH 06/42] Remove unncessary register for rkl2

---
 src/hydro/hydro_driver.cpp | 25 ++++++++++---------------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index cdae9d02..c0b22318 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -165,9 +165,8 @@ TaskStatus RKL2StepFirst(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
 }
 
 TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
-                         MeshData<Real> *md_Yjm2, MeshData<Real> *md_MY0,
-                         MeshData<Real> *md_MYjm1, const Real mu_j, const Real nu_j,
-                         const Real mu_tilde_j, const Real gamma_tilde_j,
+                         MeshData<Real> *md_Yjm2, MeshData<Real> *md_MY0, const Real mu_j,
+                         const Real nu_j, const Real mu_tilde_j, const Real gamma_tilde_j,
                          const Real tau) {
   auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer();
   IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
@@ -181,7 +180,6 @@ TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
   auto Yjm1 = md_Yjm1->PackVariablesAndFluxes(flags_ind);
   auto Yjm2 = md_Yjm2->PackVariablesAndFluxes(flags_ind);
   auto MY0 = md_MY0->PackVariablesAndFluxes(flags_ind);
-  auto MYjm1 = md_MYjm1->PackVariablesAndFluxes(flags_ind);
 
   const int ndim = pmb->pmy_mesh->ndim;
   // Using separate loops for each dim as the launch overhead should be hidden
@@ -191,9 +189,12 @@ TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
       Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
       KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
         // First calc this step
+        const auto &coords = Yjm1.coords(b);
+        const Real MYjm1 =
+            parthenon::Update::FluxDivHelper(v, k, j, i, ndim, coords, Yjm1(b));
         const Real Yj = mu_j * Yjm1(b, v, k, j, i) + nu_j * Yjm2(b, v, k, j, i) +
                         (1.0 - mu_j - nu_j) * Y0(b, v, k, j, i) +
-                        mu_tilde_j * tau * MYjm1(b, v, k, j, i) +
+                        mu_tilde_j * tau * MYjm1 +
                         gamma_tilde_j * tau * MY0(b, v, k, j, i);
         // Then shuffle vars for next step
         Yjm2(b, v, k, j, i) = Yjm1(b, v, k, j, i);
@@ -229,7 +230,6 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     // subset is actually needed. Streamline to allocate only required vars.
     pmb->meshblock_data.Add("MY0", u0);
     pmb->meshblock_data.Add("Yjm2", u0);
-    pmb->meshblock_data.Add("MYjm1", u0);
   }
 
   const int num_partitions = pmesh->DefaultNumPartitions();
@@ -316,7 +316,6 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
       auto &md_Y0 = pmesh->mesh_data.GetOrAdd("base", i);
       auto &md_MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
       auto &md_Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
-      auto &md_MYjm1 = pmesh->mesh_data.GetOrAdd("MYjm1", i);
       auto &md_Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       // Reset flux arrays (not guaranteed to be zero)
@@ -325,14 +324,10 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
       // Calculate the diffusive fluxes for Yjm1 (here u1)
       auto hydro_diff_fluxes =
           tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), md_Yjm1.get());
-      // Need to calc/stash flux div first, as Yjm1 is updated in the following task
-      auto calc_MYjm1 =
-          tl.AddTask(hydro_diff_fluxes, parthenon::Update::FluxDivergence<MeshData<Real>>,
-                     md_Yjm1.get(), md_MYjm1.get());
-
-      auto rkl2_step_other = tl.AddTask(
-          calc_MYjm1, RKL2StepOther, md_Y0.get(), md_Yjm1.get(), md_Yjm2.get(),
-          md_MY0.get(), md_MYjm1.get(), mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
+
+      auto rkl2_step_other = tl.AddTask(hydro_diff_fluxes, RKL2StepOther, md_Y0.get(),
+                                        md_Yjm1.get(), md_Yjm2.get(), md_MY0.get(), mu_j,
+                                        nu_j, mu_tilde_j, gamma_tilde_j, tau);
 
       // update ghost cells of Yjm1 (currently storing Yj)
       // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars

From 6da6766c3ce327f02ed002e07e1679f79a809878 Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Mon, 4 Oct 2021 09:59:08 -0400
Subject: [PATCH 07/42] Adopt STS RKL2 variable naming

---
 src/hydro/hydro_driver.cpp | 72 +++++++++++++++++++-------------------
 1 file changed, 36 insertions(+), 36 deletions(-)

diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index c0b22318..1cc5d353 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -236,50 +236,50 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_rkl2_step_init[i];
-    auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
-    auto &md_MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-    auto &md_Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
-    auto &md_Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
+    auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+    auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
     // Reset flux arrays (not guaranteed to be zero)
-    auto reset_fluxes = tl.AddTask(none, ResetFluxes, mu0.get());
+    auto reset_fluxes = tl.AddTask(none, ResetFluxes, Y0.get());
 
     // Calculate the diffusive fluxes for Y0 (here u0) so that we can store the result
     // as MY0 and reuse later (it is used in every subsetp).
     auto hydro_diff_fluxes =
-        tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), mu0.get());
+        tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Y0.get());
 
     auto init_MY0 =
         tl.AddTask(hydro_diff_fluxes, parthenon::Update::FluxDivergence<MeshData<Real>>,
-                   mu0.get(), md_MY0.get());
+                   Y0.get(), MY0.get());
 
     // Initialize Y0 and Y1 and the recursion relation needs data from the two
     // preceeding stages.
-    auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, mu0.get(), md_Yjm1.get(),
-                                      md_Yjm2.get(), md_MY0.get(), s_rkl, tau);
+    auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, Y0.get(), Yjm1.get(),
+                                      Yjm2.get(), MY0.get(), s_rkl, tau);
 
     // update ghost cells of Y1 (as MY1 is calculated for each Y_j)
     // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
     auto send = tl.AddTask(rkl2_step_first,
-                           parthenon::cell_centered_bvars::SendBoundaryBuffers, md_Yjm1);
+                           parthenon::cell_centered_bvars::SendBoundaryBuffers, Yjm1);
     auto recv =
-        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, md_Yjm1);
+        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, Yjm1);
     auto fill_from_bufs =
-        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md_Yjm1);
+        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, Yjm1);
   }
 
   TaskRegion &region_clear_bnd = ptask_coll->AddRegion(blocks.size());
   for (int i = 0; i < blocks.size(); i++) {
     auto &tl = region_clear_bnd[i];
-    auto &u1 = blocks[i]->meshblock_data.Get("u1");
+    auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
     auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                       u1.get(), BoundaryCommSubset::all);
+                                       Yjm1.get(), BoundaryCommSubset::all);
   }
   TaskRegion &region_cons_to_prim = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_cons_to_prim[i];
-    auto &mu1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
     auto fill_derived =
-        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu1.get());
+        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, Yjm1.get());
   }
 
   // Compute coefficients. Meyer+2012 eq. (16)
@@ -313,44 +313,44 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_rkl2_step_other[i];
-      auto &md_Y0 = pmesh->mesh_data.GetOrAdd("base", i);
-      auto &md_MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-      auto &md_Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
-      auto &md_Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
+      auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       // Reset flux arrays (not guaranteed to be zero)
-      auto reset_fluxes = tl.AddTask(none, ResetFluxes, md_Yjm1.get());
+      auto reset_fluxes = tl.AddTask(none, ResetFluxes, Yjm1.get());
 
       // Calculate the diffusive fluxes for Yjm1 (here u1)
       auto hydro_diff_fluxes =
-          tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), md_Yjm1.get());
+          tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Yjm1.get());
 
-      auto rkl2_step_other = tl.AddTask(hydro_diff_fluxes, RKL2StepOther, md_Y0.get(),
-                                        md_Yjm1.get(), md_Yjm2.get(), md_MY0.get(), mu_j,
-                                        nu_j, mu_tilde_j, gamma_tilde_j, tau);
+      auto rkl2_step_other =
+          tl.AddTask(hydro_diff_fluxes, RKL2StepOther, Y0.get(), Yjm1.get(), Yjm2.get(),
+                     MY0.get(), mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
 
       // update ghost cells of Yjm1 (currently storing Yj)
       // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
-      auto send = tl.AddTask(
-          rkl2_step_other, parthenon::cell_centered_bvars::SendBoundaryBuffers, md_Yjm1);
-      auto recv = tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers,
-                             md_Yjm1);
+      auto send = tl.AddTask(rkl2_step_other,
+                             parthenon::cell_centered_bvars::SendBoundaryBuffers, Yjm1);
+      auto recv =
+          tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, Yjm1);
       auto fill_from_bufs =
-          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, md_Yjm1);
+          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, Yjm1);
     }
     TaskRegion &region_clear_bnd_other = ptask_coll->AddRegion(blocks.size());
     for (int i = 0; i < blocks.size(); i++) {
       auto &tl = region_clear_bnd_other[i];
-      auto &u1 = blocks[i]->meshblock_data.Get("u1");
+      auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
       auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                         u1.get(), BoundaryCommSubset::all);
+                                         Yjm1.get(), BoundaryCommSubset::all);
     }
     TaskRegion &region_cons_to_prim_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_cons_to_prim_other[i];
-      auto &mu1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
       auto fill_derived =
-          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu1.get());
+          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, Yjm1.get());
     }
 
     b_jm2 = b_jm1;
@@ -362,7 +362,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   for (int i = 0; i < blocks.size(); i++) {
     auto &tl = region_copy_out[i];
     auto &u0 = blocks[i]->meshblock_data.Get();
-    auto &u1 = blocks[i]->meshblock_data.Get("u1");
+    auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
     tl.AddTask(
         none,
         [](MeshBlockData<Real> *u0, MeshBlockData<Real> *u1) {
@@ -370,7 +370,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
           u0->Get("prim").data.DeepCopy(u1->Get("prim").data);
           return TaskStatus::complete;
         },
-        u0.get(), u1.get());
+        u0.get(), Yjm1.get());
   }
 }
 

From d25abb31d5bff62f1b0e83b13ebffc00c6bb5729 Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Mon, 4 Oct 2021 09:59:08 -0400
Subject: [PATCH 08/42] Move calc of dt_diff into PreStep

---
 src/hydro/hydro.cpp        | 29 +++++++++++-
 src/hydro/hydro.hpp        |  2 +
 src/hydro/hydro_driver.cpp | 95 +++++++-------------------------------
 src/main.cpp               |  1 +
 4 files changed, 46 insertions(+), 81 deletions(-)

diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 7051a310..02563f9d 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -51,6 +51,30 @@ parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin) {
   return packages;
 }
 
+// Using this per cycle function to populate various variables in
+// Params that require global reduction *and* need to be set/known when
+// the task list is constructed (versus when the task list is being executed).
+// TODO(next person touching this function): If more/separate feature are required
+// please separate concerns.
+void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm) {
+  auto hydro_pkg = pmesh->block_list[0]->packages.Get("Hydro");
+  const auto num_partitions = pmesh->DefaultNumPartitions();
+
+  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::rkl2)) {
+    auto dt_diff = std::numeric_limits<Real>::max();
+    for (auto i = 0; i < num_partitions; i++) {
+      auto &md = pmesh->mesh_data.GetOrAdd("base", i);
+
+      dt_diff = std::min(dt_diff, EstimateConductionTimestep(md.get()));
+    }
+#ifdef MPI_PARALLEL
+    PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, &dt_diff, 1, MPI_PARTHENON_REAL,
+                                      MPI_MIN, MPI_COMM_WORLD));
+#endif
+    hydro_pkg->UpdateParam("dt_diff", dt_diff);
+  }
+}
+
 template <Hst hst, int idx = -1>
 Real HydroHst(MeshData<Real> *md) {
   auto hydro_pkg = md->GetBlockData(0)->GetBlockPointer()->packages.Get("Hydro");
@@ -447,12 +471,13 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       diffflux = DiffFlux::unsplit;
     } else if (diffflux_str == "rkl2") {
       diffflux = DiffFlux::rkl2;
-      pkg->AddParam<Real>("dt_diff", 0.0); // diffusive timestep constraint
-      pkg->AddParam<int>("s_rkl", 0);      // number of steps in RKL2 super timestep
     } else if (diffflux_str != "none") {
       PARTHENON_FAIL("AthenaPK unknown method for diffusive fluxes. Options are: none, "
                      "unsplit, rkl2");
     }
+    if (diffflux != DiffFlux::none) {
+      pkg->AddParam<Real>("dt_diff", 0.0); // diffusive timestep constraint
+    }
     pkg->AddParam<>("diffflux", diffflux);
 
     if (fluid == Fluid::euler) {
diff --git a/src/hydro/hydro.hpp b/src/hydro/hydro.hpp
index 0967d12e..ebc1b246 100644
--- a/src/hydro/hydro.hpp
+++ b/src/hydro/hydro.hpp
@@ -16,6 +16,8 @@ using namespace parthenon::package::prelude;
 namespace Hydro {
 
 parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin);
+void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin,
+                               const parthenon::SimTime &tm);
 std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin);
 
 template <Fluid fluid>
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 1cc5d353..390b7dd6 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -210,7 +210,22 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
                  const Real tau) {
 
   auto hydro_pkg = blocks[0]->packages.Get("Hydro");
-  const auto s_rkl = hydro_pkg->Param<int>("s_rkl");
+  auto mindt_diff = hydro_pkg->Param<Real>("dt_diff");
+
+  // get number of RKL steps
+  // eq (21) using half hyperbolic timestep due to Strang split
+  int s_rkl =
+      static_cast<int>(0.5 * (std::sqrt(9.0 + 16.0 * tau / mindt_diff) - 1.0)) + 1;
+  // ensure odd number of stages
+  if (s_rkl % 2 == 0) s_rkl += 1;
+
+  if (parthenon::Globals::my_rank == 0) {
+    const auto ratio = 2.0 * tau / mindt_diff;
+    std::cout << "STS ratio: " << ratio << " Taking " << s_rkl << " steps." << std::endl;
+    if (ratio > 100.0) {
+      std::cout << "WARNING: ratio is > 100. Proceed at own risk." << std::endl;
+    }
+  }
 
   TaskID none(0);
 
@@ -458,84 +473,6 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
         hydro_pkg.get());
   }
 
-  // Calc number of stages for RKL2 STS
-  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::rkl2) && (stage == 1)) {
-    // need to make sure that there's only one region in order to MPI_reduce to work
-    TaskRegion &single_task_region = tc.AddRegion(1);
-    auto &tl = single_task_region[0];
-    // First globally reset mindt_diff
-    auto prev_task = tl.AddTask(
-        none,
-        [](StateDescriptor *hydro_pkg) {
-          hydro_pkg->UpdateParam("dt_diff", std::numeric_limits<Real>::max());
-          return TaskStatus::complete;
-        },
-        hydro_pkg.get());
-    // Adding one task for each partition. Not using a (new) single partition containing
-    // all blocks here as this (default) split is also used for the following tasks and
-    // thus does not create an overhead (such as creating a new MeshBlockPack that is
-    // just used here). Given that all partitions are in one task list they'll be
-    // executed sequentially. Given that a par_reduce to a host var is blocking it's
-    // also save to store the variable in the Params for now.
-    for (int i = 0; i < num_partitions; i++) {
-      auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
-      auto new_mindt = tl.AddTask(
-          prev_task,
-          [](StateDescriptor *hydro_pkg, MeshData<Real> *md) {
-            const auto min_dt_diff = hydro_pkg->Param<Real>("dt_diff");
-            auto new_min_dt_diff = EstimateConductionTimestep(md);
-            if (new_min_dt_diff < min_dt_diff) {
-              hydro_pkg->UpdateParam("dt_diff", new_min_dt_diff);
-            }
-            return TaskStatus::complete;
-          },
-          hydro_pkg.get(), mu0.get());
-      prev_task = new_mindt;
-    }
-    auto reduce_mindt = prev_task;
-#ifdef MPI_PARALLEL
-    reduce_mindt = tl.AddTask(
-        prev_task,
-        [](StateDescriptor *hydro_pkg) {
-          Real mins[1];
-          mins[0] = hydro_pkg->Param<Real>("dt_diff");
-          PARTHENON_MPI_CHECK(MPI_Allreduce(MPI_IN_PLACE, mins, 1, MPI_PARTHENON_REAL,
-                                            MPI_MIN, MPI_COMM_WORLD));
-
-          hydro_pkg->UpdateParam("dt_diff", mins[0]);
-          return TaskStatus::complete;
-        },
-        hydro_pkg.get());
-#endif
-    // Finally calc number of stages
-    auto update_c_h = tl.AddTask(
-        reduce_mindt,
-        [](StateDescriptor *hydro_pkg, const Real tau) {
-          auto mindt_diff = hydro_pkg->Param<Real>("dt_diff");
-
-          // get number of RKL steps
-          // eq (21) using half hyperbolic timestep due to Strang split
-          int s_rkl =
-              static_cast<int>(0.5 * (std::sqrt(9.0 + 16.0 * tau / mindt_diff) - 1.0)) +
-              1;
-          // ensure odd number of stages
-          if (s_rkl % 2 == 0) s_rkl += 1;
-
-          if (parthenon::Globals::my_rank == 0) {
-            const auto ratio = 2.0 * tau / mindt_diff;
-            std::cout << "STS ratio: " << ratio << " Taking " << s_rkl << " steps."
-                      << std::endl;
-            if (ratio > 100.0) {
-              std::cout << "WARNING: ratio is > 100. Proceed at own risk." << std::endl;
-            }
-          }
-
-          hydro_pkg->UpdateParam("s_rkl", s_rkl);
-          return TaskStatus::complete;
-        },
-        hydro_pkg.get(), 0.5 * tm.dt);
-  }
-
   // First add split sources before the main time integration
   if (stage == 1) {
     const auto &diffflux = hydro_pkg->Param<DiffFlux>("diffflux");
diff --git a/src/main.cpp b/src/main.cpp
index e15213bf..ee2a05cf 100644
--- a/src/main.cpp
+++ b/src/main.cpp
@@ -41,6 +41,7 @@ int main(int argc, char *argv[]) {
 
   // Redefine defaults
   pman.app_input->ProcessPackages = Hydro::ProcessPackages;
+  pman.app_input->PreStepMeshUserWorkInLoop = Hydro::PreStepMeshUserWorkInLoop;
   const auto problem = pman.pinput->GetOrAddString("job", "problem_id", "unset");
   if (problem == "linear_wave") {
     pman.app_input->InitUserMeshData = linear_wave::InitUserMeshData;

From b4dfdd9b3ab5c042692bff05edfc0b2f3a438d5f Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 12:12:08 +0200
Subject: [PATCH 09/42] Make tlim an argument for diff step test

---
 .../aniso_therm_cond_step/aniso_therm_cond_step.py          | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py b/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
index 6e81e80a..e9a797ce 100644
--- a/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
+++ b/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
@@ -32,6 +32,7 @@
 
 res_cfgs = [50, 100]
 field_cfgs = [ "aligned", "perp", "angle" ]
+tlim = 10.0
 
 all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
         
@@ -77,7 +78,8 @@ def Prepare(self,parameters, step):
             'problem/diffusion/By=%f' % By,
             'problem/diffusion/iprob=0',
             'parthenon/output0/id=%s' % outname,
-            'hydro/gamma=2.0'
+            'hydro/gamma=2.0',
+            'parthenon/time/tlim=%f' % tlim
             ]
 
         return parameters
@@ -113,7 +115,7 @@ def get_ref(x,
                     u0 = 11.0,        # mean temp
                     delta_u = 2.0,    # temp difference
                     chi = 0.01,       # diffusivity coefficient
-                    t = 10,           # time
+                    t = tlim,         # time
                     b_x = 1.0         # magnetic field
                 ):
             if b_x == 0:

From 5f20625c7c71413a1970f426e2ba8fc6889a0291 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 17:28:52 +0200
Subject: [PATCH 10/42] Adjust RKL2 conv test to gaussian profile

---
 .gitlab-ci.yml                                |  2 +-
 inputs/diffusion.in                           |  3 +-
 src/pgen/diffusion.cpp                        | 20 +++++++++--
 tst/regression/CMakeLists.txt                 |  2 +-
 .../__init__.py                               |  0
 .../aniso_therm_cond_gauss_conv.py}           | 36 +++++++++----------
 6 files changed, 40 insertions(+), 23 deletions(-)
 rename tst/regression/test_suites/{aniso_therm_cond_step => aniso_therm_cond_gauss_conv}/__init__.py (100%)
 rename tst/regression/test_suites/{aniso_therm_cond_step/aniso_therm_cond_step.py => aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py} (83%)

diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index dcf7996e..4a7d5f35 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -115,5 +115,5 @@ cuda-regression:
       - build-cuda/tst/regression/outputs/cluster_hse/analytic_comparison.png
       - build-cuda/tst/regression/outputs/cluster_tabular_cooling/convergence.png
       - build-cuda/tst/regression/outputs/aniso_therm_cond_ring_conv/ring_convergence.png
-      - build-cuda/tst/regression/outputs/aniso_therm_cond_step/cond.png
+      - build-cuda/tst/regression/outputs/aniso_therm_cond_gauss_conv/cond.png
       - build-cuda/tst/regression/outputs/field_loop/field_loop.png
diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index 3147ffa1..3a6a3104 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -14,7 +14,8 @@ Bx = 1.0     # Bx for x1 step function (permutated for iprobs in other direction
 By = 0.0     # By for x1 step function (permutated for iprobs in other directions)
 
 #iprob = 10   # Diffusion of Gaussian profile in x1 direction
-sigma = 0.1  # standard deviation of Gaussian for iprob=10
+t0 = 0.5      # Temporal offset for initial Gaussian profile
+amp = 1e-6    # Amplitude of Gaussian profile
 
 iprob = 20   # ring diffusion in x1-x2 plane; 21 for x2-x3 plane; 22 for x3-x1 plane
 
diff --git a/src/pgen/diffusion.cpp b/src/pgen/diffusion.cpp
index c9434c18..b66d35e4 100644
--- a/src/pgen/diffusion.cpp
+++ b/src/pgen/diffusion.cpp
@@ -27,7 +27,15 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
   const auto By = pin->GetOrAddReal("problem/diffusion", "By", 0.0);
 
   const auto iprob = pin->GetInteger("problem/diffusion", "iprob");
-  const auto sigma = pin->GetOrAddReal("problem/diffusion", "sigma", 0.1);
+  Real t0 = 0.5;
+  Real diff_coeff = 0.0;
+  Real amp = 1e-6;
+  // Get parameters for Gaussian profile
+  if (iprob == 10) {
+    diff_coeff = pin->GetReal("diffusion", "thermal_diff_coeff_code");
+    t0 = pin->GetOrAddReal("problem/diffusion", "t0", t0);
+    amp = pin->GetOrAddReal("problem/diffusion", "amp", amp);
+  }
 
   auto &coords = pmb->coords;
 
@@ -64,7 +72,15 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
         } else if (iprob == 10) {
           u(IB1, k, j, i) = Bx;
           u(IB2, k, j, i) = By;
-          eint = 1 + std::exp(-SQR(coords.x1v(i) / sigma) / 2.0);
+          // Adjust for anisotropic thermal conduction.
+          // If there's no conduction for the setup (because the field is perp.)
+          // treat as 1 (also in analysis) to prevent division by 0.
+          // Note, this is very constructed and needs to be updated/adjusted for isotropic
+          // conduction, other directions, and Bfield configs with |B| != 1
+          Real eff_diff_coeff = Bx == 0.0 ? diff_coeff * 0.5 : diff_coeff * Bx * Bx;
+          eint = 1 + amp / std::sqrt(4. * M_PI * eff_diff_coeff * t0) *
+                         std::exp(-(std::pow(coords.x1v(i), 2.)) /
+                                  (4. * eff_diff_coeff * t0));
           // Ring diffusion in x1-x2 plane
         } else if (iprob == 20) {
           const auto x = coords.x1v(i);
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index 6f1beedc..4eeabefc 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -26,7 +26,7 @@ setup_test_serial("cluster_tabular_cooling" "--driver ${PROJECT_BINARY_DIR}/bin/
   setup_test_serial("aniso_therm_cond_ring_multid" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
  
-  setup_test_serial("aniso_therm_cond_step" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
+  setup_test_serial("aniso_therm_cond_gauss_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 6" "convergence")
 
   setup_test_serial("field_loop" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
diff --git a/tst/regression/test_suites/aniso_therm_cond_step/__init__.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/__init__.py
similarity index 100%
rename from tst/regression/test_suites/aniso_therm_cond_step/__init__.py
rename to tst/regression/test_suites/aniso_therm_cond_gauss_conv/__init__.py
diff --git a/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
similarity index 83%
rename from tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
rename to tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index e9a797ce..a8b97bfa 100644
--- a/tst/regression/test_suites/aniso_therm_cond_step/aniso_therm_cond_step.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -32,7 +32,7 @@
 
 res_cfgs = [50, 100]
 field_cfgs = [ "aligned", "perp", "angle" ]
-tlim = 10.0
+tlim = 1.0
 
 all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
         
@@ -70,16 +70,22 @@ def Prepare(self,parameters, step):
         parameters.driver_cmd_line_args = [
             'parthenon/mesh/nx1=%d' % res,
             'parthenon/meshblock/nx1=25',
+            'parthenon/mesh/x1min=-6.0',
+            'parthenon/mesh/x1max=6.0',
             'parthenon/mesh/nx2=%d' % res,
             'parthenon/meshblock/nx2=25',
+            'parthenon/mesh/x2min=-6.0',
+            'parthenon/mesh/x2max=6.0',
             'parthenon/mesh/nx3=1',
             'parthenon/meshblock/nx3=1',
             'problem/diffusion/Bx=%f' % Bx,
             'problem/diffusion/By=%f' % By,
-            'problem/diffusion/iprob=0',
+            'problem/diffusion/iprob=10',
             'parthenon/output0/id=%s' % outname,
             'hydro/gamma=2.0',
-            'parthenon/time/tlim=%f' % tlim
+            'parthenon/time/tlim=%f' % tlim,
+            'diffusion/thermal_diff_coeff_code=0.25',
+            'diffusion/flux=rkl2'
             ]
 
         return parameters
@@ -111,25 +117,19 @@ def Analyse(self,parameters):
             row = res_cfgs.index(res)
             p[row].plot(x,temp,'x',label=field_cfg)
 
-        def get_ref(x,
-                    u0 = 11.0,        # mean temp
-                    delta_u = 2.0,    # temp difference
-                    chi = 0.01,       # diffusivity coefficient
-                    t = tlim,         # time
-                    b_x = 1.0         # magnetic field
-                ):
-            if b_x == 0:
-                return 10.0 if x < 0.0 else 12.0
-            else:
-                return u0 + delta_u/2*(math.erf((x + 0)/np.sqrt(4*chi*t*b_x**2)) -
-                                       math.erf((x - 1)/np.sqrt(4*chi*t*b_x**2)) -
-                                       math.erf((x + 1)/np.sqrt(4*chi*t*b_x**2)))
-        x = np.linspace(-1,1,200)
+        x = np.linspace(-6,6,400)
         for field_cfg in field_cfgs:
             Bx, By = get_B(field_cfg)
             for i in range(num_rows):
-                y = [get_ref(x_, b_x = Bx) for x_ in x]
+                eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
+                tlim_ = 0.0 if Bx == 0.0 else tlim
+                y = 1.0 + 1e-6 / (
+                    np.sqrt(4*np.pi*eff_diff_coeff * (0.5 + tlim_)) /
+                    np.exp(-x**2 / (4.0 * eff_diff_coeff* (0.5+tlim_))))
+
                 p[i].plot(x, y, '-', color='black', alpha=0.5)
+                p[i].grid()
+                p[i].legend()
 
 
         fig.savefig(os.path.join(parameters.output_path, "cond.png"),

From b75b4960f94afa5b27a774db0ddc19fb8fabf1d4 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 18:06:08 +0200
Subject: [PATCH 11/42] Add conv panel to conv plot

---
 .../aniso_therm_cond_gauss_conv.py            | 52 +++++++++++++------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index a8b97bfa..73aadf56 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -30,9 +30,9 @@
 # To prevent littering up imported folders with .pyc files or __pycache_ folder
 sys.dont_write_bytecode = True
 
-res_cfgs = [50, 100]
-field_cfgs = [ "aligned", "perp", "angle" ]
-tlim = 1.0
+res_cfgs = [256, 512]
+field_cfgs = [ "aligned", "angle", "perp" ]
+tlim = 2.0
 
 all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
         
@@ -69,13 +69,13 @@ def Prepare(self,parameters, step):
 
         parameters.driver_cmd_line_args = [
             'parthenon/mesh/nx1=%d' % res,
-            'parthenon/meshblock/nx1=25',
+            'parthenon/meshblock/nx1=64',
             'parthenon/mesh/x1min=-6.0',
             'parthenon/mesh/x1max=6.0',
-            'parthenon/mesh/nx2=%d' % res,
-            'parthenon/meshblock/nx2=25',
-            'parthenon/mesh/x2min=-6.0',
-            'parthenon/mesh/x2max=6.0',
+            'parthenon/mesh/nx2=32',
+            'parthenon/meshblock/nx2=32',
+            'parthenon/mesh/x2min=-1.0',
+            'parthenon/mesh/x2max=1.0',
             'parthenon/mesh/nx3=1',
             'parthenon/meshblock/nx3=1',
             'problem/diffusion/Bx=%f' % Bx,
@@ -100,10 +100,17 @@ def Analyse(self,parameters):
             print("Couldn't find module to read Parthenon hdf5 files.")
             return False
 
+        def get_ref(x, Bx):
+                eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
+                tlim_ = 0.0 if Bx == 0.0 else tlim
+                return 1.0 + 1e-6 / (
+                    np.sqrt(4*np.pi*eff_diff_coeff * (0.5 + tlim_)) /
+                    np.exp(-x**2 / (4.0 * eff_diff_coeff* (0.5+tlim_))))
+
         num_rows = len(res_cfgs)
-        fig, p = plt.subplots(num_rows, 1,
-            sharex=True, sharey=True)
+        fig, p = plt.subplots(num_rows + 1, 1)
 
+        l1_err = np.zeros((len(field_cfgs), len(res_cfgs)))
         for step in range(len(all_cfgs)):
             outname = get_outname(all_cfgs[step])
             data_filename = f"{parameters.output_path}/parthenon.{outname}.00001.phdf"
@@ -115,18 +122,29 @@ def Analyse(self,parameters):
             x = xx[mask]
             res, field_cfg = all_cfgs[step]
             row = res_cfgs.index(res)
-            p[row].plot(x,temp,'x',label=field_cfg)
+            p[row].plot(x,temp, label=field_cfg)
+
+            Bx, By = get_B(field_cfg)
+            temp_ref = get_ref(x, Bx)
+            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = np.average(np.abs(temp - temp_ref))
 
+        # Plot convergence
+        for i, field_cfg in enumerate(field_cfgs):
+            if field_cfg == "perp":
+                continue
+
+            est_conv = np.diff(np.log(l1_err[i,:])) / np.diff(np.log(res_cfgs))
+            p[-1].plot(res_cfgs, l1_err[i, :], label=field_cfg + ' conv: %.2f' % est_conv)
+        p[-1].set_xscale("log")
+        p[-1].set_yscale("log")
+        p[-1].legend()
+
+        # Plot reference lines
         x = np.linspace(-6,6,400)
         for field_cfg in field_cfgs:
             Bx, By = get_B(field_cfg)
             for i in range(num_rows):
-                eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
-                tlim_ = 0.0 if Bx == 0.0 else tlim
-                y = 1.0 + 1e-6 / (
-                    np.sqrt(4*np.pi*eff_diff_coeff * (0.5 + tlim_)) /
-                    np.exp(-x**2 / (4.0 * eff_diff_coeff* (0.5+tlim_))))
-
+                y = get_ref(x, Bx)
                 p[i].plot(x, y, '-', color='black', alpha=0.5)
                 p[i].grid()
                 p[i].legend()

From 3473da87d38060bcc0cc881e6fe19d428185150e Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 18:07:26 +0200
Subject: [PATCH 12/42] auto-format

---
 .../aniso_therm_cond_gauss_conv.py            | 111 ++++++++++--------
 1 file changed, 61 insertions(+), 50 deletions(-)

diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index 73aadf56..5b7b3ba6 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -1,9 +1,8 @@
-
-#========================================================================================
+# ========================================================================================
 # AthenaPK - a performance portable block structured AMR MHD code
 # Copyright (c) 2020-2021, Athena Parthenon Collaboration. All rights reserved.
 # Licensed under the 3-clause BSD License, see LICENSE file for details
-#========================================================================================
+# ========================================================================================
 # (C) (or copyright) 2020. Triad National Security, LLC. All rights reserved.
 #
 # This program was produced under U.S. Government contract 89233218CNA000001 for Los
@@ -14,13 +13,14 @@
 # itself and others acting on its behalf a nonexclusive, paid-up, irrevocable worldwide
 # license in this material to reproduce, prepare derivative works, distribute copies to
 # the public, perform publicly and display publicly, and to permit others to do so.
-#========================================================================================
+# ========================================================================================
 
 # Modules
 import math
 import numpy as np
 import matplotlib
-matplotlib.use('agg')
+
+matplotlib.use("agg")
 import matplotlib.pylab as plt
 import sys
 import os
@@ -31,15 +31,17 @@
 sys.dont_write_bytecode = True
 
 res_cfgs = [256, 512]
-field_cfgs = [ "aligned", "angle", "perp" ]
+field_cfgs = ["aligned", "angle", "perp"]
 tlim = 2.0
 
 all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
-        
+
+
 def get_outname(all_cfg):
     res, field_cfg = all_cfg
     return f"{res}_{field_cfg}"
 
+
 def get_B(field_cfg):
     if field_cfg == "aligned":
         Bx = 1.0
@@ -48,51 +50,55 @@ def get_B(field_cfg):
         Bx = 0.0
         By = 1.0
     elif field_cfg == "angle":
-        Bx = 1/np.sqrt(2)
-        By = 1/np.sqrt(2)
+        Bx = 1 / np.sqrt(2)
+        By = 1 / np.sqrt(2)
     else:
         raise "Unknown field_cfg: %s" % field_cfg
 
     return Bx, By
-        
+
 
 class TestCase(utils.test_case.TestCaseAbs):
-    def Prepare(self,parameters, step):
+    def Prepare(self, parameters, step):
 
         assert parameters.num_ranks <= 4, "Use <= 4 ranks for diffusion test."
 
         res, field_cfg = all_cfgs[step - 1]
 
-        Bx , By = get_B(field_cfg)
+        Bx, By = get_B(field_cfg)
 
         outname = get_outname(all_cfgs[step - 1])
 
         parameters.driver_cmd_line_args = [
-            'parthenon/mesh/nx1=%d' % res,
-            'parthenon/meshblock/nx1=64',
-            'parthenon/mesh/x1min=-6.0',
-            'parthenon/mesh/x1max=6.0',
-            'parthenon/mesh/nx2=32',
-            'parthenon/meshblock/nx2=32',
-            'parthenon/mesh/x2min=-1.0',
-            'parthenon/mesh/x2max=1.0',
-            'parthenon/mesh/nx3=1',
-            'parthenon/meshblock/nx3=1',
-            'problem/diffusion/Bx=%f' % Bx,
-            'problem/diffusion/By=%f' % By,
-            'problem/diffusion/iprob=10',
-            'parthenon/output0/id=%s' % outname,
-            'hydro/gamma=2.0',
-            'parthenon/time/tlim=%f' % tlim,
-            'diffusion/thermal_diff_coeff_code=0.25',
-            'diffusion/flux=rkl2'
-            ]
+            "parthenon/mesh/nx1=%d" % res,
+            "parthenon/meshblock/nx1=64",
+            "parthenon/mesh/x1min=-6.0",
+            "parthenon/mesh/x1max=6.0",
+            "parthenon/mesh/nx2=32",
+            "parthenon/meshblock/nx2=32",
+            "parthenon/mesh/x2min=-1.0",
+            "parthenon/mesh/x2max=1.0",
+            "parthenon/mesh/nx3=1",
+            "parthenon/meshblock/nx3=1",
+            "problem/diffusion/Bx=%f" % Bx,
+            "problem/diffusion/By=%f" % By,
+            "problem/diffusion/iprob=10",
+            "parthenon/output0/id=%s" % outname,
+            "hydro/gamma=2.0",
+            "parthenon/time/tlim=%f" % tlim,
+            "diffusion/thermal_diff_coeff_code=0.25",
+            "diffusion/flux=rkl2",
+        ]
 
         return parameters
 
-    def Analyse(self,parameters):
-        
-        sys.path.insert(1, parameters.parthenon_path + '/scripts/python/packages/parthenon_tools/parthenon_tools')
+    def Analyse(self, parameters):
+
+        sys.path.insert(
+            1,
+            parameters.parthenon_path
+            + "/scripts/python/packages/parthenon_tools/parthenon_tools",
+        )
 
         try:
             import phdf
@@ -101,11 +107,12 @@ def Analyse(self,parameters):
             return False
 
         def get_ref(x, Bx):
-                eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
-                tlim_ = 0.0 if Bx == 0.0 else tlim
-                return 1.0 + 1e-6 / (
-                    np.sqrt(4*np.pi*eff_diff_coeff * (0.5 + tlim_)) /
-                    np.exp(-x**2 / (4.0 * eff_diff_coeff* (0.5+tlim_))))
+            eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
+            tlim_ = 0.0 if Bx == 0.0 else tlim
+            return 1.0 + 1e-6 / (
+                np.sqrt(4 * np.pi * eff_diff_coeff * (0.5 + tlim_))
+                / np.exp(-(x ** 2) / (4.0 * eff_diff_coeff * (0.5 + tlim_)))
+            )
 
         num_rows = len(res_cfgs)
         fig, p = plt.subplots(num_rows + 1, 1)
@@ -116,41 +123,45 @@ def get_ref(x, Bx):
             data_filename = f"{parameters.output_path}/parthenon.{outname}.00001.phdf"
             data_file = phdf.phdf(data_filename)
             prim = data_file.Get("prim")
-            zz, yy,xx = data_file.GetVolumeLocations()
+            zz, yy, xx = data_file.GetVolumeLocations()
             mask = yy == yy[0]
-            temp = prim[:,4][mask]
+            temp = prim[:, 4][mask]
             x = xx[mask]
             res, field_cfg = all_cfgs[step]
             row = res_cfgs.index(res)
-            p[row].plot(x,temp, label=field_cfg)
+            p[row].plot(x, temp, label=field_cfg)
 
             Bx, By = get_B(field_cfg)
             temp_ref = get_ref(x, Bx)
-            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = np.average(np.abs(temp - temp_ref))
+            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = np.average(
+                np.abs(temp - temp_ref)
+            )
 
         # Plot convergence
         for i, field_cfg in enumerate(field_cfgs):
             if field_cfg == "perp":
                 continue
 
-            est_conv = np.diff(np.log(l1_err[i,:])) / np.diff(np.log(res_cfgs))
-            p[-1].plot(res_cfgs, l1_err[i, :], label=field_cfg + ' conv: %.2f' % est_conv)
+            est_conv = np.diff(np.log(l1_err[i, :])) / np.diff(np.log(res_cfgs))
+            p[-1].plot(
+                res_cfgs, l1_err[i, :], label=field_cfg + " conv: %.2f" % est_conv
+            )
         p[-1].set_xscale("log")
         p[-1].set_yscale("log")
         p[-1].legend()
 
         # Plot reference lines
-        x = np.linspace(-6,6,400)
+        x = np.linspace(-6, 6, 400)
         for field_cfg in field_cfgs:
             Bx, By = get_B(field_cfg)
             for i in range(num_rows):
                 y = get_ref(x, Bx)
-                p[i].plot(x, y, '-', color='black', alpha=0.5)
+                p[i].plot(x, y, "-", color="black", alpha=0.5)
                 p[i].grid()
                 p[i].legend()
 
-
-        fig.savefig(os.path.join(parameters.output_path, "cond.png"),
-                    bbox_inches='tight')
+        fig.savefig(
+            os.path.join(parameters.output_path, "cond.png"), bbox_inches="tight"
+        )
 
         return True

From 3d26c40294a928236f74fe7725a7de5535375846 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 19:48:41 +0200
Subject: [PATCH 13/42] rename diffusion integrator parameter

---
 inputs/diffusion.in                           |  2 +-
 src/hydro/hydro.cpp                           | 30 +++++++++----------
 src/hydro/hydro_driver.cpp                    |  8 ++---
 src/main.hpp                                  |  2 +-
 .../aniso_therm_cond_gauss_conv.py            |  2 +-
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index 3a6a3104..cd4196c3 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -60,7 +60,7 @@ reconstruction = dc
 gamma = 2.0
 
 <diffusion>
-flux = unsplit
+integrator = unsplit
 conduction = thermal_diff
 thermal_diff_coeff_code = 0.01
 
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 02563f9d..e3cc73ce 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -60,7 +60,7 @@ void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &
   auto hydro_pkg = pmesh->block_list[0]->packages.Get("Hydro");
   const auto num_partitions = pmesh->DefaultNumPartitions();
 
-  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::rkl2)) {
+  if ((hydro_pkg->Param<DiffInt>("diffint") == DiffInt::rkl2)) {
     auto dt_diff = std::numeric_limits<Real>::max();
     for (auto i = 0; i < num_partitions; i++) {
       auto &md = pmesh->mesh_data.GetOrAdd("base", i);
@@ -465,20 +465,20 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
     }
     pkg->AddParam<>("conduction", conduction);
 
-    auto diffflux_str = pin->GetOrAddString("diffusion", "flux", "none");
-    auto diffflux = DiffFlux::none;
-    if (diffflux_str == "unsplit") {
-      diffflux = DiffFlux::unsplit;
-    } else if (diffflux_str == "rkl2") {
-      diffflux = DiffFlux::rkl2;
-    } else if (diffflux_str != "none") {
-      PARTHENON_FAIL("AthenaPK unknown method for diffusive fluxes. Options are: none, "
-                     "unsplit, rkl2");
+    auto diffint_str = pin->GetOrAddString("diffusion", "integrator", "none");
+    auto diffint = DiffInt::none;
+    if (diffint_str == "unsplit") {
+      diffint = DiffInt::unsplit;
+    } else if (diffint_str == "rkl2") {
+      diffint = DiffInt::rkl2;
+    } else if (diffint_str != "none") {
+      PARTHENON_FAIL("AthenaPK unknown integration method for diffusion processes. "
+                     "Options are: none, unsplit, rkl2");
     }
-    if (diffflux != DiffFlux::none) {
+    if (diffint != DiffInt::none) {
       pkg->AddParam<Real>("dt_diff", 0.0); // diffusive timestep constraint
     }
-    pkg->AddParam<>("diffflux", diffflux);
+    pkg->AddParam<>("diffint", diffint);
 
     if (fluid == Fluid::euler) {
       AdiabaticHydroEOS eos(pfloor, dfloor, efloor, gamma);
@@ -703,7 +703,7 @@ Real EstimateTimestep(MeshData<Real> *md) {
   }
 
   // For RKL2 STS, the diffusive timestep is calculated separately in the driver
-  if ((hydro_pkg->Param<DiffFlux>("diffflux") == DiffFlux::unsplit) &&
+  if ((hydro_pkg->Param<DiffInt>("diffint") == DiffInt::unsplit) &&
       (hydro_pkg->Param<Conduction>("conduction") != Conduction::none)) {
     min_dt = std::min(min_dt, EstimateConductionTimestep(md));
   }
@@ -951,8 +951,8 @@ TaskStatus CalculateFluxes(std::shared_ptr<MeshData<Real>> &md) {
         });
   }
 
-  const auto &diffflux = pkg->Param<DiffFlux>("diffflux");
-  if (diffflux == DiffFlux::unsplit) {
+  const auto &diffint = pkg->Param<DiffInt>("diffint");
+  if (diffint == DiffInt::unsplit) {
     CalcDiffFluxes(pkg.get(), md.get());
   }
 
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 390b7dd6..19ccbcb5 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -475,8 +475,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
 
   // First add split sources before the main time integration
   if (stage == 1) {
-    const auto &diffflux = hydro_pkg->Param<DiffFlux>("diffflux");
-    if (diffflux == DiffFlux::rkl2) {
+    const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
+    if (diffint == DiffInt::rkl2) {
       AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
     }
     TaskRegion &strang_init_region = tc.AddRegion(num_partitions);
@@ -621,8 +621,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto fill_derived =
         tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
   }
-  const auto &diffflux = hydro_pkg->Param<DiffFlux>("diffflux");
-  if (diffflux == DiffFlux::rkl2 && stage == integrator->nstages) {
+  const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
+  if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
     AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
   }
 
diff --git a/src/main.hpp b/src/main.hpp
index fe8dcc85..67d46e07 100644
--- a/src/main.hpp
+++ b/src/main.hpp
@@ -36,7 +36,7 @@ enum class Integrator { undefined, rk1, rk2, vl2, rk3 };
 enum class Fluid { undefined, euler, glmmhd };
 enum class Cooling { none, tabular };
 enum class Conduction { none, spitzer, thermal_diff };
-enum class DiffFlux { none, unsplit, rkl2 };
+enum class DiffInt { none, unsplit, rkl2 };
 
 enum class Hst { idx, ekin, emag, divb };
 
diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index 5b7b3ba6..1682ff8f 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -87,7 +87,7 @@ def Prepare(self, parameters, step):
             "hydro/gamma=2.0",
             "parthenon/time/tlim=%f" % tlim,
             "diffusion/thermal_diff_coeff_code=0.25",
-            "diffusion/flux=rkl2",
+            "diffusion/integrator=rkl2",
         ]
 
         return parameters

From 61a9c93c72afbc4d6afda0f4f8a9330d94e16ebc Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 21:04:36 +0200
Subject: [PATCH 14/42] Add isotropic thermal conduction

---
 docs/input.md                      |  16 +-
 inputs/diffusion.in                |   3 +-
 src/hydro/diffusion/conduction.cpp | 240 +++++++++++++++++++++--------
 src/hydro/diffusion/diffusion.cpp  |   4 +-
 src/hydro/diffusion/diffusion.hpp  |  12 +-
 src/hydro/hydro.cpp                |  75 ++++++---
 src/main.hpp                       |   3 +-
 7 files changed, 250 insertions(+), 103 deletions(-)

diff --git a/docs/input.md b/docs/input.md
index b25e2031..7fbcd1c9 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -50,7 +50,7 @@ Note, `ppm` and `wenoz` need at least three ghost zones (`parthenon/mesh/num_gho
 
 #### Diffusive processes
 
-##### Anisotropic thermal conduction (required MHD)
+##### Isotropic (hydro and MHD )and anisotropic thermal conduction (only MHD)
 In the presence of magnetic fields thermal conduction is becoming anisotropic with the flux along
 the local magnetic field direction typically being much stronger than the flux perpendicular to the magnetic field.
 
@@ -68,17 +68,23 @@ Moreover, they are implemented explicitly, i.e., they add a (potentially very re
 Finally, we employ limiters for calculating the temperature gradients following Sharma & Hammett (2007)[^SH07].
 This prevents unphysical conduction against the gradient, which may be introduced because the off-axis gradients are not centered on the interfaces.
 
-To enable conduction, set
+To enable thermal conduction, set
 
 Parameter: `conduction` (string)
 - `none` : No thermal conduction
+- `isotropic` : Isotropic thermal conduction
+- `anisotropic` : Anisotropic thermal conduction
+
+In addition the coefficient (or diffusivity) needs to be set
+
+Parameter: `conduction_coeff` (string)
 - `spitzer` : Anisotropic thermal conduction with a temperature dependent classic Spitzer thermal conductivity
   $`\kappa (T) = c_\kappa T^{5/2} \mathrm{erg/s/K/cm}`$ and
-  $`c_\kappa`$ being constant prefactor (set via `diffusion/spitzer_cond_in_erg_by_s_K_cm` with a default value of $`4.6\times10^{-7}`$). Note, as indicated by the units in the input parameter name, this kind of thermal conductivity requires a full set of units
+  $`c_\kappa`$ being constant prefactor (set via the additional `diffusion/spitzer_cond_in_erg_by_s_K_cm` parameter with a default value of $`4.6\times10^{-7}`$). Note, as indicated by the units in the input parameter name, this kind of thermal conductivity requires a full set of units
   to be defined for the simulation.
-- `thermal_diff` : Contrary to a temperature dependent conductivity, a simple thermal diffusivity can be used instead for which
+- `fixed` : Contrary to a temperature dependent conductivity, a simple thermal diffusivity can be used instead for which
 the conduction flux is $`\mathbf{F} = - \chi \rho \mathbf{\hat b} (\mathbf{\hat b \cdot \nabla \frac{p_\mathrm{th}}{\rho}})`$
-Here, the strength, $`\chi`$, is controlled via the `thermal_diff_coeff_code` parameter in code units.
+Here, the strength, $`\chi`$, is controlled via the additional `thermal_diff_coeff_code` parameter in code units.
 Given the dimensions of $`L^2/T`$ it is referred to a thermal diffusivity rather than thermal conductivity.
 
 [^SH07]:
diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index cd4196c3..afd9843d 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -61,7 +61,8 @@ gamma = 2.0
 
 <diffusion>
 integrator = unsplit
-conduction = thermal_diff
+conduction = anisotropic
+conduction_coeff = fixed
 thermal_diff_coeff_code = 0.01
 
 <parthenon/output0>
diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 28986507..377b0279 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -22,9 +22,9 @@ using namespace parthenon::package::prelude;
 
 KOKKOS_INLINE_FUNCTION
 Real ThermalDiffusivity::Get(const Real pres, const Real rho, const Real gradTmag) const {
-  if (conduction_ == Conduction::thermal_diff) {
+  if (conduction_coeff_type_ == ConductionCoeff::fixed) {
     return coeff_;
-  } else if (conduction_ == Conduction::spitzer) {
+  } else if (conduction_coeff_type_ == ConductionCoeff::spitzer) {
     const Real T = mbar_over_kb_ * pres / rho;
     const Real kappa = coeff_ * std::pow(T, 5. / 2.); // Full spitzer
     const Real chi_spitzer = kappa * mbar_over_kb_ / rho;
@@ -65,67 +65,168 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
   const auto gm1 = hydro_pkg->Param<Real>("AdiabaticIndex");
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
 
-  Kokkos::parallel_reduce(
-      "EstimateConductionTimestep",
-      Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
-          DevExecSpace(), {0, kb.s, jb.s, ib.s},
-          {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
-          {1, 1, 1, ib.e + 1 - ib.s}),
-      KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &min_dt) {
+  if (thermal_diff.GetType() == Conduction::isotropic) {
+    // Isotropic thermal conduction currently only supports a fixed, uniform coefficient
+    // so it's safe to get it outside the kernel.
+    // TODO(pgrete): once mindx is properly calculated before this loop, we can get rid of
+    // it entirely.
+    const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
+    Kokkos::parallel_reduce(
+        "EstimateConductionTimestep (iso)",
+        Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
+            DevExecSpace(), {0, kb.s, jb.s, ib.s},
+            {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
+            {1, 1, 1, ib.e + 1 - ib.s}),
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &min_dt) {
+          const auto &coords = prim_pack.coords(b);
+          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
+                                    (thermal_diff_coeff + TINY_NUMBER));
+          if (ndim >= 2) {
+            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
+                                      (thermal_diff_coeff + TINY_NUMBER));
+          }
+          if (ndim >= 3) {
+            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
+                                      (thermal_diff_coeff + TINY_NUMBER));
+          }
+        },
+        Kokkos::Min<Real>(min_dt_cond));
+  } else if (thermal_diff.GetType() == Conduction::anisotropic) {
+    Kokkos::parallel_reduce(
+        "EstimateConductionTimestep (aniso)",
+        Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
+            DevExecSpace(), {0, kb.s, jb.s, ib.s},
+            {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
+            {1, 1, 1, ib.e + 1 - ib.s}),
+        KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &min_dt) {
+          const auto &coords = prim_pack.coords(b);
+          const auto &prim = prim_pack(b);
+          const auto &rho = prim(IDN, k, j, i);
+          const auto &p = prim(IPR, k, j, i);
+          const auto &Bx = prim(IB1, k, j, i);
+          const auto &By = prim(IB2, k, j, i);
+          const auto &Bz = prim(IB3, k, j, i);
+          const auto Bmag = sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
+
+          const auto dTdx = 0.5 *
+                            (prim(IPR, k, j, i + 1) / prim(IDN, k, j, i + 1) -
+                             prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1)) /
+                            coords.dx1v(i);
+
+          const auto dTdy = 0.5 *
+                            (prim(IPR, k, j + 1, i) / prim(IDN, k, j + 1, i) -
+                             prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i)) /
+                            coords.dx2v(j);
+
+          const auto dTdz = ndim >= 3
+                                ? 0.5 *
+                                      (prim(IPR, k + 1, j, i) / prim(IDN, k + 1, j, i) -
+                                       prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i)) /
+                                      coords.dx3v(k)
+                                : 0.0;
+          const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
+          auto thermal_diff_coeff = thermal_diff.Get(p, rho, gradTmag);
+
+          const auto denom = Bmag * gradTmag;
+          // if either Bmag or gradTmag are 0, no anisotropic thermal conduction
+          if (denom == 0.0) {
+            return;
+          }
+          const auto costheta = fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / denom;
+
+          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
+                                    (thermal_diff_coeff * fabs(Bx) / Bmag * costheta +
+                                     TINY_NUMBER));
+          if (ndim >= 2) {
+            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
+                                      (thermal_diff_coeff * fabs(By) / Bmag * costheta +
+                                       TINY_NUMBER));
+          }
+          if (ndim >= 3) {
+            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
+                                      (thermal_diff_coeff * fabs(Bz) / Bmag * costheta +
+                                       TINY_NUMBER));
+          }
+        },
+        Kokkos::Min<Real>(min_dt_cond));
+  }
+
+  return fac * min_dt_cond;
+}
+
+//---------------------------------------------------------------------------------------
+//! Calculate isotropic thermal conduction
+
+void ThermalFluxIso(MeshData<Real> *md) {
+  auto pmb = md->GetBlockData(0)->GetBlockPointer();
+  IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
+  IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
+  IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
+
+  std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
+  auto cons_pack = md->PackVariablesAndFluxes(flags_ind);
+  auto hydro_pkg = pmb->packages.Get("Hydro");
+
+  auto const &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});
+
+  const int ndim = pmb->pmy_mesh->ndim;
+
+  const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
+  // Isotropic thermal conduction currently only supports a fixed, uniform coefficient
+  // so it's safe to get it outside the kernel.
+  const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
+
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (iso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
+      ib.e + 1, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
+        auto &cons = cons_pack(b);
         const auto &prim = prim_pack(b);
-        const auto &rho = prim(IDN, k, j, i);
-        const auto &p = prim(IPR, k, j, i);
-        // TODO(pgrete) when we introduce isotropic thermal conduction a lot of the
-        // following machinery should be hidden behind conditionals
-        const auto &Bx = prim(IB1, k, j, i);
-        const auto &By = prim(IB2, k, j, i);
-        const auto &Bz = prim(IB3, k, j, i);
-        const auto Bmag = sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
-
-        const auto dTdx = 0.5 *
-                          (prim(IPR, k, j, i + 1) / prim(IDN, k, j, i + 1) -
-                           prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1)) /
-                          coords.dx1v(i);
-
-        const auto dTdy = 0.5 *
-                          (prim(IPR, k, j + 1, i) / prim(IDN, k, j + 1, i) -
-                           prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i)) /
-                          coords.dx2v(j);
-
-        const auto dTdz = ndim >= 3
-                              ? 0.5 *
-                                    (prim(IPR, k + 1, j, i) / prim(IDN, k + 1, j, i) -
-                                     prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i)) /
-                                    coords.dx3v(k)
-                              : 0.0;
-        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
-        auto thermal_diff_coeff = thermal_diff.Get(p, rho, gradTmag);
+        const auto T_i = prim(IPR, k, j, i) / prim(IDN, k, j, i);
+        const auto T_im1 = prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1);
+        const auto dTdx = (T_i - T_im1) / coords.Dx(parthenon::X1DIR, k, j, i);
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
+        cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdx;
+      });
 
-        const auto denom = Bmag * gradTmag;
-        // if either Bmag or gradTmag are 0, no anisotropic thermal conduction
-        if (denom == 0.0) {
-          return;
-        }
-        const auto costheta = fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / denom;
-
-        min_dt = fmin(
-            min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
-                        (thermal_diff_coeff * fabs(Bx) / Bmag * costheta + TINY_NUMBER));
-        if (ndim >= 2) {
-          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
-                                    (thermal_diff_coeff * fabs(By) / Bmag * costheta +
-                                     TINY_NUMBER));
-        }
-        if (ndim >= 3) {
-          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
-                                    (thermal_diff_coeff * fabs(Bz) / Bmag * costheta +
-                                     TINY_NUMBER));
-        }
-      },
-      Kokkos::Min<Real>(min_dt_cond));
+  if (ndim < 2) {
+    return;
+  }
+  /* Compute heat fluxes in 2-direction  --------------------------------------*/
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (iso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1,
+      ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+        const auto &coords = prim_pack.coords(b);
+        auto &cons = cons_pack(b);
+        const auto &prim = prim_pack(b);
 
-  return fac * min_dt_cond;
+        const auto T_j = prim(IPR, k, j, i) / prim(IDN, k, j, i);
+        const auto T_jm1 = prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i);
+        const auto dTdy = (T_j - T_jm1) / coords.Dx(parthenon::X2DIR, k, j, i);
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
+        cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdy;
+      });
+  /* Compute heat fluxes in 3-direction, 3D problem ONLY  ---------------------*/
+  if (ndim < 3) {
+    return;
+  }
+
+  parthenon::par_for(
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (iso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e,
+      ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+        const auto &coords = prim_pack.coords(b);
+        auto &cons = cons_pack(b);
+        const auto &prim = prim_pack(b);
+
+        const auto T_k = prim(IPR, k, j, i) / prim(IDN, k, j, i);
+        const auto T_km1 = prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i);
+        const auto dTdz = (T_k - T_km1) / coords.Dx(parthenon::X3DIR, k, j, i);
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
+        cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdz;
+      });
 }
 
 //---------------------------------------------------------------------------------------
@@ -148,9 +249,9 @@ void ThermalFluxAniso(MeshData<Real> *md) {
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
 
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes", parthenon::DevExecSpace(), 0,
-      cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e + 1,
-      KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (aniso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
+      ib.e + 1, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
         auto &cons = cons_pack(b);
         const auto &prim = prim_pack(b);
@@ -207,11 +308,14 @@ void ThermalFluxAniso(MeshData<Real> *md) {
         cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_f * denf * (Bx * bDotGradT) / B02;
       });
 
+  if (ndim < 2) {
+    return;
+  }
   /* Compute heat fluxes in 2-direction  --------------------------------------*/
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes", parthenon::DevExecSpace(), 0,
-      cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1, ib.s, ib.e,
-      KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (aniso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1,
+      ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
         auto &cons = cons_pack(b);
         const auto &prim = prim_pack(b);
@@ -274,9 +378,9 @@ void ThermalFluxAniso(MeshData<Real> *md) {
   }
 
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes", parthenon::DevExecSpace(), 0,
-      cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e, ib.s, ib.e,
-      KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (aniso)",
+      parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e,
+      ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
         auto &cons = cons_pack(b);
         const auto &prim = prim_pack(b);
diff --git a/src/hydro/diffusion/diffusion.cpp b/src/hydro/diffusion/diffusion.cpp
index 376dc6b2..3a2c4f04 100644
--- a/src/hydro/diffusion/diffusion.cpp
+++ b/src/hydro/diffusion/diffusion.cpp
@@ -17,7 +17,9 @@ using namespace parthenon::package::prelude;
 
 TaskStatus CalcDiffFluxes(StateDescriptor *hydro_pkg, MeshData<Real> *md) {
   const auto &conduction = hydro_pkg->Param<Conduction>("conduction");
-  if (conduction != Conduction::none) {
+  if (conduction == Conduction::isotropic) {
+    ThermalFluxIso(md);
+  } else if (conduction == Conduction::anisotropic) {
     ThermalFluxAniso(md);
   }
   return TaskStatus::complete;
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index 69ab6e2d..048dc400 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -71,20 +71,28 @@ struct ThermalDiffusivity {
  private:
   Real mbar_over_kb_;
   Conduction conduction_;
+  ConductionCoeff conduction_coeff_type_;
   // "free" coefficient/prefactor. Value depends on conduction is set in the constructor.
   Real coeff_;
 
  public:
   KOKKOS_INLINE_FUNCTION
-  ThermalDiffusivity(Conduction conduction, Real coeff, Real mbar_over_kb)
-      : coeff_(coeff), conduction_(conduction), mbar_over_kb_(mbar_over_kb) {}
+  ThermalDiffusivity(Conduction conduction, ConductionCoeff conduction_coeff_type,
+                     Real coeff, Real mbar_over_kb)
+      : conduction_(conduction), conduction_coeff_type_(conduction_coeff_type),
+        coeff_(coeff), mbar_over_kb_(mbar_over_kb) {}
 
   KOKKOS_INLINE_FUNCTION
   Real Get(const Real pres, const Real rho, const Real gradTmag) const;
+
+  KOKKOS_INLINE_FUNCTION
+  Conduction GetType() const { return conduction_; }
 };
 
 Real EstimateConductionTimestep(MeshData<Real> *md);
 
+//! Calculate isotropic thermal conduction
+void ThermalFluxIso(MeshData<Real> *md);
 //! Calculate anisotropic thermal conduction
 void ThermalFluxAniso(MeshData<Real> *md);
 
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index e3cc73ce..9330b1d6 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -435,33 +435,58 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
 
     auto conduction = Conduction::none;
     auto conduction_str = pin->GetOrAddString("diffusion", "conduction", "none");
-    if (conduction_str == "spitzer") {
-      if (!pkg->AllParams().hasKey("mbar_over_kb")) {
-        PARTHENON_FAIL("Spitzer thermal conduction requires units and gas composition. "
-                       "Please set a 'units' block and the 'hydro/He_mass_fraction' in "
-                       "the input file.");
-      }
-      conduction = Conduction::spitzer;
-
-      Real spitzer_coeff =
-          pin->GetOrAddReal("diffusion", "spitzer_cond_in_erg_by_s_K_cm", 4.6e-7);
-      // Convert to code units. No temp conversion as [T_phys] = [T_code].
-      auto units = pkg->Param<Units>("units");
-      spitzer_coeff *= units.erg() / (units.s() * units.cm());
-
-      auto mbar_over_kb = pkg->Param<Real>("mbar_over_kb");
-      auto thermal_diff = ThermalDiffusivity(conduction, spitzer_coeff, mbar_over_kb);
-      pkg->AddParam<>("thermal_diff", thermal_diff);
-
-    } else if (conduction_str == "thermal_diff") {
-      conduction = Conduction::thermal_diff;
-      Real thermal_diff_coeff_code = pin->GetReal("diffusion", "thermal_diff_coeff_code");
-      auto thermal_diff = ThermalDiffusivity(conduction, thermal_diff_coeff_code, 0.0);
-      pkg->AddParam<>("thermal_diff", thermal_diff);
-
+    if (conduction_str == "isotropic") {
+      conduction = Conduction::isotropic;
+    } else if (conduction_str == "anisotropic") {
+      conduction = Conduction::anisotropic;
     } else if (conduction_str != "none") {
       PARTHENON_FAIL(
-          "AthenaPK unknown conduction method. Options are: none, spitzer, thermal_diff");
+          "Unknown conduction method. Options are: none, isotropic, anisotropic");
+    }
+    // If conduction is enabled, process supported coefficients
+    if (conduction != Conduction::none) {
+      auto conduction_coeff_str =
+          pin->GetOrAddString("diffusion", "conduction_coeff", "none");
+      auto conduction_coeff = ConductionCoeff::none;
+      if (conduction_coeff_str == "spitzer") {
+        if (!pkg->AllParams().hasKey("mbar_over_kb")) {
+          PARTHENON_FAIL("Spitzer thermal conduction requires units and gas composition. "
+                         "Please set a 'units' block and the 'hydro/He_mass_fraction' in "
+                         "the input file.");
+        }
+        conduction_coeff = ConductionCoeff::spitzer;
+
+        Real spitzer_coeff =
+            pin->GetOrAddReal("diffusion", "spitzer_cond_in_erg_by_s_K_cm", 4.6e-7);
+        // Convert to code units. No temp conversion as [T_phys] = [T_code].
+        auto units = pkg->Param<Units>("units");
+        spitzer_coeff *= units.erg() / (units.s() * units.cm());
+
+        auto mbar_over_kb = pkg->Param<Real>("mbar_over_kb");
+        auto thermal_diff =
+            ThermalDiffusivity(conduction, conduction_coeff, spitzer_coeff, mbar_over_kb);
+        pkg->AddParam<>("thermal_diff", thermal_diff);
+
+      } else if (conduction_coeff_str == "fixed") {
+        conduction_coeff = ConductionCoeff::fixed;
+        Real thermal_diff_coeff_code =
+            pin->GetReal("diffusion", "thermal_diff_coeff_code");
+        auto thermal_diff = ThermalDiffusivity(conduction, conduction_coeff,
+                                               thermal_diff_coeff_code, 0.0);
+        pkg->AddParam<>("thermal_diff", thermal_diff);
+
+      } else {
+        PARTHENON_FAIL("Thermal conduction is enabled but no coefficient is set. Please "
+                       "set diffusion/conduction_coeff to either 'spitzer' or 'fixed'");
+      }
+
+      if (conduction == Conduction::isotropic &&
+          conduction_coeff != ConductionCoeff::fixed) {
+        PARTHENON_FAIL(
+            "Isotropic thermal conduction is currently only supported with a fixed "
+            "(spatially and temporally) conduction coefficient. Please get in contact if "
+            "you need varying coefficients (e.g., Spitzer) for isotropic conduction.")
+      }
     }
     pkg->AddParam<>("conduction", conduction);
 
diff --git a/src/main.hpp b/src/main.hpp
index 67d46e07..717e7ba6 100644
--- a/src/main.hpp
+++ b/src/main.hpp
@@ -35,7 +35,8 @@ enum class Reconstruction { undefined, dc, plm, ppm, wenoz };
 enum class Integrator { undefined, rk1, rk2, vl2, rk3 };
 enum class Fluid { undefined, euler, glmmhd };
 enum class Cooling { none, tabular };
-enum class Conduction { none, spitzer, thermal_diff };
+enum class Conduction { none, isotropic, anisotropic };
+enum class ConductionCoeff { none, fixed, spitzer};
 enum class DiffInt { none, unsplit, rkl2 };
 
 enum class Hst { idx, ekin, emag, divb };

From 647490da847ee5185f75fc8504d51b1f9e0f33dc Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 22:30:07 +0200
Subject: [PATCH 15/42] Add isotropic cond to conv test

---
 src/pgen/diffusion.cpp                        |  2 +-
 tst/regression/CMakeLists.txt                 |  2 +-
 .../aniso_therm_cond_gauss_conv.py            | 35 ++++++++++++-------
 3 files changed, 24 insertions(+), 15 deletions(-)

diff --git a/src/pgen/diffusion.cpp b/src/pgen/diffusion.cpp
index b66d35e4..95e0b111 100644
--- a/src/pgen/diffusion.cpp
+++ b/src/pgen/diffusion.cpp
@@ -77,7 +77,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
           // treat as 1 (also in analysis) to prevent division by 0.
           // Note, this is very constructed and needs to be updated/adjusted for isotropic
           // conduction, other directions, and Bfield configs with |B| != 1
-          Real eff_diff_coeff = Bx == 0.0 ? diff_coeff * 0.5 : diff_coeff * Bx * Bx;
+          Real eff_diff_coeff = Bx == 0.0 ? diff_coeff : diff_coeff * Bx * Bx;
           eint = 1 + amp / std::sqrt(4. * M_PI * eff_diff_coeff * t0) *
                          std::exp(-(std::pow(coords.x1v(i), 2.)) /
                                   (4. * eff_diff_coeff * t0));
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index 4eeabefc..3756c964 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -27,7 +27,7 @@ setup_test_serial("cluster_tabular_cooling" "--driver ${PROJECT_BINARY_DIR}/bin/
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
  
   setup_test_serial("aniso_therm_cond_gauss_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 6" "convergence")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 8" "convergence")
 
   setup_test_serial("field_loop" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/field_loop.in --num_steps 9" "convergence")
diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index 1682ff8f..b02e87de 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -30,8 +30,8 @@
 # To prevent littering up imported folders with .pyc files or __pycache_ folder
 sys.dont_write_bytecode = True
 
-res_cfgs = [256, 512]
-field_cfgs = ["aligned", "angle", "perp"]
+res_cfgs = [128, 256]
+field_cfgs = ["none", "aligned", "angle", "perp"]
 tlim = 2.0
 
 all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
@@ -52,6 +52,10 @@ def get_B(field_cfg):
     elif field_cfg == "angle":
         Bx = 1 / np.sqrt(2)
         By = 1 / np.sqrt(2)
+    # isotropic case
+    elif field_cfg == "none":
+        Bx = 0.0
+        By = 0.0
     else:
         raise "Unknown field_cfg: %s" % field_cfg
 
@@ -69,6 +73,11 @@ def Prepare(self, parameters, step):
 
         outname = get_outname(all_cfgs[step - 1])
 
+        if field_cfg == "none":
+            conduction = "isotropic"
+        else:
+            conduction = "anisotropic"
+
         parameters.driver_cmd_line_args = [
             "parthenon/mesh/nx1=%d" % res,
             "parthenon/meshblock/nx1=64",
@@ -86,8 +95,9 @@ def Prepare(self, parameters, step):
             "parthenon/output0/id=%s" % outname,
             "hydro/gamma=2.0",
             "parthenon/time/tlim=%f" % tlim,
+            "diffusion/conduction=%s" % conduction,
             "diffusion/thermal_diff_coeff_code=0.25",
-            "diffusion/integrator=rkl2",
+            "diffusion/integrator=unsplit",
         ]
 
         return parameters
@@ -106,9 +116,9 @@ def Analyse(self, parameters):
             print("Couldn't find module to read Parthenon hdf5 files.")
             return False
 
-        def get_ref(x, Bx):
-            eff_diff_coeff = 0.25 * 0.5 if Bx == 0.0 else 0.25 * Bx * Bx
-            tlim_ = 0.0 if Bx == 0.0 else tlim
+        def get_ref(x, Bx, field_cfg):
+            eff_diff_coeff = 0.25 if Bx == 0.0 else 0.25 * Bx * Bx
+            tlim_ = 0.0 if field_cfg == "perp" else tlim
             return 1.0 + 1e-6 / (
                 np.sqrt(4 * np.pi * eff_diff_coeff * (0.5 + tlim_))
                 / np.exp(-(x ** 2) / (4.0 * eff_diff_coeff * (0.5 + tlim_)))
@@ -129,13 +139,12 @@ def get_ref(x, Bx):
             x = xx[mask]
             res, field_cfg = all_cfgs[step]
             row = res_cfgs.index(res)
-            p[row].plot(x, temp, label=field_cfg)
 
             Bx, By = get_B(field_cfg)
-            temp_ref = get_ref(x, Bx)
-            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = np.average(
-                np.abs(temp - temp_ref)
-            )
+            temp_ref = get_ref(x, Bx, field_cfg)
+            l1 = np.average(np.abs(temp - temp_ref))
+            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = l1
+            p[row].plot(x, temp, label=field_cfg + " L$_1$=%.2g" % l1)
 
         # Plot convergence
         for i, field_cfg in enumerate(field_cfgs):
@@ -155,8 +164,8 @@ def get_ref(x, Bx):
         for field_cfg in field_cfgs:
             Bx, By = get_B(field_cfg)
             for i in range(num_rows):
-                y = get_ref(x, Bx)
-                p[i].plot(x, y, "-", color="black", alpha=0.5)
+                y = get_ref(x, Bx, field_cfg)
+                p[i].plot(x, y, "-", lw=0.5, color="black", alpha=0.8)
                 p[i].grid()
                 p[i].legend()
 

From 4e03d381a215760a27d34a27b57ff481a120a400 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Wed, 6 Oct 2021 22:40:53 +0200
Subject: [PATCH 16/42] Add RKL2 conv test

---
 tst/regression/CMakeLists.txt                 |  2 +-
 .../aniso_therm_cond_gauss_conv.py            | 61 +++++++++++--------
 2 files changed, 38 insertions(+), 25 deletions(-)

diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index 3756c964..0041a870 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -27,7 +27,7 @@ setup_test_serial("cluster_tabular_cooling" "--driver ${PROJECT_BINARY_DIR}/bin/
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
  
   setup_test_serial("aniso_therm_cond_gauss_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 8" "convergence")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 16" "convergence")
 
   setup_test_serial("field_loop" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/field_loop.in --num_steps 9" "convergence")
diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index b02e87de..867a273b 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -30,16 +30,17 @@
 # To prevent littering up imported folders with .pyc files or __pycache_ folder
 sys.dont_write_bytecode = True
 
+int_cfgs = ["unsplit", "rkl2"]
 res_cfgs = [128, 256]
 field_cfgs = ["none", "aligned", "angle", "perp"]
 tlim = 2.0
 
-all_cfgs = list(itertools.product(res_cfgs, field_cfgs))
+all_cfgs = list(itertools.product(res_cfgs, field_cfgs, int_cfgs))
 
 
 def get_outname(all_cfg):
-    res, field_cfg = all_cfg
-    return f"{res}_{field_cfg}"
+    res, field_cfg, int_cfg = all_cfg
+    return f"{res}_{field_cfg}_{int_cfg}"
 
 
 def get_B(field_cfg):
@@ -67,7 +68,7 @@ def Prepare(self, parameters, step):
 
         assert parameters.num_ranks <= 4, "Use <= 4 ranks for diffusion test."
 
-        res, field_cfg = all_cfgs[step - 1]
+        res, field_cfg, int_cfg = all_cfgs[step - 1]
 
         Bx, By = get_B(field_cfg)
 
@@ -97,7 +98,7 @@ def Prepare(self, parameters, step):
             "parthenon/time/tlim=%f" % tlim,
             "diffusion/conduction=%s" % conduction,
             "diffusion/thermal_diff_coeff_code=0.25",
-            "diffusion/integrator=unsplit",
+            "diffusion/integrator=%s" % int_cfg,
         ]
 
         return parameters
@@ -125,9 +126,10 @@ def get_ref(x, Bx, field_cfg):
             )
 
         num_rows = len(res_cfgs)
-        fig, p = plt.subplots(num_rows + 1, 1)
+        num_cols = len(int_cfgs)
+        fig, p = plt.subplots(num_rows + 1, 2, sharey="row", sharex="row")
 
-        l1_err = np.zeros((len(field_cfgs), len(res_cfgs)))
+        l1_err = np.zeros((len(field_cfgs), len(int_cfgs), len(res_cfgs)))
         for step in range(len(all_cfgs)):
             outname = get_outname(all_cfgs[step])
             data_filename = f"{parameters.output_path}/parthenon.{outname}.00001.phdf"
@@ -137,38 +139,49 @@ def get_ref(x, Bx, field_cfg):
             mask = yy == yy[0]
             temp = prim[:, 4][mask]
             x = xx[mask]
-            res, field_cfg = all_cfgs[step]
+            res, field_cfg, int_cfg = all_cfgs[step]
             row = res_cfgs.index(res)
+            col = int_cfgs.index(int_cfg)
 
             Bx, By = get_B(field_cfg)
             temp_ref = get_ref(x, Bx, field_cfg)
             l1 = np.average(np.abs(temp - temp_ref))
-            l1_err[field_cfgs.index(field_cfg), res_cfgs.index(res)] = l1
-            p[row].plot(x, temp, label=field_cfg + " L$_1$=%.2g" % l1)
+            l1_err[
+                field_cfgs.index(field_cfg),
+                int_cfgs.index(int_cfg),
+                res_cfgs.index(res),
+            ] = l1
+            p[row, col].plot(x, temp, label=field_cfg + " L$_1$=%.2g" % l1)
 
         # Plot convergence
         for i, field_cfg in enumerate(field_cfgs):
-            if field_cfg == "perp":
-                continue
-
-            est_conv = np.diff(np.log(l1_err[i, :])) / np.diff(np.log(res_cfgs))
-            p[-1].plot(
-                res_cfgs, l1_err[i, :], label=field_cfg + " conv: %.2f" % est_conv
-            )
-        p[-1].set_xscale("log")
-        p[-1].set_yscale("log")
-        p[-1].legend()
+            for j, int_cfg in enumerate(int_cfgs):
+                if field_cfg == "perp":
+                    continue
+
+                est_conv = np.diff(np.log(l1_err[i, j, :])) / np.diff(np.log(res_cfgs))
+                p[-1, j].plot(
+                    res_cfgs,
+                    l1_err[i, j, :],
+                    label=field_cfg + " conv: %.2f" % est_conv,
+                )
+        p[-1, 0].set_xscale("log")
+        p[-1, 0].set_yscale("log")
+        p[-1, 0].legend(fontsize=8)
+        p[-1, 1].legend(fontsize=8)
 
         # Plot reference lines
         x = np.linspace(-6, 6, 400)
         for field_cfg in field_cfgs:
             Bx, By = get_B(field_cfg)
             for i in range(num_rows):
-                y = get_ref(x, Bx, field_cfg)
-                p[i].plot(x, y, "-", lw=0.5, color="black", alpha=0.8)
-                p[i].grid()
-                p[i].legend()
+                for j in range(num_cols):
+                    y = get_ref(x, Bx, field_cfg)
+                    p[i, j].plot(x, y, "-", lw=0.5, color="black", alpha=0.8)
+                    p[i, j].grid()
+                    p[i, j].legend(fontsize=8)
 
+        fig.tight_layout()
         fig.savefig(
             os.path.join(parameters.output_path, "cond.png"), bbox_inches="tight"
         )

From a235952e118e430135a9f3bb7d34d3f679cfe33a Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Mon, 11 Oct 2021 09:59:08 -0400
Subject: [PATCH 17/42] Add new dt max ratio for rkl2 param

---
 external/parthenon  | 2 +-
 inputs/diffusion.in | 1 +
 src/hydro/hydro.cpp | 8 +++++++-
 src/hydro/hydro.hpp | 3 +--
 src/main.hpp        | 2 +-
 5 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/external/parthenon b/external/parthenon
index b0678c49..bba38aa4 160000
--- a/external/parthenon
+++ b/external/parthenon
@@ -1 +1 @@
-Subproject commit b0678c494d995952f62daa9fba062eb0839ef8af
+Subproject commit bba38aa41eac6d53388a5ba55c1e7461a0840b19
diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index afd9843d..fd5c09c7 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -64,6 +64,7 @@ integrator = unsplit
 conduction = anisotropic
 conduction_coeff = fixed
 thermal_diff_coeff_code = 0.01
+rkl2_max_dt_ratio = 200.0
 
 <parthenon/output0>
 file_type = hdf5
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 9330b1d6..3418349b 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -56,7 +56,7 @@ parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin) {
 // the task list is constructed (versus when the task list is being executed).
 // TODO(next person touching this function): If more/separate feature are required
 // please separate concerns.
-void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &tm) {
+void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {
   auto hydro_pkg = pmesh->block_list[0]->packages.Get("Hydro");
   const auto num_partitions = pmesh->DefaultNumPartitions();
 
@@ -72,6 +72,10 @@ void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, const SimTime &
                                       MPI_MIN, MPI_COMM_WORLD));
 #endif
     hydro_pkg->UpdateParam("dt_diff", dt_diff);
+    const auto max_dt_ratio = hydro_pkg->Param<Real>("rkl2_max_dt_ratio");
+    if (max_dt_ratio > 0.0 && tm.dt / dt_diff > max_dt_ratio) {
+      tm.dt = max_dt_ratio * dt_diff;
+    }
   }
 }
 
@@ -496,6 +500,8 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       diffint = DiffInt::unsplit;
     } else if (diffint_str == "rkl2") {
       diffint = DiffInt::rkl2;
+      auto rkl2_dt_ratio = pin->GetOrAddReal("diffusion", "rkl2_max_dt_ratio", -1.0);
+      pkg->AddParam<>("rkl2_max_dt_ratio", rkl2_dt_ratio);
     } else if (diffint_str != "none") {
       PARTHENON_FAIL("AthenaPK unknown integration method for diffusion processes. "
                      "Options are: none, unsplit, rkl2");
diff --git a/src/hydro/hydro.hpp b/src/hydro/hydro.hpp
index ebc1b246..d54e96b1 100644
--- a/src/hydro/hydro.hpp
+++ b/src/hydro/hydro.hpp
@@ -16,8 +16,7 @@ using namespace parthenon::package::prelude;
 namespace Hydro {
 
 parthenon::Packages_t ProcessPackages(std::unique_ptr<ParameterInput> &pin);
-void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin,
-                               const parthenon::SimTime &tm);
+void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, parthenon::SimTime &tm);
 std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin);
 
 template <Fluid fluid>
diff --git a/src/main.hpp b/src/main.hpp
index 717e7ba6..9cfea40c 100644
--- a/src/main.hpp
+++ b/src/main.hpp
@@ -36,7 +36,7 @@ enum class Integrator { undefined, rk1, rk2, vl2, rk3 };
 enum class Fluid { undefined, euler, glmmhd };
 enum class Cooling { none, tabular };
 enum class Conduction { none, isotropic, anisotropic };
-enum class ConductionCoeff { none, fixed, spitzer};
+enum class ConductionCoeff { none, fixed, spitzer };
 enum class DiffInt { none, unsplit, rkl2 };
 
 enum class Hst { idx, ekin, emag, divb };

From 991d353f00a1c3db02bad8bef7aa697414385c46 Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Mon, 11 Oct 2021 11:42:51 -0400
Subject: [PATCH 18/42] Add prolongation and fluxcorrect to RKL2 task list

---
 src/hydro/hydro_driver.cpp | 78 +++++++++++++++++++++++++++++---------
 1 file changed, 61 insertions(+), 17 deletions(-)

diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 19ccbcb5..2c72e868 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -222,8 +222,8 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   if (parthenon::Globals::my_rank == 0) {
     const auto ratio = 2.0 * tau / mindt_diff;
     std::cout << "STS ratio: " << ratio << " Taking " << s_rkl << " steps." << std::endl;
-    if (ratio > 100.0) {
-      std::cout << "WARNING: ratio is > 100. Proceed at own risk." << std::endl;
+    if (ratio > 200.1) {
+      std::cout << "WARNING: ratio is > 200. Proceed at own risk." << std::endl;
     }
   }
 
@@ -248,13 +248,10 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   }
 
   const int num_partitions = pmesh->DefaultNumPartitions();
-  TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
+  TaskRegion &region_calc_fluxes_step_init = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
-    auto &tl = region_rkl2_step_init[i];
+    auto &tl = region_calc_fluxes_step_init[i];
     auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
-    auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
-    auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
     // Reset flux arrays (not guaranteed to be zero)
     auto reset_fluxes = tl.AddTask(none, ResetFluxes, Y0.get());
 
@@ -262,10 +259,27 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     // as MY0 and reuse later (it is used in every subsetp).
     auto hydro_diff_fluxes =
         tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Y0.get());
+  }
 
-    auto init_MY0 =
-        tl.AddTask(hydro_diff_fluxes, parthenon::Update::FluxDivergence<MeshData<Real>>,
-                   Y0.get(), MY0.get());
+  TaskRegion &region_flux_correct_step_init = ptask_coll->AddRegion(blocks.size());
+  for (int i = 0; i < blocks.size(); i++) {
+    auto &tl = region_flux_correct_step_init[i];
+    auto &Y0 = blocks[i]->meshblock_data.Get("base");
+    auto send_flux = tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, Y0.get());
+    auto recv_flux =
+        tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, Y0.get());
+  }
+
+  TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
+  for (int i = 0; i < num_partitions; i++) {
+    auto &tl = region_rkl2_step_init[i];
+    auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+    auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
+
+    auto init_MY0 = tl.AddTask(none, parthenon::Update::FluxDivergence<MeshData<Real>>,
+                               Y0.get(), MY0.get());
 
     // Initialize Y0 and Y1 and the recursion relation needs data from the two
     // preceeding stages.
@@ -288,6 +302,13 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
     auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
                                        Yjm1.get(), BoundaryCommSubset::all);
+    auto prolongBound = none;
+    if (pmesh->multilevel) {
+      prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, Yjm1);
+    }
+
+    // set physical boundaries
+    auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, Yjm1);
   }
   TaskRegion &region_cons_to_prim = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
@@ -325,13 +346,10 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
                                    BoundaryCommSubset::all);
     }
 
-    TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
+    TaskRegion &region_calc_fluxes_step_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
-      auto &tl = region_rkl2_step_other[i];
-      auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
-      auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+      auto &tl = region_calc_fluxes_step_other[i];
       auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
-      auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       // Reset flux arrays (not guaranteed to be zero)
       auto reset_fluxes = tl.AddTask(none, ResetFluxes, Yjm1.get());
@@ -339,10 +357,29 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
       // Calculate the diffusive fluxes for Yjm1 (here u1)
       auto hydro_diff_fluxes =
           tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Yjm1.get());
+    }
+
+    TaskRegion &region_flux_correct_step_other = ptask_coll->AddRegion(blocks.size());
+    for (int i = 0; i < blocks.size(); i++) {
+      auto &tl = region_flux_correct_step_other[i];
+      auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
+      auto send_flux =
+          tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, Yjm1.get());
+      auto recv_flux =
+          tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, Yjm1.get());
+    }
+
+    TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
+    for (int i = 0; i < num_partitions; i++) {
+      auto &tl = region_rkl2_step_other[i];
+      auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
+      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       auto rkl2_step_other =
-          tl.AddTask(hydro_diff_fluxes, RKL2StepOther, Y0.get(), Yjm1.get(), Yjm2.get(),
-                     MY0.get(), mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
+          tl.AddTask(none, RKL2StepOther, Y0.get(), Yjm1.get(), Yjm2.get(), MY0.get(),
+                     mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
 
       // update ghost cells of Yjm1 (currently storing Yj)
       // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
@@ -359,6 +396,13 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
       auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
       auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
                                          Yjm1.get(), BoundaryCommSubset::all);
+      auto prolongBound = none;
+      if (pmesh->multilevel) {
+        prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, Yjm1);
+      }
+
+      // set physical boundaries
+      auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, Yjm1);
     }
     TaskRegion &region_cons_to_prim_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {

From 7fd557d41ca2e45fe1988d1df192920d3f520066 Mon Sep 17 00:00:00 2001
From: Philipp Grete <grete@pa.msu.edu>
Date: Tue, 12 Oct 2021 09:15:45 -0400
Subject: [PATCH 19/42] Use base container as active STS container (workaround
 some AMR bug for using prolong/restric with non-base containers)

---
 src/hydro/hydro_driver.cpp | 140 +++++++++++++++++++------------------
 1 file changed, 71 insertions(+), 69 deletions(-)

diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 2c72e868..971e7506 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -229,93 +229,110 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
 
   TaskID none(0);
 
+  // Store initial u0 in u1 as "base" will continusouly be updated but initial state Y0 is
+  // required for each stage.
+  TaskRegion &region_copy_out = ptask_coll->AddRegion(blocks.size());
+  for (int i = 0; i < blocks.size(); i++) {
+    auto &tl = region_copy_out[i];
+    auto &Y0 = blocks[i]->meshblock_data.Get("u1");
+    auto &base = blocks[i]->meshblock_data.Get();
+    tl.AddTask(
+        none,
+        [](MeshBlockData<Real> *dst, MeshBlockData<Real> *src) {
+          dst->Get("cons").data.DeepCopy(src->Get("cons").data);
+          dst->Get("prim").data.DeepCopy(src->Get("prim").data);
+          return TaskStatus::complete;
+        },
+        Y0.get(), base.get());
+  }
+
   TaskRegion &region_init = ptask_coll->AddRegion(blocks.size());
   for (int i = 0; i < blocks.size(); i++) {
     auto &pmb = blocks[i];
     auto &tl = region_init[i];
-    auto &u0 = pmb->meshblock_data.Get();
-    auto &u1 = pmb->meshblock_data.Get("u1");
-    // only need boundaries for Yjm1 (u1 here)
-    auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, u1.get(),
+    auto &base = pmb->meshblock_data.Get();
+    auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, base.get(),
                                  BoundaryCommSubset::all);
 
     // Add extra registers. No-op for existing variables so it's safe to call every
     // time.
     // TODO(pgrete) this allocates all Variables, i.e., prim and cons vector, but only a
     // subset is actually needed. Streamline to allocate only required vars.
-    pmb->meshblock_data.Add("MY0", u0);
-    pmb->meshblock_data.Add("Yjm2", u0);
+    pmb->meshblock_data.Add("MY0", base);
+    pmb->meshblock_data.Add("Yjm2", base);
   }
 
   const int num_partitions = pmesh->DefaultNumPartitions();
   TaskRegion &region_calc_fluxes_step_init = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_calc_fluxes_step_init[i];
-    auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+    auto &base = pmesh->mesh_data.GetOrAdd("base", i);
     // Reset flux arrays (not guaranteed to be zero)
-    auto reset_fluxes = tl.AddTask(none, ResetFluxes, Y0.get());
+    auto reset_fluxes = tl.AddTask(none, ResetFluxes, base.get());
 
-    // Calculate the diffusive fluxes for Y0 (here u0) so that we can store the result
-    // as MY0 and reuse later (it is used in every subsetp).
+    // Calculate the diffusive fluxes for Y0 (here still "base" as nothing has been
+    // updated yet) so that we can store the result as MY0 and reuse later
+    // (in every subsetp).
     auto hydro_diff_fluxes =
-        tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Y0.get());
+        tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), base.get());
   }
 
   TaskRegion &region_flux_correct_step_init = ptask_coll->AddRegion(blocks.size());
   for (int i = 0; i < blocks.size(); i++) {
     auto &tl = region_flux_correct_step_init[i];
-    auto &Y0 = blocks[i]->meshblock_data.Get("base");
-    auto send_flux = tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, Y0.get());
+    auto &base = blocks[i]->meshblock_data.Get("base");
+    auto send_flux =
+        tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, base.get());
     auto recv_flux =
-        tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, Y0.get());
+        tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, base.get());
   }
 
   TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_rkl2_step_init[i];
-    auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+    auto &Y0 = pmesh->mesh_data.GetOrAdd("u1", i);
     auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &base = pmesh->mesh_data.GetOrAdd("base", i);
     auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
     auto init_MY0 = tl.AddTask(none, parthenon::Update::FluxDivergence<MeshData<Real>>,
-                               Y0.get(), MY0.get());
+                               base.get(), MY0.get());
 
-    // Initialize Y0 and Y1 and the recursion relation needs data from the two
-    // preceeding stages.
-    auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, Y0.get(), Yjm1.get(),
+    // Initialize Y0 and Y1 and the recursion relation starting with j = 2 needs data from
+    // the two preceeding stages.
+    auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, Y0.get(), base.get(),
                                       Yjm2.get(), MY0.get(), s_rkl, tau);
 
-    // update ghost cells of Y1 (as MY1 is calculated for each Y_j)
+    // Update ghost cells of Y1 (as MY1 is calculated for each Y_j)
     // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
     auto send = tl.AddTask(rkl2_step_first,
-                           parthenon::cell_centered_bvars::SendBoundaryBuffers, Yjm1);
+                           parthenon::cell_centered_bvars::SendBoundaryBuffers, base);
     auto recv =
-        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, Yjm1);
+        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, base);
     auto fill_from_bufs =
-        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, Yjm1);
+        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, base);
   }
 
   TaskRegion &region_clear_bnd = ptask_coll->AddRegion(blocks.size());
   for (int i = 0; i < blocks.size(); i++) {
     auto &tl = region_clear_bnd[i];
-    auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
+    auto &base = blocks[i]->meshblock_data.Get();
     auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                       Yjm1.get(), BoundaryCommSubset::all);
+                                       base.get(), BoundaryCommSubset::all);
     auto prolongBound = none;
     if (pmesh->multilevel) {
-      prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, Yjm1);
+      prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, base);
     }
 
     // set physical boundaries
-    auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, Yjm1);
+    auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, base);
   }
   TaskRegion &region_cons_to_prim = ptask_coll->AddRegion(num_partitions);
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_cons_to_prim[i];
-    auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+    auto &base = pmesh->mesh_data.GetOrAdd("base", i);
     auto fill_derived =
-        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, Yjm1.get());
+        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, base.get());
   }
 
   // Compute coefficients. Meyer+2012 eq. (16)
@@ -339,98 +356,83 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     for (int i = 0; i < blocks.size(); i++) {
       auto &pmb = blocks[i];
       auto &tl = region_init_other[i];
-      auto &u0 = pmb->meshblock_data.Get();
-      auto &u1 = pmb->meshblock_data.Get("u1");
-      // only need boundaries for Yjm1 (u1 here)
-      auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, u1.get(),
+      auto &base = pmb->meshblock_data.Get();
+      // Only need boundaries for base as it's the only "active" container exchanging
+      // data/fluxes with neighbors. All other containers are passive (i.e., data is only
+      // used but not exchanged).
+      auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, base.get(),
                                    BoundaryCommSubset::all);
     }
 
     TaskRegion &region_calc_fluxes_step_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_calc_fluxes_step_other[i];
-      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &base = pmesh->mesh_data.GetOrAdd("base", i);
 
       // Reset flux arrays (not guaranteed to be zero)
-      auto reset_fluxes = tl.AddTask(none, ResetFluxes, Yjm1.get());
+      auto reset_fluxes = tl.AddTask(none, ResetFluxes, base.get());
 
       // Calculate the diffusive fluxes for Yjm1 (here u1)
       auto hydro_diff_fluxes =
-          tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), Yjm1.get());
+          tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), base.get());
     }
 
     TaskRegion &region_flux_correct_step_other = ptask_coll->AddRegion(blocks.size());
     for (int i = 0; i < blocks.size(); i++) {
       auto &tl = region_flux_correct_step_other[i];
-      auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
+      auto &base = blocks[i]->meshblock_data.Get();
       auto send_flux =
-          tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, Yjm1.get());
+          tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, base.get());
       auto recv_flux =
-          tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, Yjm1.get());
+          tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, base.get());
     }
 
     TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_rkl2_step_other[i];
-      auto &Y0 = pmesh->mesh_data.GetOrAdd("base", i);
+      auto &Y0 = pmesh->mesh_data.GetOrAdd("u1", i);
       auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &base = pmesh->mesh_data.GetOrAdd("base", i);
       auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       auto rkl2_step_other =
-          tl.AddTask(none, RKL2StepOther, Y0.get(), Yjm1.get(), Yjm2.get(), MY0.get(),
+          tl.AddTask(none, RKL2StepOther, Y0.get(), base.get(), Yjm2.get(), MY0.get(),
                      mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
 
-      // update ghost cells of Yjm1 (currently storing Yj)
+      // update ghost cells of base (currently storing Yj)
       // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
       auto send = tl.AddTask(rkl2_step_other,
-                             parthenon::cell_centered_bvars::SendBoundaryBuffers, Yjm1);
+                             parthenon::cell_centered_bvars::SendBoundaryBuffers, base);
       auto recv =
-          tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, Yjm1);
+          tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, base);
       auto fill_from_bufs =
-          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, Yjm1);
+          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, base);
     }
     TaskRegion &region_clear_bnd_other = ptask_coll->AddRegion(blocks.size());
     for (int i = 0; i < blocks.size(); i++) {
       auto &tl = region_clear_bnd_other[i];
-      auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
+      auto &base = blocks[i]->meshblock_data.Get();
       auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                         Yjm1.get(), BoundaryCommSubset::all);
+                                         base.get(), BoundaryCommSubset::all);
       auto prolongBound = none;
       if (pmesh->multilevel) {
-        prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, Yjm1);
+        prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, base);
       }
 
       // set physical boundaries
-      auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, Yjm1);
+      auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, base);
     }
     TaskRegion &region_cons_to_prim_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_cons_to_prim_other[i];
-      auto &Yjm1 = pmesh->mesh_data.GetOrAdd("u1", i);
+      auto &base = pmesh->mesh_data.GetOrAdd("base", i);
       auto fill_derived =
-          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, Yjm1.get());
+          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, base.get());
     }
 
     b_jm2 = b_jm1;
     b_jm1 = b_j;
   }
-
-  // copy final result back to u0
-  TaskRegion &region_copy_out = ptask_coll->AddRegion(blocks.size());
-  for (int i = 0; i < blocks.size(); i++) {
-    auto &tl = region_copy_out[i];
-    auto &u0 = blocks[i]->meshblock_data.Get();
-    auto &Yjm1 = blocks[i]->meshblock_data.Get("u1");
-    tl.AddTask(
-        none,
-        [](MeshBlockData<Real> *u0, MeshBlockData<Real> *u1) {
-          u0->Get("cons").data.DeepCopy(u1->Get("cons").data);
-          u0->Get("prim").data.DeepCopy(u1->Get("prim").data);
-          return TaskStatus::complete;
-        },
-        u0.get(), Yjm1.get());
-  }
 }
 
 // See the advection.hpp declaration for a description of how this function gets called.

From 1f9cf686d25f2d1c963e48722f3b2ad54fab1b6b Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 8 Dec 2021 13:49:10 +0100
Subject: [PATCH 20/42] Add isotropic Spitzer thermal conduction timestep

---
 src/hydro/diffusion/conduction.cpp | 55 ++++++++++++++++++++----------
 src/hydro/diffusion/diffusion.hpp  |  3 ++
 2 files changed, 40 insertions(+), 18 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 377b0279..04455cbf 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -65,14 +65,13 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
   const auto gm1 = hydro_pkg->Param<Real>("AdiabaticIndex");
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
 
-  if (thermal_diff.GetType() == Conduction::isotropic) {
-    // Isotropic thermal conduction currently only supports a fixed, uniform coefficient
-    // so it's safe to get it outside the kernel.
+  if (thermal_diff.GetType() == Conduction::isotropic &&
+      thermal_diff.GetCoeffType() == ConductionCoeff::fixed) {
     // TODO(pgrete): once mindx is properly calculated before this loop, we can get rid of
     // it entirely.
     const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
     Kokkos::parallel_reduce(
-        "EstimateConductionTimestep (iso)",
+        "EstimateConductionTimestep (iso fixed)",
         Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
             DevExecSpace(), {0, kb.s, jb.s, ib.s},
             {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
@@ -91,9 +90,9 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           }
         },
         Kokkos::Min<Real>(min_dt_cond));
-  } else if (thermal_diff.GetType() == Conduction::anisotropic) {
+  } else {
     Kokkos::parallel_reduce(
-        "EstimateConductionTimestep (aniso)",
+        "EstimateConductionTimestep",
         Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
             DevExecSpace(), {0, kb.s, jb.s, ib.s},
             {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
@@ -103,20 +102,18 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           const auto &prim = prim_pack(b);
           const auto &rho = prim(IDN, k, j, i);
           const auto &p = prim(IPR, k, j, i);
-          const auto &Bx = prim(IB1, k, j, i);
-          const auto &By = prim(IB2, k, j, i);
-          const auto &Bz = prim(IB3, k, j, i);
-          const auto Bmag = sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
 
           const auto dTdx = 0.5 *
                             (prim(IPR, k, j, i + 1) / prim(IDN, k, j, i + 1) -
                              prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1)) /
                             coords.dx1v(i);
 
-          const auto dTdy = 0.5 *
-                            (prim(IPR, k, j + 1, i) / prim(IDN, k, j + 1, i) -
-                             prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i)) /
-                            coords.dx2v(j);
+          const auto dTdy = ndim >= 2
+                                ? 0.5 *
+                                      (prim(IPR, k, j + 1, i) / prim(IDN, k, j + 1, i) -
+                                       prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i)) /
+                                      coords.dx2v(j)
+                                : 0.0;
 
           const auto dTdz = ndim >= 3
                                 ? 0.5 *
@@ -125,14 +122,36 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
                                       coords.dx3v(k)
                                 : 0.0;
           const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
+
+          // No temperature gradient -> no thermal conduction-> no timestep restriction
+          if (gradTmag == 0.0) {
+            return;
+          }
           auto thermal_diff_coeff = thermal_diff.Get(p, rho, gradTmag);
 
-          const auto denom = Bmag * gradTmag;
-          // if either Bmag or gradTmag are 0, no anisotropic thermal conduction
-          if (denom == 0.0) {
+          if (thermal_diff.GetType() == Conduction::isotropic) {
+            min_dt = fmin(min_dt,
+                          SQR(coords.Dx(parthenon::X1DIR, k, j, i)) / thermal_diff_coeff);
+            if (ndim >= 2) {
+              min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
+                                        thermal_diff_coeff);
+            }
+            if (ndim >= 3) {
+              min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
+                                        thermal_diff_coeff);
+            }
+            return;
+          }
+          const auto &Bx = prim(IB1, k, j, i);
+          const auto &By = prim(IB2, k, j, i);
+          const auto &Bz = prim(IB3, k, j, i);
+          const auto Bmag = sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
+          // Need to have some local field for anisotropic conduction
+          if (Bmag == 0.0) {
             return;
           }
-          const auto costheta = fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / denom;
+          const auto costheta =
+              fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / (Bmag * gradTmag);
 
           min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
                                     (thermal_diff_coeff * fabs(Bx) / Bmag * costheta +
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index 048dc400..fda0b4c3 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -87,6 +87,9 @@ struct ThermalDiffusivity {
 
   KOKKOS_INLINE_FUNCTION
   Conduction GetType() const { return conduction_; }
+
+  KOKKOS_INLINE_FUNCTION
+  ConductionCoeff GetCoeffType() const { return conduction_coeff_type_; }
 };
 
 Real EstimateConductionTimestep(MeshData<Real> *md);

From 1b9a7976a2b1e97a5fe48ec74f479853181241c4 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 8 Dec 2021 16:26:40 +0100
Subject: [PATCH 21/42] Calc isotropic, non-const thermal diff

---
 src/hydro/diffusion/conduction.cpp | 95 +++++++++++++++++++-----------
 src/hydro/diffusion/diffusion.cpp  | 13 ++--
 src/hydro/diffusion/diffusion.hpp  |  8 +--
 src/hydro/hydro.cpp                | 11 +---
 4 files changed, 75 insertions(+), 52 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 04455cbf..1b92e027 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -17,6 +17,7 @@
 // AthenaPK headers
 #include "../../main.hpp"
 #include "diffusion.hpp"
+#include "utils/error_checking.hpp"
 
 using namespace parthenon::package::prelude;
 
@@ -92,7 +93,7 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
         Kokkos::Min<Real>(min_dt_cond));
   } else {
     Kokkos::parallel_reduce(
-        "EstimateConductionTimestep",
+        "EstimateConductionTimestep (general)",
         Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
             DevExecSpace(), {0, kb.s, jb.s, ib.s},
             {prim_pack.GetDim(5), kb.e + 1, jb.e + 1, ib.e + 1},
@@ -174,9 +175,9 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
 }
 
 //---------------------------------------------------------------------------------------
-//! Calculate isotropic thermal conduction
+//! Calculate isotropic thermal conduction with fixed coefficient
 
-void ThermalFluxIso(MeshData<Real> *md) {
+void ThermalFluxIsoFixed(MeshData<Real> *md) {
   auto pmb = md->GetBlockData(0)->GetBlockPointer();
   IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
   IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
@@ -191,8 +192,7 @@ void ThermalFluxIso(MeshData<Real> *md) {
   const int ndim = pmb->pmy_mesh->ndim;
 
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
-  // Isotropic thermal conduction currently only supports a fixed, uniform coefficient
-  // so it's safe to get it outside the kernel.
+  // Using fixed and uniform coefficient so it's safe to get it outside the kernel.
   const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
 
   parthenon::par_for(
@@ -249,9 +249,10 @@ void ThermalFluxIso(MeshData<Real> *md) {
 }
 
 //---------------------------------------------------------------------------------------
-//! Calculate anisotropic thermal conduction
+//! Calculate thermal conduction, general case, i.e., anisotropic and/or with varying
+//! (incl. saturated) coefficient
 
-void ThermalFluxAniso(MeshData<Real> *md) {
+void ThermalFluxGeneral(MeshData<Real> *md) {
   auto pmb = md->GetBlockData(0)->GetBlockPointer();
   IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
   IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
@@ -268,7 +269,7 @@ void ThermalFluxAniso(MeshData<Real> *md) {
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
 
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (aniso)",
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (general)",
       parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
       ib.e + 1, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
@@ -277,7 +278,6 @@ void ThermalFluxAniso(MeshData<Real> *md) {
 
         // Variables only required in 3D case
         Real dTdz = 0.0;
-        Real Bz = 0.0;
 
         // clang-format off
         /* Monotonized temperature difference dT/dy */
@@ -303,7 +303,6 @@ void ThermalFluxAniso(MeshData<Real> *md) {
                                 prim(IPR, k    , j, i - 1) / prim(IDN, k    , j, i - 1) -
                                 prim(IPR, k - 1, j, i - 1) / prim(IDN, k - 1, j, i - 1)) /
                  coords.Dx(parthenon::X3DIR, k, j, i);
-          Bz = 0.5 * (prim(IB3, k, j, i - 1) + prim(IB3, k, j, i));
         }
         // clang-format on
 
@@ -311,20 +310,30 @@ void ThermalFluxAniso(MeshData<Real> *md) {
         const auto T_im1 = prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1);
         const auto dTdx = (T_i - T_im1) / coords.Dx(parthenon::X1DIR, k, j, i);
 
-        // Calc interface values
-        const auto Bx = 0.5 * (prim(IB1, k, j, i - 1) + prim(IB1, k, j, i));
-        const auto By = 0.5 * (prim(IB2, k, j, i - 1) + prim(IB2, k, j, i));
-        auto B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-        B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-        const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+        Real flux_grad = 0.0;
+        if (thermal_diff.GetType() == Conduction::anisotropic) {
+          const auto Bx = 0.5 * (prim(IB1, k, j, i - 1) + prim(IB1, k, j, i));
+          const auto By = 0.5 * (prim(IB2, k, j, i - 1) + prim(IB2, k, j, i));
+          const auto Bz =
+              ndim >= 3 ? 0.5 * (prim(IB3, k, j, i - 1) + prim(IB3, k, j, i)) : 0.0;
+          auto B02 = SQR(Bx) + SQR(By) + SQR(Bz);
+          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
+          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+          flux_grad = (Bx * bDotGradT) / B02;
+        } else if (thermal_diff.GetType() == Conduction::isotropic) {
+          flux_grad = dTdx;
+        } else {
+          PARTHENON_FAIL("Unknown thermal diffusion flux.");
+        }
 
+        // Calc interface values
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
         const auto thermal_diff_f =
             0.5 *
             (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
              thermal_diff.Get(prim(IPR, k, j, i - 1), prim(IDN, k, j, i - 1), gradTmag));
-        cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_f * denf * (Bx * bDotGradT) / B02;
+        cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
 
   if (ndim < 2) {
@@ -332,7 +341,7 @@ void ThermalFluxAniso(MeshData<Real> *md) {
   }
   /* Compute heat fluxes in 2-direction  --------------------------------------*/
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (aniso)",
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (general)",
       parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1,
       ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
@@ -341,7 +350,6 @@ void ThermalFluxAniso(MeshData<Real> *md) {
 
         // Variables only required in 3D case
         Real dTdz = 0.0;
-        Real Bz = 0.0;
 
         // clang-format off
         /* Monotonized temperature difference dT/dx */
@@ -368,7 +376,6 @@ void ThermalFluxAniso(MeshData<Real> *md) {
                                 prim(IPR, k - 1, j - 1, i) / prim(IDN, k - 1, j - 1, i)) /
                  coords.Dx(parthenon::X3DIR, k, j, i);
 
-          Bz = 0.5 * (prim(IB3, k, j - 1, i) + prim(IB3, k, j, i));
         }
         // clang-format on
 
@@ -376,20 +383,30 @@ void ThermalFluxAniso(MeshData<Real> *md) {
         const auto T_jm1 = prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i);
         const auto dTdy = (T_j - T_jm1) / coords.Dx(parthenon::X2DIR, k, j, i);
 
-        // Calc interface values
-        const auto Bx = 0.5 * (prim(IB1, k, j - 1, i) + prim(IB1, k, j, i));
-        const auto By = 0.5 * (prim(IB2, k, j - 1, i) + prim(IB2, k, j, i));
-        Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-        B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-        const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+        Real flux_grad = 0.0;
+        if (thermal_diff.GetType() == Conduction::anisotropic) {
+          const auto Bx = 0.5 * (prim(IB1, k, j - 1, i) + prim(IB1, k, j, i));
+          const auto By = 0.5 * (prim(IB2, k, j - 1, i) + prim(IB2, k, j, i));
+          const auto Bz =
+              ndim >= 3 ? 0.5 * (prim(IB3, k, j - 1, i) + prim(IB3, k, j, i)) : 0.0;
+          Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
+          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
+          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+          flux_grad = (By * bDotGradT) / B02;
+        } else if (thermal_diff.GetType() == Conduction::isotropic) {
+          flux_grad = dTdy;
+        } else {
+          PARTHENON_FAIL("Unknown thermal diffusion flux.");
+        }
 
+        // Calc interface values
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
         const auto thermal_diff_f =
             0.5 *
             (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
              thermal_diff.Get(prim(IPR, k, j - 1, i), prim(IDN, k, j - 1, i), gradTmag));
-        cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_f * denf * (By * bDotGradT) / B02;
+        cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
   /* Compute heat fluxes in 3-direction, 3D problem ONLY  ---------------------*/
   if (ndim < 3) {
@@ -397,7 +414,7 @@ void ThermalFluxAniso(MeshData<Real> *md) {
   }
 
   parthenon::par_for(
-      DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (aniso)",
+      DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (general)",
       parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e,
       ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
         const auto &coords = prim_pack.coords(b);
@@ -434,12 +451,20 @@ void ThermalFluxAniso(MeshData<Real> *md) {
         const auto T_km1 = prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i);
         const auto dTdz = (T_k - T_km1) / coords.Dx(parthenon::X3DIR, k, j, i);
 
-        const auto Bx = 0.5 * (prim(IB1, k - 1, j, i) + prim(IB1, k, j, i));
-        const auto By = 0.5 * (prim(IB2, k - 1, j, i) + prim(IB2, k, j, i));
-        const auto Bz = 0.5 * (prim(IB3, k - 1, j, i) + prim(IB3, k, j, i));
-        Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-        B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-        const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+        Real flux_grad = 0.0;
+        if (thermal_diff.GetType() == Conduction::anisotropic) {
+          const auto Bx = 0.5 * (prim(IB1, k - 1, j, i) + prim(IB1, k, j, i));
+          const auto By = 0.5 * (prim(IB2, k - 1, j, i) + prim(IB2, k, j, i));
+          const auto Bz = 0.5 * (prim(IB3, k - 1, j, i) + prim(IB3, k, j, i));
+          Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
+          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
+          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
+          flux_grad = (Bz * bDotGradT) / B02;
+        } else if (thermal_diff.GetType() == Conduction::isotropic) {
+          flux_grad = dTdz;
+        } else {
+          PARTHENON_FAIL("Unknown thermal diffusion flux.");
+        }
 
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
@@ -448,6 +473,6 @@ void ThermalFluxAniso(MeshData<Real> *md) {
             (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
              thermal_diff.Get(prim(IPR, k - 1, j, i), prim(IDN, k - 1, j, i), gradTmag));
 
-        cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_f * denf * (Bz * bDotGradT) / B02;
+        cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
 }
diff --git a/src/hydro/diffusion/diffusion.cpp b/src/hydro/diffusion/diffusion.cpp
index 3a2c4f04..20617ca2 100644
--- a/src/hydro/diffusion/diffusion.cpp
+++ b/src/hydro/diffusion/diffusion.cpp
@@ -17,10 +17,15 @@ using namespace parthenon::package::prelude;
 
 TaskStatus CalcDiffFluxes(StateDescriptor *hydro_pkg, MeshData<Real> *md) {
   const auto &conduction = hydro_pkg->Param<Conduction>("conduction");
-  if (conduction == Conduction::isotropic) {
-    ThermalFluxIso(md);
-  } else if (conduction == Conduction::anisotropic) {
-    ThermalFluxAniso(md);
+  if (conduction != Conduction::none) {
+    const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
+
+    if (conduction == Conduction::isotropic &&
+        thermal_diff.GetCoeffType() == ConductionCoeff::fixed) {
+      ThermalFluxIsoFixed(md);
+    } else {
+      ThermalFluxGeneral(md);
+    }
   }
   return TaskStatus::complete;
 }
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index fda0b4c3..7dedfa0a 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -94,10 +94,10 @@ struct ThermalDiffusivity {
 
 Real EstimateConductionTimestep(MeshData<Real> *md);
 
-//! Calculate isotropic thermal conduction
-void ThermalFluxIso(MeshData<Real> *md);
-//! Calculate anisotropic thermal conduction
-void ThermalFluxAniso(MeshData<Real> *md);
+//! Calculate isotropic thermal conduction with fixed coefficient
+void ThermalFluxIsoFixed(MeshData<Real> *md);
+//! Calculate thermal conduction (general case incl. anisotropic and saturated)
+void ThermalFluxGeneral(MeshData<Real> *md);
 
 // Calculate all diffusion fluxes, i.e., update the .flux views in md
 TaskStatus CalcDiffFluxes(StateDescriptor *hydro_pkg, MeshData<Real> *md);
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 6784e7b9..8354623f 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -61,7 +61,8 @@ void PreStepMeshUserWorkInLoop(Mesh *pmesh, ParameterInput *pin, SimTime &tm) {
   auto hydro_pkg = pmesh->block_list[0]->packages.Get("Hydro");
   const auto num_partitions = pmesh->DefaultNumPartitions();
 
-  if ((hydro_pkg->Param<DiffInt>("diffint") == DiffInt::rkl2)) {
+  if ((hydro_pkg->Param<DiffInt>("diffint") == DiffInt::rkl2) &&
+      (hydro_pkg->Param<Conduction>("conduction") != Conduction::none)) {
     auto dt_diff = std::numeric_limits<Real>::max();
     for (auto i = 0; i < num_partitions; i++) {
       auto &md = pmesh->mesh_data.GetOrAdd("base", i);
@@ -486,14 +487,6 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         PARTHENON_FAIL("Thermal conduction is enabled but no coefficient is set. Please "
                        "set diffusion/conduction_coeff to either 'spitzer' or 'fixed'");
       }
-
-      if (conduction == Conduction::isotropic &&
-          conduction_coeff != ConductionCoeff::fixed) {
-        PARTHENON_FAIL(
-            "Isotropic thermal conduction is currently only supported with a fixed "
-            "(spatially and temporally) conduction coefficient. Please get in contact if "
-            "you need varying coefficients (e.g., Spitzer) for isotropic conduction.")
-      }
     }
     pkg->AddParam<>("conduction", conduction);
 

From c9f1dfbfbbdc11a90ab780f39ce1bfb52e65089c Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Tue, 12 Apr 2022 14:46:28 -0400
Subject: [PATCH 22/42] Fix calc of saturated heat flux

---
 src/hydro/diffusion/conduction.cpp | 28 ++++++++++++++++------------
 src/hydro/diffusion/diffusion.hpp  |  6 +++---
 src/hydro/hydro.cpp                | 10 ++++++----
 src/units.hpp                      |  2 ++
 4 files changed, 27 insertions(+), 19 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 1b92e027..0c57a69e 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -12,32 +12,36 @@
 //! \brief
 
 // Parthenon headers
+#include <cmath>
 #include <parthenon/package.hpp>
 
 // AthenaPK headers
 #include "../../main.hpp"
+#include "config.hpp"
 #include "diffusion.hpp"
 #include "utils/error_checking.hpp"
 
 using namespace parthenon::package::prelude;
 
+// Calculate the thermal *diffusivity*, \chi, in code units as the energy flux itself
+// is calculated from -\chi \rho \nabla (p/\rho).
+// To match the latter, note that |\nabla T| is expected to be calculated outside
+// using the gradient of p/\rho, i.e., not included the mbar_/kb_ conversion factor.
 KOKKOS_INLINE_FUNCTION
 Real ThermalDiffusivity::Get(const Real pres, const Real rho, const Real gradTmag) const {
   if (conduction_coeff_type_ == ConductionCoeff::fixed) {
     return coeff_;
   } else if (conduction_coeff_type_ == ConductionCoeff::spitzer) {
-    const Real T = mbar_over_kb_ * pres / rho;
-    const Real kappa = coeff_ * std::pow(T, 5. / 2.); // Full spitzer
-    const Real chi_spitzer = kappa * mbar_over_kb_ / rho;
-
-    // Saturated total flux: fac * \rho * c_{s,isoth}^3
-    // In practice: fac * \rho * c_{s,isoth}^3 * (gradT / gradTmag)
-    // where T is calculated based on p/rho in the code.
-    // Thus, everything is in code units and no conversion is required.
-    // The rho above is cancelled as we convert the conduction above to a diffusvity here.
-    const Real chi_sat =
-        0.34 * std::pow(pres / rho, 3.0 / 2.0) / (gradTmag + TINY_NUMBER);
-    return std::min(chi_spitzer, chi_sat);
+    const Real T_cgs = mbar_ / kb_ * pres / rho;
+    const Real kappa_spitzer = coeff_ * std::pow(T_cgs, 5. / 2.); // Full spitzer
+
+    const Real cs_e = std::sqrt(kb_ * T_cgs / me_); // electron isothermal speed of sound
+    // assuming neutral plasma so n_e = n = \rho / mbar
+    const Real kappa_sat =
+        0.34 * rho / mbar_ * kb_ * T_cgs * cs_e / (gradTmag + TINY_NUMBER);
+
+    // Convert conductivity to diffusivity
+    return std::min(kappa_spitzer, kappa_sat) * mbar_ / kb_ / rho;
 
   } else {
     return 0.0;
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index 7dedfa0a..2b75ac5d 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -69,7 +69,7 @@ KOKKOS_INLINE_FUNCTION Real lim4(const Real A, const Real B, const Real C, const
 
 struct ThermalDiffusivity {
  private:
-  Real mbar_over_kb_;
+  Real mbar_, me_, kb_;
   Conduction conduction_;
   ConductionCoeff conduction_coeff_type_;
   // "free" coefficient/prefactor. Value depends on conduction is set in the constructor.
@@ -78,9 +78,9 @@ struct ThermalDiffusivity {
  public:
   KOKKOS_INLINE_FUNCTION
   ThermalDiffusivity(Conduction conduction, ConductionCoeff conduction_coeff_type,
-                     Real coeff, Real mbar_over_kb)
+                     Real coeff, Real mbar, Real me, Real kb)
       : conduction_(conduction), conduction_coeff_type_(conduction_coeff_type),
-        coeff_(coeff), mbar_over_kb_(mbar_over_kb) {}
+        coeff_(coeff), mbar_(mbar), me_(me), kb_(kb) {}
 
   KOKKOS_INLINE_FUNCTION
   Real Get(const Real pres, const Real rho, const Real gradTmag) const;
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 8354623f..fabac16a 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -421,6 +421,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       const auto He_mass_fraction = pin->GetReal("hydro", "He_mass_fraction");
       const auto H_mass_fraction = 1.0 - He_mass_fraction;
       const auto mu = 1 / (He_mass_fraction * 3. / 4. + (1 - He_mass_fraction) * 2);
+      pkg->AddParam<>("mbar", mu * units.atomic_mass_unit());
       pkg->AddParam<>("mbar_over_kb",
                       mu * units.atomic_mass_unit() / units.k_boltzmann());
     }
@@ -457,7 +458,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
           pin->GetOrAddString("diffusion", "conduction_coeff", "none");
       auto conduction_coeff = ConductionCoeff::none;
       if (conduction_coeff_str == "spitzer") {
-        if (!pkg->AllParams().hasKey("mbar_over_kb")) {
+        if (!pkg->AllParams().hasKey("mbar")) {
           PARTHENON_FAIL("Spitzer thermal conduction requires units and gas composition. "
                          "Please set a 'units' block and the 'hydro/He_mass_fraction' in "
                          "the input file.");
@@ -470,9 +471,10 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         auto units = pkg->Param<Units>("units");
         spitzer_coeff *= units.erg() / (units.s() * units.cm());
 
-        auto mbar_over_kb = pkg->Param<Real>("mbar_over_kb");
+        auto mbar = pkg->Param<Real>("mbar");
         auto thermal_diff =
-            ThermalDiffusivity(conduction, conduction_coeff, spitzer_coeff, mbar_over_kb);
+            ThermalDiffusivity(conduction, conduction_coeff, spitzer_coeff, mbar,
+                               units.electron_mass(), units.k_boltzmann());
         pkg->AddParam<>("thermal_diff", thermal_diff);
 
       } else if (conduction_coeff_str == "fixed") {
@@ -480,7 +482,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         Real thermal_diff_coeff_code =
             pin->GetReal("diffusion", "thermal_diff_coeff_code");
         auto thermal_diff = ThermalDiffusivity(conduction, conduction_coeff,
-                                               thermal_diff_coeff_code, 0.0);
+                                               thermal_diff_coeff_code, 0.0, 0.0, 0.0);
         pkg->AddParam<>("thermal_diff", thermal_diff);
 
       } else {
diff --git a/src/units.hpp b/src/units.hpp
index c3666f6b..4e38eed3 100644
--- a/src/units.hpp
+++ b/src/units.hpp
@@ -30,6 +30,7 @@ class Units {
   static constexpr parthenon::Real dyne_cm2_cgs = 1.0;                     // dyne/cm^2
   static constexpr parthenon::Real msun_cgs = 1.98841586e+33;              // g
   static constexpr parthenon::Real atomic_mass_unit_cgs = 1.660538921e-24; // g
+  static constexpr parthenon::Real electron_mass_cgs = 9.1093837015e-28;   // g
   static constexpr parthenon::Real g_cm3_cgs = 1.0;                        // gcm**3
   static constexpr parthenon::Real erg_cgs = 1;                            // erg
   static constexpr parthenon::Real gauss_cgs = 1;                          // gauss
@@ -122,6 +123,7 @@ class Units {
   parthenon::Real atomic_mass_unit() const {
     return atomic_mass_unit_cgs / code_mass_cgs();
   }
+  parthenon::Real electron_mass() const { return electron_mass_cgs / code_mass_cgs(); }
   parthenon::Real erg() const { return erg_cgs / code_energy_cgs(); }
   parthenon::Real gauss() const { return gauss_cgs / code_magnetic_cgs(); }
   parthenon::Real microgauss() const { return microgauss_cgs / code_magnetic_cgs(); }

From 8599682b0ec01aa21c97a20f7133a746e0543be6 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Fri, 13 May 2022 16:41:26 +0200
Subject: [PATCH 23/42] Add LimO3 limiter

---
 README.md                                     |   2 +-
 docs/input.md                                 |   1 +
 src/hydro/hydro.cpp                           |   8 ++
 src/main.hpp                                  |   2 +-
 src/recon/limo3_simple.hpp                    | 109 ++++++++++++++++++
 tst/regression/CMakeLists.txt                 |   2 +-
 .../riemann_hydro/riemann_hydro.py            |   2 +
 7 files changed, 123 insertions(+), 3 deletions(-)
 create mode 100644 src/recon/limo3_simple.hpp

diff --git a/README.md b/README.md
index 070db2ae..0ff7f2bb 100644
--- a/README.md
+++ b/README.md
@@ -10,7 +10,7 @@ Neither other versions or nor using preinstalled Parthenon/Kokkos libraries have
 Current features include
 - first, second, and third order (magneto)hydrodynamics with
   - RK1, RK2, RK3, VL2 integrators
-  - piecewise constant (DC), piecewise linear (PLM), piecewise parabolic (PPM), WENO3, and WENOZ reconstruction
+  - piecewise constant (DC), piecewise linear (PLM), piecewise parabolic (PPM), WENO3, LimO3, and WENOZ reconstruction
   - HLLE (hydro and MHD), HLLC (hydro), and HLLD (MHD) Riemann solvers
   - adiabatic equation of state
   - MHD based on hyperbolic divergence cleaning following Dedner+ 2002
diff --git a/docs/input.md b/docs/input.md
index e59088b5..f48b4442 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -45,6 +45,7 @@ Parameter: `reconstruction` (string)
 - `dc` : donor cell/piecewise constant (first order)
 - `plm` : piecewise linear (second order)
 - `ppm` : piecewise parabolic (third order)
+- `limo3` : LimO3 (third order)
 - `weno3` : WENO3 (third order)
 - `wenoz` : WENO-Z (third order but more accurate than WENO3)
 
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index f8be73a6..49d99d20 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -18,6 +18,7 @@
 #include "../main.hpp"
 #include "../pgen/pgen.hpp"
 #include "../recon/dc_simple.hpp"
+#include "../recon/limo3_simple.hpp"
 #include "../recon/plm_simple.hpp"
 #include "../recon/ppm_simple.hpp"
 #include "../recon/weno3_simple.hpp"
@@ -240,6 +241,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
   } else if (recon_str == "ppm") {
     recon = Reconstruction::ppm;
     recon_need_nghost = 3;
+  } else if (recon_str == "limo3") {
+    recon = Reconstruction::limo3;
+    recon_need_nghost = 2;
   } else if (recon_str == "weno3") {
     recon = Reconstruction::weno3;
     recon_need_nghost = 2;
@@ -300,22 +304,26 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
   add_flux_fun<Fluid::euler, Reconstruction::plm, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::ppm, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::weno3, RiemannSolver::hlle>(flux_functions);
+  add_flux_fun<Fluid::euler, Reconstruction::limo3, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::wenoz, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::dc, RiemannSolver::hllc>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::plm, RiemannSolver::hllc>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::ppm, RiemannSolver::hllc>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::weno3, RiemannSolver::hllc>(flux_functions);
+  add_flux_fun<Fluid::euler, Reconstruction::limo3, RiemannSolver::hllc>(flux_functions);
   add_flux_fun<Fluid::euler, Reconstruction::wenoz, RiemannSolver::hllc>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::dc, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::dc, RiemannSolver::none>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::plm, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::ppm, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::weno3, RiemannSolver::hlle>(flux_functions);
+  add_flux_fun<Fluid::glmmhd, Reconstruction::limo3, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::wenoz, RiemannSolver::hlle>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::dc, RiemannSolver::hlld>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::plm, RiemannSolver::hlld>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::ppm, RiemannSolver::hlld>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::weno3, RiemannSolver::hlld>(flux_functions);
+  add_flux_fun<Fluid::glmmhd, Reconstruction::limo3, RiemannSolver::hlld>(flux_functions);
   add_flux_fun<Fluid::glmmhd, Reconstruction::wenoz, RiemannSolver::hlld>(flux_functions);
   // Add first order recon with LLF fluxes (implemented for testing as tight loop)
   flux_functions[std::make_tuple(Fluid::euler, Reconstruction::dc, RiemannSolver::llf)] =
diff --git a/src/main.hpp b/src/main.hpp
index 84a352cf..b73efbf8 100644
--- a/src/main.hpp
+++ b/src/main.hpp
@@ -31,7 +31,7 @@ enum {
 enum { IV1 = 1, IV2 = 2, IV3 = 3, IPR = 4 };
 
 enum class RiemannSolver { undefined, none, hlle, llf, hllc, hlld };
-enum class Reconstruction { undefined, dc, plm, ppm, wenoz, weno3 };
+enum class Reconstruction { undefined, dc, plm, ppm, wenoz, weno3, limo3 };
 enum class Integrator { undefined, rk1, rk2, vl2, rk3 };
 enum class Fluid { undefined, euler, glmmhd };
 enum class Cooling { none, tabular };
diff --git a/src/recon/limo3_simple.hpp b/src/recon/limo3_simple.hpp
new file mode 100644
index 00000000..dbe9bc19
--- /dev/null
+++ b/src/recon/limo3_simple.hpp
@@ -0,0 +1,109 @@
+//========================================================================================
+// AthenaPK - a performance portable block structured AMR MHD code
+// Copyright (c) 2022, Athena Parthenon Collaboration. All rights reserved.
+// Licensed under the 3-Clause License (the "LICENSE")
+//========================================================================================
+#ifndef RECONSTRUCT_LIMO3_SIMPLE_HPP_
+#define RECONSTRUCT_LIMO3_SIMPLE_HPP_
+//! \file limo3_simple.hpp
+//  \brief  LimO3 reconstruction implemented as inline functions
+//  This version only works with uniform mesh spacing
+//
+// REFERENCES:
+// Čada, Miroslav / Torrilhon, Manuel Compact third-order limiter functions for finite
+// volume methods 2009 Journal of Computational Physics , Vol. 228, No. 11 p. 4118-4145
+// https://doi.org/10.1016/j.jcp.2009.02.020
+
+#include "Kokkos_Macros.hpp"
+#include "config.hpp"
+#include <limits>
+#include <parthenon/parthenon.hpp>
+
+using parthenon::ScratchPad2D;
+
+//----------------------------------------------------------------------------------------
+//! \fn limo3_limiter()
+//  \brief Helper function to reuse common smoothness indicator in cell i for left and
+//  right reconstructed states.
+KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
+  const Real q = (2.0 + theta) / 3.0;
+
+  // (3.13) in CT09
+  const Real phi = std::max(
+      0.0, std::min(q, std::max(-0.5 * theta, std::min(2.0 * theta, std::min(q, 1.6)))));
+
+  // "a small positive number, which is about the size of the particular machine prec."
+  constexpr Real eps = 2.0 * std::numeric_limits<Real>::epsilon();
+
+  // (3.22) in CT09
+  if (eta <= 1.0 - eps) {
+    return q;
+  } else if (eta >= 1.0 + eps) {
+    return phi;
+  } else {
+    return 0.5 * (1.0 - (eta - 1.0) / eps) * q + (1.0 + (eta - 1.0) / eps) * phi;
+  }
+}
+
+//----------------------------------------------------------------------------------------
+//! \fn LimO3()
+//  \brief Reconstructs linear slope in cell i to compute ql(i+1) and qr(i). Works for
+//  reconstruction in any dimension by passing in the appropriate q_im1, q_i, and q_ip1.
+
+KOKKOS_INLINE_FUNCTION
+void LimO3(const Real &q_im1, const Real &q_i, const Real &q_ip1, Real &ql_ip1,
+           Real &qr_i, const Real &dx) {
+
+  constexpr Real r = 1.0; // radius of asymptotic region
+
+  const Real dqp = q_ip1 - q_i;
+  const Real dqm = q_i - q_im1;
+
+  const Real theta = dqm / (dqp + TINY_NUMBER); // (2.8) in CT09; local smoothness measure
+
+  Real eta = r * dx; // (3.17) in CT09; indicator for asymp. region
+  eta = (dqm * dqm + dqp * dqp) / (eta * eta);
+
+  // (3.5) in CT09
+  ql_ip1 = q_i + 0.5 * dqp * limo3_limiter(theta, eta);
+  qr_i = q_i - 0.5 * dqm * limo3_limiter(1.0/theta, eta);
+}
+
+//! \fn Reconstruct<Reconstruction::limo3, int DIR>()
+//  \brief Wrapper function for LimO3 reconstruction
+//  In X1DIR call over [is-1,ie+1] to get BOTH L/R states over [is,ie]
+//  In X2DIR call over [js-1,je+1] to get BOTH L/R states over [js,je]
+//  In X3DIR call over [ks-1,ke+1] to get BOTH L/R states over [ks,ke]
+//  Note that in the CalculateFlux function ql and qr contain stencils in i-direction that
+//  have been cached for the appropriate k, j (and plus 1) values. Thus, in x1dir ql needs
+//  to be offset by i+1 but for the other direction the offset has been set outside in the
+//  cached stencil.
+template <Reconstruction recon, int XNDIR>
+KOKKOS_INLINE_FUNCTION typename std::enable_if<recon == Reconstruction::limo3, void>::type
+Reconstruct(parthenon::team_mbr_t const &member, const int k, const int j, const int il,
+            const int iu, const parthenon::VariablePack<Real> &q, ScratchPad2D<Real> &ql,
+            ScratchPad2D<Real> &qr) {
+  const auto nvar = q.GetDim(4);
+  for (auto n = 0; n < nvar; ++n) {
+    parthenon::par_for_inner(member, il, iu, [&](const int i) {
+      auto dx = q.GetCoords().Dx(XNDIR, k, j, i);
+      if constexpr (XNDIR == parthenon::X1DIR) {
+        // ql is ql_ip1 and qr is qr_i
+        LimO3(q(n, k, j, i - 1), q(n, k, j, i), q(n, k, j, i + 1), ql(n, i + 1), qr(n, i),
+              dx);
+      } else if constexpr (XNDIR == parthenon::X2DIR) {
+        // ql is ql_jp1 and qr is qr_j
+        LimO3(q(n, k, j - 1, i), q(n, k, j, i), q(n, k, j + 1, i), ql(n, i), qr(n, i),
+              dx);
+      } else if constexpr (XNDIR == parthenon::X3DIR) {
+        // ql is ql_kp1 and qr is qr_k
+        LimO3(q(n, k - 1, j, i), q(n, k, j, i), q(n, k + 1, j, i), ql(n, i), qr(n, i),
+              dx);
+      } else {
+        PARTHENON_FAIL("Unknow direction for LimO3 reconstruction.")
+      }
+    });
+  }
+}
+
+#endif // RECONSTRUCT_LIMO3_SIMPLE_HPP_
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index e81d1a39..13cdaa58 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -6,7 +6,7 @@
 include(${PROJECT_SOURCE_DIR}/external/parthenon/cmake/TestSetup.cmake)
 
 setup_test_serial("riemann_hydro" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 33" "other")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 39" "other")
 
 setup_test_serial("convergence" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 36" "convergence")
diff --git a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
index 08789a23..da80f047 100644
--- a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
+++ b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
@@ -25,6 +25,8 @@
     {"nx1": 64, "integrator": "rk1", "recon": "dc", "riemann": "hllc"},
     {"nx1": 64, "integrator": "vl2", "recon": "plm", "riemann": "hlle"},
     {"nx1": 64, "integrator": "vl2", "recon": "plm", "riemann": "hllc"},
+    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hlle"},
+    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hlle"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "ppm", "riemann": "hlle"},

From 4ebb5d66e8e2bd9327b9e27e7427c995d018bbc8 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Fri, 13 May 2022 17:00:37 +0200
Subject: [PATCH 24/42] Add limo3 convergence

---
 src/recon/limo3_simple.hpp                                  | 3 ++-
 tst/regression/CMakeLists.txt                               | 6 +++---
 tst/regression/test_suites/convergence/convergence.py       | 1 +
 .../test_suites/mhd_convergence/mhd_convergence.py          | 1 +
 tst/regression/test_suites/riemann_hydro/riemann_hydro.py   | 2 --
 5 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/recon/limo3_simple.hpp b/src/recon/limo3_simple.hpp
index dbe9bc19..61800bfb 100644
--- a/src/recon/limo3_simple.hpp
+++ b/src/recon/limo3_simple.hpp
@@ -26,6 +26,7 @@ using parthenon::ScratchPad2D;
 //  \brief Helper function to reuse common smoothness indicator in cell i for left and
 //  right reconstructed states.
 KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
+  // unlimited 3rd order reconstruction
   const Real q = (2.0 + theta) / 3.0;
 
   // (3.13) in CT09
@@ -66,7 +67,7 @@ void LimO3(const Real &q_im1, const Real &q_i, const Real &q_ip1, Real &ql_ip1,
 
   // (3.5) in CT09
   ql_ip1 = q_i + 0.5 * dqp * limo3_limiter(theta, eta);
-  qr_i = q_i - 0.5 * dqm * limo3_limiter(1.0/theta, eta);
+  qr_i = q_i - 0.5 * dqm * limo3_limiter(1.0 / theta, eta);
 }
 
 //! \fn Reconstruct<Reconstruction::limo3, int DIR>()
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index 13cdaa58..f7f622c2 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -6,13 +6,13 @@
 include(${PROJECT_SOURCE_DIR}/external/parthenon/cmake/TestSetup.cmake)
 
 setup_test_serial("riemann_hydro" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 39" "other")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 33" "other")
 
 setup_test_serial("convergence" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 36" "convergence")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 40" "convergence")
 
 setup_test_serial("mhd_convergence" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 40" "convergence")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 44" "convergence")
 
 setup_test_serial("performance" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 20" "performance")
diff --git a/tst/regression/test_suites/convergence/convergence.py b/tst/regression/test_suites/convergence/convergence.py
index 4fbf47d1..f9215118 100644
--- a/tst/regression/test_suites/convergence/convergence.py
+++ b/tst/regression/test_suites/convergence/convergence.py
@@ -39,6 +39,7 @@
     {"integrator" : "rk2", "recon" : "weno3"},
     {"integrator" : "rk3", "recon" : "ppm"},
     {"integrator" : "rk3", "recon" : "weno3"},
+    {"integrator" : "rk3", "recon" : "limo3"},
     {"integrator" : "rk3", "recon" : "wenoz"},
 ]
 
diff --git a/tst/regression/test_suites/mhd_convergence/mhd_convergence.py b/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
index 9be0b8e5..7700eea6 100644
--- a/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
+++ b/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
@@ -40,6 +40,7 @@
     {"integrator" : "rk2", "recon" : "weno3"},
     {"integrator" : "rk3", "recon" : "ppm"},
     {"integrator" : "rk3", "recon" : "weno3"},
+    {"integrator" : "rk3", "recon" : "limo3"},
     {"integrator" : "rk3", "recon" : "wenoz"},
 ]
 
diff --git a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
index da80f047..08789a23 100644
--- a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
+++ b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
@@ -25,8 +25,6 @@
     {"nx1": 64, "integrator": "rk1", "recon": "dc", "riemann": "hllc"},
     {"nx1": 64, "integrator": "vl2", "recon": "plm", "riemann": "hlle"},
     {"nx1": 64, "integrator": "vl2", "recon": "plm", "riemann": "hllc"},
-    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hlle"},
-    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hlle"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "ppm", "riemann": "hlle"},

From 8800b516c7e9be12c05f3f35224528a923f06db6 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sat, 14 May 2022 16:55:04 +0200
Subject: [PATCH 25/42] Fix LimO3 recon

---
 src/recon/limo3_simple.hpp                    | 54 +++++++++++--------
 tst/regression/CMakeLists.txt                 |  6 +--
 .../mhd_convergence/mhd_convergence.py        |  6 ++-
 .../test_suites/performance/performance.py    |  1 +
 .../riemann_hydro/riemann_hydro.py            |  2 +
 5 files changed, 42 insertions(+), 27 deletions(-)

diff --git a/src/recon/limo3_simple.hpp b/src/recon/limo3_simple.hpp
index 61800bfb..3757313e 100644
--- a/src/recon/limo3_simple.hpp
+++ b/src/recon/limo3_simple.hpp
@@ -14,7 +14,7 @@
 // volume methods 2009 Journal of Computational Physics , Vol. 228, No. 11 p. 4118-4145
 // https://doi.org/10.1016/j.jcp.2009.02.020
 
-#include "Kokkos_Macros.hpp"
+#include "../hydro/diffusion/diffusion.hpp"
 #include "config.hpp"
 #include <limits>
 #include <parthenon/parthenon.hpp>
@@ -23,9 +23,16 @@ using parthenon::ScratchPad2D;
 
 //----------------------------------------------------------------------------------------
 //! \fn limo3_limiter()
-//  \brief Helper function to reuse common smoothness indicator in cell i for left and
-//  right reconstructed states.
-KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
+//  \brief Helper function that actually applies the LimO3 limiter
+KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real dvp, const Real dvm, const Real dx) {
+  constexpr Real r = 0.1; // radius of asymptotic region
+
+  // "a small positive number, which is about the size of the particular machine prec."
+  constexpr Real eps = 1e-12;
+
+  // (2.8) in CT09; local smoothness measure
+  const Real theta = dvm / (dvp + TINY_NUMBER);
+
   // unlimited 3rd order reconstruction
   const Real q = (2.0 + theta) / 3.0;
 
@@ -33,8 +40,9 @@ KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
   const Real phi = std::max(
       0.0, std::min(q, std::max(-0.5 * theta, std::min(2.0 * theta, std::min(q, 1.6)))));
 
-  // "a small positive number, which is about the size of the particular machine prec."
-  constexpr Real eps = 2.0 * std::numeric_limits<Real>::epsilon();
+  // (3.17) in CT09; indicator for asymp. region
+  Real eta = r * dx;
+  eta = (dvm * dvm + dvp * dvp) / (eta * eta);
 
   // (3.22) in CT09
   if (eta <= 1.0 - eps) {
@@ -42,7 +50,7 @@ KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
   } else if (eta >= 1.0 + eps) {
     return phi;
   } else {
-    return 0.5 * (1.0 - (eta - 1.0) / eps) * q + (1.0 + (eta - 1.0) / eps) * phi;
+    return 0.5 * ((1.0 - (eta - 1.0) / eps) * q + (1.0 + (eta - 1.0) / eps) * phi);
   }
 }
 
@@ -53,21 +61,20 @@ KOKKOS_INLINE_FUNCTION Real limo3_limiter(const Real theta, const Real eta) {
 
 KOKKOS_INLINE_FUNCTION
 void LimO3(const Real &q_im1, const Real &q_i, const Real &q_ip1, Real &ql_ip1,
-           Real &qr_i, const Real &dx) {
-
-  constexpr Real r = 1.0; // radius of asymptotic region
+           Real &qr_i, const Real &dx, const bool ensure_positivity) {
 
   const Real dqp = q_ip1 - q_i;
   const Real dqm = q_i - q_im1;
 
-  const Real theta = dqm / (dqp + TINY_NUMBER); // (2.8) in CT09; local smoothness measure
-
-  Real eta = r * dx; // (3.17) in CT09; indicator for asymp. region
-  eta = (dqm * dqm + dqp * dqp) / (eta * eta);
-
   // (3.5) in CT09
-  ql_ip1 = q_i + 0.5 * dqp * limo3_limiter(theta, eta);
-  qr_i = q_i - 0.5 * dqm * limo3_limiter(1.0 / theta, eta);
+  ql_ip1 = q_i + 0.5 * dqp * limo3_limiter(dqp, dqm, dx);
+  qr_i = q_i - 0.5 * dqm * limo3_limiter(dqm, dqp, dx);
+
+  if (ensure_positivity && (ql_ip1 <= 0.0 || qr_i <= 0.0)) {
+    Real dqmm = limiters::minmod(dqp, dqm);
+    ql_ip1 = q_i + 0.5 * dqmm;
+    qr_i = q_i - 0.5 * dqmm;
+  }
 }
 
 //! \fn Reconstruct<Reconstruction::limo3, int DIR>()
@@ -86,20 +93,23 @@ Reconstruct(parthenon::team_mbr_t const &member, const int k, const int j, const
             ScratchPad2D<Real> &qr) {
   const auto nvar = q.GetDim(4);
   for (auto n = 0; n < nvar; ++n) {
+    // Note, this may be unsafe as we implicitly assume how this function is called with
+    // respect to the entries in the single state vector containing all components
+    const bool ensure_positivity = (n == IDN || n == IPR);
     parthenon::par_for_inner(member, il, iu, [&](const int i) {
       auto dx = q.GetCoords().Dx(XNDIR, k, j, i);
       if constexpr (XNDIR == parthenon::X1DIR) {
         // ql is ql_ip1 and qr is qr_i
         LimO3(q(n, k, j, i - 1), q(n, k, j, i), q(n, k, j, i + 1), ql(n, i + 1), qr(n, i),
-              dx);
+              dx, ensure_positivity);
       } else if constexpr (XNDIR == parthenon::X2DIR) {
         // ql is ql_jp1 and qr is qr_j
-        LimO3(q(n, k, j - 1, i), q(n, k, j, i), q(n, k, j + 1, i), ql(n, i), qr(n, i),
-              dx);
+        LimO3(q(n, k, j - 1, i), q(n, k, j, i), q(n, k, j + 1, i), ql(n, i), qr(n, i), dx,
+              ensure_positivity);
       } else if constexpr (XNDIR == parthenon::X3DIR) {
         // ql is ql_kp1 and qr is qr_k
-        LimO3(q(n, k - 1, j, i), q(n, k, j, i), q(n, k + 1, j, i), ql(n, i), qr(n, i),
-              dx);
+        LimO3(q(n, k - 1, j, i), q(n, k, j, i), q(n, k + 1, j, i), ql(n, i), qr(n, i), dx,
+              ensure_positivity);
       } else {
         PARTHENON_FAIL("Unknow direction for LimO3 reconstruction.")
       }
diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index f7f622c2..c2126935 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -6,16 +6,16 @@
 include(${PROJECT_SOURCE_DIR}/external/parthenon/cmake/TestSetup.cmake)
 
 setup_test_serial("riemann_hydro" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 33" "other")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/sod.in --num_steps 39" "other")
 
 setup_test_serial("convergence" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 40" "convergence")
 
 setup_test_serial("mhd_convergence" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 44" "convergence")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 48" "convergence")
 
 setup_test_serial("performance" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 20" "performance")
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/linear_wave3d.in --num_steps 21" "performance")
 
 setup_test_serial("cluster_hse" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/cluster/hse.in --num_steps 1" "convergence")
diff --git a/tst/regression/test_suites/mhd_convergence/mhd_convergence.py b/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
index 7700eea6..d879d90b 100644
--- a/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
+++ b/tst/regression/test_suites/mhd_convergence/mhd_convergence.py
@@ -38,6 +38,7 @@
     {"integrator" : "rk2", "recon" : "plm"},
     {"integrator" : "rk2", "recon" : "ppm"},
     {"integrator" : "rk2", "recon" : "weno3"},
+    {"integrator" : "rk2", "recon" : "limo3"},
     {"integrator" : "rk3", "recon" : "ppm"},
     {"integrator" : "rk3", "recon" : "weno3"},
     {"integrator" : "rk3", "recon" : "limo3"},
@@ -151,10 +152,11 @@ def Analyse(self,parameters):
         data = np.genfromtxt(os.path.join(parameters.output_path, "linearwave-errors.dat"))
 
         # quick and dirty test
-        if data[39,4] > 6.14e-12:
+        if data[47,4] > 6.14e-12:
+            print("QUICK AND DIRTY TEST FAILED")
             analyze_status = False
 
-        markers = 'ov^<>sp*hXD'
+        markers = 'ov^<>sp*hDXd+|x'
         for i, cfg in enumerate(method_cfgs):
             plt.plot(data[i * n_res:(i + 1) * n_res, 0],
                     data[i * n_res:(i + 1) * n_res, 4],
diff --git a/tst/regression/test_suites/performance/performance.py b/tst/regression/test_suites/performance/performance.py
index 9a50c224..c677f700 100644
--- a/tst/regression/test_suites/performance/performance.py
+++ b/tst/regression/test_suites/performance/performance.py
@@ -47,6 +47,7 @@
     {"mx" : 256, "mb" : 128, "integrator" : "vl2", "recon" : "plm", "fluid" : "glmmhd"},
     {"mx" : 256, "mb" : 128, "integrator" : "vl2", "recon" : "weno3", "fluid" : "glmmhd"},
     {"mx" : 256, "mb" : 128, "integrator" : "rk2", "recon" : "weno3", "fluid" : "glmmhd"},
+    {"mx" : 256, "mb" : 128, "integrator" : "rk2", "recon" : "limo3", "fluid" : "glmmhd"},
     {"mx" : 256, "mb" : 128, "integrator" : "rk3", "recon" : "weno3", "fluid" : "glmmhd"},
     {"mx" : 256, "mb" : 128, "integrator" : "rk3", "recon" : "wenoz", "fluid" : "glmmhd"},
 ]
diff --git a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
index 08789a23..dfa74af7 100644
--- a/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
+++ b/tst/regression/test_suites/riemann_hydro/riemann_hydro.py
@@ -27,6 +27,8 @@
     {"nx1": 64, "integrator": "vl2", "recon": "plm", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hlle"},
     {"nx1": 64, "integrator": "rk3", "recon": "weno3", "riemann": "hllc"},
+    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hlle"},
+    {"nx1": 64, "integrator": "rk3", "recon": "limo3", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "ppm", "riemann": "hlle"},
     {"nx1": 64, "integrator": "rk3", "recon": "ppm", "riemann": "hllc"},
     {"nx1": 64, "integrator": "rk3", "recon": "wenoz", "riemann": "hlle"},

From 3f071d4584615ccd23e8e6c910d633911f4b7ff8 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Tue, 7 Jun 2022 14:05:42 +0200
Subject: [PATCH 26/42] Fix saturated conduction prefactor

---
 src/hydro/diffusion/conduction.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 6e2bf6dc..03398a60 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -36,9 +36,10 @@ Real ThermalDiffusivity::Get(const Real pres, const Real rho, const Real gradTma
     const Real kappa_spitzer = coeff_ * std::pow(T_cgs, 5. / 2.); // Full spitzer
 
     const Real cs_e = std::sqrt(kb_ * T_cgs / me_); // electron isothermal speed of sound
-    // assuming neutral plasma so n_e = n = \rho / mbar
+    // assuming neutral plasma so n_e = 0.5 * n = 0.5 * \rho / mbar
+    // 0.4 * sqrt(2/pi) * 0.5 = 0.16 (prefac from (7) in CM77 times 0.5 from n_e)
     const Real kappa_sat =
-        0.34 * rho / mbar_ * kb_ * T_cgs * cs_e / (gradTmag + TINY_NUMBER);
+        0.16 * rho / mbar_ * kb_ * T_cgs * cs_e / (gradTmag + TINY_NUMBER);
 
     // Convert conductivity to diffusivity
     return std::min(kappa_spitzer, kappa_sat) * mbar_ / kb_ / rho;

From 95827647c67c9eea03e3fda83949026709c092b5 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Tue, 7 Jun 2022 15:42:15 +0200
Subject: [PATCH 27/42] Remove calc of saturated conduction from cond coeff

---
 src/hydro/diffusion/conduction.cpp | 35 +++++++++++-------------------
 src/hydro/diffusion/diffusion.hpp  |  2 +-
 2 files changed, 14 insertions(+), 23 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 03398a60..f03b8d1b 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -25,24 +25,16 @@ using namespace parthenon::package::prelude;
 
 // Calculate the thermal *diffusivity*, \chi, in code units as the energy flux itself
 // is calculated from -\chi \rho \nabla (p/\rho).
-// To match the latter, note that |\nabla T| is expected to be calculated outside
-// using the gradient of p/\rho, i.e., not included the mbar_/kb_ conversion factor.
 KOKKOS_INLINE_FUNCTION
-Real ThermalDiffusivity::Get(const Real pres, const Real rho, const Real gradTmag) const {
+Real ThermalDiffusivity::Get(const Real pres, const Real rho) const {
   if (conduction_coeff_type_ == ConductionCoeff::fixed) {
     return coeff_;
   } else if (conduction_coeff_type_ == ConductionCoeff::spitzer) {
     const Real T_cgs = mbar_ / kb_ * pres / rho;
     const Real kappa_spitzer = coeff_ * std::pow(T_cgs, 5. / 2.); // Full spitzer
 
-    const Real cs_e = std::sqrt(kb_ * T_cgs / me_); // electron isothermal speed of sound
-    // assuming neutral plasma so n_e = 0.5 * n = 0.5 * \rho / mbar
-    // 0.4 * sqrt(2/pi) * 0.5 = 0.16 (prefac from (7) in CM77 times 0.5 from n_e)
-    const Real kappa_sat =
-        0.16 * rho / mbar_ * kb_ * T_cgs * cs_e / (gradTmag + TINY_NUMBER);
-
     // Convert conductivity to diffusivity
-    return std::min(kappa_spitzer, kappa_sat) * mbar_ / kb_ / rho;
+    return kappa_spitzer * mbar_ / kb_ / rho;
 
   } else {
     return 0.0;
@@ -75,7 +67,8 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
       thermal_diff.GetCoeffType() == ConductionCoeff::fixed) {
     // TODO(pgrete): once mindx is properly calculated before this loop, we can get rid of
     // it entirely.
-    const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
+    // Using 0.0 as parameters rho and p as they're not used anyway for a fixed coeff.
+    const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0);
     Kokkos::parallel_reduce(
         "EstimateConductionTimestep (iso fixed)",
         Kokkos::MDRangePolicy<Kokkos::Rank<4>>(
@@ -133,7 +126,7 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           if (gradTmag == 0.0) {
             return;
           }
-          auto thermal_diff_coeff = thermal_diff.Get(p, rho, gradTmag);
+          auto thermal_diff_coeff = thermal_diff.Get(p, rho);
 
           if (thermal_diff.GetType() == Conduction::isotropic) {
             min_dt = fmin(min_dt,
@@ -198,7 +191,8 @@ void ThermalFluxIsoFixed(MeshData<Real> *md) {
 
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
   // Using fixed and uniform coefficient so it's safe to get it outside the kernel.
-  const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0, 0.0);
+  // Using 0.0 as parameters rho and p as they're not used anyway for a fixed coeff.
+  const auto thermal_diff_coeff = thermal_diff.Get(0.0, 0.0);
 
   parthenon::par_for(
       DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (iso)",
@@ -335,9 +329,8 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
         const auto thermal_diff_f =
-            0.5 *
-            (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
-             thermal_diff.Get(prim(IPR, k, j, i - 1), prim(IDN, k, j, i - 1), gradTmag));
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k, j, i - 1), prim(IDN, k, j, i - 1)));
         cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
 
@@ -408,9 +401,8 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
         const auto thermal_diff_f =
-            0.5 *
-            (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
-             thermal_diff.Get(prim(IPR, k, j - 1, i), prim(IDN, k, j - 1, i), gradTmag));
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k, j - 1, i), prim(IDN, k, j - 1, i)));
         cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
   /* Compute heat fluxes in 3-direction, 3D problem ONLY  ---------------------*/
@@ -474,9 +466,8 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
         const auto thermal_diff_f =
-            0.5 *
-            (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i), gradTmag) +
-             thermal_diff.Get(prim(IPR, k - 1, j, i), prim(IDN, k - 1, j, i), gradTmag));
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k - 1, j, i), prim(IDN, k - 1, j, i)));
 
         cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
       });
diff --git a/src/hydro/diffusion/diffusion.hpp b/src/hydro/diffusion/diffusion.hpp
index 2b75ac5d..7d522902 100644
--- a/src/hydro/diffusion/diffusion.hpp
+++ b/src/hydro/diffusion/diffusion.hpp
@@ -83,7 +83,7 @@ struct ThermalDiffusivity {
         coeff_(coeff), mbar_(mbar), me_(me), kb_(kb) {}
 
   KOKKOS_INLINE_FUNCTION
-  Real Get(const Real pres, const Real rho, const Real gradTmag) const;
+  Real Get(const Real pres, const Real rho) const;
 
   KOKKOS_INLINE_FUNCTION
   Conduction GetType() const { return conduction_; }

From 115285d6c94d22152d31c0ca3326f053ea4867dc Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Tue, 7 Jun 2022 16:58:27 +0200
Subject: [PATCH 28/42] Add upwinded saturated conduction in x-dir

---
 src/hydro/diffusion/conduction.cpp | 46 +++++++++++++++++++++---------
 1 file changed, 33 insertions(+), 13 deletions(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index f03b8d1b..05f3f344 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -309,29 +309,49 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto T_im1 = prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1);
         const auto dTdx = (T_i - T_im1) / coords.Dx(parthenon::X1DIR, k, j, i);
 
-        Real flux_grad = 0.0;
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
+        const auto thermal_diff_f =
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k, j, i - 1), prim(IDN, k, j, i - 1)));
+        const auto gradTmag = std::sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
+
+        // Calculate "classic" fluxes
+        Real flux_classic = 0.0;
+        Real flux_classic_mag = 0.0;
         if (thermal_diff.GetType() == Conduction::anisotropic) {
           const auto Bx = 0.5 * (prim(IB1, k, j, i - 1) + prim(IB1, k, j, i));
           const auto By = 0.5 * (prim(IB2, k, j, i - 1) + prim(IB2, k, j, i));
           const auto Bz =
               ndim >= 3 ? 0.5 * (prim(IB3, k, j, i - 1) + prim(IB3, k, j, i)) : 0.0;
-          auto B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
-          flux_grad = (Bx * bDotGradT) / B02;
+          auto Bmag = std::sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
+          Bmag = std::max(Bmag, TINY_NUMBER); /* limit in case B=0 */
+          const auto bx = Bx / Bmag;          // unit vector component
+          const auto bDotGradT = (Bx * dTdx + By * dTdy + Bz * dTdz) / Bmag;
+          flux_classic = thermal_diff_f * denf * bDotGradT * bx;
+          flux_classic_mag = std::abs(thermal_diff_f * denf * bDotGradT);
         } else if (thermal_diff.GetType() == Conduction::isotropic) {
-          flux_grad = dTdx;
+          flux_classic = thermal_diff_f * denf * dTdx;
+          flux_classic_mag = thermal_diff_f * denf * gradTmag;
         } else {
           PARTHENON_FAIL("Unknown thermal diffusion flux.");
         }
 
-        // Calc interface values
-        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
-        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
-        const auto thermal_diff_f =
-            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
-                   thermal_diff.Get(prim(IPR, k, j, i - 1), prim(IDN, k, j, i - 1)));
-        cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
+        // Calculate saturated fluxes using upwinding, see (A3) in Mignone+12
+        Real flux_sat;
+        // Use first order limiting for now
+        // TODO(pgrete) This assumes a fixed mu = 0.6. Need update! Also pot. add phi.
+        if (flux_classic > 0.0) {
+          flux_sat = 5.31 * std::sqrt(prim(IPR, k, j, i) / denf) * prim(IPR, k, j, i);
+        } else if (flux_classic < 0.0) {
+          flux_sat =
+              5.31 * std::sqrt(prim(IPR, k, j, i - 1) / denf) * prim(IPR, k, j, i - 1);
+        } else {
+          const auto presf = 0.5 * (prim(IPR, k, j, i) + prim(IPR, k, j, i - 1));
+          flux_sat = 5.31 * std::sqrt(presf / denf) * presf;
+        }
+
+        cons.flux(X1DIR, IEN, k, j, i) -=
+            (flux_sat / (flux_sat + flux_classic_mag)) * flux_classic;
       });
 
   if (ndim < 2) {

From 51dca4f031a0bc6b1291b51cd5942e8da1edf7f0 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 8 Jun 2022 12:25:57 +0200
Subject: [PATCH 29/42] Add saturated conduction prefactor

---
 docs/input.md                      | 34 ++++++++++++++++++++++++++++--
 src/hydro/diffusion/conduction.cpp | 23 ++++++++++++--------
 src/hydro/hydro.cpp                | 18 +++++++++++++++-
 3 files changed, 63 insertions(+), 12 deletions(-)

diff --git a/docs/input.md b/docs/input.md
index 8f8707f5..3c3455cf 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -60,8 +60,15 @@ the local magnetic field direction typically being much stronger than the flux p
 From a theoretical point of view, thermal conduction is included in the system of MHD equations by an additional
 term in the total energy equation:
 ```math
-\delta_t E + \nabla \cdot (... + \mathbf{F}) \quad \mathrm{with}\\
-\mathbf{F} = - \kappa \mathbf{\hat b} (\mathbf{\hat b \cdot \nabla T})
+\delta_t E + \nabla \cdot (... + \mathbf{F}_\mathrm{c})
+```
+where the full thermal conduction flux $`\mathbf{F}_\mathrm{c}`$ contains both the classic thermal conduction
+```math
+\mathbf{F}_\mathrm{classic} = - \kappa \mathbf{\hat b} (\mathbf{\hat b \cdot \nabla T})
+```
+as well as the saturated flux (as introduced by ^[CM77])
+```math
+\mathbf{F}_\mathrm{sat} = - 5 \phi \rho^{-1/2} p^{3/2} \mathrm{sgn}(\mathbf{\hat b \cdot \nabla T}) \mathbf{\hat b}
 ```
 
 From an implementation point of view, two options implemented and can be configured within a `<diffusion>` block in the input file.
@@ -70,6 +77,12 @@ the integration step (before flux correction in case of AMR, and calculating the
 Moreover, they are implemented explicitly, i.e., they add a (potentially very restrictive) constraint to the timestep due to the scaling with $`\propto \Delta_x^2`$.
 Finally, we employ limiters for calculating the temperature gradients following Sharma & Hammett (2007)[^SH07].
 This prevents unphysical conduction against the gradient, which may be introduced because the off-axis gradients are not centered on the interfaces.
+Similarly, to account for the different nature of classic and saturated fluxes (parabolic and hyperbolic, respectively),
+we follow [^M+12] and use a smooth transition
+```math
+\mathbf{F}_\mathrm{c} = \frac{q}{q + F_\mathrm{classic}} \mathbf{F}_\mathrm{classic} \quad \mathrm{with} \quad q = 5 \phi \rho^{-1/2} p^{3/2}
+```
+and upwinding of the hyperbolic, saturated fluxes.
 
 To enable thermal conduction, set
 
@@ -90,9 +103,26 @@ the conduction flux is $`\mathbf{F} = - \chi \rho \mathbf{\hat b} (\mathbf{\hat
 Here, the strength, $`\chi`$, is controlled via the additional `thermal_diff_coeff_code` parameter in code units.
 Given the dimensions of $`L^2/T`$ it is referred to a thermal diffusivity rather than thermal conductivity.
 
+Parameter: `conduction_sat_phi` (float)
+- Default value 0.3\
+Factor to account for the uncertainty in the estimated of saturated fluxes, see [^CM77].
+Default value corresponds to the typical value used in literature and goes back to [^MMM80] and [^BM82].
+
+
 [^SH07]:
     P. Sharma and G. W. Hammett, "Preserving monotonicity in anisotropic diffusion," Journal of Computational Physics, vol. 227, no. 1, Art. no. 1, 2007, doi: https://doi.org/10.1016/j.jcp.2007.07.026.
 
+[^M+12]:
+    A. Mignone, C. Zanni, P. Tzeferacos, B. van Straalen, P. Colella, and G. Bodo, “THE PLUTO CODE FOR ADAPTIVE MESH COMPUTATIONS IN ASTROPHYSICAL FLUID DYNAMICS,” The Astrophysical Journal Supplement Series, vol. 198, Art. no. 1, Dec. 2011, doi: https://doi.org/10.1088/0067-0049/198/1/7
+
+[^CM77]:
+    L. Cowie and C. F. McKee, “The evaporation of spherical clouds in a hot gas. I. Classical and saturated mass loss rates.,” , vol. 211, pp. 135–146, Jan. 1977, doi: https://doi.org/10.1086/154911
+
+[^MMM80]:
+    C. E. Max, C. F. McKee, and W. C. Mead, “A model for laser driven ablative implosions,” The Physics of Fluids, vol. 23, Art. no. 8, 1980, doi: https://doi.org/10.1063/1.863183
+  
+[^BM82]:
+    S. A. Balbus and C. F. McKee, “The evaporation of spherical clouds in a hot gas. III - Suprathermal evaporation,” , vol. 252, pp. 529–552, Jan. 1982, doi: https://doi.org/10.1086/159581
 
 
 ### Additional MHD options in `<hydro>` block
diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 05f3f344..fa98df4b 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -266,6 +266,7 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
   const int ndim = pmb->pmy_mesh->ndim;
 
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
+  const auto &flux_sat_prefac = hydro_pkg->Param<Real>("conduction_sat_prefac");
 
   parthenon::par_for(
       DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (general)",
@@ -327,30 +328,34 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
           Bmag = std::max(Bmag, TINY_NUMBER); /* limit in case B=0 */
           const auto bx = Bx / Bmag;          // unit vector component
           const auto bDotGradT = (Bx * dTdx + By * dTdy + Bz * dTdz) / Bmag;
-          flux_classic = thermal_diff_f * denf * bDotGradT * bx;
+          flux_classic = -thermal_diff_f * denf * bDotGradT * bx;
           flux_classic_mag = std::abs(thermal_diff_f * denf * bDotGradT);
         } else if (thermal_diff.GetType() == Conduction::isotropic) {
-          flux_classic = thermal_diff_f * denf * dTdx;
+          flux_classic = -thermal_diff_f * denf * dTdx;
           flux_classic_mag = thermal_diff_f * denf * gradTmag;
         } else {
           PARTHENON_FAIL("Unknown thermal diffusion flux.");
         }
 
-        // Calculate saturated fluxes using upwinding, see (A3) in Mignone+12
+        // Calculate saturated fluxes using upwinding, see (A3) in Mignone+12.
+        // Note that we are not concerned about the sign of flux_sat here. The way it is
+        // calculated it's always positive because we use it in the geometric mean with
+        // the flux_classic_mag below. The correct sign is eventually picked up again from
+        // flux_classic.
         Real flux_sat;
-        // Use first order limiting for now
-        // TODO(pgrete) This assumes a fixed mu = 0.6. Need update! Also pot. add phi.
+        // Use first order limiting for now.
         if (flux_classic > 0.0) {
-          flux_sat = 5.31 * std::sqrt(prim(IPR, k, j, i) / denf) * prim(IPR, k, j, i);
+          flux_sat = flux_sat_prefac * std::sqrt(prim(IPR, k, j, i - 1) / denf) *
+                     prim(IPR, k, j, i - 1);
         } else if (flux_classic < 0.0) {
           flux_sat =
-              5.31 * std::sqrt(prim(IPR, k, j, i - 1) / denf) * prim(IPR, k, j, i - 1);
+              flux_sat_prefac * std::sqrt(prim(IPR, k, j, i) / denf) * prim(IPR, k, j, i);
         } else {
           const auto presf = 0.5 * (prim(IPR, k, j, i) + prim(IPR, k, j, i - 1));
-          flux_sat = 5.31 * std::sqrt(presf / denf) * presf;
+          flux_sat = flux_sat_prefac * std::sqrt(presf / denf) * presf;
         }
 
-        cons.flux(X1DIR, IEN, k, j, i) -=
+        cons.flux(X1DIR, IEN, k, j, i) +=
             (flux_sat / (flux_sat + flux_classic_mag)) * flux_classic;
       });
 
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 6b579f57..884b8645 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -443,6 +443,7 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       const auto He_mass_fraction = pin->GetReal("hydro", "He_mass_fraction");
       const auto H_mass_fraction = 1.0 - He_mass_fraction;
       const auto mu = 1 / (He_mass_fraction * 3. / 4. + (1 - He_mass_fraction) * 2);
+      pkg->AddParam<>("mu", mu);
       pkg->AddParam<>("mbar", mu * units.atomic_mass_unit());
       pkg->AddParam<>("mbar_over_kb",
                       mu * units.atomic_mass_unit() / units.k_boltzmann());
@@ -479,6 +480,14 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
       auto conduction_coeff_str =
           pin->GetOrAddString("diffusion", "conduction_coeff", "none");
       auto conduction_coeff = ConductionCoeff::none;
+
+      // Saturated conduction factor to account for "uncertainty", see
+      // Cowie & McKee 77 and a value of 0.3 is typical chosen (though using "weak
+      // evidence", see Balbus & MacKee 1982 and Max, McKee, and Mead 1980).
+      const auto conduction_sat_phi =
+          pin->GetOrAddReal("diffusion", "conduction_sat_phi", 0.3);
+      Real conduction_sat_prefac = 0.0;
+
       if (conduction_coeff_str == "spitzer") {
         if (!pkg->AllParams().hasKey("mbar")) {
           PARTHENON_FAIL("Spitzer thermal conduction requires units and gas composition. "
@@ -493,12 +502,15 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         auto units = pkg->Param<Units>("units");
         spitzer_coeff *= units.erg() / (units.s() * units.cm());
 
-        auto mbar = pkg->Param<Real>("mbar");
+        const auto mbar = pkg->Param<Real>("mbar");
         auto thermal_diff =
             ThermalDiffusivity(conduction, conduction_coeff, spitzer_coeff, mbar,
                                units.electron_mass(), units.k_boltzmann());
         pkg->AddParam<>("thermal_diff", thermal_diff);
 
+        const auto mu = pkg->Param<Real>("mu");
+        conduction_sat_prefac = 6.86 * std::sqrt(mu) * conduction_sat_phi;
+
       } else if (conduction_coeff_str == "fixed") {
         conduction_coeff = ConductionCoeff::fixed;
         Real thermal_diff_coeff_code =
@@ -506,11 +518,15 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         auto thermal_diff = ThermalDiffusivity(conduction, conduction_coeff,
                                                thermal_diff_coeff_code, 0.0, 0.0, 0.0);
         pkg->AddParam<>("thermal_diff", thermal_diff);
+        conduction_sat_prefac = 5.0 * conduction_sat_phi;
 
       } else {
         PARTHENON_FAIL("Thermal conduction is enabled but no coefficient is set. Please "
                        "set diffusion/conduction_coeff to either 'spitzer' or 'fixed'");
       }
+      PARTHENON_REQUIRE(conduction_sat_prefac != 0.0,
+                        "Saturated thermal conduction prefactor uninitialized.");
+      pkg->AddParam<>("conduction_sat_prefac", conduction_sat_prefac);
     }
     pkg->AddParam<>("conduction", conduction);
 

From 8194e9746279443e2f896bb4174c284aa20e682e Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 8 Jun 2022 12:55:10 +0200
Subject: [PATCH 30/42] Add x2 and x3 sat cond fluxes

---
 docs/input.md                      |  2 +-
 src/hydro/diffusion/conduction.cpp | 90 +++++++++++++++++++++---------
 2 files changed, 66 insertions(+), 26 deletions(-)

diff --git a/docs/input.md b/docs/input.md
index 3c3455cf..b43f4e02 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -120,7 +120,7 @@ Default value corresponds to the typical value used in literature and goes back
 
 [^MMM80]:
     C. E. Max, C. F. McKee, and W. C. Mead, “A model for laser driven ablative implosions,” The Physics of Fluids, vol. 23, Art. no. 8, 1980, doi: https://doi.org/10.1063/1.863183
-  
+
 [^BM82]:
     S. A. Balbus and C. F. McKee, “The evaporation of spherical clouds in a hot gas. III - Suprathermal evaporation,” , vol. 252, pp. 529–552, Jan. 1982, doi: https://doi.org/10.1086/159581
 
diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index fa98df4b..46814aaf 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -406,29 +406,50 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto T_jm1 = prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i);
         const auto dTdy = (T_j - T_jm1) / coords.Dx(parthenon::X2DIR, k, j, i);
 
-        Real flux_grad = 0.0;
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
+        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
+        const auto thermal_diff_f =
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k, j - 1, i), prim(IDN, k, j - 1, i)));
+
+        // Calculate "classic" fluxes
+        Real flux_classic = 0.0;
+        Real flux_classic_mag = 0.0;
         if (thermal_diff.GetType() == Conduction::anisotropic) {
           const auto Bx = 0.5 * (prim(IB1, k, j - 1, i) + prim(IB1, k, j, i));
           const auto By = 0.5 * (prim(IB2, k, j - 1, i) + prim(IB2, k, j, i));
           const auto Bz =
               ndim >= 3 ? 0.5 * (prim(IB3, k, j - 1, i) + prim(IB3, k, j, i)) : 0.0;
-          Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
-          flux_grad = (By * bDotGradT) / B02;
+          auto Bmag = std::sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
+          Bmag = std::max(Bmag, TINY_NUMBER); /* limit in case B=0 */
+          const auto by = By / Bmag;          // unit vector component
+          const auto bDotGradT = (Bx * dTdx + By * dTdy + Bz * dTdz) / Bmag;
+          flux_classic = -thermal_diff_f * denf * bDotGradT * by;
+          flux_classic_mag = std::abs(thermal_diff_f * denf * bDotGradT);
         } else if (thermal_diff.GetType() == Conduction::isotropic) {
-          flux_grad = dTdy;
+          flux_classic = -thermal_diff_f * denf * dTdy;
+          flux_classic_mag = thermal_diff_f * denf * gradTmag;
         } else {
           PARTHENON_FAIL("Unknown thermal diffusion flux.");
         }
 
+        // Calculate saturated fluxes,see comment above.
+        Real flux_sat;
+        // Use first order limiting for now.
+        if (flux_classic > 0.0) {
+          flux_sat = flux_sat_prefac * std::sqrt(prim(IPR, k, j - 1, i) / denf) *
+                     prim(IPR, k, j - 1, i);
+        } else if (flux_classic < 0.0) {
+          flux_sat =
+              flux_sat_prefac * std::sqrt(prim(IPR, k, j, i) / denf) * prim(IPR, k, j, i);
+        } else {
+          const auto presf = 0.5 * (prim(IPR, k, j, i) + prim(IPR, k, j - 1, i));
+          flux_sat = flux_sat_prefac * std::sqrt(presf / denf) * presf;
+        }
+
         // Calc interface values
-        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
-        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
-        const auto thermal_diff_f =
-            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
-                   thermal_diff.Get(prim(IPR, k, j - 1, i), prim(IDN, k, j - 1, i)));
-        cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
+        cons.flux(X2DIR, IEN, k, j, i) +=
+            (flux_sat / (flux_sat + flux_classic_mag)) * flux_classic;
       });
   /* Compute heat fluxes in 3-direction, 3D problem ONLY  ---------------------*/
   if (ndim < 3) {
@@ -473,27 +494,46 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
         const auto T_km1 = prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i);
         const auto dTdz = (T_k - T_km1) / coords.Dx(parthenon::X3DIR, k, j, i);
 
-        Real flux_grad = 0.0;
+        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
+        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
+        const auto thermal_diff_f =
+            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
+                   thermal_diff.Get(prim(IPR, k - 1, j, i), prim(IDN, k - 1, j, i)));
+
+        // Calculate "classic" fluxes
+        Real flux_classic = 0.0;
+        Real flux_classic_mag = 0.0;
         if (thermal_diff.GetType() == Conduction::anisotropic) {
           const auto Bx = 0.5 * (prim(IB1, k - 1, j, i) + prim(IB1, k, j, i));
           const auto By = 0.5 * (prim(IB2, k - 1, j, i) + prim(IB2, k, j, i));
           const auto Bz = 0.5 * (prim(IB3, k - 1, j, i) + prim(IB3, k, j, i));
-          Real B02 = SQR(Bx) + SQR(By) + SQR(Bz);
-          B02 = std::max(B02, TINY_NUMBER); /* limit in case B=0 */
-          const auto bDotGradT = Bx * dTdx + By * dTdy + Bz * dTdz;
-          flux_grad = (Bz * bDotGradT) / B02;
+          auto Bmag = std::sqrt(SQR(Bx) + SQR(By) + SQR(Bz));
+          Bmag = std::max(Bmag, TINY_NUMBER); /* limit in case B=0 */
+          const auto bz = Bz / Bmag;          // unit vector component
+          const auto bDotGradT = (Bx * dTdx + By * dTdy + Bz * dTdz) / Bmag;
+          flux_classic = -thermal_diff_f * denf * bDotGradT * bz;
+          flux_classic_mag = std::abs(thermal_diff_f * denf * bDotGradT);
         } else if (thermal_diff.GetType() == Conduction::isotropic) {
-          flux_grad = dTdz;
+          flux_classic = -thermal_diff_f * denf * dTdz;
+          flux_classic_mag = thermal_diff_f * denf * gradTmag;
         } else {
           PARTHENON_FAIL("Unknown thermal diffusion flux.");
         }
+        // Calculate saturated fluxes,see comment above.
+        Real flux_sat;
+        // Use first order limiting for now.
+        if (flux_classic > 0.0) {
+          flux_sat = flux_sat_prefac * std::sqrt(prim(IPR, k - 1, j, i) / denf) *
+                     prim(IPR, k - 1, j, i);
+        } else if (flux_classic < 0.0) {
+          flux_sat =
+              flux_sat_prefac * std::sqrt(prim(IPR, k, j, i) / denf) * prim(IPR, k, j, i);
+        } else {
+          const auto presf = 0.5 * (prim(IPR, k, j, i) + prim(IPR, k - 1, j, i));
+          flux_sat = flux_sat_prefac * std::sqrt(presf / denf) * presf;
+        }
 
-        const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
-        const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
-        const auto thermal_diff_f =
-            0.5 * (thermal_diff.Get(prim(IPR, k, j, i), prim(IDN, k, j, i)) +
-                   thermal_diff.Get(prim(IPR, k - 1, j, i), prim(IDN, k - 1, j, i)));
-
-        cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_f * denf * flux_grad;
+        cons.flux(X3DIR, IEN, k, j, i) +=
+            (flux_sat / (flux_sat + flux_classic_mag)) * flux_classic;
       });
 }

From ca3c397ef0fcb1f2caaaae554757f0124fdcc9cd Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 8 Jun 2022 16:47:25 +0200
Subject: [PATCH 31/42] Increase default rkl2 ratio to 400 and allow flux
 correction for all integrators

---
 docs/input.md              |  2 +-
 inputs/diffusion.in        |  2 +-
 src/hydro/hydro.cpp        | 13 +------------
 src/hydro/hydro_driver.cpp |  4 ++--
 4 files changed, 5 insertions(+), 16 deletions(-)

diff --git a/docs/input.md b/docs/input.md
index b43f4e02..b76a6640 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -66,7 +66,7 @@ where the full thermal conduction flux $`\mathbf{F}_\mathrm{c}`$ contains both t
 ```math
 \mathbf{F}_\mathrm{classic} = - \kappa \mathbf{\hat b} (\mathbf{\hat b \cdot \nabla T})
 ```
-as well as the saturated flux (as introduced by ^[CM77])
+as well as the saturated flux (as introduced by [^CM77])
 ```math
 \mathbf{F}_\mathrm{sat} = - 5 \phi \rho^{-1/2} p^{3/2} \mathrm{sgn}(\mathbf{\hat b \cdot \nabla T}) \mathbf{\hat b}
 ```
diff --git a/inputs/diffusion.in b/inputs/diffusion.in
index fd5c09c7..44d02bcb 100644
--- a/inputs/diffusion.in
+++ b/inputs/diffusion.in
@@ -64,7 +64,7 @@ integrator = unsplit
 conduction = anisotropic
 conduction_coeff = fixed
 thermal_diff_coeff_code = 0.01
-rkl2_max_dt_ratio = 200.0
+rkl2_max_dt_ratio = 400.0
 
 <parthenon/output0>
 file_type = hdf5
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 884b8645..0013afcd 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -413,10 +413,6 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
 
   auto first_order_flux_correct =
       pin->GetOrAddBoolean("hydro", "first_order_flux_correct", false);
-  if (first_order_flux_correct && integrator != Integrator::vl2) {
-    PARTHENON_FAIL("Please use 'vl2' integrator with first order flux correction. Other "
-                   "integrators have not been tested.")
-  }
   pkg->AddParam<>("first_order_flux_correct", first_order_flux_correct);
   if (first_order_flux_correct) {
     if (fluid == Fluid::euler) {
@@ -1056,6 +1052,7 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
 
   std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
   auto u0_cons_pack = u0_data->PackVariablesAndFluxes(flags_ind);
+  auto const &u0_prim_pack = u0_data->PackVariables(std::vector<std::string>{"prim"});
   auto u1_cons_pack = u1_data->PackVariablesAndFluxes(flags_ind);
   auto pkg = pmb->packages.Get("Hydro");
   const int nhydro = pkg->Param<int>("nhydro");
@@ -1069,14 +1066,6 @@ TaskStatus FirstOrderFluxCorrect(MeshData<Real> *u0_data, MeshData<Real> *u1_dat
   if (fluid == Fluid::glmmhd) {
     c_h = pkg->Param<Real>("c_h");
   }
-  // Using "u1_prim" as "u0_prim" here because all current integrators start with copying
-  // the initial state to the "u0" register, see conditional for `stage == 1` in the
-  // hydro_driver where normally only "cons" is copied but in case for flux correction
-  // "prim", too. This means both during stage 1 and during stage 2 `u1` holds the
-  // original data at the beginning of the timestep. For flux correction we want to make a
-  // full (dt) low order update using the original data and thus use the "prim" data from
-  // u1 here.
-  auto const &u0_prim_pack = u1_data->PackVariables(std::vector<std::string>{"prim"});
 
   const int ndim = pmb->pmy_mesh->ndim;
 
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index d34c869d..3539ac70 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -222,8 +222,8 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   if (parthenon::Globals::my_rank == 0) {
     const auto ratio = 2.0 * tau / mindt_diff;
     std::cout << "STS ratio: " << ratio << " Taking " << s_rkl << " steps." << std::endl;
-    if (ratio > 200.1) {
-      std::cout << "WARNING: ratio is > 200. Proceed at own risk." << std::endl;
+    if (ratio > 400.1) {
+      std::cout << "WARNING: ratio is > 400. Proceed at own risk." << std::endl;
     }
   }
 

From 6494ab27349e7ce9b372e72f1f9c2d9d8d1cbdbc Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Tue, 9 Aug 2022 17:14:19 +0200
Subject: [PATCH 32/42] Remove parabolic timestep constraint for saturated
 conduction limit regime

---
 src/hydro/diffusion/conduction.cpp | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)

diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 46814aaf..e6c8abf3 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -62,6 +62,7 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
 
   const auto gm1 = hydro_pkg->Param<Real>("AdiabaticIndex");
   const auto &thermal_diff = hydro_pkg->Param<ThermalDiffusivity>("thermal_diff");
+  const auto &flux_sat_prefac = hydro_pkg->Param<Real>("conduction_sat_prefac");
 
   if (thermal_diff.GetType() == Conduction::isotropic &&
       thermal_diff.GetCoeffType() == ConductionCoeff::fixed) {
@@ -149,6 +150,19 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           if (Bmag == 0.0) {
             return;
           }
+
+          // In the saturated regime, i.e., when the ratio of classic to saturated fluxes
+          // is large, the equation becomes hyperbolic with the signal speed of the
+          // conduction front being comparable to the sound speed, see [Balsara, Tilley,
+          // and Howk MANRAS 2008]. Therefore, we don't need to contrain the "parabolic"
+          // timestep here (and the hyperbolic one is constrained automatically by the
+          // fluid EstimateTimestep call).
+          auto const flux_sat = flux_sat_prefac * std::sqrt(p / rho) * p;
+          auto const flux_classic = thermal_diff_coeff * rho * gradTmag;
+          if (flux_classic / flux_sat > 100.) {
+            return;
+          }
+
           const auto costheta =
               fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / (Bmag * gradTmag);
 
@@ -339,7 +353,7 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
 
         // Calculate saturated fluxes using upwinding, see (A3) in Mignone+12.
         // Note that we are not concerned about the sign of flux_sat here. The way it is
-        // calculated it's always positive because we use it in the geometric mean with
+        // calculated it's always positive because we use it in the harmonic mean with
         // the flux_classic_mag below. The correct sign is eventually picked up again from
         // flux_classic.
         Real flux_sat;

From 6df018fe852fe4c285ac5929ed61d5fc67b17ddf Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 10 Aug 2022 16:07:47 +0200
Subject: [PATCH 33/42] Add perturb to cloud pgen

---
 src/pgen/cloud.cpp | 61 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 55 insertions(+), 6 deletions(-)

diff --git a/src/pgen/cloud.cpp b/src/pgen/cloud.cpp
index b6d0dded..2e0f0f37 100644
--- a/src/pgen/cloud.cpp
+++ b/src/pgen/cloud.cpp
@@ -1,6 +1,6 @@
 //========================================================================================
 // AthenaPK - a performance portable block structured AMR astrophysical MHD code.
-// Copyright (c) 2021, Athena-Parthenon Collaboration. All rights reserved.
+// Copyright (c) 2021-2022, Athena-Parthenon Collaboration. All rights reserved.
 // Licensed under the 3-clause BSD License, see LICENSE file for details
 //========================================================================================
 //! \file cloud.cpp
@@ -13,6 +13,8 @@
 #include <cstring>   // strcmp()
 
 // Parthenon headers
+#include "Kokkos_CopyViews.hpp"
+#include "globals.hpp"
 #include "mesh/mesh.hpp"
 #include <iomanip>
 #include <ios>
@@ -24,6 +26,7 @@
 // AthenaPK headers
 #include "../main.hpp"
 #include "../units.hpp"
+#include "parthenon/prelude.hpp"
 
 namespace cloud {
 using namespace parthenon::driver::prelude;
@@ -31,6 +34,9 @@ using namespace parthenon::driver::prelude;
 Real rho_wind, mom_wind, rhoe_wind, r_cloud, rho_cloud;
 Real Bx = 0.0;
 Real By = 0.0;
+Real perturb = 0.0;
+std::mt19937 gen; // Standard mersenne_twister_engine seeded with gid
+std::uniform_real_distribution<Real> ran;
 
 //========================================================================================
 //! \fn void InitUserMeshData(Mesh *mesh, ParameterInput *pin)
@@ -70,6 +76,11 @@ void InitUserMeshData(Mesh *mesh, ParameterInput *pin) {
   const auto T_cloud = pressure / gm1 / rho_cloud * mu_m_u_gm1_by_k_B_;
 
   auto plasma_beta = pin->GetOrAddReal("problem/cloud", "plasma_beta", -1.0);
+  perturb = pin->GetOrAddReal("problem/cloud", "perturb", 0.0);
+  if (perturb > 0.0) {
+    gen = std::mt19937(parthenon::Globals::my_rank);
+    ran = std::uniform_real_distribution<Real>(-perturb, perturb);
+  }
 
   auto mag_field_angle_str =
       pin->GetOrAddString("problem/cloud", "mag_field_angle", "undefined");
@@ -191,9 +202,14 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
         }
 
         u(IDN, k, j, i) = rho;
-        u(IM2, k, j, i) = mom;
+        u(IM1, k, j, i) = perturb > 0.0 ? mom * ran(gen) : 0.0;
+        u(IM2, k, j, i) = perturb > 0.0 ? mom + mom * ran(gen) : mom;
+        u(IM3, k, j, i) = perturb > 0.0 ? mom * ran(gen) : 0.0;
         // Can use rhoe_wind here as simulation is setup in pressure equil.
-        u(IEN, k, j, i) = rhoe_wind + 0.5 * mom * mom / rho;
+        u(IEN, k, j, i) =
+            rhoe_wind +
+            0.5 * (SQR(u(IM1, k, j, i)) + SQR(u(IM2, k, j, i)) + SQR(u(IM3, k, j, i))) /
+                rho;
 
         if (mhd_enabled) {
           u(IB1, k, j, i) = Bx;
@@ -225,12 +241,45 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
   const auto rhoe_wind_ = rhoe_wind;
   const auto Bx_ = Bx;
   const auto By_ = By;
+
+  const auto domain = IndexDomain::inner_x2;
+  const auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
+  const auto ib = bounds.GetBoundsI(domain);
+  const auto jb = bounds.GetBoundsJ(domain);
+  const auto kb = bounds.GetBoundsK(domain);
+
+  const auto Ni = ib.e - ib.s + 1;
+  const auto Nj = jb.e - jb.s + 1;
+  const auto Nk = kb.e - kb.s + 1;
+
+  const auto num_cells = Ni * Nj * Nk;
+
+  parthenon::ParArray2D<Real> rand_num("inflow rand num", num_cells, 3);
+  auto rand_num_h = Kokkos::create_mirror_view(rand_num);
+
+  for (int j = 0; j < num_cells; j++) {
+    for (int i = 0; i < 3; i++) {
+      rand_num_h(j, i) = ran(gen);
+    }
+  }
+  Kokkos::deep_copy(rand_num, rand_num_h);
+
+  auto perturb_ = perturb;
+
   pmb->par_for_bndry(
-      "InflowWindX2", nb, IndexDomain::inner_x2, coarse,
+      "InflowWindX2", nb, domain, coarse,
       KOKKOS_LAMBDA(const int, const int &k, const int &j, const int &i) {
+        const auto idx = (k - kb.s) * Ni * Nj + (j - jb.s) * Ni + (i - ib.s);
         cons(IDN, k, j, i) = rho_wind_;
-        cons(IM2, k, j, i) = mom_wind_;
-        cons(IEN, k, j, i) = rhoe_wind_ + 0.5 * mom_wind_ * mom_wind_ / rho_wind_;
+        cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
+        cons(IM2, k, j, i) =
+            perturb > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
+        cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
+        cons(IEN, k, j, i) =
+            rhoe_wind_ + 0.5 *
+                             (SQR(cons(IM1, k, j, i)) + SQR(cons(IM2, k, j, i)) +
+                              SQR(cons(IM3, k, j, i))) /
+                             rho_wind_;
         if (Bx_ != 0.0) {
           cons(IB1, k, j, i) = Bx_;
           cons(IEN, k, j, i) += 0.5 * Bx_ * Bx_;

From 1d0cb198c92b9eee579d3ecb5617d0629b1f9941 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Fri, 12 Aug 2022 11:00:54 +0200
Subject: [PATCH 34/42] Add perturb to B (knowing this is not great...)

---
 src/pgen/cloud.cpp | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/pgen/cloud.cpp b/src/pgen/cloud.cpp
index 2e0f0f37..e98d0d2e 100644
--- a/src/pgen/cloud.cpp
+++ b/src/pgen/cloud.cpp
@@ -271,10 +271,13 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
       KOKKOS_LAMBDA(const int, const int &k, const int &j, const int &i) {
         const auto idx = (k - kb.s) * Ni * Nj + (j - jb.s) * Ni + (i - ib.s);
         cons(IDN, k, j, i) = rho_wind_;
-        cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
-        cons(IM2, k, j, i) =
-            perturb > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
-        cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
+        // cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
+        // cons(IM2, k, j, i) =
+        // perturb_ > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
+        // cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
+        cons(IM1, k, j, i) = 0.0;
+        cons(IM2, k, j, i) = mom_wind_;
+        cons(IM3, k, j, i) = 0.0;
         cons(IEN, k, j, i) =
             rhoe_wind_ + 0.5 *
                              (SQR(cons(IM1, k, j, i)) + SQR(cons(IM2, k, j, i)) +
@@ -282,11 +285,17 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
                              rho_wind_;
         if (Bx_ != 0.0) {
           cons(IB1, k, j, i) = Bx_;
-          cons(IEN, k, j, i) += 0.5 * Bx_ * Bx_;
+          cons(IB2, k, j, i) = perturb_ > 0.0 ? Bx_ * rand_num(idx, 1) : 0.0;
+          cons(IB3, k, j, i) = perturb_ > 0.0 ? Bx_ * rand_num(idx, 2) : 0.0;
+          cons(IEN, k, j, i) +=
+              0.5 * (Bx_ * Bx_ + SQR(cons(IB2, k, j, i)) + SQR(cons(IB3, k, j, i)));
         }
         if (By_ != 0.0) {
+          cons(IB1, k, j, i) = perturb_ > 0.0 ? By_ * rand_num(idx, 0) : 0.0;
           cons(IB2, k, j, i) = By_;
-          cons(IEN, k, j, i) += 0.5 * By_ * By_;
+          cons(IB3, k, j, i) = perturb_ > 0.0 ? By_ * rand_num(idx, 2) : 0.0;
+          cons(IEN, k, j, i) +=
+              0.5 * (By_ * By_ + SQR(cons(IB1, k, j, i)) + SQR(cons(IB3, k, j, i)));
         }
       });
 }

From 300c6a0c2a42a377ae59540bc426137d2f58f564 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sat, 24 Sep 2022 12:40:05 +0200
Subject: [PATCH 35/42] Revert "Add perturb to B (knowing this is not
 great...)"

This reverts commit 1d0cb198c92b9eee579d3ecb5617d0629b1f9941.
---
 src/pgen/cloud.cpp | 21 ++++++---------------
 1 file changed, 6 insertions(+), 15 deletions(-)

diff --git a/src/pgen/cloud.cpp b/src/pgen/cloud.cpp
index e98d0d2e..2e0f0f37 100644
--- a/src/pgen/cloud.cpp
+++ b/src/pgen/cloud.cpp
@@ -271,13 +271,10 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
       KOKKOS_LAMBDA(const int, const int &k, const int &j, const int &i) {
         const auto idx = (k - kb.s) * Ni * Nj + (j - jb.s) * Ni + (i - ib.s);
         cons(IDN, k, j, i) = rho_wind_;
-        // cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
-        // cons(IM2, k, j, i) =
-        // perturb_ > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
-        // cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
-        cons(IM1, k, j, i) = 0.0;
-        cons(IM2, k, j, i) = mom_wind_;
-        cons(IM3, k, j, i) = 0.0;
+        cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
+        cons(IM2, k, j, i) =
+            perturb > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
+        cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
         cons(IEN, k, j, i) =
             rhoe_wind_ + 0.5 *
                              (SQR(cons(IM1, k, j, i)) + SQR(cons(IM2, k, j, i)) +
@@ -285,17 +282,11 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
                              rho_wind_;
         if (Bx_ != 0.0) {
           cons(IB1, k, j, i) = Bx_;
-          cons(IB2, k, j, i) = perturb_ > 0.0 ? Bx_ * rand_num(idx, 1) : 0.0;
-          cons(IB3, k, j, i) = perturb_ > 0.0 ? Bx_ * rand_num(idx, 2) : 0.0;
-          cons(IEN, k, j, i) +=
-              0.5 * (Bx_ * Bx_ + SQR(cons(IB2, k, j, i)) + SQR(cons(IB3, k, j, i)));
+          cons(IEN, k, j, i) += 0.5 * Bx_ * Bx_;
         }
         if (By_ != 0.0) {
-          cons(IB1, k, j, i) = perturb_ > 0.0 ? By_ * rand_num(idx, 0) : 0.0;
           cons(IB2, k, j, i) = By_;
-          cons(IB3, k, j, i) = perturb_ > 0.0 ? By_ * rand_num(idx, 2) : 0.0;
-          cons(IEN, k, j, i) +=
-              0.5 * (By_ * By_ + SQR(cons(IB1, k, j, i)) + SQR(cons(IB3, k, j, i)));
+          cons(IEN, k, j, i) += 0.5 * By_ * By_;
         }
       });
 }

From 9205e5642d9cb2979712924f3bbb7f33536cb9a9 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sat, 24 Sep 2022 12:40:13 +0200
Subject: [PATCH 36/42] Revert "Add perturb to cloud pgen"

This reverts commit 6df018fe852fe4c285ac5929ed61d5fc67b17ddf.
---
 src/pgen/cloud.cpp | 61 +++++-----------------------------------------
 1 file changed, 6 insertions(+), 55 deletions(-)

diff --git a/src/pgen/cloud.cpp b/src/pgen/cloud.cpp
index 2e0f0f37..b6d0dded 100644
--- a/src/pgen/cloud.cpp
+++ b/src/pgen/cloud.cpp
@@ -1,6 +1,6 @@
 //========================================================================================
 // AthenaPK - a performance portable block structured AMR astrophysical MHD code.
-// Copyright (c) 2021-2022, Athena-Parthenon Collaboration. All rights reserved.
+// Copyright (c) 2021, Athena-Parthenon Collaboration. All rights reserved.
 // Licensed under the 3-clause BSD License, see LICENSE file for details
 //========================================================================================
 //! \file cloud.cpp
@@ -13,8 +13,6 @@
 #include <cstring>   // strcmp()
 
 // Parthenon headers
-#include "Kokkos_CopyViews.hpp"
-#include "globals.hpp"
 #include "mesh/mesh.hpp"
 #include <iomanip>
 #include <ios>
@@ -26,7 +24,6 @@
 // AthenaPK headers
 #include "../main.hpp"
 #include "../units.hpp"
-#include "parthenon/prelude.hpp"
 
 namespace cloud {
 using namespace parthenon::driver::prelude;
@@ -34,9 +31,6 @@ using namespace parthenon::driver::prelude;
 Real rho_wind, mom_wind, rhoe_wind, r_cloud, rho_cloud;
 Real Bx = 0.0;
 Real By = 0.0;
-Real perturb = 0.0;
-std::mt19937 gen; // Standard mersenne_twister_engine seeded with gid
-std::uniform_real_distribution<Real> ran;
 
 //========================================================================================
 //! \fn void InitUserMeshData(Mesh *mesh, ParameterInput *pin)
@@ -76,11 +70,6 @@ void InitUserMeshData(Mesh *mesh, ParameterInput *pin) {
   const auto T_cloud = pressure / gm1 / rho_cloud * mu_m_u_gm1_by_k_B_;
 
   auto plasma_beta = pin->GetOrAddReal("problem/cloud", "plasma_beta", -1.0);
-  perturb = pin->GetOrAddReal("problem/cloud", "perturb", 0.0);
-  if (perturb > 0.0) {
-    gen = std::mt19937(parthenon::Globals::my_rank);
-    ran = std::uniform_real_distribution<Real>(-perturb, perturb);
-  }
 
   auto mag_field_angle_str =
       pin->GetOrAddString("problem/cloud", "mag_field_angle", "undefined");
@@ -202,14 +191,9 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
         }
 
         u(IDN, k, j, i) = rho;
-        u(IM1, k, j, i) = perturb > 0.0 ? mom * ran(gen) : 0.0;
-        u(IM2, k, j, i) = perturb > 0.0 ? mom + mom * ran(gen) : mom;
-        u(IM3, k, j, i) = perturb > 0.0 ? mom * ran(gen) : 0.0;
+        u(IM2, k, j, i) = mom;
         // Can use rhoe_wind here as simulation is setup in pressure equil.
-        u(IEN, k, j, i) =
-            rhoe_wind +
-            0.5 * (SQR(u(IM1, k, j, i)) + SQR(u(IM2, k, j, i)) + SQR(u(IM3, k, j, i))) /
-                rho;
+        u(IEN, k, j, i) = rhoe_wind + 0.5 * mom * mom / rho;
 
         if (mhd_enabled) {
           u(IB1, k, j, i) = Bx;
@@ -241,45 +225,12 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
   const auto rhoe_wind_ = rhoe_wind;
   const auto Bx_ = Bx;
   const auto By_ = By;
-
-  const auto domain = IndexDomain::inner_x2;
-  const auto bounds = coarse ? pmb->c_cellbounds : pmb->cellbounds;
-  const auto ib = bounds.GetBoundsI(domain);
-  const auto jb = bounds.GetBoundsJ(domain);
-  const auto kb = bounds.GetBoundsK(domain);
-
-  const auto Ni = ib.e - ib.s + 1;
-  const auto Nj = jb.e - jb.s + 1;
-  const auto Nk = kb.e - kb.s + 1;
-
-  const auto num_cells = Ni * Nj * Nk;
-
-  parthenon::ParArray2D<Real> rand_num("inflow rand num", num_cells, 3);
-  auto rand_num_h = Kokkos::create_mirror_view(rand_num);
-
-  for (int j = 0; j < num_cells; j++) {
-    for (int i = 0; i < 3; i++) {
-      rand_num_h(j, i) = ran(gen);
-    }
-  }
-  Kokkos::deep_copy(rand_num, rand_num_h);
-
-  auto perturb_ = perturb;
-
   pmb->par_for_bndry(
-      "InflowWindX2", nb, domain, coarse,
+      "InflowWindX2", nb, IndexDomain::inner_x2, coarse,
       KOKKOS_LAMBDA(const int, const int &k, const int &j, const int &i) {
-        const auto idx = (k - kb.s) * Ni * Nj + (j - jb.s) * Ni + (i - ib.s);
         cons(IDN, k, j, i) = rho_wind_;
-        cons(IM1, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 0) : 0.0;
-        cons(IM2, k, j, i) =
-            perturb > 0.0 ? mom_wind_ + mom_wind_ * rand_num(idx, 1) : mom_wind_;
-        cons(IM3, k, j, i) = perturb_ > 0.0 ? mom_wind_ * rand_num(idx, 2) : 0.0;
-        cons(IEN, k, j, i) =
-            rhoe_wind_ + 0.5 *
-                             (SQR(cons(IM1, k, j, i)) + SQR(cons(IM2, k, j, i)) +
-                              SQR(cons(IM3, k, j, i))) /
-                             rho_wind_;
+        cons(IM2, k, j, i) = mom_wind_;
+        cons(IEN, k, j, i) = rhoe_wind_ + 0.5 * mom_wind_ * mom_wind_ / rho_wind_;
         if (Bx_ != 0.0) {
           cons(IB1, k, j, i) = Bx_;
           cons(IEN, k, j, i) += 0.5 * Bx_ * Bx_;

From 1749bb862dcd8c6b1c7a7095542870f740d9c7bb Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sat, 24 Sep 2022 12:45:38 +0200
Subject: [PATCH 37/42] Limit cooling to upper bound of TFloor and cooling
 table cutoff

---
 src/hydro/srcterms/tabular_cooling.cpp | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/hydro/srcterms/tabular_cooling.cpp b/src/hydro/srcterms/tabular_cooling.cpp
index c158c8fd..52475076 100644
--- a/src/hydro/srcterms/tabular_cooling.cpp
+++ b/src/hydro/srcterms/tabular_cooling.cpp
@@ -242,7 +242,8 @@ void TabularCooling::SubcyclingFixedIntSrcTerm(MeshData<Real> *md, const Real dt
   const Real min_sub_dt = dt / max_iter;
 
   const Real d_e_tol = d_e_tol_;
-  const Real internal_e_floor = T_floor_ / mu_m_u_gm1_by_k_B;
+  const Real internal_e_floor =
+      std::max(T_floor_, std::pow(10.0, log_temp_start)) / mu_m_u_gm1_by_k_B;
 
   // Grab some necessary variables
   const auto &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});
@@ -401,7 +402,10 @@ void TabularCooling::SubcyclingFixedIntSrcTerm(MeshData<Real> *md, const Real dt
           sub_iter++;
         }
 
-        PARTHENON_REQUIRE(internal_e > internal_e_floor, "cooled below floor");
+        PARTHENON_REQUIRE(internal_e > 0.9 * internal_e_floor, "cooled way below floor");
+        if (internal_e < internal_e_floor) {
+          internal_e = internal_e_floor;
+        }
 
         // Remove the cooling from the specific total energy
         cons(IEN, k, j, i) += rho * (internal_e - internal_e_initial);

From 2bc56f88b32c047140bd096adcfc383f8d7f160b Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Sat, 24 Sep 2022 13:01:45 +0200
Subject: [PATCH 38/42] Add oblique B field

---
 src/pgen/cloud.cpp | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/pgen/cloud.cpp b/src/pgen/cloud.cpp
index b6d0dded..223f67d1 100644
--- a/src/pgen/cloud.cpp
+++ b/src/pgen/cloud.cpp
@@ -31,6 +31,7 @@ using namespace parthenon::driver::prelude;
 Real rho_wind, mom_wind, rhoe_wind, r_cloud, rho_cloud;
 Real Bx = 0.0;
 Real By = 0.0;
+Real Bz = 0.0;
 
 //========================================================================================
 //! \fn void InitUserMeshData(Mesh *mesh, ParameterInput *pin)
@@ -80,9 +81,13 @@ void InitUserMeshData(Mesh *mesh, ParameterInput *pin) {
       By = std::sqrt(2.0 * pressure / plasma_beta);
     } else if (mag_field_angle_str == "transverse") {
       Bx = std::sqrt(2.0 * pressure / plasma_beta);
+    } else if (mag_field_angle_str == "oblique") {
+      const auto B = std::sqrt(2.0 * pressure / plasma_beta);
+      Bx = B / std::sqrt(5.0);
+      Bz = 2 * Bx;
     } else {
       PARTHENON_FAIL("Unsupported problem/cloud/mag_field_angle. Please use either "
-                     "'aligned' or 'transverse'.");
+                     "'aligned', 'transverse', or 'oblique'.");
     }
   }
 
@@ -155,7 +160,7 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
   const auto nscalars = hydro_pkg->Param<int>("nscalars");
 
   const bool mhd_enabled = hydro_pkg->Param<Fluid>("fluid") == Fluid::glmmhd;
-  if (((Bx != 0.0) || (By != 0.0)) && !mhd_enabled) {
+  if (((Bx != 0.0) || (By != 0.0) || (Bz != 0.0)) && !mhd_enabled) {
     PARTHENON_FAIL("Requested to initialize magnetic fields by `cloud/plasma_beta > 0`, "
                    "but `hydro/fluid` is not supporting MHD.");
   }
@@ -198,7 +203,8 @@ void ProblemGenerator(MeshBlock *pmb, ParameterInput *pin) {
         if (mhd_enabled) {
           u(IB1, k, j, i) = Bx;
           u(IB2, k, j, i) = By;
-          u(IEN, k, j, i) += 0.5 * (Bx * Bx + By * By);
+          u(IB3, k, j, i) = Bz;
+          u(IEN, k, j, i) += 0.5 * (Bx * Bx + By * By + Bz * Bz);
         }
 
         // Init passive scalars
@@ -225,6 +231,7 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
   const auto rhoe_wind_ = rhoe_wind;
   const auto Bx_ = Bx;
   const auto By_ = By;
+  const auto Bz_ = Bz;
   pmb->par_for_bndry(
       "InflowWindX2", nb, IndexDomain::inner_x2, coarse,
       KOKKOS_LAMBDA(const int, const int &k, const int &j, const int &i) {
@@ -239,6 +246,10 @@ void InflowWindX2(std::shared_ptr<MeshBlockData<Real>> &mbd, bool coarse) {
           cons(IB2, k, j, i) = By_;
           cons(IEN, k, j, i) += 0.5 * By_ * By_;
         }
+        if (Bz_ != 0.0) {
+          cons(IB3, k, j, i) = Bz_;
+          cons(IEN, k, j, i) += 0.5 * Bz_ * Bz_;
+        }
       });
 }
 

From b0bd7caa81d97fc2c35676a932ea8ae7836d0e12 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Mon, 25 Sep 2023 18:42:12 +0200
Subject: [PATCH 39/42] Update coords and driver

---
 .github/workflows/ci.yml           |   1 +
 src/hydro/diffusion/conduction.cpp |  53 ++++-----
 src/hydro/hydro_driver.cpp         | 176 +++++++++--------------------
 3 files changed, 82 insertions(+), 148 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index e24e197b..05049ba7 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -53,6 +53,7 @@ jobs:
             build/tst/regression/outputs/cluster_hse/analytic_comparison.png
             build/tst/regression/outputs/cluster_tabular_cooling/convergence.png
             build/tst/regression/outputs/aniso_therm_cond_ring_conv/ring_convergence.png
+            build/tst/regression/outputs/aniso_therm_cond_gauss_conv/cond.png
             build/tst/regression/outputs/field_loop/field_loop.png
             build/tst/regression/outputs/riemann_hydro/shock_tube.png
             build/tst/regression/outputs/turbulence/parthenon.hst
diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp
index 0103ed1b..d6e57d30 100644
--- a/src/hydro/diffusion/conduction.cpp
+++ b/src/hydro/diffusion/conduction.cpp
@@ -78,14 +78,14 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
             {1, 1, 1, ib.e + 1 - ib.s}),
         KOKKOS_LAMBDA(const int b, const int k, const int j, const int i, Real &min_dt) {
           const auto &coords = prim_pack.GetCoords(b);
-          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
-                                    (thermal_diff_coeff + TINY_NUMBER));
+          min_dt = fmin(min_dt,
+                        SQR(coords.Dxc<1>(k, j, i)) / (thermal_diff_coeff + TINY_NUMBER));
           if (ndim >= 2) {
-            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
+            min_dt = fmin(min_dt, SQR(coords.Dxc<2>(k, j, i)) /
                                       (thermal_diff_coeff + TINY_NUMBER));
           }
           if (ndim >= 3) {
-            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
+            min_dt = fmin(min_dt, SQR(coords.Dxc<3>(k, j, i)) /
                                       (thermal_diff_coeff + TINY_NUMBER));
           }
         },
@@ -106,20 +106,20 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           const auto dTdx = 0.5 *
                             (prim(IPR, k, j, i + 1) / prim(IDN, k, j, i + 1) -
                              prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1)) /
-                            coords.dx1v(i);
+                            coords.Dxc<1>(i);
 
           const auto dTdy = ndim >= 2
                                 ? 0.5 *
                                       (prim(IPR, k, j + 1, i) / prim(IDN, k, j + 1, i) -
                                        prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i)) /
-                                      coords.dx2v(j)
+                                      coords.Dxc<2>(j)
                                 : 0.0;
 
           const auto dTdz = ndim >= 3
                                 ? 0.5 *
                                       (prim(IPR, k + 1, j, i) / prim(IDN, k + 1, j, i) -
                                        prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i)) /
-                                      coords.dx3v(k)
+                                      coords.Dxc<3>(k)
                                 : 0.0;
           const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
 
@@ -130,15 +130,12 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           auto thermal_diff_coeff = thermal_diff.Get(p, rho);
 
           if (thermal_diff.GetType() == Conduction::isotropic) {
-            min_dt = fmin(min_dt,
-                          SQR(coords.Dx(parthenon::X1DIR, k, j, i)) / thermal_diff_coeff);
+            min_dt = fmin(min_dt, SQR(coords.Dxc<1>(k, j, i)) / thermal_diff_coeff);
             if (ndim >= 2) {
-              min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
-                                        thermal_diff_coeff);
+              min_dt = fmin(min_dt, SQR(coords.Dxc<2>(k, j, i)) / thermal_diff_coeff);
             }
             if (ndim >= 3) {
-              min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
-                                        thermal_diff_coeff);
+              min_dt = fmin(min_dt, SQR(coords.Dxc<3>(k, j, i)) / thermal_diff_coeff);
             }
             return;
           }
@@ -166,16 +163,16 @@ Real EstimateConductionTimestep(MeshData<Real> *md) {
           const auto costheta =
               fabs(Bx * dTdx + By * dTdy + Bz * dTdz) / (Bmag * gradTmag);
 
-          min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X1DIR, k, j, i)) /
+          min_dt = fmin(min_dt, SQR(coords.Dxc<1>(k, j, i)) /
                                     (thermal_diff_coeff * fabs(Bx) / Bmag * costheta +
                                      TINY_NUMBER));
           if (ndim >= 2) {
-            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X2DIR, k, j, i)) /
+            min_dt = fmin(min_dt, SQR(coords.Dxc<2>(k, j, i)) /
                                       (thermal_diff_coeff * fabs(By) / Bmag * costheta +
                                        TINY_NUMBER));
           }
           if (ndim >= 3) {
-            min_dt = fmin(min_dt, SQR(coords.Dx(parthenon::X3DIR, k, j, i)) /
+            min_dt = fmin(min_dt, SQR(coords.Dxc<3>(k, j, i)) /
                                       (thermal_diff_coeff * fabs(Bz) / Bmag * costheta +
                                        TINY_NUMBER));
           }
@@ -217,7 +214,7 @@ void ThermalFluxIsoFixed(MeshData<Real> *md) {
         const auto &prim = prim_pack(b);
         const auto T_i = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_im1 = prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1);
-        const auto dTdx = (T_i - T_im1) / coords.Dx(parthenon::X1DIR, k, j, i);
+        const auto dTdx = (T_i - T_im1) / coords.Dxc<1>(k, j, i);
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
         cons.flux(X1DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdx;
       });
@@ -236,7 +233,7 @@ void ThermalFluxIsoFixed(MeshData<Real> *md) {
 
         const auto T_j = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_jm1 = prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i);
-        const auto dTdy = (T_j - T_jm1) / coords.Dx(parthenon::X2DIR, k, j, i);
+        const auto dTdy = (T_j - T_jm1) / coords.Dxc<2>(k, j, i);
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
         cons.flux(X2DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdy;
       });
@@ -255,7 +252,7 @@ void ThermalFluxIsoFixed(MeshData<Real> *md) {
 
         const auto T_k = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_km1 = prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i);
-        const auto dTdz = (T_k - T_km1) / coords.Dx(parthenon::X3DIR, k, j, i);
+        const auto dTdz = (T_k - T_km1) / coords.Dxc<3>(k, j, i);
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
         cons.flux(X3DIR, IEN, k, j, i) -= thermal_diff_coeff * denf * dTdz;
       });
@@ -304,7 +301,7 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                            prim(IPR, k, j    , i - 1) / prim(IDN, k, j    , i - 1),
                            prim(IPR, k, j    , i - 1) / prim(IDN, k, j    , i - 1) -
                            prim(IPR, k, j - 1, i - 1) / prim(IDN, k, j - 1, i - 1)) /
-            coords.Dx(parthenon::X2DIR, k, j, i);
+            coords.Dxc<2>( k, j, i);
 
         if (ndim >= 3) {
           /* Monotonized temperature difference dT/dz, 3D problem ONLY */
@@ -316,13 +313,13 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                                 prim(IPR, k    , j, i - 1) / prim(IDN, k    , j, i - 1),
                                 prim(IPR, k    , j, i - 1) / prim(IDN, k    , j, i - 1) -
                                 prim(IPR, k - 1, j, i - 1) / prim(IDN, k - 1, j, i - 1)) /
-                 coords.Dx(parthenon::X3DIR, k, j, i);
+                 coords.Dxc<3>( k, j, i);
         }
         // clang-format on
 
         const auto T_i = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_im1 = prim(IPR, k, j, i - 1) / prim(IDN, k, j, i - 1);
-        const auto dTdx = (T_i - T_im1) / coords.Dx(parthenon::X1DIR, k, j, i);
+        const auto dTdx = (T_i - T_im1) / coords.Dxc<1>(k, j, i);
 
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j, i - 1));
         const auto thermal_diff_f =
@@ -399,7 +396,7 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                            prim(IPR, k, j - 1, i    ) / prim(IDN, k, j - 1, i    ),
                            prim(IPR, k, j - 1, i    ) / prim(IDN, k, j - 1, i    ) -
                            prim(IPR, k, j - 1, i - 1) / prim(IDN, k, j - 1, i - 1)) /
-            coords.Dx(parthenon::X1DIR, k, j, i);
+            coords.Dxc<1>(k, j, i);
 
         if (ndim >= 3) {
           /* Monotonized temperature difference dT/dz, 3D problem ONLY */
@@ -411,14 +408,14 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                                 prim(IPR, k    , j - 1, i) / prim(IDN, k    , j - 1, i),
                                 prim(IPR, k    , j - 1, i) / prim(IDN, k    , j - 1, i) -
                                 prim(IPR, k - 1, j - 1, i) / prim(IDN, k - 1, j - 1, i)) /
-                 coords.Dx(parthenon::X3DIR, k, j, i);
+                 coords.Dxc<3>(k, j, i);
 
         }
         // clang-format on
 
         const auto T_j = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_jm1 = prim(IPR, k, j - 1, i) / prim(IDN, k, j - 1, i);
-        const auto dTdy = (T_j - T_jm1) / coords.Dx(parthenon::X2DIR, k, j, i);
+        const auto dTdy = (T_j - T_jm1) / coords.Dxc<2>(k, j, i);
 
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k, j - 1, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
@@ -489,7 +486,7 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                            prim(IPR, k - 1, j, i    ) / prim(IDN, k - 1, j, i    ),
                            prim(IPR, k - 1, j, i    ) / prim(IDN, k - 1, j, i    ) -
                            prim(IPR, k - 1, j, i - 1) / prim(IDN, k - 1, j, i - 1)) /
-            coords.Dx(parthenon::X1DIR, k, j, i);
+            coords.Dxc<1>(k, j, i);
 
         /* Monotonized temperature difference dT/dy */
         const auto dTdy =
@@ -501,12 +498,12 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
                            prim(IPR, k - 1, j    , i) / prim(IDN, k - 1, j    , i),
                            prim(IPR, k - 1, j    , i) / prim(IDN, k - 1, j    , i) -
                            prim(IPR, k - 1, j - 1, i) / prim(IDN, k - 1, j - 1, i)) /
-            coords.Dx(parthenon::X2DIR, k, j, i);
+            coords.Dxc<2>(k, j, i);
         // clang-format on
 
         const auto T_k = prim(IPR, k, j, i) / prim(IDN, k, j, i);
         const auto T_km1 = prim(IPR, k - 1, j, i) / prim(IDN, k - 1, j, i);
-        const auto dTdz = (T_k - T_km1) / coords.Dx(parthenon::X3DIR, k, j, i);
+        const auto dTdz = (T_k - T_km1) / coords.Dxc<3>(k, j, i);
 
         const auto denf = 0.5 * (prim(IDN, k, j, i) + prim(IDN, k - 1, j, i));
         const auto gradTmag = sqrt(SQR(dTdx) + SQR(dTdy) + SQR(dTdz));
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index 60295eec..bbc16e9d 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -250,8 +250,6 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     auto &pmb = blocks[i];
     auto &tl = region_init[i];
     auto &base = pmb->meshblock_data.Get();
-    auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, base.get(),
-                                 BoundaryCommSubset::all);
 
     // Add extra registers. No-op for existing variables so it's safe to call every
     // time.
@@ -266,6 +264,11 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
   for (int i = 0; i < num_partitions; i++) {
     auto &tl = region_calc_fluxes_step_init[i];
     auto &base = pmesh->mesh_data.GetOrAdd("base", i);
+    const auto any = parthenon::BoundaryType::any;
+    auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, base);
+    auto start_flxcor_recv =
+        tl.AddTask(none, parthenon::StartReceiveFluxCorrections, base);
+
     // Reset flux arrays (not guaranteed to be zero)
     auto reset_fluxes = tl.AddTask(none, ResetFluxes, base.get());
 
@@ -274,27 +277,19 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     // (in every subsetp).
     auto hydro_diff_fluxes =
         tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), base.get());
-  }
 
-  TaskRegion &region_flux_correct_step_init = ptask_coll->AddRegion(blocks.size());
-  for (int i = 0; i < blocks.size(); i++) {
-    auto &tl = region_flux_correct_step_init[i];
-    auto &base = blocks[i]->meshblock_data.Get("base");
-    auto send_flux =
-        tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, base.get());
-    auto recv_flux =
-        tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, base.get());
-  }
+    auto send_flx =
+        tl.AddTask(hydro_diff_fluxes, parthenon::LoadAndSendFluxCorrections, base);
+    auto recv_flx =
+        tl.AddTask(start_flxcor_recv, parthenon::ReceiveFluxCorrections, base);
+    auto set_flx =
+        tl.AddTask(recv_flx | hydro_diff_fluxes, parthenon::SetFluxCorrections, base);
 
-  TaskRegion &region_rkl2_step_init = ptask_coll->AddRegion(num_partitions);
-  for (int i = 0; i < num_partitions; i++) {
-    auto &tl = region_rkl2_step_init[i];
     auto &Y0 = pmesh->mesh_data.GetOrAdd("u1", i);
     auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-    auto &base = pmesh->mesh_data.GetOrAdd("base", i);
     auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
-    auto init_MY0 = tl.AddTask(none, parthenon::Update::FluxDivergence<MeshData<Real>>,
+    auto init_MY0 = tl.AddTask(set_flx, parthenon::Update::FluxDivergence<MeshData<Real>>,
                                base.get(), MY0.get());
 
     // Initialize Y0 and Y1 and the recursion relation starting with j = 2 needs data from
@@ -302,36 +297,18 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     auto rkl2_step_first = tl.AddTask(init_MY0, RKL2StepFirst, Y0.get(), base.get(),
                                       Yjm2.get(), MY0.get(), s_rkl, tau);
 
-    // Update ghost cells of Y1 (as MY1 is calculated for each Y_j)
+    // Update ghost cells of Y1 (as MY1 is calculated for each Y_j).
+    // Y1 stored in "base", see rkl2_step_first task.
+    // Update ghost cells (local and non local), prolongate and apply bound cond.
+    // TODO(someone) experiment with split (local/nonlocal) comms with respect to
+    // performance for various tests (static, amr, block sizes) and then decide on the
+    // best impl. Go with default call (split local/nonlocal) for now.
     // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
-    auto send = tl.AddTask(rkl2_step_first,
-                           parthenon::cell_centered_bvars::SendBoundaryBuffers, base);
-    auto recv =
-        tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, base);
-    auto fill_from_bufs =
-        tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, base);
-  }
+    auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
+        rkl2_step_first | start_bnd, tl, base, pmesh->multilevel);
 
-  TaskRegion &region_clear_bnd = ptask_coll->AddRegion(blocks.size());
-  for (int i = 0; i < blocks.size(); i++) {
-    auto &tl = region_clear_bnd[i];
-    auto &base = blocks[i]->meshblock_data.Get();
-    auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                       base.get(), BoundaryCommSubset::all);
-    auto prolongBound = none;
-    if (pmesh->multilevel) {
-      prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, base);
-    }
-
-    // set physical boundaries
-    auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, base);
-  }
-  TaskRegion &region_cons_to_prim = ptask_coll->AddRegion(num_partitions);
-  for (int i = 0; i < num_partitions; i++) {
-    auto &tl = region_cons_to_prim[i];
-    auto &base = pmesh->mesh_data.GetOrAdd("base", i);
-    auto fill_derived =
-        tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, base.get());
+    tl.AddTask(bounds_exchange, parthenon::Update::FillDerived<MeshData<Real>>,
+               base.get());
   }
 
   // Compute coefficients. Meyer+2012 eq. (16)
@@ -351,82 +328,52 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
     mu_tilde_j = mu_j * w1;
     gamma_tilde_j = -(1.0 - b_jm1) * mu_tilde_j; // -a_jm1*mu_tilde_j
 
-    TaskRegion &region_init_other = ptask_coll->AddRegion(blocks.size());
-    for (int i = 0; i < blocks.size(); i++) {
-      auto &pmb = blocks[i];
-      auto &tl = region_init_other[i];
-      auto &base = pmb->meshblock_data.Get();
-      // Only need boundaries for base as it's the only "active" container exchanging
-      // data/fluxes with neighbors. All other containers are passive (i.e., data is only
-      // used but not exchanged).
-      auto start_recv = tl.AddTask(none, &MeshBlockData<Real>::StartReceiving, base.get(),
-                                   BoundaryCommSubset::all);
-    }
-
     TaskRegion &region_calc_fluxes_step_other = ptask_coll->AddRegion(num_partitions);
     for (int i = 0; i < num_partitions; i++) {
       auto &tl = region_calc_fluxes_step_other[i];
       auto &base = pmesh->mesh_data.GetOrAdd("base", i);
 
+      // Only need boundaries for base as it's the only "active" container exchanging
+      // data/fluxes with neighbors. All other containers are passive (i.e., data is only
+      // used but not exchanged).
+      const auto any = parthenon::BoundaryType::any;
+      auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, base);
+      auto start_flxcor_recv =
+          tl.AddTask(none, parthenon::StartReceiveFluxCorrections, base);
+
       // Reset flux arrays (not guaranteed to be zero)
       auto reset_fluxes = tl.AddTask(none, ResetFluxes, base.get());
 
       // Calculate the diffusive fluxes for Yjm1 (here u1)
       auto hydro_diff_fluxes =
           tl.AddTask(reset_fluxes, CalcDiffFluxes, hydro_pkg.get(), base.get());
-    }
 
-    TaskRegion &region_flux_correct_step_other = ptask_coll->AddRegion(blocks.size());
-    for (int i = 0; i < blocks.size(); i++) {
-      auto &tl = region_flux_correct_step_other[i];
-      auto &base = blocks[i]->meshblock_data.Get();
-      auto send_flux =
-          tl.AddTask(none, &MeshBlockData<Real>::SendFluxCorrection, base.get());
-      auto recv_flux =
-          tl.AddTask(none, &MeshBlockData<Real>::ReceiveFluxCorrection, base.get());
-    }
+      auto send_flx =
+          tl.AddTask(hydro_diff_fluxes, parthenon::LoadAndSendFluxCorrections, base);
+      auto recv_flx =
+          tl.AddTask(start_flxcor_recv, parthenon::ReceiveFluxCorrections, base);
+      auto set_flx =
+          tl.AddTask(recv_flx | hydro_diff_fluxes, parthenon::SetFluxCorrections, base);
 
-    TaskRegion &region_rkl2_step_other = ptask_coll->AddRegion(num_partitions);
-    for (int i = 0; i < num_partitions; i++) {
-      auto &tl = region_rkl2_step_other[i];
       auto &Y0 = pmesh->mesh_data.GetOrAdd("u1", i);
       auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
-      auto &base = pmesh->mesh_data.GetOrAdd("base", i);
       auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);
 
       auto rkl2_step_other =
-          tl.AddTask(none, RKL2StepOther, Y0.get(), base.get(), Yjm2.get(), MY0.get(),
+          tl.AddTask(set_flx, RKL2StepOther, Y0.get(), base.get(), Yjm2.get(), MY0.get(),
                      mu_j, nu_j, mu_tilde_j, gamma_tilde_j, tau);
 
       // update ghost cells of base (currently storing Yj)
+      // Update ghost cells (local and non local), prolongate and apply bound cond.
+      // TODO(someone) experiment with split (local/nonlocal) comms with respect to
+      // performance for various tests (static, amr, block sizes) and then decide on the
+      // best impl. Go with default call (split local/nonlocal) for now.
       // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
-      auto send = tl.AddTask(rkl2_step_other,
-                             parthenon::cell_centered_bvars::SendBoundaryBuffers, base);
-      auto recv =
-          tl.AddTask(send, parthenon::cell_centered_bvars::ReceiveBoundaryBuffers, base);
-      auto fill_from_bufs =
-          tl.AddTask(recv, parthenon::cell_centered_bvars::SetBoundaries, base);
-    }
-    TaskRegion &region_clear_bnd_other = ptask_coll->AddRegion(blocks.size());
-    for (int i = 0; i < blocks.size(); i++) {
-      auto &tl = region_clear_bnd_other[i];
-      auto &base = blocks[i]->meshblock_data.Get();
-      auto clear_comm_flags = tl.AddTask(none, &MeshBlockData<Real>::ClearBoundary,
-                                         base.get(), BoundaryCommSubset::all);
-      auto prolongBound = none;
-      if (pmesh->multilevel) {
-        prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, base);
-      }
-
-      // set physical boundaries
-      auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, base);
-    }
-    TaskRegion &region_cons_to_prim_other = ptask_coll->AddRegion(num_partitions);
-    for (int i = 0; i < num_partitions; i++) {
-      auto &tl = region_cons_to_prim_other[i];
-      auto &base = pmesh->mesh_data.GetOrAdd("base", i);
-      auto fill_derived =
-          tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, base.get());
+      auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
+          rkl2_step_other | start_bnd, tl, base, pmesh->multilevel);
+
+      tl.AddTask(bounds_exchange, parthenon::Update::FillDerived<MeshData<Real>>,
+                 base.get());
     }
 
     b_jm2 = b_jm1;
@@ -639,7 +586,11 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
     auto &tl = single_tasklist_per_pack_region[i];
     auto &mu0 = pmesh->mesh_data.GetOrAdd("base", i);
     auto &mu1 = pmesh->mesh_data.GetOrAdd("u1", i);
-    tl.AddTask(none, parthenon::StartReceiveFluxCorrections, mu0);
+
+    const auto any = parthenon::BoundaryType::any;
+    auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, mu0);
+    auto start_flxcor_recv =
+        tl.AddTask(none, parthenon::StartReceiveFluxCorrections, mu0);
 
     const auto flux_str = (stage == 1) ? "flux_first_stage" : "flux_other_stage";
     FluxFun_t *calc_flux_fun = hydro_pkg->Param<FluxFun_t *>(flux_str);
@@ -660,9 +611,9 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
 
     auto send_flx =
         tl.AddTask(first_order_flux_correct, parthenon::LoadAndSendFluxCorrections, mu0);
-    auto recv_flx =
-        tl.AddTask(first_order_flux_correct, parthenon::ReceiveFluxCorrections, mu0);
-    auto set_flx = tl.AddTask(recv_flx, parthenon::SetFluxCorrections, mu0);
+    auto recv_flx = tl.AddTask(start_flxcor_recv, parthenon::ReceiveFluxCorrections, mu0);
+    auto set_flx = tl.AddTask(recv_flx | first_order_flux_correct,
+                              parthenon::SetFluxCorrections, mu0);
 
     // compute the divergence of fluxes of conserved variables
     auto update = tl.AddTask(
@@ -694,29 +645,14 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
           tl.AddTask(source_split_strang_final, AddSplitSourcesFirstOrder, mu0.get(), tm);
     }
 
-    // Update ghost cells (local and non local)
+    // Update ghost cells (local and non local), prolongate and apply bound cond.
     // TODO(someone) experiment with split (local/nonlocal) comms with respect to
     // performance for various tests (static, amr, block sizes) and then decide on the
     // best impl. Go with default call (split local/nonlocal) for now.
-    parthenon::AddBoundaryExchangeTasks(source_split_first_order, tl, mu0,
+    parthenon::AddBoundaryExchangeTasks(source_split_first_order | start_bnd, tl, mu0,
                                         pmesh->multilevel);
   }
 
-  TaskRegion &async_region_3 = tc.AddRegion(num_task_lists_executed_independently);
-  for (int i = 0; i < blocks.size(); i++) {
-    auto &tl = async_region_3[i];
-    auto &u0 = blocks[i]->meshblock_data.Get("base");
-    auto prolongBound = none;
-    // Currently taken care of by AddBoundaryExchangeTasks above.
-    // Needs to be reintroduced once we reintroduce split (local/nonlocal) communication.
-    // if (pmesh->multilevel) {
-    //  prolongBound = tl.AddTask(none, parthenon::ProlongateBoundaries, u0);
-    //}
-
-    // set physical boundaries
-    auto set_bc = tl.AddTask(prolongBound, parthenon::ApplyBoundaryConditions, u0);
-  }
-
   // Single task in single (serial) region to reset global vars used in reductions in the
   // first stage.
   if (stage == integrator->nstages && hydro_pkg->Param<bool>("calc_c_h")) {

From 4ab883459579b6f47e701145204c495601538b66 Mon Sep 17 00:00:00 2001
From: Philipp Grete <gretephi@msu.edu>
Date: Mon, 25 Sep 2023 22:24:08 +0200
Subject: [PATCH 40/42] Fix test cases and add success check

---
 tst/regression/CMakeLists.txt                 |  4 +-
 .../aniso_therm_cond_gauss_conv.py            | 58 ++++++++++++++-----
 2 files changed, 47 insertions(+), 15 deletions(-)

diff --git a/tst/regression/CMakeLists.txt b/tst/regression/CMakeLists.txt
index e43d579f..c43b4de5 100644
--- a/tst/regression/CMakeLists.txt
+++ b/tst/regression/CMakeLists.txt
@@ -46,8 +46,8 @@ setup_test_both("aniso_therm_cond_ring_conv" "--driver ${PROJECT_BINARY_DIR}/bin
 setup_test_both("aniso_therm_cond_ring_multid" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 4" "convergence")
  
-  setup_test_serial("aniso_therm_cond_gauss_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
-  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 16" "convergence")
+setup_test_both("aniso_therm_cond_gauss_conv" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
+  --driver_input ${PROJECT_SOURCE_DIR}/inputs/diffusion.in --num_steps 24" "convergence")
 
 setup_test_both("field_loop" "--driver ${PROJECT_BINARY_DIR}/bin/athenaPK \
   --driver_input ${PROJECT_SOURCE_DIR}/inputs/field_loop.in --num_steps 12" "convergence")
diff --git a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
index c3ce3bb7..9f002723 100644
--- a/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
+++ b/tst/regression/test_suites/aniso_therm_cond_gauss_conv/aniso_therm_cond_gauss_conv.py
@@ -26,12 +26,13 @@
 import os
 import itertools
 import utils.test_case
+from scipy.optimize import curve_fit
 
 # To prevent littering up imported folders with .pyc files or __pycache_ folder
 sys.dont_write_bytecode = True
 
 int_cfgs = ["unsplit", "rkl2"]
-res_cfgs = [128, 256]
+res_cfgs = [128, 256, 512]
 field_cfgs = ["none", "aligned", "angle", "perp"]
 tlim = 2.0
 
@@ -65,7 +66,6 @@ def get_B(field_cfg):
 
 class TestCase(utils.test_case.TestCaseAbs):
     def Prepare(self, parameters, step):
-
         assert parameters.num_ranks <= 4, "Use <= 4 ranks for diffusion test."
 
         res, field_cfg, int_cfg = all_cfgs[step - 1]
@@ -90,6 +90,8 @@ def Prepare(self, parameters, step):
             "parthenon/mesh/x2max=1.0",
             "parthenon/mesh/nx3=1",
             "parthenon/meshblock/nx3=1",
+            "parthenon/time/integrator=%s"
+            % ("rk2" if (int_cfg == "unsplit") else "rk1"),
             "problem/diffusion/Bx=%f" % Bx,
             "problem/diffusion/By=%f" % By,
             "problem/diffusion/iprob=10",
@@ -104,7 +106,6 @@ def Prepare(self, parameters, step):
         return parameters
 
     def Analyse(self, parameters):
-
         sys.path.insert(
             1,
             parameters.parthenon_path
@@ -117,12 +118,14 @@ def Analyse(self, parameters):
             print("Couldn't find module to read Parthenon hdf5 files.")
             return False
 
+        tests_passed = True
+
         def get_ref(x, Bx, field_cfg):
             eff_diff_coeff = 0.25 if Bx == 0.0 else 0.25 * Bx * Bx
             tlim_ = 0.0 if field_cfg == "perp" else tlim
             return 1.0 + 1e-6 / (
                 np.sqrt(4 * np.pi * eff_diff_coeff * (0.5 + tlim_))
-                / np.exp(-(x ** 2) / (4.0 * eff_diff_coeff * (0.5 + tlim_)))
+                / np.exp(-(x**2) / (4.0 * eff_diff_coeff * (0.5 + tlim_)))
             )
 
         num_rows = len(res_cfgs)
@@ -137,7 +140,7 @@ def get_ref(x, Bx, field_cfg):
             prim = data_file.Get("prim")
             zz, yy, xx = data_file.GetVolumeLocations()
             mask = yy == yy[0]
-            temp = prim[:, 4][mask]
+            temp = prim[4][mask]
             x = xx[mask]
             res, field_cfg, int_cfg = all_cfgs[step]
             row = res_cfgs.index(res)
@@ -151,24 +154,51 @@ def get_ref(x, Bx, field_cfg):
                 int_cfgs.index(int_cfg),
                 res_cfgs.index(res),
             ] = l1
-            p[row, col].plot(x, temp, label=field_cfg + " L$_1$=%.2g" % l1)
+            p[row, col].plot(x, temp, label=f"{field_cfg} N={res} L$_1$={l1:.2g}")
 
         # Plot convergence
         for i, field_cfg in enumerate(field_cfgs):
             for j, int_cfg in enumerate(int_cfgs):
+                p[0, j].set_title(f"Integrator: {int_cfg}")
                 if field_cfg == "perp":
                     continue
 
-                est_conv = np.diff(np.log(l1_err[i, j, :])) / np.diff(np.log(res_cfgs))
                 p[-1, j].plot(
                     res_cfgs,
                     l1_err[i, j, :],
-                    label=field_cfg + " conv: %.2f" % est_conv,
+                    label=f"{field_cfg} data",
+                )
+
+                # Simple convergence estimator
+                conv_model = lambda log_n, log_a, conv_rate: conv_rate * log_n + log_a
+                popt, pconv = curve_fit(
+                    conv_model, np.log(res_cfgs), np.log(l1_err[i, j, :])
                 )
+                conv_a, conv_measured = popt
+                # Note that the RKL2 convergence on the plots is currently significantly better
+                # than expected (<-3) though the L1 errors themself are larger than the unsplit
+                # integrator (as expected).
+                # For a more reasonable test (which would take longer), reduce the RKL2 ratio to,
+                # say, 200 and extend the resolution grid to 1024 (as the first data point at N=128
+                # is comparatively worse than at N>128).
+                if conv_measured > -1.98:
+                    print(
+                        f"!!!\nConvergence for {field_cfg} test with {int_cfg} integrator "
+                        f"is worse ({conv_measured}) than expected (-1.98).\n!!!"
+                    )
+                    tests_passed = False
+                p[-1, j].plot(
+                    res_cfgs,
+                    np.exp(conv_a) * res_cfgs**conv_measured,
+                    ":",
+                    lw=0.75,
+                    label=f"{field_cfg} Measured conv: {conv_measured:.2f}",
+                )
+
         p[-1, 0].set_xscale("log")
         p[-1, 0].set_yscale("log")
-        p[-1, 0].legend(fontsize=8)
-        p[-1, 1].legend(fontsize=8)
+        p[-1, 0].legend(fontsize=6)
+        p[-1, 1].legend(fontsize=6)
 
         # Plot reference lines
         x = np.linspace(-6, 6, 400)
@@ -179,11 +209,13 @@ def get_ref(x, Bx, field_cfg):
                     y = get_ref(x, Bx, field_cfg)
                     p[i, j].plot(x, y, "-", lw=0.5, color="black", alpha=0.8)
                     p[i, j].grid()
-                    p[i, j].legend(fontsize=8)
+                    p[i, j].legend(fontsize=6)
 
         fig.tight_layout()
         fig.savefig(
-            os.path.join(parameters.output_path, "cond.png"), bbox_inches="tight"
+            os.path.join(parameters.output_path, "cond.png"),
+            bbox_inches="tight",
+            dpi=300,
         )
 
-        return True
+        return tests_passed

From e43c64783e6e387858ca16e73fcba409ed353635 Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 21 Aug 2024 10:59:23 +0200
Subject: [PATCH 41/42] Add changelog

---
 CHANGELOG.md | 1 +
 README.md    | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 83bc715d..47b1e93d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ## Current develop (i.e., `main` branch)
 
 ### Added (new features/APIs/variables/...)
+- [[PR 1]](https://github.com/parthenon-hpc-lab/athenapk/pull/1) Add isotropic thermal conduction and RKL2 supertimestepping
 
 ### Changed (changing behavior/API/variables/...)
 - [[PR 84]](https://github.com/parthenon-hpc-lab/athenapk/pull/84) Bump Parthenon to latest develop (2024-02-15)
diff --git a/README.md b/README.md
index 04881cd0..13d7280a 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,8 @@ Current features include
   - HLLE (hydro and MHD), HLLC (hydro), and HLLD (MHD) Riemann solvers
   - adiabatic equation of state
   - MHD based on hyperbolic divergence cleaning following Dedner+ 2002
-  - anisotropic thermal conduction
+  - isotropic and anisotropic thermal conduction
+  - operator-split, second-order RKL2 supertimestepping for diffusive terms
   - optically thin cooling based on tabulated cooling tables with either Townsend 2009 exact integration or operator-split subcycling
 - static and adaptive mesh refinement
 - problem generators for

From e1ea14a769e10077958e1aecaf73c584c8bc1f2e Mon Sep 17 00:00:00 2001
From: Philipp Grete <pgrete@hs.uni-hamburg.de>
Date: Wed, 21 Aug 2024 16:22:22 +0200
Subject: [PATCH 42/42] Address comments

---
 docs/input.md              | 4 ++--
 src/hydro/hydro.cpp        | 7 +++++++
 src/hydro/hydro_driver.cpp | 6 +++++-
 3 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/docs/input.md b/docs/input.md
index 75f827fc..11dae8c5 100644
--- a/docs/input.md
+++ b/docs/input.md
@@ -69,7 +69,7 @@ conserved to primitive conversion if both are defined.
 
 #### Diffusive processes
 
-##### Isotropic (hydro and MHD )and anisotropic thermal conduction (only MHD)
+##### Isotropic (hydro and MHD) and anisotropic thermal conduction (only MHD)
 In the presence of magnetic fields thermal conduction is becoming anisotropic with the flux along
 the local magnetic field direction typically being much stronger than the flux perpendicular to the magnetic field.
 
@@ -112,7 +112,7 @@ In addition the coefficient (or diffusivity) needs to be set
 Parameter: `conduction_coeff` (string)
 - `spitzer` : Anisotropic thermal conduction with a temperature dependent classic Spitzer thermal conductivity
   $`\kappa (T) = c_\kappa T^{5/2} \mathrm{erg/s/K/cm}`$ and
-  $`c_\kappa`$ being constant prefactor (set via the additional `diffusion/spitzer_cond_in_erg_by_s_K_cm` parameter with a default value of $`4.6\times10^{-7}`$). Note, as indicated by the units in the input parameter name, this kind of thermal conductivity requires a full set of units
+  $`c_\kappa`$ being constant prefactor (set via the additional `diffusion/spitzer_cond_in_erg_by_s_K_cm` parameter with a default value of $`4.6\times10^{-7}`$ which assumes a fully ionized hydrogen plasma [^CM77] with $`\ln \lambda = 40`$ approximating ICM conditions). Note, as indicated by the units in the input parameter name, this kind of thermal conductivity requires a full set of units
   to be defined for the simulation.
 - `fixed` : Contrary to a temperature dependent conductivity, a simple thermal diffusivity can be used instead for which
 the conduction flux is $`\mathbf{F} = - \chi \rho \mathbf{\hat b} (\mathbf{\hat b \cdot \nabla \frac{p_\mathrm{th}}{\rho}})`$
diff --git a/src/hydro/hydro.cpp b/src/hydro/hydro.cpp
index 5d2f3d04..54a97b7f 100644
--- a/src/hydro/hydro.cpp
+++ b/src/hydro/hydro.cpp
@@ -503,6 +503,8 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         }
         conduction_coeff = ConductionCoeff::spitzer;
 
+        // Default value assume fully ionized hydrogen plasma with Coulomb logarithm of 40
+        // to approximate ICM conditions, i.e., 1.84e-5/ln Lambda = 4.6e-7.
         Real spitzer_coeff =
             pin->GetOrAddReal("diffusion", "spitzer_cond_in_erg_by_s_K_cm", 4.6e-7);
         // Convert to code units. No temp conversion as [T_phys] = [T_code].
@@ -516,6 +518,9 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         pkg->AddParam<>("thermal_diff", thermal_diff);
 
         const auto mu = pkg->Param<Real>("mu");
+        // 6.86 again assumes a fully ionized hydrogen plasma in agreement with
+        // the assumptions above (technically this means mu = 0.5) and can be derived
+        // from eq (7) in CM77 assuming T_e = T_i.
         conduction_sat_prefac = 6.86 * std::sqrt(mu) * conduction_sat_phi;
 
       } else if (conduction_coeff_str == "fixed") {
@@ -525,6 +530,8 @@ std::shared_ptr<StateDescriptor> Initialize(ParameterInput *pin) {
         auto thermal_diff = ThermalDiffusivity(conduction, conduction_coeff,
                                                thermal_diff_coeff_code, 0.0, 0.0, 0.0);
         pkg->AddParam<>("thermal_diff", thermal_diff);
+        // 5.0 prefactor comes from eq (8) in Cowie & McKee 1977
+        // https://doi.org/10.1086/154911
         conduction_sat_prefac = 5.0 * conduction_sat_phi;
 
       } else {
diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp
index bbc16e9d..36bdce91 100644
--- a/src/hydro/hydro_driver.cpp
+++ b/src/hydro/hydro_driver.cpp
@@ -228,7 +228,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
 
   TaskID none(0);
 
-  // Store initial u0 in u1 as "base" will continusouly be updated but initial state Y0 is
+  // Store initial u0 in u1 as "base" will continuously be updated but initial state Y0 is
   // required for each stage.
   TaskRegion &region_copy_out = ptask_coll->AddRegion(blocks.size());
   for (int i = 0; i < blocks.size(); i++) {
@@ -537,6 +537,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
 
   // First add split sources before the main time integration
   if (stage == 1) {
+    // If any tasks modify the conserved variables before this place, then
+    // the STS tasks should be updated to not assume prim and cons are in sync.
     const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
     if (diffint == DiffInt::rkl2) {
       AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
@@ -676,6 +678,8 @@ TaskCollection HydroDriver::MakeTaskCollection(BlockList_t &blocks, int stage) {
         tl.AddTask(none, parthenon::Update::FillDerived<MeshData<Real>>, mu0.get());
   }
   const auto &diffint = hydro_pkg->Param<DiffInt>("diffint");
+  // If any tasks modify the conserved variables before this place and after FillDerived,
+  // then the STS tasks should be updated to not assume prim and cons are in sync.
   if (diffint == DiffInt::rkl2 && stage == integrator->nstages) {
     AddSTSTasks(&tc, pmesh, blocks, 0.5 * tm.dt);
   }