Use sparse storage & operations in C++ VODE for aprox13 (#307)

This PR adds a sparse linear system storage and arithmetic to C++ VODE for aprox13. The basic idea is to create sparse and dense matrix data structures for the Jacobian so the rhs and integrator can interact with it through provided functions agnostic to the underlying storage. - Adds a sparse jacobian struct for aprox13 that handles 2D->1D indexing for access and arithmetic at compile time. - Adds a dense jacobian struct (MathArray2D) that includes the equivalent arithmetic operations we need to supply for a consistent interface between sparse & dense jacobian data structures. - Modifies rhs, temperature rhs, and integrator to be agnostic to the storage type - Modifies test_rhs to use the sparse or dense jacobians to call actual_jac depending on USE_NETWORK_SOLVER.
AMReX-Astro · Apr 17, 2020 · 3068949 · 3068949
1 parent f9823e0
commit 3068949
Show file tree

Hide file tree

Showing 13 changed files with 832 additions and 68 deletions.
diff --git a/integration/VODE/vode_dvjac.H b/integration/VODE/vode_dvjac.H
@@ -70,22 +70,15 @@ void dvjac (IArray1D& pivot, int& IERPJ, burn_t& state, dvode_t& vstate)
  // Indicate that the Jacobian is current for this solve.
  vstate.JCUR = 1;
 
- for (int j = 1; j <= VODE_NEQS; ++j) {
- for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac(i,j) = 0.0_rt;
- }
- }
+ // Initialize the Jacobian to zero
+ vstate.jac.zero();
 
  jac(state, vstate, vstate.jac);
 
 #ifndef AMREX_USE_GPU
  // Store the Jacobian if we're caching.
  if (use_jacobian_caching == 1) {
- for (int j = 1; j <= VODE_NEQS; ++j) {
- for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac_save(i,j) = vstate.jac(i,j);
- }
- }
+ vstate.jac_save = vstate.jac;
  }
 #endif
 
@@ -123,7 +116,7 @@ void dvjac (IArray1D& pivot, int& IERPJ, burn_t& state, dvode_t& vstate)
 
  rhs(vstate.tn, state, vstate, vstate.acor);
  for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac(i,j) = (vstate.acor(i) - vstate.savf(i)) * fac;
+ vstate.jac.set(i, j, (vstate.acor(i) - vstate.savf(i)) * fac);
  }
 
  vstate.y(j) = yj;
@@ -135,11 +128,7 @@ void dvjac (IArray1D& pivot, int& IERPJ, burn_t& state, dvode_t& vstate)
 #ifndef AMREX_USE_GPU
  // Store the Jacobian if we're caching.
  if (use_jacobian_caching == 1) {
- for (int j = 1; j <= VODE_NEQS; ++j) {
- for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac_save(i,j) = vstate.jac(i,j);
- }
- }
+ vstate.jac_save = vstate.jac;
  }
 #endif
 
@@ -153,11 +142,7 @@ void dvjac (IArray1D& pivot, int& IERPJ, burn_t& state, dvode_t& vstate)
 
  // Indicate the Jacobian is not current for this step.
  vstate.JCUR = 0;
- for (int j = 1; j <= VODE_NEQS; ++j) {
- for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac(i,j) = vstate.jac_save(i,j);
- }
- }
+ vstate.jac = vstate.jac_save;
 
  }
 #endif
@@ -167,16 +152,9 @@ void dvjac (IArray1D& pivot, int& IERPJ, burn_t& state, dvode_t& vstate)
 
  Real hrl1 = vstate.H * vstate.RL1;
  Real con = -hrl1;
- for (int j = 1; j <= VODE_NEQS; ++j) {
- for (int i = 1; i <= VODE_NEQS; ++i) {
- vstate.jac(i,j) *= con;
- }
- }
 
- for (int i = 1; i <= VODE_NEQS; ++i) {
- // Add 1 from the identity matrix.
- vstate.jac(i,i) += 1.0_rt;
- }
+ vstate.jac.mul(con);
+ vstate.jac.add_identity();
 
 #ifndef NETWORK_SOLVER
  int IER;

diff --git a/integration/VODE/vode_dvnlsd.H b/integration/VODE/vode_dvnlsd.H
@@ -6,7 +6,7 @@
 #include <vode_dvjac.H>
 
 #ifdef NETWORK_SOLVER
-#include <actual_linear_solver.H>
+#include <actual_matrix.H>
 #endif
 
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE

diff --git a/integration/VODE/vode_rhs.H b/integration/VODE/vode_rhs.H
@@ -64,9 +64,9 @@ void rhs (const Real time, burn_t& state, dvode_t& vode_state, RArray1D& ydot)
 
 
 // Analytical Jacobian
-
+template<class MatrixType>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void jac (burn_t& state, dvode_t& vode_state, RArray2D& pd)
+void jac (burn_t& state, dvode_t& vode_state, MatrixType& pd)
 {
  // NOTE: the time at which to evaluate the Jacobian is not
  // explicitly passed. VODE always evaluates the analytic
@@ -81,16 +81,16 @@ void jac (burn_t& state, dvode_t& vode_state, RArray2D& pd)
  // We integrate X, not Y
  for (int j = 1; j <= NumSpec; ++j) {
  for (int i = 1; i <= VODE_NEQS; ++i) {
- pd(j,i) *= aion[j-1];
- pd(i,j) *= aion_inv[j-1];
+ pd.mul(j, i, aion[j-1]);
+ pd.mul(i, j, aion_inv[j-1]);
  }
  }
 
  // apply fudge factor:
  if (react_boost > 0.0_rt) {
  for (int j = 1; j <= VODE_NEQS; ++j) {
  for (int i = 1; i <= VODE_NEQS; ++i) {
- pd(i,j) *= react_boost;
+ pd.mul(i, j, react_boost);
  }
  }
  }

diff --git a/integration/VODE/vode_type.H b/integration/VODE/vode_type.H
@@ -3,13 +3,19 @@
 
 #include <AMReX_REAL.H>
 #include <AMReX_Array.H>
+
+#include <ArrayUtilities.H>
 #include <network.H>
 
+#ifdef NETWORK_SOLVER
+#include <actual_matrix.H>
+#endif
+
 const int VODE_NEQS = NumSpec + 2;
 
 typedef amrex::Array1D<int, 1, VODE_NEQS> IArray1D;
 typedef amrex::Array1D<Real, 1, VODE_NEQS> RArray1D;
-typedef amrex::Array2D<Real, 1, VODE_NEQS, 1, VODE_NEQS> RArray2D;
+typedef ArrayUtil::MathArray2D<1, VODE_NEQS, 1, VODE_NEQS> RArray2D;
 
 const amrex::Real UROUND = std::numeric_limits<amrex::Real>::epsilon();
 const amrex::Real CCMXJ = 0.2e0_rt;
@@ -49,12 +55,26 @@ struct dvode_t
  // Integration array
  RArray1D y;
 
+#ifdef NETWORK_SOLVER
+
+ // Jacobian
+ SparseMatrix jac;
+
+#ifndef AMREX_USE_GPU
+ // Saved Jacobian
+ SparseMatrix jac_save;
+#endif
+
+#else
+
  // Jacobian
  RArray2D jac;
 
 #ifndef AMREX_USE_GPU
  // Saved Jacobian
  RArray2D jac_save;
+#endif
+
 #endif
 
  amrex::Array2D<Real, 1, VODE_NEQS, 1, VODE_LMAX> yh;

diff --git a/integration/utils/temperature_integration.H b/integration/utils/temperature_integration.H
@@ -79,9 +79,9 @@ void temperature_rhs (burn_t& state, Array1D<Real, 1, neqs>& ydot)
 // Sets up the temperature entries in the Jacobian. This should be called from
 // within the actual_jac routine but is provided here as a convenience
 // since most networks will use the same temperature ODE.
-
+template<class MatrixType>
 AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
-void temperature_jac (burn_t& state, Array2D<Real, 1, neqs, 1, neqs>& jac)
+void temperature_jac (burn_t& state, MatrixType& jac)
 {
 
  if (state.self_heat) {
@@ -134,13 +134,13 @@ void temperature_jac (burn_t& state, Array2D<Real, 1, neqs, 1, neqs>& jac)
 
  // d(itemp) / d(enuc)
 
- jac(net_itemp, net_ienuc) = 0.0_rt;
+ jac.set(net_itemp, net_ienuc, 0.0_rt);
 
  }
  else {
 
  for (int k = 1; k <= neqs; ++k) {
- jac(net_itemp, k) = 0.0_rt;
+ jac.set(net_itemp, k, 0.0_rt);
  }
 
  }

diff --git a/interfaces/ArrayUtilities.H b/interfaces/ArrayUtilities.H
@@ -8,6 +8,102 @@ namespace ArrayUtil
 {
  using namespace amrex;
 
+ template <int XLO, int XHI>
+ struct MathArray1D
+ {
+ AMREX_GPU_HOST_DEVICE AMREX_INLINE
+ void zero()
+ {
+ for (int i = 0; i < (XHI-XLO+1); ++i) {
+ arr[i] = 0.0_rt;
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void operator= (const MathArray1D<XLO, XHI>& other) noexcept {
+ for (int i = 0; i < (XHI-XLO+1); ++i) {
+ arr[i] = other.arr[i];
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ const Real& operator() (int i) const noexcept {
+ return arr[i-XLO];
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ Real& operator() (int i) noexcept {
+ return arr[i-XLO];
+ }
+
+ Real arr[(XHI-XLO+1)];
+ };
+
+ template <int XLO, int XHI, int YLO, int YHI>
+ struct MathArray2D
+ {
+ AMREX_GPU_HOST_DEVICE AMREX_INLINE
+ void zero()
+ {
+ for (int i = 0; i < (YHI-YLO+1)*(XHI-XLO+1); ++i) {
+ arr[i] = 0.0_rt;
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void mul (const Real x) noexcept {
+ for (int i = 0; i < (YHI-YLO+1)*(XHI-XLO+1); ++i) {
+ arr[i] *= x;
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void set (const int i, const int j, const Real x) noexcept {
+ arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)] = x;
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void add (const int i, const int j, const Real x) noexcept {
+ arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)] += x;
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void mul (const int i, const int j, const Real x) noexcept {
+ arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)] *= x;
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ Real get (const int i, const int j) const noexcept {
+ return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void add_identity () noexcept {
+ for (int i = XLO; i <= XHI; ++i) {
+ arr[i+i*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)] += 1.0_rt;
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ void operator= (const MathArray2D<XLO, XHI, YLO, YHI>& other) noexcept {
+ for (int i = 0; i < (YHI-YLO+1)*(XHI-XLO+1); ++i) {
+ arr[i] = other.arr[i];
+ }
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ const Real& operator() (int i, int j) const noexcept {
+ return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
+ }
+
+ AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
+ Real& operator() (int i, int j) noexcept {
+ return arr[i+j*(XHI-XLO+1)-(YLO*(XHI-XLO+1)+XLO)];
+ }
+
+ Real arr[(XHI-XLO+1)*(YHI-YLO+1)];
+ };
+
  namespace Math
  {
  template <int XLO, int XHI>

diff --git a/networks/aprox13/Make.package b/networks/aprox13/Make.package
@@ -13,6 +13,7 @@ F90EXE_sources += actual_rhs.F90
 
 CEXE_sources += actual_rhs_data.cpp
 CEXE_headers += actual_rhs.H
+CEXE_headers += actual_matrix.H
 
 USE_RATES = TRUE
 USE_SCREENING = TRUE