SmallMatrix: Matrix class with compile time size

Add amrex::SmallMatrix class with compile time size.
AMReX-Codes · Oct 1, 2024 · a2d7c22 · a2d7c22
1 parent 6d9c25b
commit a2d7c22
Show file tree

Hide file tree

Showing 12 changed files with 481 additions and 84 deletions.
diff --git a/Src/Base/AMReX_Array.H b/Src/Base/AMReX_Array.H
@@ -10,6 +10,7 @@
 #include <AMReX_REAL.H>
 #include <AMReX_Algorithm.H>
 #include <AMReX_Dim3.H>
+#include <AMReX_SmallMatrix.H>
 
 #include <array>
 #include <memory>
@@ -148,10 +149,6 @@ namespace amrex {
  * order (last index moving fastest). If not specified, Fortran order is
  * assumed.
  */
- namespace Order {
- struct C {};
- struct F {};
- }
 
  /**
  * A GPU-compatible one-dimensional array.
@@ -280,7 +277,7 @@ namespace amrex {
  * default if not given)
  */
  template <class T, int XLO, int XHI, int YLO, int YHI,
- class ORDER=Order::F>
+ Order ORDER = Order::F>
  struct Array2D
  {
  /**
@@ -370,8 +367,7 @@ namespace amrex {
  * If the order is not specified, Fortran column-major order is assumed
  * (the index \c i moves the fastest)
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  const T& operator() (int i, int j) const noexcept {
  AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
@@ -384,8 +380,7 @@ namespace amrex {
  * If the order is not specified, Fortran column-major order is assumed
  * (the index \c i moves the fastest)
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  T& operator() (int i, int j) noexcept {
  AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
@@ -398,8 +393,7 @@ namespace amrex {
  * When the order is manually specified as Order::C, row-major order
  * is used (the index \c j moves the fastest).
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  const T& operator() (int i, int j) const noexcept {
  AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
@@ -412,8 +406,7 @@ namespace amrex {
  * When the order is manually specified as Order::C, row-major order
  * is used (the index \c j moves the fastest).
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  T& operator() (int i, int j) noexcept {
  AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
@@ -551,7 +544,7 @@ namespace amrex {
  * default if not given)
  */
  template <class T, int XLO, int XHI, int YLO, int YHI, int ZLO, int ZHI,
- class ORDER=Order::F>
+ Order ORDER=Order::F>
  struct Array3D
  {
  /**
@@ -662,8 +655,7 @@ namespace amrex {
  * If the order is not specified, Fortran column-major order is assumed
  * (the index \c i moves the fastest)
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  const T& operator() (int i, int j, int k) const noexcept {
  return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
@@ -676,8 +668,7 @@ namespace amrex {
  * If the order is not specified, Fortran column-major order is assumed
  * (the index \c i moves the fastest)
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  T& operator() (int i, int j, int k) noexcept {
  return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
@@ -690,8 +681,7 @@ namespace amrex {
  * When the order is manually specified as Order::C, row-major order
  * is used (the index \c k moves the fastest).
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  const T& operator() (int i, int j, int k) const noexcept {
  return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))
@@ -704,8 +694,7 @@ namespace amrex {
  * When the order is manually specified as Order::C, row-major order
  * is used (the index \c k moves the fastest).
  */
- template <typename O=ORDER,
- std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
+ template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
  [[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
  T& operator() (int i, int j, int k) noexcept {
  return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))

diff --git a/Src/Base/AMReX_ConstexprFor.H b/Src/Base/AMReX_ConstexprFor.H
@@ -0,0 +1,38 @@
+#ifndef AMREX_CONSTEXPR_FOR_H_
+#define AMREX_CONSTEXPR_FOR_H_
+#include <AMReX_Config.H>
+
+#include <AMReX_GpuQualifiers.H>
+#include <AMReX_Extension.H>
+
+#include <type_traits>
+
+namespace amrex {
+
+// Implementation of "constexpr for" based on
+// https://artificial-mind.net/blog/2020/10/31/constexpr-for
+//
+// Approximates what one would get from a compile-time
+// unrolling of the loop
+// for (int i = 0; i < N; ++i) {
+// f(i);
+// }
+//
+// The mechanism is recursive: we evaluate f(i) at the current
+// i and then call the for loop at i+1. f() is a lambda function
+// that provides the body of the loop and takes only an integer
+// i as its argument.
+
+template<auto I, auto N, class F>
+AMREX_GPU_HOST_DEVICE AMREX_INLINE
+constexpr void constexpr_for (F const& f)
+{
+ if constexpr (I < N) {
+ f(std::integral_constant<decltype(I), I>());
+ constexpr_for<I+1, N>(f);
+ }
+}
+
+}
+
+#endif
diff --git a/Src/Base/AMReX_Loop.H b/Src/Base/AMReX_Loop.H
@@ -3,6 +3,7 @@
 #include <AMReX_Config.H>
 
 #include <AMReX_Box.H>
+#include <AMReX_ConstexprFor.H>
 #include <AMReX_Extension.H>
 
 namespace amrex {
@@ -567,30 +568,6 @@ void LoopConcurrentOnCpu (BoxND<dim> const& bx, int ncomp, F const& f) noexcept
  }
 }
 
-// Implementation of "constexpr for" based on
-// https://artificial-mind.net/blog/2020/10/31/constexpr-for
-//
-// Approximates what one would get from a compile-time
-// unrolling of the loop
-// for (int i = 0; i < N; ++i) {
-// f(i);
-// }
-//
-// The mechanism is recursive: we evaluate f(i) at the current
-// i and then call the for loop at i+1. f() is a lambda function
-// that provides the body of the loop and takes only an integer
-// i as its argument.
-
-template<auto I, auto N, class F>
-AMREX_GPU_HOST_DEVICE AMREX_INLINE
-constexpr void constexpr_for (F const& f)
-{
- if constexpr (I < N) {
- f(std::integral_constant<decltype(I), I>());
- constexpr_for<I+1, N>(f);
- }
-}
-
 #include <AMReX_Loop.nolint.H>
 
 }