Skip to content

Commit

Permalink
SmallMatrix: Matrix class with compile time size
Browse files Browse the repository at this point in the history
Add amrex::SmallMatrix class with compile time size.
  • Loading branch information
WeiqunZhang committed Oct 1, 2024
1 parent 6d9c25b commit a2d7c22
Show file tree
Hide file tree
Showing 12 changed files with 481 additions and 84 deletions.
33 changes: 11 additions & 22 deletions Src/Base/AMReX_Array.H
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <AMReX_REAL.H>
#include <AMReX_Algorithm.H>
#include <AMReX_Dim3.H>
#include <AMReX_SmallMatrix.H>

#include <array>
#include <memory>
Expand Down Expand Up @@ -148,10 +149,6 @@ namespace amrex {
* order (last index moving fastest). If not specified, Fortran order is
* assumed.
*/
namespace Order {
struct C {};
struct F {};
}

/**
* A GPU-compatible one-dimensional array.
Expand Down Expand Up @@ -280,7 +277,7 @@ namespace amrex {
* default if not given)
*/
template <class T, int XLO, int XHI, int YLO, int YHI,
class ORDER=Order::F>
Order ORDER = Order::F>
struct Array2D
{
/**
Expand Down Expand Up @@ -370,8 +367,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j) const noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -384,8 +380,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j) noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -398,8 +393,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c j moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j) const noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand All @@ -412,8 +406,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c j moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j) noexcept {
AMREX_ASSERT(i >= XLO && i <= XHI && j >= YLO && j <= YHI);
Expand Down Expand Up @@ -551,7 +544,7 @@ namespace amrex {
* default if not given)
*/
template <class T, int XLO, int XHI, int YLO, int YHI, int ZLO, int ZHI,
class ORDER=Order::F>
Order ORDER=Order::F>
struct Array3D
{
/**
Expand Down Expand Up @@ -662,8 +655,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j, int k) const noexcept {
return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
Expand All @@ -676,8 +668,7 @@ namespace amrex {
* If the order is not specified, Fortran column-major order is assumed
* (the index \c i moves the fastest)
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::F>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::F,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j, int k) noexcept {
return arr[i+j*(XHI-XLO+1)+k*((XHI-XLO+1)*(YHI-YLO+1))
Expand All @@ -690,8 +681,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c k moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
const T& operator() (int i, int j, int k) const noexcept {
return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))
Expand All @@ -704,8 +694,7 @@ namespace amrex {
* When the order is manually specified as Order::C, row-major order
* is used (the index \c k moves the fastest).
*/
template <typename O=ORDER,
std::enable_if_t<std::is_same_v<O,Order::C>,int> = 0>
template <Order Ord=ORDER, std::enable_if_t<Ord==Order::C,int> = 0>
[[nodiscard]] AMREX_GPU_HOST_DEVICE AMREX_FORCE_INLINE
T& operator() (int i, int j, int k) noexcept {
return arr[k+j*(ZHI-ZLO+1)+i*((ZHI-ZLO+1)*(YHI-YLO+1))
Expand Down
38 changes: 38 additions & 0 deletions Src/Base/AMReX_ConstexprFor.H
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#ifndef AMREX_CONSTEXPR_FOR_H_
#define AMREX_CONSTEXPR_FOR_H_
#include <AMReX_Config.H>

#include <AMReX_GpuQualifiers.H>
#include <AMReX_Extension.H>

#include <type_traits>

namespace amrex {

// Implementation of "constexpr for" based on
// https://artificial-mind.net/blog/2020/10/31/constexpr-for
//
// Approximates what one would get from a compile-time
// unrolling of the loop
// for (int i = 0; i < N; ++i) {
// f(i);
// }
//
// The mechanism is recursive: we evaluate f(i) at the current
// i and then call the for loop at i+1. f() is a lambda function
// that provides the body of the loop and takes only an integer
// i as its argument.

template<auto I, auto N, class F>
AMREX_GPU_HOST_DEVICE AMREX_INLINE
constexpr void constexpr_for (F const& f)
{
if constexpr (I < N) {
f(std::integral_constant<decltype(I), I>());
constexpr_for<I+1, N>(f);
}
}

}

#endif
25 changes: 1 addition & 24 deletions Src/Base/AMReX_Loop.H
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
#include <AMReX_Config.H>

#include <AMReX_Box.H>
#include <AMReX_ConstexprFor.H>
#include <AMReX_Extension.H>

namespace amrex {
Expand Down Expand Up @@ -567,30 +568,6 @@ void LoopConcurrentOnCpu (BoxND<dim> const& bx, int ncomp, F const& f) noexcept
}
}

// Implementation of "constexpr for" based on
// https://artificial-mind.net/blog/2020/10/31/constexpr-for
//
// Approximates what one would get from a compile-time
// unrolling of the loop
// for (int i = 0; i < N; ++i) {
// f(i);
// }
//
// The mechanism is recursive: we evaluate f(i) at the current
// i and then call the for loop at i+1. f() is a lambda function
// that provides the body of the loop and takes only an integer
// i as its argument.

template<auto I, auto N, class F>
AMREX_GPU_HOST_DEVICE AMREX_INLINE
constexpr void constexpr_for (F const& f)
{
if constexpr (I < N) {
f(std::integral_constant<decltype(I), I>());
constexpr_for<I+1, N>(f);
}
}

#include <AMReX_Loop.nolint.H>

}
Expand Down
Loading

0 comments on commit a2d7c22

Please sign in to comment.