Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add benchmark for expression templates
Browse files Browse the repository at this point in the history
vaithak committed Oct 3, 2023
1 parent bba8a47 commit 01bf9b1
Showing 6 changed files with 221 additions and 65 deletions.
104 changes: 104 additions & 0 deletions benchmark/ArrayExpressionTemplates.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "benchmark/benchmark.h"

#include "clad/Differentiator/Differentiator.h"

// Benchmark the expression x*y + y*z + z*x between clad arrays,
// this is to compare the performance of expression templates.
// We will evaluate the expression on using four different methods:
// 1. Using operations on clad arrays - this will use expression templates.
// 2. Using clad arrays but creating temporaries manually.
// 3. Using loops on clad arrays.
// 4. Using loops on native arrays.

// Benchmark expression templates.
static void BM_ExpressionTemplates(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state)
benchmark::DoNotOptimize(res = x * y + y * z + z * x);
}
BENCHMARK(BM_ExpressionTemplates);

// Benchmark manually creating temporaries.
static void BM_ManualTemporaries(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state) {
clad::array<double> temp1 = x * y;
clad::array<double> temp2 = y * z;
clad::array<double> temp3 = z * x;
clad::array<double> temp4 = temp1 + temp2;
benchmark::DoNotOptimize(res = temp4 + temp3);
}
}
BENCHMARK(BM_ManualTemporaries);

// Benchmark loops on clad arrays.
static void BM_LoopsOnCladArrays(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state) {
for (int i = 0; i < n; ++i) {
benchmark::DoNotOptimize(res[i] =
x[i] * y[i] + y[i] * z[i] + z[i] * x[i]);
}
}
}
BENCHMARK(BM_LoopsOnCladArrays);

// Benchmark loops on native arrays.
static void BM_LoopsOnNativeArrays(benchmark::State& state) {
constexpr int n = 1000;
double* x = new double[n];
double* y = new double[n];
double* z = new double[n];
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

double* res = new double[n];
for (auto _ : state) {
for (int i = 0; i < n; ++i) {
benchmark::DoNotOptimize(res[i] =
x[i] * y[i] + y[i] * z[i] + z[i] * x[i]);
}
}

delete[] x;
delete[] y;
delete[] z;
delete[] res;
}
BENCHMARK(BM_LoopsOnNativeArrays);

// Define our main.
BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -6,6 +6,7 @@ include(AddCladBenchmark)

CB_ADD_GBENCHMARK(Simple Simple.cpp)
CB_ADD_GBENCHMARK(AlgorithmicComplexity AlgorithmicComplexity.cpp)
CB_ADD_GBENCHMARK(ArrayExpressionTemplates ArrayExpressionTemplates.cpp)
CB_ADD_GBENCHMARK(EnzymeCladComparison EnzymeCladComparison.cpp)
CB_ADD_GBENCHMARK(MemoryComplexity MemoryComplexity.cpp)
CB_ADD_GBENCHMARK(VectorModeComparison VectorModeComparison.cpp)
Binary file added benchmark/mybenchmark
Binary file not shown.
69 changes: 49 additions & 20 deletions include/clad/Differentiator/Array.h
Original file line number Diff line number Diff line change
@@ -58,6 +58,13 @@ template <typename T> class array {
m_arr[i] = expression[i];
}

template <typename L, typename BinaryOp, typename R>
CUDA_HOST_DEVICE array(const array_expression<L, BinaryOp, R>& expression)
: m_arr(new T[expression.size()]), m_size(expression.size()) {
for (std::size_t i = 0; i < expression.size(); ++i)
m_arr[i] = expression[i];
}

// initializing all entries using the same value
template <typename U>
CUDA_HOST_DEVICE array(std::size_t size, U val)
@@ -293,17 +300,19 @@ template <typename T> class array {
}

/// Negate the array and return a new array.
CUDA_HOST_DEVICE array_expression<T, BinarySub, array<T>> operator-() const {
return array_expression<T, BinarySub, array<T>>(static_cast<T>(0), *this);
CUDA_HOST_DEVICE array_expression<T, BinarySub, array<T> const&>
operator-() const {
return array_expression<T, BinarySub, array<T> const&>(static_cast<T>(0),
*this);
}

/// Subtracts the number from every element in the array and returns a new
/// array, when the number is on the left side.
template <typename U, typename std::enable_if<std::is_arithmetic<U>::value,
int>::type = 0>
CUDA_HOST_DEVICE friend array_expression<U, BinarySub, array<T>>
CUDA_HOST_DEVICE friend array_expression<U, BinarySub, array<T> const&>
operator-(U n, const array<T>& arr) {
return array_expression<U, BinarySub, array<T>>(n, arr);
return array_expression<U, BinarySub, array<T> const&>(n, arr);
}

/// Implicitly converts from clad::array to pointer to an array of type T
@@ -333,69 +342,89 @@ template <typename T> CUDA_HOST_DEVICE array<T> zero_vector(std::size_t n) {
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryMul, U>
operator*(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryMul, U>(arr, n);
return array_expression<array<T> const&, BinaryMul, U>(arr, n);
}

/// Multiplies the number to every element in the array and returns an array
/// expression, when the number is on the left side.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryMul, U>
operator*(U n, const array<T>& arr) {
return array_expression<array<T>, BinaryMul, U>(arr, n);
return array_expression<array<T> const&, BinaryMul, U>(arr, n);
}

/// Divides the number from every element in the array and returns an array
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryDiv, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryDiv, U>
operator/(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryDiv, U>(arr, n);
return array_expression<array<T> const&, BinaryDiv, U>(arr, n);
}

/// Adds the number to every element in the array and returns a new array
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryAdd, U>
operator+(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryAdd, U>(arr, n);
return array_expression<array<T> const&, BinaryAdd, U>(arr, n);
}

/// Adds the number to every element in the array and returns an array
/// expression, when the number is on the left side.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryAdd, U>
operator+(U n, const array<T>& arr) {
return array_expression<array<T>, BinaryAdd, U>(arr, n);
return array_expression<array<T> const&, BinaryAdd, U>(arr, n);
}

/// Subtracts the number from every element in the array and returns an array
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinarySub, U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinarySub, U>
operator-(const array<T>& arr, U n) {
return array_expression<array<T>, BinarySub, U>(arr, n);
return array_expression<array<T> const&, BinarySub, U>(arr, n);
}

/// Function to define element wise adding of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, array<U>>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryAdd, array<U> const&>
operator+(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T>, BinaryAdd, array<U>>(arr1, arr2);
return array_expression<array<T> const&, BinaryAdd, array<U> const&>(arr1,
arr2);
}

/// Function to define element wise subtraction of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T>, BinarySub, array<U>>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinarySub, array<U> const&>
operator-(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T>, BinarySub, array<U>>(arr1, arr2);
return array_expression<array<T> const&, BinarySub, array<U> const&>(arr1,
arr2);
}

/// Function to define element wise multiplication of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryMul, array<U> const&>
operator*(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T> const&, BinaryMul, array<U> const&>(arr1,
arr2);
}

/// Function to define element wise division of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T> const&, BinaryDiv, array<U> const&>
operator/(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T> const&, BinaryDiv, array<U> const&>(arr1,
arr2);
}

} // namespace clad
52 changes: 33 additions & 19 deletions include/clad/Differentiator/ArrayExpression.h
Original file line number Diff line number Diff line change
@@ -48,7 +48,7 @@ class array_expression {
RightExp r;

public:
array_expression(LeftExp const& l, RightExp const& r) : l(l), r(r) {}
array_expression(LeftExp l, RightExp r) : l(l), r(r) {}

// for scalars
template <typename T, typename std::enable_if<std::is_arithmetic<T>::value,
@@ -84,65 +84,79 @@ class array_expression {

// Operator overload for addition.
template <typename RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp>, BinaryAdd, RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp> const&,
BinaryAdd, RE>
operator+(RE const& r) const {
return array_expression<array_expression<LeftExp, BinaryOp, RightExp>,
BinaryAdd, RE>(*this, r);
return array_expression<
array_expression<LeftExp, BinaryOp, RightExp> const&, BinaryAdd, RE>(
*this, r);
}

// Operator overload for multiplication.
template <typename RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp>, BinaryMul, RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp> const&,
BinaryMul, RE>
operator*(RE const& r) const {
return array_expression<array_expression<LeftExp, BinaryOp, RightExp>,
BinaryMul, RE>(*this, r);
return array_expression<
array_expression<LeftExp, BinaryOp, RightExp> const&, BinaryMul, RE>(
*this, r);
}

// Operator overload for subtraction.
template <typename RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp>, BinarySub, RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp> const&,
BinarySub, RE>
operator-(RE const& r) const {
return array_expression<array_expression<LeftExp, BinaryOp, RightExp>,
BinarySub, RE>(*this, r);
return array_expression<
array_expression<LeftExp, BinaryOp, RightExp> const&, BinarySub, RE>(
*this, r);
}

// Operator overload for division.
template <typename RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp>, BinaryDiv, RE>
array_expression<array_expression<LeftExp, BinaryOp, RightExp> const&,
BinaryDiv, RE>
operator/(RE const& r) const {
return array_expression<array_expression<LeftExp, BinaryOp, RightExp>,
BinaryDiv, RE>(*this, r);
return array_expression<
array_expression<LeftExp, BinaryOp, RightExp> const&, BinaryDiv, RE>(
*this, r);
}
};

// Operator overload for addition, when the right operand is an array_expression
// and the left operand is a scalar.
template <typename T, typename LeftExp, typename BinaryOp, typename RightExp,
typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
array_expression<T, BinaryAdd, array_expression<LeftExp, BinaryOp, RightExp>>
array_expression<T, BinaryAdd,
array_expression<LeftExp, BinaryOp, RightExp> const&>
operator+(T const& l, array_expression<LeftExp, BinaryOp, RightExp> const& r) {
return array_expression<T, BinaryAdd,
array_expression<LeftExp, BinaryOp, RightExp>>(l, r);
array_expression<LeftExp, BinaryOp, RightExp> const&>(
l, r);
}

// Operator overload for multiplication, when the right operand is an
// array_expression and the left operand is a scalar.
template <typename T, typename LeftExp, typename BinaryOp, typename RightExp,
typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
array_expression<T, BinaryMul, array_expression<LeftExp, BinaryOp, RightExp>>
array_expression<T, BinaryMul,
array_expression<LeftExp, BinaryOp, RightExp> const&>
operator*(T const& l, array_expression<LeftExp, BinaryOp, RightExp> const& r) {
return array_expression<T, BinaryMul,
array_expression<LeftExp, BinaryOp, RightExp>>(l, r);
array_expression<LeftExp, BinaryOp, RightExp> const&>(
l, r);
}

// Operator overload for subtraction, when the right operand is an
// array_expression and the left operand is a scalar.
template <typename T, typename LeftExp, typename BinaryOp, typename RightExp,
typename std::enable_if<std::is_arithmetic<T>::value, int>::type = 0>
array_expression<T, BinarySub, array_expression<LeftExp, BinaryOp, RightExp>>
array_expression<T, BinarySub,
array_expression<LeftExp, BinaryOp, RightExp> const&>
operator-(T const& l, array_expression<LeftExp, BinaryOp, RightExp> const& r) {
return array_expression<T, BinarySub,
array_expression<LeftExp, BinaryOp, RightExp>>(l, r);
array_expression<LeftExp, BinaryOp, RightExp> const&>(
l, r);
}
} // namespace clad
// NOLINTEND(*-pointer-arithmetic)
60 changes: 34 additions & 26 deletions include/clad/Differentiator/ArrayRef.h
Original file line number Diff line number Diff line change
@@ -163,99 +163,107 @@ template <typename T> class array_ref {

/// Multiplies the arrays element wise
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryMul, array_ref<U>>
operator*(const array_ref<T>& Ar, const array_ref<U>& Br) {
CUDA_HOST_DEVICE
array_expression<array_ref<T> const&, BinaryMul, array_ref<U> const&>
operator*(const array_ref<T>& Ar, const array_ref<U>& Br) {
assert(Ar.size() == Br.size() &&
"Size of both the array_refs must be equal for carrying out "
"multiplication assignment");
return array_expression<array_ref<T>, BinaryMul, array_ref<U>>(Ar, Br);
return array_expression<array_ref<T> const&, BinaryMul, array_ref<U> const&>(
Ar, Br);
}

/// Adds the arrays element wise
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryAdd, array_ref<U>>
operator+(const array_ref<T>& Ar, const array_ref<U>& Br) {
CUDA_HOST_DEVICE
array_expression<array_ref<T> const&, BinaryAdd, array_ref<U> const&>
operator+(const array_ref<T>& Ar, const array_ref<U>& Br) {
assert(Ar.size() == Br.size() &&
"Size of both the array_refs must be equal for carrying out addition "
"assignment");
return array_expression<array_ref<T>, BinaryAdd, array_ref<U>>(Ar, Br);
return array_expression<array_ref<T> const&, BinaryAdd, array_ref<U> const&>(
Ar, Br);
}

/// Subtracts the arrays element wise
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinarySub, array_ref<U>>
operator-(const array_ref<T>& Ar, const array_ref<U>& Br) {
CUDA_HOST_DEVICE
array_expression<array_ref<T> const&, BinarySub, array_ref<U> const&>
operator-(const array_ref<T>& Ar, const array_ref<U>& Br) {
assert(
Ar.size() == Br.size() &&
"Size of both the array_refs must be equal for carrying out subtraction "
"assignment");
return array_expression<array_ref<T>, BinarySub, array_ref<U>>(Ar, Br);
return array_expression<array_ref<T> const&, BinarySub, array_ref<U> const&>(
Ar, Br);
}

/// Divides the arrays element wise
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryDiv, array_ref<U>>
operator/(const array_ref<T>& Ar, const array_ref<U>& Br) {
CUDA_HOST_DEVICE
array_expression<array_ref<T> const&, BinaryDiv, array_ref<U> const&>
operator/(const array_ref<T>& Ar, const array_ref<U>& Br) {
assert(Ar.size() == Br.size() &&
"Size of both the array_refs must be equal for carrying out division "
"assignment");
return array_expression<array_ref<T>, BinaryDiv, array_ref<U>>(Ar, Br);
return array_expression<array_ref<T> const&, BinaryDiv, array_ref<U> const&>(
Ar, Br);
}

/// Multiplies array_ref by a scalar
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinaryMul, U>
operator*(const array_ref<T>& Ar, U a) {
return array_expression<array_ref<T>, BinaryMul, U>(Ar, a);
return array_expression<array_ref<T> const&, BinaryMul, U>(Ar, a);
}

/// Multiplies array_ref by a scalar (reverse order)
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinaryMul, U>
operator*(U a, const array_ref<T>& Ar) {
return array_expression<array_ref<T>, BinaryMul, U>(Ar, a);
return array_expression<array_ref<T> const&, BinaryMul, U>(Ar, a);
}

/// Divides array_ref by a scalar
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryDiv, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinaryDiv, U>
operator/(const array_ref<T>& Ar, U a) {
return array_expression<array_ref<T>, BinaryDiv, U>(Ar, a);
return array_expression<array_ref<T> const&, BinaryDiv, U>(Ar, a);
}

/// Adds array_ref by a scalar
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinaryAdd, U>
operator+(const array_ref<T>& Ar, U a) {
return array_expression<array_ref<T>, BinaryAdd, U>(Ar, a);
return array_expression<array_ref<T> const&, BinaryAdd, U>(Ar, a);
}

/// Adds array_ref by a scalar (reverse order)
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinaryAdd, U>
operator+(U a, const array_ref<T>& Ar) {
return array_expression<array_ref<T>, BinaryAdd, U>(Ar, a);
return array_expression<array_ref<T> const&, BinaryAdd, U>(Ar, a);
}

/// Subtracts array_ref by a scalar
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array_ref<T>, BinarySub, U>
CUDA_HOST_DEVICE array_expression<array_ref<T> const&, BinarySub, U>
operator-(const array_ref<T>& Ar, U a) {
return array_expression<array_ref<T>, BinarySub, U>(Ar, a);
return array_expression<array_ref<T> const&, BinarySub, U>(Ar, a);
}

/// Subtracts array_ref by a scalar (reverse order)
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<U, BinarySub, array_ref<T>>
CUDA_HOST_DEVICE array_expression<U, BinarySub, array_ref<T> const&>
operator-(U a, const array_ref<T>& Ar) {
return array_expression<U, BinarySub, array_ref<T>>(a, Ar);
return array_expression<U, BinarySub, array_ref<T> const&>(a, Ar);
}

/// `array_ref<void>` specialisation is created to be used as a placeholder

0 comments on commit 01bf9b1

Please sign in to comment.