Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add benchmark for expression templates #634

Merged
merged 2 commits into from
Oct 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .clang-format
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ Language: Cpp
Standard: Cpp11
PointerAlignment: Left
RemoveBracesLLVM: true
QualifierAlignment: Left

IncludeCategories:
- Regex: '^"[^/]+\"'
Expand Down
104 changes: 104 additions & 0 deletions benchmark/ArrayExpressionTemplates.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
#include "benchmark/benchmark.h"

#include "clad/Differentiator/Differentiator.h"

// Benchmark the expression x*y + y*z + z*x between clad arrays,
// this is to compare the performance of expression templates.
// We will evaluate the expression on using four different methods:
// 1. Using operations on clad arrays - this will use expression templates.
// 2. Using clad arrays but creating temporaries manually.
// 3. Using loops on clad arrays.
// 4. Using loops on native arrays.

// Benchmark expression templates.
static void BM_ExpressionTemplates(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state)
benchmark::DoNotOptimize(res = x * y + y * z + z * x);
}
BENCHMARK(BM_ExpressionTemplates);

// Benchmark manually creating temporaries.
static void BM_ManualTemporaries(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state) {
clad::array<double> temp1 = x * y;
clad::array<double> temp2 = y * z;
clad::array<double> temp3 = z * x;
clad::array<double> temp4 = temp1 + temp2;
benchmark::DoNotOptimize(res = temp4 + temp3);
}
}
BENCHMARK(BM_ManualTemporaries);

// Benchmark loops on clad arrays.
static void BM_LoopsOnCladArrays(benchmark::State& state) {
constexpr int n = 1000;
clad::array<double> x(n);
clad::array<double> y(n);
clad::array<double> z(n);
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

clad::array<double> res(n);
for (auto _ : state) {
for (int i = 0; i < n; ++i) {
benchmark::DoNotOptimize(res[i] =
x[i] * y[i] + y[i] * z[i] + z[i] * x[i]);
}
}
}
BENCHMARK(BM_LoopsOnCladArrays);

// Benchmark loops on native arrays.
static void BM_LoopsOnNativeArrays(benchmark::State& state) {
constexpr int n = 1000;
double* x = new double[n];
double* y = new double[n];
double* z = new double[n];
for (int i = 0; i < n; ++i) {
x[i] = i + 1;
y[i] = i + 2;
z[i] = i + 3;
}

double* res = new double[n];
for (auto _ : state) {
for (int i = 0; i < n; ++i) {
benchmark::DoNotOptimize(res[i] =
x[i] * y[i] + y[i] * z[i] + z[i] * x[i]);
}
}

delete[] x;
delete[] y;
delete[] z;
delete[] res;
}
BENCHMARK(BM_LoopsOnNativeArrays);

// Define our main.
BENCHMARK_MAIN();
1 change: 1 addition & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ include(AddCladBenchmark)

CB_ADD_GBENCHMARK(Simple Simple.cpp)
CB_ADD_GBENCHMARK(AlgorithmicComplexity AlgorithmicComplexity.cpp)
CB_ADD_GBENCHMARK(ArrayExpressionTemplates ArrayExpressionTemplates.cpp)
CB_ADD_GBENCHMARK(EnzymeCladComparison EnzymeCladComparison.cpp)
CB_ADD_GBENCHMARK(MemoryComplexity MemoryComplexity.cpp)
CB_ADD_GBENCHMARK(VectorModeComparison VectorModeComparison.cpp)
Expand Down
69 changes: 49 additions & 20 deletions include/clad/Differentiator/Array.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ template <typename T> class array {
m_arr[i] = expression[i];
}

template <typename L, typename BinaryOp, typename R>
CUDA_HOST_DEVICE array(const array_expression<L, BinaryOp, R>& expression)
: m_arr(new T[expression.size()]), m_size(expression.size()) {
for (std::size_t i = 0; i < expression.size(); ++i)
m_arr[i] = expression[i];
}

// initializing all entries using the same value
template <typename U>
CUDA_HOST_DEVICE array(std::size_t size, U val)
Expand Down Expand Up @@ -293,17 +300,19 @@ template <typename T> class array {
}

/// Negate the array and return a new array.
CUDA_HOST_DEVICE array_expression<T, BinarySub, array<T>> operator-() const {
return array_expression<T, BinarySub, array<T>>(static_cast<T>(0), *this);
CUDA_HOST_DEVICE array_expression<T, BinarySub, const array<T>&>
operator-() const {
return array_expression<T, BinarySub, const array<T>&>(static_cast<T>(0),
*this);
}

/// Subtracts the number from every element in the array and returns a new
/// array, when the number is on the left side.
template <typename U, typename std::enable_if<std::is_arithmetic<U>::value,
int>::type = 0>
CUDA_HOST_DEVICE friend array_expression<U, BinarySub, array<T>>
CUDA_HOST_DEVICE friend array_expression<U, BinarySub, const array<T>&>
operator-(U n, const array<T>& arr) {
return array_expression<U, BinarySub, array<T>>(n, arr);
return array_expression<U, BinarySub, const array<T>&>(n, arr);
}

/// Implicitly converts from clad::array to pointer to an array of type T
Expand Down Expand Up @@ -333,69 +342,89 @@ template <typename T> CUDA_HOST_DEVICE array<T> zero_vector(std::size_t n) {
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryMul, U>
operator*(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryMul, U>(arr, n);
return array_expression<const array<T>&, BinaryMul, U>(arr, n);
}

/// Multiplies the number to every element in the array and returns an array
/// expression, when the number is on the left side.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryMul, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryMul, U>
operator*(U n, const array<T>& arr) {
return array_expression<array<T>, BinaryMul, U>(arr, n);
return array_expression<const array<T>&, BinaryMul, U>(arr, n);
}

/// Divides the number from every element in the array and returns an array
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryDiv, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryDiv, U>
operator/(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryDiv, U>(arr, n);
return array_expression<const array<T>&, BinaryDiv, U>(arr, n);
}

/// Adds the number to every element in the array and returns a new array
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryAdd, U>
operator+(const array<T>& arr, U n) {
return array_expression<array<T>, BinaryAdd, U>(arr, n);
return array_expression<const array<T>&, BinaryAdd, U>(arr, n);
}

/// Adds the number to every element in the array and returns an array
/// expression, when the number is on the left side.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryAdd, U>
operator+(U n, const array<T>& arr) {
return array_expression<array<T>, BinaryAdd, U>(arr, n);
return array_expression<const array<T>&, BinaryAdd, U>(arr, n);
}

/// Subtracts the number from every element in the array and returns an array
/// expression.
template <typename T, typename U,
typename std::enable_if<std::is_arithmetic<U>::value, int>::type = 0>
CUDA_HOST_DEVICE array_expression<array<T>, BinarySub, U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinarySub, U>
operator-(const array<T>& arr, U n) {
return array_expression<array<T>, BinarySub, U>(arr, n);
return array_expression<const array<T>&, BinarySub, U>(arr, n);
}

/// Function to define element wise adding of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T>, BinaryAdd, array<U>>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryAdd, const array<U>&>
operator+(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T>, BinaryAdd, array<U>>(arr1, arr2);
return array_expression<const array<T>&, BinaryAdd, const array<U>&>(arr1,
arr2);
}

/// Function to define element wise subtraction of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<array<T>, BinarySub, array<U>>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinarySub, const array<U>&>
operator-(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<array<T>, BinarySub, array<U>>(arr1, arr2);
return array_expression<const array<T>&, BinarySub, const array<U>&>(arr1,
arr2);
}

/// Function to define element wise multiplication of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryMul, const array<U>&>
operator*(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<const array<T>&, BinaryMul, const array<U>&>(arr1,
arr2);
}

/// Function to define element wise division of two arrays.
template <typename T, typename U>
CUDA_HOST_DEVICE array_expression<const array<T>&, BinaryDiv, const array<U>&>
operator/(const array<T>& arr1, const array<U>& arr2) {
assert(arr1.size() == arr2.size());
return array_expression<const array<T>&, BinaryDiv, const array<U>&>(arr1,
arr2);
}

} // namespace clad
Expand Down
Loading
Loading