diff --git a/benchmark/ArrayExpressionTemplates.cpp b/benchmark/ArrayExpressionTemplates.cpp new file mode 100644 index 000000000..5be6e070f --- /dev/null +++ b/benchmark/ArrayExpressionTemplates.cpp @@ -0,0 +1,104 @@ +#include "benchmark/benchmark.h" + +#include "clad/Differentiator/Differentiator.h" + +// Benchmark the expression x*y + y*z + z*x between clad arrays, +// this is to compare the performance of expression templates. +// We will evaluate the expression on using four different methods: +// 1. Using operations on clad arrays - this will use expression templates. +// 2. Using clad arrays but creating temporaries manually. +// 3. Using loops on clad arrays. +// 4. Using loops on native arrays. + +// Benchmark expression templates. +static void BM_ExpressionTemplates(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) + benchmark::DoNotOptimize(res = x * y + y * z + z * x); +} +BENCHMARK(BM_ExpressionTemplates); + +// Benchmark manually creating temporaries. +static void BM_ManualTemporaries(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) { + clad::array temp1 = x * y; + clad::array temp2 = y * z; + clad::array temp3 = z * x; + clad::array temp4 = temp1 + temp2; + benchmark::DoNotOptimize(res = temp4 + temp3); + } +} +BENCHMARK(BM_ManualTemporaries); + +// Benchmark loops on clad arrays. +static void BM_LoopsOnCladArrays(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) { + for (int i = 0; i < n; ++i) { + benchmark::DoNotOptimize(res[i] = + x[i] * y[i] + y[i] * z[i] + z[i] * x[i]); + } + } +} +BENCHMARK(BM_LoopsOnCladArrays); + +// Benchmark loops on native arrays. +static void BM_LoopsOnNativeArrays(benchmark::State& state) { + constexpr int n = 1000; + double* x = new double[n]; + double* y = new double[n]; + double* z = new double[n]; + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + double* res = new double[n]; + for (auto _ : state) { + for (int i = 0; i < n; ++i) { + benchmark::DoNotOptimize(res[i] = + x[i] * y[i] + y[i] * z[i] + z[i] * x[i]); + } + } + + delete[] x; + delete[] y; + delete[] z; + delete[] res; +} +BENCHMARK(BM_LoopsOnNativeArrays); + +// Define our main. +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/benchmark/mybenchmark b/benchmark/mybenchmark new file mode 100755 index 000000000..eadc186ec Binary files /dev/null and b/benchmark/mybenchmark differ diff --git a/include/clad/Differentiator/Array.h b/include/clad/Differentiator/Array.h index 604137382..c7c45d2f8 100644 --- a/include/clad/Differentiator/Array.h +++ b/include/clad/Differentiator/Array.h @@ -58,6 +58,13 @@ template class array { m_arr[i] = expression[i]; } + template + CUDA_HOST_DEVICE array(const array_expression& expression) + : m_arr(new T[expression.size()]), m_size(expression.size()) { + for (std::size_t i = 0; i < expression.size(); ++i) + m_arr[i] = expression[i]; + } + // initializing all entries using the same value template CUDA_HOST_DEVICE array(std::size_t size, U val) @@ -293,17 +300,19 @@ template class array { } /// Negate the array and return a new array. - CUDA_HOST_DEVICE array_expression> operator-() const { - return array_expression>(static_cast(0), *this); + CUDA_HOST_DEVICE array_expression const&> + operator-() const { + return array_expression const&>(static_cast(0), + *this); } /// Subtracts the number from every element in the array and returns a new /// array, when the number is on the left side. template ::value, int>::type = 0> - CUDA_HOST_DEVICE friend array_expression> + CUDA_HOST_DEVICE friend array_expression const&> operator-(U n, const array& arr) { - return array_expression>(n, arr); + return array_expression const&>(n, arr); } /// Implicitly converts from clad::array to pointer to an array of type T @@ -333,69 +342,89 @@ template CUDA_HOST_DEVICE array zero_vector(std::size_t n) { /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression const&, BinaryMul, U> operator*(const array& arr, U n) { - return array_expression, BinaryMul, U>(arr, n); + return array_expression const&, BinaryMul, U>(arr, n); } /// Multiplies the number to every element in the array and returns an array /// expression, when the number is on the left side. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression const&, BinaryMul, U> operator*(U n, const array& arr) { - return array_expression, BinaryMul, U>(arr, n); + return array_expression const&, BinaryMul, U>(arr, n); } /// Divides the number from every element in the array and returns an array /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryDiv, U> +CUDA_HOST_DEVICE array_expression const&, BinaryDiv, U> operator/(const array& arr, U n) { - return array_expression, BinaryDiv, U>(arr, n); + return array_expression const&, BinaryDiv, U>(arr, n); } /// Adds the number to every element in the array and returns a new array template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression const&, BinaryAdd, U> operator+(const array& arr, U n) { - return array_expression, BinaryAdd, U>(arr, n); + return array_expression const&, BinaryAdd, U>(arr, n); } /// Adds the number to every element in the array and returns an array /// expression, when the number is on the left side. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression const&, BinaryAdd, U> operator+(U n, const array& arr) { - return array_expression, BinaryAdd, U>(arr, n); + return array_expression const&, BinaryAdd, U>(arr, n); } /// Subtracts the number from every element in the array and returns an array /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinarySub, U> +CUDA_HOST_DEVICE array_expression const&, BinarySub, U> operator-(const array& arr, U n) { - return array_expression, BinarySub, U>(arr, n); + return array_expression const&, BinarySub, U>(arr, n); } /// Function to define element wise adding of two arrays. template -CUDA_HOST_DEVICE array_expression, BinaryAdd, array> +CUDA_HOST_DEVICE array_expression const&, BinaryAdd, array const&> operator+(const array& arr1, const array& arr2) { assert(arr1.size() == arr2.size()); - return array_expression, BinaryAdd, array>(arr1, arr2); + return array_expression const&, BinaryAdd, array const&>(arr1, + arr2); } /// Function to define element wise subtraction of two arrays. template -CUDA_HOST_DEVICE array_expression, BinarySub, array> +CUDA_HOST_DEVICE array_expression const&, BinarySub, array const&> operator-(const array& arr1, const array& arr2) { assert(arr1.size() == arr2.size()); - return array_expression, BinarySub, array>(arr1, arr2); + return array_expression const&, BinarySub, array const&>(arr1, + arr2); +} + +/// Function to define element wise multiplication of two arrays. +template +CUDA_HOST_DEVICE array_expression const&, BinaryMul, array const&> +operator*(const array& arr1, const array& arr2) { + assert(arr1.size() == arr2.size()); + return array_expression const&, BinaryMul, array const&>(arr1, + arr2); +} + +/// Function to define element wise division of two arrays. +template +CUDA_HOST_DEVICE array_expression const&, BinaryDiv, array const&> +operator/(const array& arr1, const array& arr2) { + assert(arr1.size() == arr2.size()); + return array_expression const&, BinaryDiv, array const&>(arr1, + arr2); } } // namespace clad diff --git a/include/clad/Differentiator/ArrayExpression.h b/include/clad/Differentiator/ArrayExpression.h index 9d9c5a94c..b608bc902 100644 --- a/include/clad/Differentiator/ArrayExpression.h +++ b/include/clad/Differentiator/ArrayExpression.h @@ -48,7 +48,7 @@ class array_expression { RightExp r; public: - array_expression(LeftExp const& l, RightExp const& r) : l(l), r(r) {} + array_expression(LeftExp l, RightExp r) : l(l), r(r) {} // for scalars template ::value, @@ -84,34 +84,42 @@ class array_expression { // Operator overload for addition. template - array_expression, BinaryAdd, RE> + array_expression const&, + BinaryAdd, RE> operator+(RE const& r) const { - return array_expression, - BinaryAdd, RE>(*this, r); + return array_expression< + array_expression const&, BinaryAdd, RE>( + *this, r); } // Operator overload for multiplication. template - array_expression, BinaryMul, RE> + array_expression const&, + BinaryMul, RE> operator*(RE const& r) const { - return array_expression, - BinaryMul, RE>(*this, r); + return array_expression< + array_expression const&, BinaryMul, RE>( + *this, r); } // Operator overload for subtraction. template - array_expression, BinarySub, RE> + array_expression const&, + BinarySub, RE> operator-(RE const& r) const { - return array_expression, - BinarySub, RE>(*this, r); + return array_expression< + array_expression const&, BinarySub, RE>( + *this, r); } // Operator overload for division. template - array_expression, BinaryDiv, RE> + array_expression const&, + BinaryDiv, RE> operator/(RE const& r) const { - return array_expression, - BinaryDiv, RE>(*this, r); + return array_expression< + array_expression const&, BinaryDiv, RE>( + *this, r); } }; @@ -119,30 +127,36 @@ class array_expression { // and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> +array_expression const&> operator+(T const& l, array_expression const& r) { return array_expression>(l, r); + array_expression const&>( + l, r); } // Operator overload for multiplication, when the right operand is an // array_expression and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> +array_expression const&> operator*(T const& l, array_expression const& r) { return array_expression>(l, r); + array_expression const&>( + l, r); } // Operator overload for subtraction, when the right operand is an // array_expression and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> +array_expression const&> operator-(T const& l, array_expression const& r) { return array_expression>(l, r); + array_expression const&>( + l, r); } } // namespace clad // NOLINTEND(*-pointer-arithmetic) diff --git a/include/clad/Differentiator/ArrayRef.h b/include/clad/Differentiator/ArrayRef.h index 4d99391a9..0ceaa52fb 100644 --- a/include/clad/Differentiator/ArrayRef.h +++ b/include/clad/Differentiator/ArrayRef.h @@ -163,99 +163,107 @@ template class array_ref { /// Multiplies the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryMul, array_ref> -operator*(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression const&, BinaryMul, array_ref const&> + operator*(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out " "multiplication assignment"); - return array_expression, BinaryMul, array_ref>(Ar, Br); + return array_expression const&, BinaryMul, array_ref const&>( + Ar, Br); } /// Adds the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryAdd, array_ref> -operator+(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression const&, BinaryAdd, array_ref const&> + operator+(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out addition " "assignment"); - return array_expression, BinaryAdd, array_ref>(Ar, Br); + return array_expression const&, BinaryAdd, array_ref const&>( + Ar, Br); } /// Subtracts the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinarySub, array_ref> -operator-(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression const&, BinarySub, array_ref const&> + operator-(const array_ref& Ar, const array_ref& Br) { assert( Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out subtraction " "assignment"); - return array_expression, BinarySub, array_ref>(Ar, Br); + return array_expression const&, BinarySub, array_ref const&>( + Ar, Br); } /// Divides the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryDiv, array_ref> -operator/(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression const&, BinaryDiv, array_ref const&> + operator/(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out division " "assignment"); - return array_expression, BinaryDiv, array_ref>(Ar, Br); + return array_expression const&, BinaryDiv, array_ref const&>( + Ar, Br); } /// Multiplies array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression const&, BinaryMul, U> operator*(const array_ref& Ar, U a) { - return array_expression, BinaryMul, U>(Ar, a); + return array_expression const&, BinaryMul, U>(Ar, a); } /// Multiplies array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression const&, BinaryMul, U> operator*(U a, const array_ref& Ar) { - return array_expression, BinaryMul, U>(Ar, a); + return array_expression const&, BinaryMul, U>(Ar, a); } /// Divides array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryDiv, U> +CUDA_HOST_DEVICE array_expression const&, BinaryDiv, U> operator/(const array_ref& Ar, U a) { - return array_expression, BinaryDiv, U>(Ar, a); + return array_expression const&, BinaryDiv, U>(Ar, a); } /// Adds array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression const&, BinaryAdd, U> operator+(const array_ref& Ar, U a) { - return array_expression, BinaryAdd, U>(Ar, a); + return array_expression const&, BinaryAdd, U>(Ar, a); } /// Adds array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression const&, BinaryAdd, U> operator+(U a, const array_ref& Ar) { - return array_expression, BinaryAdd, U>(Ar, a); + return array_expression const&, BinaryAdd, U>(Ar, a); } /// Subtracts array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinarySub, U> +CUDA_HOST_DEVICE array_expression const&, BinarySub, U> operator-(const array_ref& Ar, U a) { - return array_expression, BinarySub, U>(Ar, a); + return array_expression const&, BinarySub, U>(Ar, a); } /// Subtracts array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression> +CUDA_HOST_DEVICE array_expression const&> operator-(U a, const array_ref& Ar) { - return array_expression>(a, Ar); + return array_expression const&>(a, Ar); } /// `array_ref` specialisation is created to be used as a placeholder