diff --git a/.clang-format b/.clang-format index 9d3caf0f5..3cae5f179 100644 --- a/.clang-format +++ b/.clang-format @@ -4,6 +4,7 @@ Language: Cpp Standard: Cpp11 PointerAlignment: Left RemoveBracesLLVM: true +QualifierAlignment: Left IncludeCategories: - Regex: '^"[^/]+\"' diff --git a/benchmark/ArrayExpressionTemplates.cpp b/benchmark/ArrayExpressionTemplates.cpp new file mode 100644 index 000000000..5be6e070f --- /dev/null +++ b/benchmark/ArrayExpressionTemplates.cpp @@ -0,0 +1,104 @@ +#include "benchmark/benchmark.h" + +#include "clad/Differentiator/Differentiator.h" + +// Benchmark the expression x*y + y*z + z*x between clad arrays, +// this is to compare the performance of expression templates. +// We will evaluate the expression on using four different methods: +// 1. Using operations on clad arrays - this will use expression templates. +// 2. Using clad arrays but creating temporaries manually. +// 3. Using loops on clad arrays. +// 4. Using loops on native arrays. + +// Benchmark expression templates. +static void BM_ExpressionTemplates(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) + benchmark::DoNotOptimize(res = x * y + y * z + z * x); +} +BENCHMARK(BM_ExpressionTemplates); + +// Benchmark manually creating temporaries. +static void BM_ManualTemporaries(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) { + clad::array temp1 = x * y; + clad::array temp2 = y * z; + clad::array temp3 = z * x; + clad::array temp4 = temp1 + temp2; + benchmark::DoNotOptimize(res = temp4 + temp3); + } +} +BENCHMARK(BM_ManualTemporaries); + +// Benchmark loops on clad arrays. +static void BM_LoopsOnCladArrays(benchmark::State& state) { + constexpr int n = 1000; + clad::array x(n); + clad::array y(n); + clad::array z(n); + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + clad::array res(n); + for (auto _ : state) { + for (int i = 0; i < n; ++i) { + benchmark::DoNotOptimize(res[i] = + x[i] * y[i] + y[i] * z[i] + z[i] * x[i]); + } + } +} +BENCHMARK(BM_LoopsOnCladArrays); + +// Benchmark loops on native arrays. +static void BM_LoopsOnNativeArrays(benchmark::State& state) { + constexpr int n = 1000; + double* x = new double[n]; + double* y = new double[n]; + double* z = new double[n]; + for (int i = 0; i < n; ++i) { + x[i] = i + 1; + y[i] = i + 2; + z[i] = i + 3; + } + + double* res = new double[n]; + for (auto _ : state) { + for (int i = 0; i < n; ++i) { + benchmark::DoNotOptimize(res[i] = + x[i] * y[i] + y[i] * z[i] + z[i] * x[i]); + } + } + + delete[] x; + delete[] y; + delete[] z; + delete[] res; +} +BENCHMARK(BM_LoopsOnNativeArrays); + +// Define our main. +BENCHMARK_MAIN(); \ No newline at end of file diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 2d22a6ea4..da16b6936 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -6,6 +6,7 @@ include(AddCladBenchmark) CB_ADD_GBENCHMARK(Simple Simple.cpp) CB_ADD_GBENCHMARK(AlgorithmicComplexity AlgorithmicComplexity.cpp) +CB_ADD_GBENCHMARK(ArrayExpressionTemplates ArrayExpressionTemplates.cpp) CB_ADD_GBENCHMARK(EnzymeCladComparison EnzymeCladComparison.cpp) CB_ADD_GBENCHMARK(MemoryComplexity MemoryComplexity.cpp) CB_ADD_GBENCHMARK(VectorModeComparison VectorModeComparison.cpp) diff --git a/include/clad/Differentiator/Array.h b/include/clad/Differentiator/Array.h index 604137382..a78083b64 100644 --- a/include/clad/Differentiator/Array.h +++ b/include/clad/Differentiator/Array.h @@ -58,6 +58,13 @@ template class array { m_arr[i] = expression[i]; } + template + CUDA_HOST_DEVICE array(const array_expression& expression) + : m_arr(new T[expression.size()]), m_size(expression.size()) { + for (std::size_t i = 0; i < expression.size(); ++i) + m_arr[i] = expression[i]; + } + // initializing all entries using the same value template CUDA_HOST_DEVICE array(std::size_t size, U val) @@ -293,17 +300,19 @@ template class array { } /// Negate the array and return a new array. - CUDA_HOST_DEVICE array_expression> operator-() const { - return array_expression>(static_cast(0), *this); + CUDA_HOST_DEVICE array_expression&> + operator-() const { + return array_expression&>(static_cast(0), + *this); } /// Subtracts the number from every element in the array and returns a new /// array, when the number is on the left side. template ::value, int>::type = 0> - CUDA_HOST_DEVICE friend array_expression> + CUDA_HOST_DEVICE friend array_expression&> operator-(U n, const array& arr) { - return array_expression>(n, arr); + return array_expression&>(n, arr); } /// Implicitly converts from clad::array to pointer to an array of type T @@ -333,69 +342,89 @@ template CUDA_HOST_DEVICE array zero_vector(std::size_t n) { /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(const array& arr, U n) { - return array_expression, BinaryMul, U>(arr, n); + return array_expression&, BinaryMul, U>(arr, n); } /// Multiplies the number to every element in the array and returns an array /// expression, when the number is on the left side. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(U n, const array& arr) { - return array_expression, BinaryMul, U>(arr, n); + return array_expression&, BinaryMul, U>(arr, n); } /// Divides the number from every element in the array and returns an array /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryDiv, U> +CUDA_HOST_DEVICE array_expression&, BinaryDiv, U> operator/(const array& arr, U n) { - return array_expression, BinaryDiv, U>(arr, n); + return array_expression&, BinaryDiv, U>(arr, n); } /// Adds the number to every element in the array and returns a new array template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(const array& arr, U n) { - return array_expression, BinaryAdd, U>(arr, n); + return array_expression&, BinaryAdd, U>(arr, n); } /// Adds the number to every element in the array and returns an array /// expression, when the number is on the left side. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(U n, const array& arr) { - return array_expression, BinaryAdd, U>(arr, n); + return array_expression&, BinaryAdd, U>(arr, n); } /// Subtracts the number from every element in the array and returns an array /// expression. template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinarySub, U> +CUDA_HOST_DEVICE array_expression&, BinarySub, U> operator-(const array& arr, U n) { - return array_expression, BinarySub, U>(arr, n); + return array_expression&, BinarySub, U>(arr, n); } /// Function to define element wise adding of two arrays. template -CUDA_HOST_DEVICE array_expression, BinaryAdd, array> +CUDA_HOST_DEVICE array_expression&, BinaryAdd, const array&> operator+(const array& arr1, const array& arr2) { assert(arr1.size() == arr2.size()); - return array_expression, BinaryAdd, array>(arr1, arr2); + return array_expression&, BinaryAdd, const array&>(arr1, + arr2); } /// Function to define element wise subtraction of two arrays. template -CUDA_HOST_DEVICE array_expression, BinarySub, array> +CUDA_HOST_DEVICE array_expression&, BinarySub, const array&> operator-(const array& arr1, const array& arr2) { assert(arr1.size() == arr2.size()); - return array_expression, BinarySub, array>(arr1, arr2); + return array_expression&, BinarySub, const array&>(arr1, + arr2); +} + +/// Function to define element wise multiplication of two arrays. +template +CUDA_HOST_DEVICE array_expression&, BinaryMul, const array&> +operator*(const array& arr1, const array& arr2) { + assert(arr1.size() == arr2.size()); + return array_expression&, BinaryMul, const array&>(arr1, + arr2); +} + +/// Function to define element wise division of two arrays. +template +CUDA_HOST_DEVICE array_expression&, BinaryDiv, const array&> +operator/(const array& arr1, const array& arr2) { + assert(arr1.size() == arr2.size()); + return array_expression&, BinaryDiv, const array&>(arr1, + arr2); } } // namespace clad diff --git a/include/clad/Differentiator/ArrayExpression.h b/include/clad/Differentiator/ArrayExpression.h index 9d9c5a94c..c420f9177 100644 --- a/include/clad/Differentiator/ArrayExpression.h +++ b/include/clad/Differentiator/ArrayExpression.h @@ -12,7 +12,7 @@ namespace clad { // Operator to add two elements. struct BinaryAdd { template - static auto apply(T const& t, U const& u) -> decltype(t + u) { + static auto apply(const T& t, const U& u) -> decltype(t + u) { return t + u; } }; @@ -20,7 +20,7 @@ struct BinaryAdd { // Operator to add two elements. struct BinaryMul { template - static auto apply(T const& t, U const& u) -> decltype(t * u) { + static auto apply(const T& t, const U& u) -> decltype(t * u) { return t * u; } }; @@ -28,7 +28,7 @@ struct BinaryMul { // Operator to divide two elements. struct BinaryDiv { template - static auto apply(T const& t, U const& u) -> decltype(t / u) { + static auto apply(const T& t, const U& u) -> decltype(t / u) { return t / u; } }; @@ -36,7 +36,7 @@ struct BinaryDiv { // Operator to subtract two elements. struct BinarySub { template - static auto apply(T const& t, U const& u) -> decltype(t - u) { + static auto apply(const T& t, const U& u) -> decltype(t - u) { return t - u; } }; @@ -48,29 +48,29 @@ class array_expression { RightExp r; public: - array_expression(LeftExp const& l, RightExp const& r) : l(l), r(r) {} + array_expression(LeftExp l, RightExp r) : l(l), r(r) {} // for scalars template ::value, int>::type = 0> - std::size_t get_size(T const& t) const { + std::size_t get_size(const T& t) const { return 1; } template ::value, int>::type = 0> - T get(T const& t, std::size_t i) const { + T get(const T& t, std::size_t i) const { return t; } // for vectors template ::value, int>::type = 0> - std::size_t get_size(T const& t) const { + std::size_t get_size(const T& t) const { return t.size(); } template ::value, int>::type = 0> - auto get(T const& t, std::size_t i) const -> decltype(t[i]) { + auto get(const T& t, std::size_t i) const -> decltype(t[i]) { return t[i]; } @@ -84,34 +84,42 @@ class array_expression { // Operator overload for addition. template - array_expression, BinaryAdd, RE> - operator+(RE const& r) const { - return array_expression, - BinaryAdd, RE>(*this, r); + array_expression&, + BinaryAdd, RE> + operator+(const RE& r) const { + return array_expression< + const array_expression&, BinaryAdd, RE>( + *this, r); } // Operator overload for multiplication. template - array_expression, BinaryMul, RE> - operator*(RE const& r) const { - return array_expression, - BinaryMul, RE>(*this, r); + array_expression&, + BinaryMul, RE> + operator*(const RE& r) const { + return array_expression< + const array_expression&, BinaryMul, RE>( + *this, r); } // Operator overload for subtraction. template - array_expression, BinarySub, RE> - operator-(RE const& r) const { - return array_expression, - BinarySub, RE>(*this, r); + array_expression&, + BinarySub, RE> + operator-(const RE& r) const { + return array_expression< + const array_expression&, BinarySub, RE>( + *this, r); } // Operator overload for division. template - array_expression, BinaryDiv, RE> - operator/(RE const& r) const { - return array_expression, - BinaryDiv, RE>(*this, r); + array_expression&, + BinaryDiv, RE> + operator/(const RE& r) const { + return array_expression< + const array_expression&, BinaryDiv, RE>( + *this, r); } }; @@ -119,30 +127,36 @@ class array_expression { // and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> -operator+(T const& l, array_expression const& r) { +array_expression&> +operator+(const T& l, const array_expression& r) { return array_expression>(l, r); + const array_expression&>( + l, r); } // Operator overload for multiplication, when the right operand is an // array_expression and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> -operator*(T const& l, array_expression const& r) { +array_expression&> +operator*(const T& l, const array_expression& r) { return array_expression>(l, r); + const array_expression&>( + l, r); } // Operator overload for subtraction, when the right operand is an // array_expression and the left operand is a scalar. template ::value, int>::type = 0> -array_expression> -operator-(T const& l, array_expression const& r) { +array_expression&> +operator-(const T& l, const array_expression& r) { return array_expression>(l, r); + const array_expression&>( + l, r); } } // namespace clad // NOLINTEND(*-pointer-arithmetic) diff --git a/include/clad/Differentiator/ArrayRef.h b/include/clad/Differentiator/ArrayRef.h index 4d99391a9..efc227522 100644 --- a/include/clad/Differentiator/ArrayRef.h +++ b/include/clad/Differentiator/ArrayRef.h @@ -163,99 +163,107 @@ template class array_ref { /// Multiplies the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryMul, array_ref> -operator*(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression&, BinaryMul, const array_ref&> + operator*(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out " "multiplication assignment"); - return array_expression, BinaryMul, array_ref>(Ar, Br); + return array_expression&, BinaryMul, const array_ref&>( + Ar, Br); } /// Adds the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryAdd, array_ref> -operator+(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression&, BinaryAdd, const array_ref&> + operator+(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out addition " "assignment"); - return array_expression, BinaryAdd, array_ref>(Ar, Br); + return array_expression&, BinaryAdd, const array_ref&>( + Ar, Br); } /// Subtracts the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinarySub, array_ref> -operator-(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression&, BinarySub, const array_ref&> + operator-(const array_ref& Ar, const array_ref& Br) { assert( Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out subtraction " "assignment"); - return array_expression, BinarySub, array_ref>(Ar, Br); + return array_expression&, BinarySub, const array_ref&>( + Ar, Br); } /// Divides the arrays element wise template -CUDA_HOST_DEVICE array_expression, BinaryDiv, array_ref> -operator/(const array_ref& Ar, const array_ref& Br) { +CUDA_HOST_DEVICE + array_expression&, BinaryDiv, const array_ref&> + operator/(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && "Size of both the array_refs must be equal for carrying out division " "assignment"); - return array_expression, BinaryDiv, array_ref>(Ar, Br); + return array_expression&, BinaryDiv, const array_ref&>( + Ar, Br); } /// Multiplies array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(const array_ref& Ar, U a) { - return array_expression, BinaryMul, U>(Ar, a); + return array_expression&, BinaryMul, U>(Ar, a); } /// Multiplies array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryMul, U> +CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(U a, const array_ref& Ar) { - return array_expression, BinaryMul, U>(Ar, a); + return array_expression&, BinaryMul, U>(Ar, a); } /// Divides array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryDiv, U> +CUDA_HOST_DEVICE array_expression&, BinaryDiv, U> operator/(const array_ref& Ar, U a) { - return array_expression, BinaryDiv, U>(Ar, a); + return array_expression&, BinaryDiv, U>(Ar, a); } /// Adds array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(const array_ref& Ar, U a) { - return array_expression, BinaryAdd, U>(Ar, a); + return array_expression&, BinaryAdd, U>(Ar, a); } /// Adds array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinaryAdd, U> +CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(U a, const array_ref& Ar) { - return array_expression, BinaryAdd, U>(Ar, a); + return array_expression&, BinaryAdd, U>(Ar, a); } /// Subtracts array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression, BinarySub, U> +CUDA_HOST_DEVICE array_expression&, BinarySub, U> operator-(const array_ref& Ar, U a) { - return array_expression, BinarySub, U>(Ar, a); + return array_expression&, BinarySub, U>(Ar, a); } /// Subtracts array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression> +CUDA_HOST_DEVICE array_expression&> operator-(U a, const array_ref& Ar) { - return array_expression>(a, Ar); + return array_expression&>(a, Ar); } /// `array_ref` specialisation is created to be used as a placeholder