diff --git a/docs/userDocs/source/index.rst b/docs/userDocs/source/index.rst index 8d41a0135..69da583e3 100644 --- a/docs/userDocs/source/index.rst +++ b/docs/userDocs/source/index.rst @@ -90,6 +90,7 @@ The User Guide user/tutorials user/UsingEnzymeWithinClad user/UsingVectorMode.rst + user/UsingImmediateMode user/FAQ user/DevelopersDocumentation user/IntroductionToClangForCladContributors diff --git a/docs/userDocs/source/user/UsingImmediateMode.rst b/docs/userDocs/source/user/UsingImmediateMode.rst new file mode 100644 index 000000000..e350958b2 --- /dev/null +++ b/docs/userDocs/source/user/UsingImmediateMode.rst @@ -0,0 +1,71 @@ +Using Clad-generated derivatives in an immediate context +********************************************************** + +The derivatives that Clad generates are valid C++ code, which could in theory +be executed at compile-time (or in an immediate context as the C++ standard +calls it). When a function is differentiated all specifiers, such as +`constexpr` and `consteval` are kept, but it is important to understand the +interface that Clad provides for those derivatives to the user. + +When Clad differentiates a function (e.g. with `clad::differentiate`) the user +receives a `CladFunction`, which contains a function pointer to the generated +derivative, among many other things. Unfortunately due to how the C++ standard +is written handling function pointers in an immediate context is very +restricted and care needs to be taken to not violate the rules or the compiler +won't be able to evaluate our `constexpr`/`consteval` functions during +translation. + +Currently to get a `CladFunction` that is usable in immediate mode the user has +to pass `clad::immediate_mode` to the differentiation function and that removes +the ability to dump the generated derivative, but it may be possible to add +support for that in the future. + +Usage of Clad's immediate mode +================================================ + +The following code snippet shows how one can request Clad to use the immediate +mode for differentiation:: + + #include "clad/Differentiator/Differentiator.h" + + constexpr double fn(double x, double y) { + return (x + y) / 2; + } + + constexpr double fn_test() { + auto dx = clad::differentiate(fn, "x"); + + return dx.execute(4, 7); + } + + int main(){ + constexpr double fn_result = fn_test(); + + printf("%.2f\n", fn_result); + } + +It is neccessary both to pass the `clad::immediate_mode` option to +`clad::differentiate` and to keep both the call to `clad::differentiate` and +all it's `.execute(...)` calls in the same immediate context, as the C++ +standard forbids having a function pointer to an immediate function outside of +an immediate context. (It is not possible to do the differentiation and +executions in main as `dx` would contain such a pointer, but `main` is not and +can not be immediate) + +When using `constexpr` there is no easy way to tell whether the functions are +actually being evaluated during translation, so it is a good idea to use either +`consteval` or an `if consteval` (in C++23 and newer) to check if the immediate +contexts are behaving as expected or assign the results to a variable marked +`constexpr` as that would fail if the expression that is being assigned isn't +immediate. + +Use cases supported by Clad's immediate mode +================================================ + +Currently Clad's immediate mode is primarily meant to be used in the forward +mode (`clad::differentiate`) as internal data structures that Clad needs for +differentiating loops, etc. are not yet usable in an immediate context. + +Both `constexpr` and `consteval` are supported as Clad doesn't actually rely on +these specific keywords for its support, but instead uses clang's API to +determine if the functions are immediate and should be differentiated eariler. diff --git a/include/clad/Differentiator/ArrayRef.h b/include/clad/Differentiator/ArrayRef.h index fac7ab320..5fe173ee4 100644 --- a/include/clad/Differentiator/ArrayRef.h +++ b/include/clad/Differentiator/ArrayRef.h @@ -25,18 +25,19 @@ template class array_ref { array_ref() = default; /// Constructor to store the pointer to and size of an array supplied by the /// user - CUDA_HOST_DEVICE array_ref(T* arr, std::size_t size) + constexpr CUDA_HOST_DEVICE array_ref(T* arr, std::size_t size) : m_arr(arr), m_size(size) {} /// Constructor for arrays having size equal to 1 or non pointer types to /// store their addresses - CUDA_HOST_DEVICE array_ref(T* a) : m_arr(a), m_size(1) {} + constexpr CUDA_HOST_DEVICE array_ref(T* a) : m_arr(a), m_size(1) {} /// Constructor for clad::array types - CUDA_HOST_DEVICE array_ref(array& a) : m_arr(a.ptr()), m_size(a.size()) {} + constexpr CUDA_HOST_DEVICE array_ref(array& a) + : m_arr(a.ptr()), m_size(a.size()) {} /// Operator for conversion from array_ref to T*. - CUDA_HOST_DEVICE operator T*() { return m_arr; } + constexpr CUDA_HOST_DEVICE operator T*() { return m_arr; } /// Operator for conversion from array_ref to const T*. - CUDA_HOST_DEVICE operator const T*() const { return m_arr; } + constexpr CUDA_HOST_DEVICE operator const T*() const { return m_arr; } template CUDA_HOST_DEVICE array_ref& operator=(const array& a) { @@ -46,25 +47,26 @@ template class array_ref { return *this; } template - CUDA_HOST_DEVICE array_ref& operator=(const array_ref& a) { + constexpr CUDA_HOST_DEVICE array_ref& operator=(const array_ref& a) { m_arr = a.ptr(); m_size = a.size(); return *this; } /// Returns the size of the underlying array - CUDA_HOST_DEVICE std::size_t size() const { return m_size; } - CUDA_HOST_DEVICE PUREFUNC T* ptr() const { return m_arr; } - CUDA_HOST_DEVICE PUREFUNC T*& ptr_ref() { return m_arr; } + constexpr CUDA_HOST_DEVICE std::size_t size() const { return m_size; } + constexpr CUDA_HOST_DEVICE PUREFUNC T* ptr() const { return m_arr; } + constexpr CUDA_HOST_DEVICE PUREFUNC T*& ptr_ref() { return m_arr; } /// Returns an array_ref to a part of the underlying array starting at /// offset and having the specified size - CUDA_HOST_DEVICE array_ref slice(std::size_t offset, std::size_t size) { + constexpr CUDA_HOST_DEVICE array_ref slice(std::size_t offset, + std::size_t size) { assert((offset >= 0) && (offset + size <= m_size) && "Window is outside array. Please provide an offset and size " "inside the array size."); return array_ref(&m_arr[offset], size); } /// Returns the reference to the underlying array - CUDA_HOST_DEVICE PUREFUNC T& operator*() { return *m_arr; } + constexpr CUDA_HOST_DEVICE PUREFUNC T& operator*() { return *m_arr; } // Arithmetic overloads /// Divides the arrays element wise @@ -171,7 +173,7 @@ template class array_ref { /// Multiplies the arrays element wise template -CUDA_HOST_DEVICE +constexpr CUDA_HOST_DEVICE array_expression&, BinaryMul, const array_ref&> operator*(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && @@ -183,7 +185,7 @@ CUDA_HOST_DEVICE /// Adds the arrays element wise template -CUDA_HOST_DEVICE +constexpr CUDA_HOST_DEVICE array_expression&, BinaryAdd, const array_ref&> operator+(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && @@ -195,7 +197,7 @@ CUDA_HOST_DEVICE /// Subtracts the arrays element wise template -CUDA_HOST_DEVICE +constexpr CUDA_HOST_DEVICE array_expression&, BinarySub, const array_ref&> operator-(const array_ref& Ar, const array_ref& Br) { assert( @@ -208,7 +210,7 @@ CUDA_HOST_DEVICE /// Divides the arrays element wise template -CUDA_HOST_DEVICE +constexpr CUDA_HOST_DEVICE array_expression&, BinaryDiv, const array_ref&> operator/(const array_ref& Ar, const array_ref& Br) { assert(Ar.size() == Br.size() && @@ -221,7 +223,7 @@ CUDA_HOST_DEVICE /// Multiplies array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinaryMul, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(const array_ref& Ar, U a) { return array_expression&, BinaryMul, U>(Ar, a); } @@ -229,7 +231,7 @@ operator*(const array_ref& Ar, U a) { /// Multiplies array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinaryMul, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinaryMul, U> operator*(U a, const array_ref& Ar) { return array_expression&, BinaryMul, U>(Ar, a); } @@ -237,7 +239,7 @@ operator*(U a, const array_ref& Ar) { /// Divides array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinaryDiv, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinaryDiv, U> operator/(const array_ref& Ar, U a) { return array_expression&, BinaryDiv, U>(Ar, a); } @@ -245,7 +247,7 @@ operator/(const array_ref& Ar, U a) { /// Adds array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(const array_ref& Ar, U a) { return array_expression&, BinaryAdd, U>(Ar, a); } @@ -253,7 +255,7 @@ operator+(const array_ref& Ar, U a) { /// Adds array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinaryAdd, U> operator+(U a, const array_ref& Ar) { return array_expression&, BinaryAdd, U>(Ar, a); } @@ -261,7 +263,7 @@ operator+(U a, const array_ref& Ar) { /// Subtracts array_ref by a scalar template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&, BinarySub, U> +constexpr CUDA_HOST_DEVICE array_expression&, BinarySub, U> operator-(const array_ref& Ar, U a) { return array_expression&, BinarySub, U>(Ar, a); } @@ -269,7 +271,7 @@ operator-(const array_ref& Ar, U a) { /// Subtracts array_ref by a scalar (reverse order) template ::value, int>::type = 0> -CUDA_HOST_DEVICE array_expression&> +constexpr CUDA_HOST_DEVICE array_expression&> operator-(U a, const array_ref& Ar) { return array_expression&>(a, Ar); } @@ -303,16 +305,18 @@ operator-(U a, const array_ref& Ar) { template ::value || std::is_same::value>::type> - CUDA_HOST_DEVICE array_ref(T arr, std::size_t size = 1) + constexpr CUDA_HOST_DEVICE array_ref(T arr, std::size_t size = 1) : m_arr((void*)arr), m_size(size) {} template - CUDA_HOST_DEVICE array_ref(const array_ref& other) + constexpr CUDA_HOST_DEVICE array_ref(const array_ref& other) : m_arr(other.ptr()), m_size(other.size()) {} - template CUDA_HOST_DEVICE operator array_ref() { + template constexpr CUDA_HOST_DEVICE operator array_ref() { return array_ref((T*)(m_arr), m_size); } - CUDA_HOST_DEVICE void* ptr() const { return m_arr; } - CUDA_HOST_DEVICE std::size_t size() const { return m_size; } + [[nodiscard]] constexpr CUDA_HOST_DEVICE void* ptr() const { return m_arr; } + [[nodiscard]] constexpr CUDA_HOST_DEVICE std::size_t size() const { + return m_size; + } }; // NOLINTEND(*-pointer-arithmetic) } // namespace clad diff --git a/include/clad/Differentiator/CladConfig.h b/include/clad/Differentiator/CladConfig.h index 39d47efd8..a81c0cef7 100644 --- a/include/clad/Differentiator/CladConfig.h +++ b/include/clad/Differentiator/CladConfig.h @@ -34,6 +34,9 @@ enum opts : unsigned { // Specifying whether we only want the diagonal of the hessian. diagonal_only = 1 << (ORDER_BITS + 4), + + // Specify that we need a constexpr-enabled CladFunction + immediate_mode = 1 << (ORDER_BITS + 7), }; // enum opts constexpr unsigned GetDerivativeOrder(const unsigned bitmasked_opts) { diff --git a/include/clad/Differentiator/DiffPlanner.h b/include/clad/Differentiator/DiffPlanner.h index 2116d5ea0..d2b74592b 100644 --- a/include/clad/Differentiator/DiffPlanner.h +++ b/include/clad/Differentiator/DiffPlanner.h @@ -65,6 +65,9 @@ struct DiffRequest { /// A flag to enable TBR analysis during reverse-mode differentiation. bool EnableTBRAnalysis = false; bool EnableVariedAnalysis = false; + /// A flag specifying whether this differentiation is to be used + /// in immediate contexts. + bool ImmediateMode = false; /// Puts the derived function and its code in the diff call void updateCall(clang::FunctionDecl* FD, clang::FunctionDecl* OverloadedFD, clang::Sema& SemaRef); diff --git a/include/clad/Differentiator/Differentiator.h b/include/clad/Differentiator/Differentiator.h index dfb900e1e..d5e51c4c0 100644 --- a/include/clad/Differentiator/Differentiator.h +++ b/include/clad/Differentiator/Differentiator.h @@ -120,15 +120,14 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template ::type = true> - CUDA_HOST_DEVICE return_type_t + constexpr CUDA_HOST_DEVICE return_type_t execute_with_default_args(list, F f, list, CUDA_ARGS CUDA_REST_ARGS Args&&... args) { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) if (CUDAkernel) { constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest); - std::vector argPtrs; - argPtrs.reserve(totalArgs); - (argPtrs.push_back(static_cast(&args)), ...); + std::array argPtrs = {static_cast(&args)..., + static_cast(nullptr)...}; void* null_param = nullptr; for (size_t i = sizeof...(args); i < totalArgs; ++i) @@ -148,7 +147,7 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template ::type = true> - return_type_t + constexpr return_type_t execute_with_default_args(list, F f, list, CUDA_ARGS CUDA_REST_ARGS Args&&... args) { #if defined(__CUDACC__) && !defined(__CUDA_ARCH__) @@ -167,10 +166,10 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template ::type = true> - CUDA_HOST_DEVICE auto + constexpr CUDA_HOST_DEVICE auto execute_with_default_args(list, ReturnType C::*f, Obj&& obj, - list, Args&&... args) - -> return_type_t { + list, + Args&&... args) -> return_type_t { return (static_cast(obj).*f)((fArgTypes)(args)..., static_cast(nullptr)...); } @@ -178,9 +177,10 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template ::type = true> - auto execute_with_default_args(list, ReturnType C::*f, Obj&& obj, - list, Args&&... args) - -> return_type_t { + constexpr auto + execute_with_default_args(list, ReturnType C::*f, Obj&& obj, + list, + Args&&... args) -> return_type_t { return (static_cast(obj).*f)(static_cast(args)...); } @@ -192,7 +192,7 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { /// Default value of `Functor` here is temporary, and should be removed /// once all clad differentiation functions support differentiating functors. template , - bool EnablePadding = false> + bool EnablePadding = false, bool ImmediateMode = false> class CladFunction { public: using CladFunctionType = F; @@ -200,46 +200,79 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { private: CladFunctionType m_Function; - char* m_Code; + const char* m_Code; FunctorType *m_Functor = nullptr; bool m_CUDAkernel = false; public: +#ifdef __cpp_concepts CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, FunctorType* functor = nullptr, bool CUDAkernel = false) + requires(!ImmediateMode) : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { -#ifndef __CLAD_SO_LOADED +#ifndef __CLAD__ static_assert(false, "clad doesn't appear to be loaded; make sure that " "you pass clad.so to clang."); #endif + size_t length = GetLength(code); + char* temp = (char*)malloc(length + 1); + m_Code = temp; + while ((*temp++ = *code++)) + ; + } + constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, + FunctorType* functor = nullptr, + bool CUDAkernel = false) + requires(ImmediateMode) + : m_Function(f), m_Code(""), + m_Functor(functor), m_CUDAkernel(CUDAkernel) { +#ifndef __CLAD__ + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); +#endif + } +#else + CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, + FunctorType* functor = nullptr, + bool CUDAkernel = false) + : m_Function(f), m_Functor(functor), m_CUDAkernel(CUDAkernel) { +#ifndef __CLAD__ + static_assert(false, "clad doesn't appear to be loaded; make sure that " + "you pass clad.so to clang."); +#endif size_t length = GetLength(code); char* temp = (char*)malloc(length + 1); m_Code = temp; while ((*temp++ = *code++)) ; } +#endif + /// Constructor overload for initializing `m_Functor` when functor /// is passed by reference. - CUDA_HOST_DEVICE - CladFunction(CladFunctionType f, const char* code, FunctorType& functor) + CUDA_HOST_DEVICE CladFunction(CladFunctionType f, const char* code, + FunctorType& functor) : CladFunction(f, code, &functor) {}; + constexpr CUDA_HOST_DEVICE CladFunction(CladFunctionType f, + FunctorType& functor) + : CladFunction(f, &functor) {}; + // Intentionally leak m_Code, otherwise we have to link against c++ runtime, // i.e -lstdc++. //~CladFunction() { /*free(m_Code);*/ } - CladFunctionType getFunctionPtr() { return m_Function; } + constexpr CladFunctionType getFunctionPtr() const { return m_Function; } template typename std::enable_if::value, - return_type_t>::type - execute(Args&&... args) CUDA_HOST_DEVICE { - if (!m_Function) { - printf("CladFunction is invalid\n"); + return_type_t>::type constexpr CUDA_HOST_DEVICE + execute(Args&&... args) const { + if (!m_Function) return static_cast>(return_type_t()); - } if (m_CUDAkernel) { printf("Use execute_kernel() for global CUDA kernels\n"); return static_cast>(return_type_t()); @@ -279,18 +312,18 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { /// subsystem. template typename std::enable_if::value, - return_type_t>::type - execute(Args&&... args) CUDA_HOST_DEVICE { + return_type_t>::type constexpr CUDA_HOST_DEVICE + execute(Args&&... args) const { return static_cast>(0); } /// Return the string representation for the generated derivative. - const char* getCode() const { + constexpr const char* getCode() const { if (m_Code) return m_Code; - else - return ""; + return ""; } + void dump() const { printf("The code is: \n%s\n", getCode()); } @@ -315,8 +348,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { private: /// Helper function for executing non-member derived functions. template - CUDA_HOST_DEVICE return_type_t - execute_helper(Fn f, CUDA_ARGS Args&&... args) { + constexpr CUDA_HOST_DEVICE return_type_t + execute_helper(Fn f, CUDA_ARGS Args&&... args) const { // `static_cast` is required here for perfect forwarding. #if defined(__CUDACC__) if constexpr (sizeof...(Args) >= 2) { @@ -354,27 +387,25 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { /// Helper functions for executing member derived functions. /// If user have passed object explicitly, then this specialization will /// be used and derived function will be called through the passed object. - template < - class ReturnType, - class C, - class Obj, - class = typename std::enable_if< - std::is_same::type, C>::value>::type, - class... Args> - return_type_t - execute_helper(ReturnType C::*f, Obj&& obj, Args&&... args) { + template ::type, C>::value>::type, + class... Args> + constexpr return_type_t + execute_helper(ReturnType C::*f, Obj&& obj, Args&&... args) const { // `static_cast` is required here for perfect forwarding. - return execute_with_default_args( - DropArgs_t{}, f, static_cast(obj), - TakeNFirstArgs_t{}, - static_cast(args)...); + return execute_with_default_args( + DropArgs_t{}, f, + static_cast(obj), + TakeNFirstArgs_t{}, + static_cast(args)...); } /// If user have not passed object explicitly, then this specialization /// will be used and derived function will be called through the object /// saved in `CladFunction`. template - return_type_t execute_helper(ReturnType C::*f, - Args&&... args) { + constexpr return_type_t + execute_helper(ReturnType C::*f, Args&&... args) const { // `static_cast` is required here for perfect forwarding. return execute_with_default_args( DropArgs_t{}, f, *m_Functor, @@ -406,6 +437,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { typename = typename std::enable_if< !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), opts::vector_mode) && + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && !std::is_class>::value>::type> CladFunction> __attribute__(( annotate("D"))) @@ -416,6 +449,23 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { code); } + template , + typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::vector_mode) && + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> + constexpr CladFunction, false, + true> __attribute__((annotate("D"))) + differentiate(F fn, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr)) { + return CladFunction, false, true>( + derivedFn); + } + /// Specialization for differentiating functors. /// The specialization is needed because objects have to be passed /// by reference whereas functions have to be passed by value. @@ -426,13 +476,13 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), opts::vector_mode) && std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("D"))) + constexpr CladFunction< + DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("D"))) differentiate(F&& f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - return CladFunction>(derivedFn, - code, f); + return CladFunction>(derivedFn, + code, f); } /// Generates function which computes derivative of `fn` argument w.r.t @@ -449,8 +499,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), opts::vector_mode) && !std::is_class>::value>::type> - CladFunction, true> __attribute__(( - annotate("D"))) + constexpr CladFunction, + true> __attribute__((annotate("D"))) differentiate(F fn, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { @@ -468,9 +518,11 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template , typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && !std::is_class>::value>::type> - CladFunction, true> __attribute__(( - annotate("G"))) CUDA_HOST_DEVICE + constexpr CladFunction, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE gradient(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "", bool CUDAkernel = false) { @@ -478,6 +530,21 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { derivedFn /* will be replaced by gradient*/, code, nullptr, CUDAkernel); } + template , + typename = typename std::enable_if< + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> + constexpr CladFunction, true, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE + gradient(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr), + bool CUDAkernel = false) { + return CladFunction, true, true>( + derivedFn /* will be replaced by gradient*/, nullptr, CUDAkernel); + } + /// Specialization for differentiating functors. /// The specialization is needed because objects have to be passed /// by reference whereas functions have to be passed by value. @@ -485,13 +552,13 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { typename F, typename DerivedFnType = GradientDerivedFnTraits_t, typename = typename std::enable_if< std::is_class>::value>::type> - CladFunction, true> __attribute__(( - annotate("G"))) CUDA_HOST_DEVICE + constexpr CladFunction, + true> __attribute__((annotate("G"))) CUDA_HOST_DEVICE gradient(F&& f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - return CladFunction, true>( - derivedFn /* will be replaced by gradient*/, code, f); + return CladFunction, true>( + derivedFn /* will be replaced by gradient*/, code, f); } /// Generates function which computes hessian matrix of the given function wrt @@ -504,9 +571,11 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { template , typename = typename std::enable_if< + !clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && !std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("H"))) + constexpr CladFunction< + DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("H"))) hessian(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { @@ -514,6 +583,20 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { derivedFn /* will be replaced by hessian*/, code); } + template , + typename = typename std::enable_if< + clad::HasOption(GetBitmaskedOpts(BitMaskedOpts...), + opts::immediate_mode) && + !std::is_class>::value>::type> + constexpr CladFunction, false, + true> __attribute__((annotate("H"))) + hessian(F f, ArgSpec args = "", + DerivedFnType derivedFn = static_cast(nullptr)) { + return CladFunction, false, true>( + derivedFn /* will be replaced by hessian*/); + } + /// Specialization for differentiating functors. /// The specialization is needed because objects have to be passed /// by reference whereas functions have to be passed by value. @@ -521,13 +604,13 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { typename F, typename DerivedFnType = HessianDerivedFnTraits_t, typename = typename std::enable_if< std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("H"))) + constexpr CladFunction< + DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("H"))) hessian(F&& f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - return CladFunction>( - derivedFn /* will be replaced by hessian*/, code, f); + return CladFunction>( + derivedFn /* will be replaced by hessian*/, code, f); } /// Generates function which computes jacobian matrix of the given function @@ -541,8 +624,8 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { typename F, typename DerivedFnType = JacobianDerivedFnTraits_t, typename = typename std::enable_if< !std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("J"))) + constexpr CladFunction< + DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("J"))) jacobian(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { @@ -557,18 +640,18 @@ CUDA_HOST_DEVICE T push(tape& to, ArgsT... val) { typename F, typename DerivedFnType = JacobianDerivedFnTraits_t, typename = typename std::enable_if< std::is_class>::value>::type> - CladFunction> __attribute__(( - annotate("J"))) + constexpr CladFunction< + DerivedFnType, ExtractFunctorTraits_t> __attribute__((annotate("J"))) jacobian(F&& f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { - return CladFunction>( - derivedFn /* will be replaced by Jacobian*/, code, f); + return CladFunction>( + derivedFn /* will be replaced by Jacobian*/, code, f); } template > - CladFunction __attribute__((annotate("E"))) + constexpr CladFunction __attribute__((annotate("E"))) estimate_error(F f, ArgSpec args = "", DerivedFnType derivedFn = static_cast(nullptr), const char* code = "") { diff --git a/include/clad/Differentiator/DynamicGraph.h b/include/clad/Differentiator/DynamicGraph.h index f7b5f61b0..2ef8cf992 100644 --- a/include/clad/Differentiator/DynamicGraph.h +++ b/include/clad/Differentiator/DynamicGraph.h @@ -106,7 +106,8 @@ template class DynamicGraph { bool isProcessingNode() { return m_currentId != -1; } /// Get the nodes in the graph. - const std::vector& getNodes() { return m_nodes; } + const std::vector& getNodes() const { return m_nodes; } + std::vector& getNodes() { return m_nodes; } /// Print the nodes and edges in the graph. void print() { diff --git a/include/clad/Differentiator/FunctionTraits.h b/include/clad/Differentiator/FunctionTraits.h index c15eeb270..bc568e51d 100644 --- a/include/clad/Differentiator/FunctionTraits.h +++ b/include/clad/Differentiator/FunctionTraits.h @@ -763,17 +763,15 @@ namespace clad { /// Specialization for free function pointer type template struct ExtractDerivedFnTraitsForwMode< - F*, - typename std::enable_if::value>::type> { + F*, typename std::enable_if::value>::type> { using type = remove_reference_and_pointer_t*; }; /// Specialization for member function pointer type template struct ExtractDerivedFnTraitsForwMode< - F, - typename std::enable_if< - std::is_member_function_pointer::value>::type> { + F, typename std::enable_if< + std::is_member_function_pointer::value>::type> { using type = typename std::decay::type; }; @@ -783,20 +781,19 @@ namespace clad { /// defines member typedef `type` as the type of `NoFunction*`. template struct ExtractDerivedFnTraitsForwMode< - F, - typename std::enable_if< - std::is_class>::value && - has_call_operator::value>::type> { + F, typename std::enable_if< + std::is_class>::value && + has_call_operator::value>::type> { using ClassType = typename std::decay>::type; using type = decltype(&ClassType::operator()); }; + template struct ExtractDerivedFnTraitsForwMode< - F, - typename std::enable_if< - std::is_class>::value && - !has_call_operator::value>::type> { + F, typename std::enable_if< + std::is_class>::value && + !has_call_operator::value>::type> { using type = NoFunction*; }; diff --git a/lib/Differentiator/DiffPlanner.cpp b/lib/Differentiator/DiffPlanner.cpp index b4c7018a2..28485a413 100644 --- a/lib/Differentiator/DiffPlanner.cpp +++ b/lib/Differentiator/DiffPlanner.cpp @@ -190,6 +190,20 @@ namespace clad { FunctionDecl* replacementFD = OverloadedFD ? OverloadedFD : FD; + auto codeArgIdx = -1; + auto derivedFnArgIdx = -1; + auto idx = 0; + for (auto* arg : call->arguments()) { + if (auto* default_arg_expr = dyn_cast(arg)) { + std::string argName = default_arg_expr->getParam()->getNameAsString(); + if (argName == "derivedFn") + derivedFnArgIdx = idx; + else if (argName == "code") + codeArgIdx = idx; + } + ++idx; + } + // Index of "CUDAkernel" parameter: int numArgs = static_cast(call->getNumArgs()); if (numArgs > 4) { @@ -204,8 +218,6 @@ namespace clad { call->setArg(kernelArgIdx, cudaKernelFlag); numArgs--; } - auto codeArgIdx = numArgs - 1; - auto derivedFnArgIdx = numArgs - 2; // Create ref to generated FD. DeclRefExpr* DRE = @@ -221,31 +233,35 @@ namespace clad { if (isa(DRE->getDecl())) DRE->setValueKind(CLAD_COMPAT_ExprValueKind_R_or_PR_Value); - // Add the "&" operator - auto newUnOp = - SemaRef.BuildUnaryOp(nullptr, noLoc, UnaryOperatorKind::UO_AddrOf, DRE) - .get(); - call->setArg(derivedFnArgIdx, newUnOp); - - // Update the code parameter. - if (CXXDefaultArgExpr* Arg - = dyn_cast(call->getArg(codeArgIdx))) { - clang::LangOptions LangOpts; - LangOpts.CPlusPlus = true; - clang::PrintingPolicy Policy(LangOpts); - Policy.Bool = true; - - std::string s; - llvm::raw_string_ostream Out(s); - FD->print(Out, Policy); - Out.flush(); - - StringLiteral* SL = utils::CreateStringLiteral(C, Out.str()); - Expr* newArg = - SemaRef.ImpCastExprToType(SL, - Arg->getType(), - CK_ArrayToPointerDecay).get(); - call->setArg(codeArgIdx, newArg); + if (derivedFnArgIdx != -1) { + // Add the "&" operator + auto* newUnOp = + SemaRef + .BuildUnaryOp(nullptr, noLoc, UnaryOperatorKind::UO_AddrOf, DRE) + .get(); + call->setArg(derivedFnArgIdx, newUnOp); + } + + // Update the code parameter if it was found. + if (codeArgIdx != -1) { + if (auto* Arg = dyn_cast(call->getArg(codeArgIdx))) { + clang::LangOptions LangOpts; + LangOpts.CPlusPlus = true; + clang::PrintingPolicy Policy(LangOpts); + Policy.Bool = true; + + std::string s; + llvm::raw_string_ostream Out(s); + FD->print(Out, Policy); + Out.flush(); + + StringLiteral* SL = utils::CreateStringLiteral(C, Out.str()); + Expr* newArg = + SemaRef + .ImpCastExprToType(SL, Arg->getType(), CK_ArrayToPointerDecay) + .get(); + call->setArg(codeArgIdx, newArg); + } } } @@ -732,6 +748,8 @@ namespace clad { request.RequestedDerivativeOrder = derivative_order; if (clad::HasOption(bitmasked_opts_value, clad::opts::use_enzyme)) request.use_enzyme = true; + if (clad::HasOption(bitmasked_opts_value, clad::opts::immediate_mode)) + request.ImmediateMode = true; if (enable_tbr_in_req) { utils::EmitDiag(m_Sema, DiagnosticsEngine::Error, endLoc, "TBR analysis is not meant for forward mode AD."); diff --git a/test/ForwardMode/ConstevalTest.C b/test/ForwardMode/ConstevalTest.C new file mode 100644 index 000000000..95b0fc936 --- /dev/null +++ b/test/ForwardMode/ConstevalTest.C @@ -0,0 +1,53 @@ +// RUN: %cladclang %s -I%S/../../include -std=c++23 -oConstevalTest.out | %filecheck %s +// RUN: ./ConstevalTest.out | %filecheck_exec %s +// UNSUPPORTED: clang-8, clang-9, clang-10, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16 + +#include "clad/Differentiator/Differentiator.h" + +consteval double fn(double x, double y) { + return (x+y)/2; +} + +//CHECK: consteval double fn_darg0(double x, double y) { +//CHECK-NEXT: double _d_x = 1; +//CHECK-NEXT: double _d_y = 0; +//CHECK-NEXT: double _t0 = (x + y); +//CHECK-NEXT: return ((_d_x + _d_y) * 2 - _t0 * 0) / (2 * 2); +//CHECK-NEXT:} + +consteval double mul(double a, double b, double c) { + double val = 99.00; + double result = val * a + 100 - b + c; + return result; +} + +//CHECK: consteval double mul_darg0(double a, double b, double c) { +//CHECK-NEXT: double _d_a = 1; +//CHECK-NEXT: double _d_b = 0; +//CHECK-NEXT: double _d_c = 0; +//CHECK-NEXT: double _d_val = 0.; +//CHECK-NEXT: double val = 99.; +//CHECK-NEXT: double _d_result = _d_val * a + val * _d_a + 0 - _d_b + _d_c; +//CHECK-NEXT: double result = val * a + 100 - b + c; +//CHECK-NEXT: return _d_result; +//CHECK-NEXT:} + +consteval double fn_test() { + auto dx = clad::differentiate(fn, "x"); + + return dx.execute(4, 7); +} + +consteval double mul_test() { + auto dx = clad::differentiate(mul, "a"); + + return dx.execute(5, 6, 10); +} + +int main() { + constexpr double fn_result = fn_test(); + printf("%.2f\n", fn_result); // CHECK-EXEC: 0.50 + + constexpr double mul_result = mul_test(); + printf("%.2f\n", mul_result); // CHECK-EXEC: 99.00 +} diff --git a/test/ForwardMode/ConstexprTest.C b/test/ForwardMode/ConstexprTest.C new file mode 100644 index 000000000..cdf3dcd67 --- /dev/null +++ b/test/ForwardMode/ConstexprTest.C @@ -0,0 +1,63 @@ +// RUN: %cladclang %s -I%S/../../include -std=c++23 -oConstexprTest.out | %filecheck %s +// RUN: ./ConstexprTest.out | %filecheck_exec %s +// UNSUPPORTED: clang-8, clang-9, clang-10, clang-11, clang-12, clang-13, clang-14, clang-15, clang-16 + +#include "clad/Differentiator/Differentiator.h" + +constexpr double fn(double x, double y) { + return (x + y) / 2; +} + +//CHECK: constexpr double fn_darg0(double x, double y) { +//CHECK-NEXT: double _d_x = 1; +//CHECK-NEXT: double _d_y = 0; +//CHECK-NEXT: double _t0 = (x + y); +//CHECK-NEXT: return ((_d_x + _d_y) * 2 - _t0 * 0) / (2 * 2); +//CHECK-NEXT:} + +constexpr double mul(double a, double b, double c) { + double val = 99.00; + double result = val * a + 100 - b + c; + return result; +} + +//CHECK: constexpr double mul_darg0(double a, double b, double c) { +//CHECK-NEXT: double _d_a = 1; +//CHECK-NEXT: double _d_b = 0; +//CHECK-NEXT: double _d_c = 0; +//CHECK-NEXT: double _d_val = 0.; +//CHECK-NEXT: double val = 99.; +//CHECK-NEXT: double _d_result = _d_val * a + val * _d_a + 0 - _d_b + _d_c; +//CHECK-NEXT: double result = val * a + 100 - b + c; +//CHECK-NEXT: return _d_result; +//CHECK-NEXT:} + +constexpr double fn_test() { + if consteval { + auto dx = clad::differentiate(fn, "x"); + + return dx.execute(4, 7); + } else { + assert(false && "fn non-immediate context"); + return -1.; + } +} + +constexpr double mul_test() { + if consteval { + auto dx = clad::differentiate(mul, "a"); + + return dx.execute(5, 6, 10); + } else { + assert(false && "mul non-immediate context"); + return -1.; + } +} + +int main() { + constexpr double fn_result = fn_test(); + printf("%.2f\n", fn_result); // CHECK-EXEC: 0.50 + + constexpr double mul_result = mul_test(); + printf("%.2f\n", mul_result); // CHECK-EXEC: 99.0 +} diff --git a/test/ForwardMode/NotEnoughArgError.C b/test/ForwardMode/NotEnoughArgError.C index df49c5b14..1660a5056 100644 --- a/test/ForwardMode/NotEnoughArgError.C +++ b/test/ForwardMode/NotEnoughArgError.C @@ -13,8 +13,8 @@ int main () { // expected-error@clad/Differentiator/Differentiator.h:* {{too few arguments to function call, expected 2, have 1}} // expected-note@clad/Differentiator/Differentiator.h:* {{in instantiation of function template specialization 'clad::execute_with_default_args' requested here}} #if __clang_major__ < 16 - // expected-note@clad/Differentiator/Differentiator.h:* {{in instantiation of function template specialization 'clad::CladFunction::execute_helper' requested here}} - // expected-note@NotEnoughArgError.C:12 {{in instantiation of function template specialization 'clad::CladFunction::execute' requested here}} + // expected-note@clad/Differentiator/Differentiator.h:* {{in instantiation of function template specialization 'clad::CladFunction::execute_helper' requested here}} + // expected-note@NotEnoughArgError.C:12 {{in instantiation of function template specialization 'clad::CladFunction::execute' requested here}} #else // expected-note@clad/Differentiator/Differentiator.h:* {{in instantiation of function template specialization 'clad::CladFunction::execute_helper' requested here}} // expected-note@NotEnoughArgError.C:12 {{in instantiation of function template specialization 'clad::CladFunction::execute' requested here}} diff --git a/test/ForwardMode/constexprTest.C b/test/ForwardMode/constexprTest.C deleted file mode 100644 index 5ead5b1b0..000000000 --- a/test/ForwardMode/constexprTest.C +++ /dev/null @@ -1,43 +0,0 @@ -// RUN: %cladclang %s -I%S/../../include -oconstexprTest.out | %filecheck %s -// RUN: ./constexprTest.out | %filecheck_exec %s - -#include "clad/Differentiator/Differentiator.h" - -#include "../TestUtils.h" - - -constexpr double fn(double a, double b) { - return (a+b)/2; -} - -//CHECK: constexpr double fn_darg0(double a, double b) { -//CHECK-NEXT: double _d_a = 1; -//CHECK-NEXT: double _d_b = 0; -//CHECK-NEXT: double _t0 = (a + b); -//CHECK-NEXT: return ((_d_a + _d_b) * 2 - _t0 * 0) / (2 * 2); -//CHECK-NEXT:} - -constexpr double mul(double a, double b, double c) { - double val = 99.00; - double result = val * a + 100 - b + c; - return result; -} - -//CHECK: constexpr double mul_darg0(double a, double b, double c) { -//CHECK-NEXT: double _d_a = 1; -//CHECK-NEXT: double _d_b = 0; -//CHECK-NEXT: double _d_c = 0; -//CHECK-NEXT: double _d_val = 0.; -//CHECK-NEXT: double val = 99.; -//CHECK-NEXT: double _d_result = _d_val * a + val * _d_a + 0 - _d_b + _d_c; -//CHECK-NEXT: double result = val * a + 100 - b + c; -//CHECK-NEXT: return _d_result; -//CHECK-NEXT:} - -int main() { - INIT_DIFFERENTIATE(fn,"a"); - INIT_DIFFERENTIATE(mul, "a"); - - TEST_DIFFERENTIATE(fn, 4, 7); // CHECK-EXEC: {0.50} - TEST_DIFFERENTIATE(mul, 5, 6, 10); // CHECK-EXEC: {99.00} -} diff --git a/tools/ClangPlugin.cpp b/tools/ClangPlugin.cpp index d228a2dc3..c868336dd 100644 --- a/tools/ClangPlugin.cpp +++ b/tools/ClangPlugin.cpp @@ -91,10 +91,10 @@ namespace clad { } #endif // CLANG_VERSION_MAJOR > 8 - // Add define for __CLAD_SO_LOADED, so that CladFunction::CladFunction() + // Add define for __CLAD__, so that CladFunction::CladFunction() // doesn't throw an error. auto predefines = m_CI.getPreprocessor().getPredefines(); - predefines.append("#define __CLAD_SO_LOADED 1\n"); + predefines.append("#define __CLAD__ 1\n"); m_CI.getPreprocessor().setPredefines(predefines); } @@ -137,6 +137,17 @@ namespace clad { SetRequestOptions(opts); DiffCollector collector(DGR, CladEnabledRange, m_DiffRequestGraph, S, opts); + +#if CLANG_VERSION_MAJOR > 16 + for (DiffRequest& request : m_DiffRequestGraph.getNodes()) { + if (request.ImmediateMode && request.Function->isConstexpr()) { + m_DiffRequestGraph.setCurrentProcessingNode(request); + ProcessDiffRequest(request); + m_DiffRequestGraph.markCurrentNodeProcessed(); + } + } +#endif + // We could not delay the processing of derivatives, inform act as if each // call is final. That would still have vgvassilev/clad#248 unresolved. if (!m_Multiplexer)