Skip to content

Commit

Permalink
Merge branch 'master' into 710-fix-loop-continue
Browse files Browse the repository at this point in the history
  • Loading branch information
vgvassilev authored Sep 23, 2024
2 parents 75bad98 + e2b8e35 commit 0622a9b
Show file tree
Hide file tree
Showing 126 changed files with 1,459 additions and 545 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -238,11 +238,11 @@ jobs:
clang-runtime: '10'
cuda: true

- name: selfh-ubu20-clang9-runtime10-cuda
os: self-hosted #ubuntu-20.04
- name: selfh-ubu22-clang16-runtime18-cuda
os: self-hosted #ubuntu-22.04
runs-on: cuda
compiler: clang-9
clang-runtime: '10'
compiler: clang-16
clang-runtime: '18'
cuda: true

- name: ubu20-clang9-runtime15
Expand Down Expand Up @@ -739,7 +739,7 @@ jobs:
echo "PATH_TO_LLVM_BUILD=$env:PATH_TO_LLVM_BUILD"
echo "PATH_TO_LLVM_BUILD=$env:PATH_TO_LLVM_BUILD" >> $env:GITHUB_ENV
- name: Setup CUDA 8 on Linux
if: ${{ matrix.cuda == true }}
if: ${{ matrix.cuda == true && !matrix.os == 'self-hosted'}}
run: |
wget --no-verbose https://developer.nvidia.com/compute/cuda/8.0/Prod2/local_installers/cuda_8.0.61_375.26_linux-run
wget --no-verbose https://developer.nvidia.com/compute/cuda/8.0/Prod2/patches/2/cuda_8.0.61.2_linux-run
Expand Down
27 changes: 27 additions & 0 deletions include/clad/Differentiator/BuiltinDerivatives.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,11 @@ template <typename T, typename U> struct ValueAndPushforward {
}
};

template <typename T, typename U>
ValueAndPushforward<T, U> make_value_and_pushforward(T value, U pushforward) {
return {value, pushforward};
}

template <typename T, typename U> struct ValueAndAdjoint {
T value;
U adjoint;
Expand Down Expand Up @@ -178,6 +183,25 @@ CUDA_HOST_DEVICE ValueAndPushforward<T, T> floor_pushforward(T x, T /*d_x*/) {
return {::std::floor(x), (T)0};
}

template <typename T>
CUDA_HOST_DEVICE ValueAndPushforward<T, T> atan2_pushforward(T y, T x, T d_y,
T d_x) {
return {::std::atan2(y, x),
-(y / ((x * x) + (y * y))) * d_x + x / ((x * x) + (y * y)) * d_y};
}

template <typename T, typename U>
CUDA_HOST_DEVICE void atan2_pullback(T y, T x, U d_z, T* d_y, T* d_x) {
*d_y += x / ((x * x) + (y * y)) * d_z;

*d_x += -(y / ((x * x) + (y * y))) * d_z;
}

template <typename T>
CUDA_HOST_DEVICE ValueAndPushforward<T, T> acos_pushforward(T x, T d_x) {
return {::std::acos(x), ((-1) / (::std::sqrt(1 - x * x))) * d_x};
}

template <typename T>
CUDA_HOST_DEVICE ValueAndPushforward<T, T> ceil_pushforward(T x, T /*d_x*/) {
return {::std::ceil(x), (T)0};
Expand Down Expand Up @@ -316,6 +340,9 @@ inline void free_pushforward(void* ptr, void* d_ptr) {
// These are required because C variants of mathematical functions are
// defined in global namespace.
using std::abs_pushforward;
using std::acos_pushforward;
using std::atan2_pullback;
using std::atan2_pushforward;
using std::ceil_pushforward;
using std::cos_pushforward;
using std::exp_pushforward;
Expand Down
18 changes: 14 additions & 4 deletions include/clad/Differentiator/Differentiator.h
Original file line number Diff line number Diff line change
Expand Up @@ -130,8 +130,18 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
CUDA_ARGS CUDA_REST_ARGS Args&&... args) {
#if defined(__CUDACC__) && !defined(__CUDA_ARCH__)
if (CUDAkernel) {
void* argPtrs[] = {(void*)&args..., (void*)static_cast<Rest>(nullptr)...};
cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream);
constexpr size_t totalArgs = sizeof...(args) + sizeof...(Rest);
std::vector<void*> argPtrs;
argPtrs.reserve(totalArgs);
(argPtrs.push_back(static_cast<void*>(&args)), ...);

void* null_param = nullptr;
for (size_t i = sizeof...(args); i < totalArgs; ++i)
argPtrs[i] = &null_param;

cudaLaunchKernel((void*)f, grid, block, argPtrs.data(), shared_mem,
stream);
return return_type_t<F>();
} else {
return f(static_cast<Args>(args)..., static_cast<Rest>(nullptr)...);
}
Expand All @@ -150,9 +160,9 @@ CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
if (CUDAkernel) {
void* argPtrs[] = {(void*)&args...};
cudaLaunchKernel((void*)f, grid, block, argPtrs, shared_mem, stream);
} else {
return f(static_cast<Args>(args)...);
return return_type_t<F>();
}
return f(static_cast<Args>(args)...);
#else
return f(static_cast<Args>(args)...);
#endif
Expand Down
Loading

0 comments on commit 0622a9b

Please sign in to comment.