Skip to content

Commit

Permalink
Use connected lists for tape data structure
Browse files Browse the repository at this point in the history
Instead of a single list use mutliple connected lists to store elements.
This allows to dynamically increase the size of tape without the need of
relocating elements.
  • Loading branch information
rohanjulka19 committed Sep 12, 2024
1 parent 50a0b5f commit 2495073
Show file tree
Hide file tree
Showing 5 changed files with 125 additions and 3 deletions.
2 changes: 1 addition & 1 deletion benchmark/AlgorithmicComplexity.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ static void BM_NumericGausP(benchmark::State& state) {
double p[] = {1, 2, 3, 4, 5};
double dx[5] = {0, 0, 0, 0, 0};
double dp[5] = {0, 0, 0, 0, 0};
clad::tape<clad::array_ref<double>> results = {};
clad::old_tape<clad::array_ref<double>> results = {};
int dim = 5;
results.emplace_back(dx, dim);
results.emplace_back(dp, dim);
Expand Down
2 changes: 1 addition & 1 deletion demos/CustomTypeNumDiff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ int main() {
// This is how we return the derivative with respect to all arguments.
// The order of being placed in this tape should be the same as the order of
// the arguments being passed to the function.
clad::tape<clad::array_ref<
clad::old_tape<clad::array_ref<
double /*This should be the return value of the function you want to differentiate.*/>>
grad = {};
// Place the l-value reference of the variables in the tape.
Expand Down
3 changes: 3 additions & 0 deletions include/clad/Differentiator/Differentiator.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
#include "DynamicGraph.h"
#include "FunctionTraits.h"
#include "Matrix.h"
#include "NewTape.h"
#include "NumericalDiff.h"
#include "Tape.h"

Expand Down Expand Up @@ -49,6 +50,8 @@ inline CUDA_HOST_DEVICE unsigned int GetLength(const char* code) {
/// Tape type used for storing values in reverse-mode AD inside loops.
template <typename T> using tape = tape_impl<T>;

template <typename T> using old_tape = tape_impl<T>;

/// Add value to the end of the tape, return the same value.
template <typename T, typename... ArgsT>
CUDA_HOST_DEVICE T push(tape<T>& to, ArgsT... val) {
Expand Down
119 changes: 119 additions & 0 deletions include/clad/Differentiator/NewTape.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
#ifndef CLAD_DIFFERENTIATOR_NEWTAPE_H
#define CLAD_DIFFERENTIATOR_NEWTAPE_H

#include <cassert>
#include <cstdio>
#include <type_traits>
#include <utility>

#include "clad/Differentiator/CladConfig.h"

namespace clad {

static const int capacity = 32;

template <typename T> class Block {
public:
T data[capacity];
Block<T>* next;
Block<T>* prev;
using pointer = T*;
using iterator = pointer;

CUDA_HOST_DEVICE Block() {
}

CUDA_HOST_DEVICE ~Block() { destroy(block_begin(), block_end()); }

Block(const Block& other) = delete;
Block& operator=(const Block& other) = delete;

Block(Block&& other) = delete;
Block& operator=(const Block&& other) = delete;

CUDA_HOST_DEVICE iterator block_begin() { return data; }

CUDA_HOST_DEVICE iterator block_end() { return data + capacity; }

template <typename It> using value_type_of = decltype(*std::declval<It>());

template <typename It>
static typename std::enable_if<
!std::is_trivially_destructible<value_type_of<It>>::value>::type
destroy(It B, It E) {
for (It I = E - 1; I >= B; --I)
I->~value_type_of<It>();
}

template <typename It>
static typename std::enable_if<
std::is_trivially_destructible<value_type_of<It>>::value>::type
CUDA_HOST_DEVICE
destroy(It B, It E) {}
};

template <typename T> class new_tape_impl {
using NonConstT = typename std::remove_cv<T>::type;

Block<NonConstT>* m_cur_block = nullptr;
std::size_t m_size = 0;

public:
new_tape_impl() = default;

~new_tape_impl() { }

new_tape_impl(new_tape_impl& other) = delete;
new_tape_impl operator=(new_tape_impl& other) = delete;

new_tape_impl(new_tape_impl&& other) = delete;
new_tape_impl& operator=(new_tape_impl&& other) = delete;

template <typename... ArgsT>

CUDA_HOST_DEVICE void emplace_back(ArgsT&&... args) {
if (!m_cur_block || m_size >= capacity) {
Block<NonConstT>* prev_block = m_cur_block;
m_cur_block = static_cast<Block<NonConstT>*>(::operator new(sizeof(Block<NonConstT>)));
if (prev_block != nullptr) {
prev_block->next = m_cur_block;
m_cur_block->prev = prev_block;
}
m_size = 0;
}
m_size += 1;
::new (const_cast<void*>(static_cast<const volatile void*>(end())))
T(std::forward<ArgsT>(args)...);
}

[[nodiscard]] CUDA_HOST_DEVICE std::size_t size() const { return m_size; }

CUDA_HOST_DEVICE T* end() { return m_cur_block->data + (m_size - 1); }

CUDA_HOST_DEVICE T& back() {
assert(m_size || m_cur_block->prev);
return *end();
}

CUDA_HOST_DEVICE void pop_back() {
assert(m_size || m_cur_block->prev);
m_size -= 1;
if (m_size == 0) {
Block<NonConstT>* temp = m_cur_block;
m_cur_block = m_cur_block->prev;
delete temp;
m_size = capacity;
}
}

void destroy() {
while (m_cur_block != nullptr) {
Block<NonConstT>* prev_block = m_cur_block->prev;
delete m_cur_block;
m_cur_block = prev_block;
}
}
};
} // namespace clad

#endif // CLAD_DIFFERENTIATOR_NEWTAPE_H
2 changes: 1 addition & 1 deletion test/NumericalDiff/PureCentralDiffCalls.C
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ int main() { // expected-no-diagnostics
printf("Result is = %f\n", func1_res); // CHECK-EXEC: Result is = 2.000000

// Gradients, derivative wrt all args
clad::tape<clad::array_ref<double>> grad = {};
clad::old_tape<clad::array_ref<double>> grad = {};
grad.emplace_back(dx, 3);
grad.emplace_back(&dy);
grad.emplace_back(&dz);
Expand Down

0 comments on commit 2495073

Please sign in to comment.