Skip to content

Commit

Permalink
Merge branch 'vgvassilev:master' into void-function-support
Browse files Browse the repository at this point in the history
  • Loading branch information
kchristin22 authored Mar 14, 2024
2 parents 24e55e4 + d2df900 commit 80bbdae
Show file tree
Hide file tree
Showing 37 changed files with 1,026 additions and 1,107 deletions.
94 changes: 62 additions & 32 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,56 +24,81 @@ jobs:
matrix:

include:
- name: osx-clang-runtime11
os: macos-11
- name: osx14-arm-clang-runtime13
os: macos-14
compiler: clang
clang-runtime: '11'
clang-runtime: '13'

- name: osx14-arm-clang-runtime14
os: macos-14
compiler: clang
clang-runtime: '14'

- name: osx14-arm-clang-runtime15
os: macos-14
compiler: clang
clang-runtime: '15'

- name: osx14-arm-clang-runtime16
os: macos-14
compiler: clang
clang-runtime: '16'

- name: osx-clang-runtime12
os: macos-latest
- name: osx14-arm-clang-runtime17
os: macos-14
compiler: clang
clang-runtime: '17'

- name: osx13-x86-clang-runtime12
os: macos-13
compiler: clang
clang-runtime: '12'

- name: osx-clang-runtime13
os: macos-latest
- name: osx13-x86-clang-runtime13
os: macos-13
compiler: clang
clang-runtime: '13'

- name: osx-clang-runtime14
os: macos-latest
- name: osx13-x86-clang-runtime14
os: macos-13
compiler: clang
clang-runtime: '14'

- name: osx-clang-runtime15
os: macos-latest
- name: osx13-x86-clang-runtime15
os: macos-13
compiler: clang
clang-runtime: '15'

- name: osx-clang-runtime16
os: macos-latest
- name: osx13-x86-clang-runtime16
os: macos-13
compiler: clang
clang-runtime: '16'

- name: osx-clang-runtime17
os: macos-latest
- name: osx13-x86-clang-runtime17
os: macos-13
compiler: clang
clang-runtime: '17'

- name: win-msvc-runtime14
os: windows-latest
- name: win2022-msvc-runtime14
os: windows-2022
compiler: msvc
clang-runtime: '14'

- name: win-msvc-runtime15
os: windows-latest
- name: win2022-msvc-runtime15
os: windows-2022
compiler: msvc
clang-runtime: '15'

- name: win-msvc-runtime16
os: windows-latest
- name: win2022-msvc-runtime16
os: windows-2022
compiler: msvc
clang-runtime: '16'

- name: win2022-msvc-runtime17
os: windows-2022
compiler: msvc
clang-runtime: '17'

- name: ubu22-clang15-runtime16-debug
os: ubuntu-22.04
compiler: clang-15
Expand Down Expand Up @@ -559,7 +584,6 @@ jobs:
# Update openssl on osx because the current one is deprecated by python.
curl -L https://bootstrap.pypa.io/get-pip.py | sudo python3
echo "/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
PATH_TO_LLVM_BUILD=/usr/local/opt/llvm@${{ matrix.clang-runtime }}/
# For now Package llvm@18 is unsuported on brew, llvm <=@11 are deprecated or deleted.
# Install llvm from github releases.
Expand Down Expand Up @@ -591,16 +615,22 @@ jobs:
# allowing clang to work with system's SDK.
sudo rm -fr /usr/local/opt/llvm*/include/c++
fi
PATH_TO_LLVM_BUILD=$(brew --prefix llvm@${{ matrix.clang-runtime }})
pip3 install lit # LLVM lit is not part of the llvm releases...
# We need headers in correct place
for file in $(xcrun --show-sdk-path)/usr/include/*
do
if [ ! -f /usr/local/include/$(basename $file) ]; then
ln -s $file /usr/local/include/$(basename $file)
fi
done
#FIXME: ln solution fails with error message No such file or directory on osx arm,
#Copying over files as a temporary solution
sudo cp -r -n $(xcrun --show-sdk-path)/usr/include/ /usr/local/include/
#for file in $(xcrun --show-sdk-path)/usr/include/*
#do
# if [ ! -f /usr/local/include/$(basename $file) ]; then
# echo ${file}
# ln -s ${file} /usr/local/include/$(basename $file)
# fi
#done
# We need PATH_TO_LLVM_BUILD later
echo "PATH_TO_LLVM_BUILD=$PATH_TO_LLVM_BUILD" >> $GITHUB_ENV
Expand Down Expand Up @@ -740,10 +770,10 @@ jobs:
if: ${{ runner.os != 'windows' }}
run: |
mkdir obj && cd obj
cmake -DClang_DIR="$PATH_TO_LLVM_BUILD" \
-DLLVM_DIR="$PATH_TO_LLVM_BUILD" \
cmake -DClang_DIR=${{ env.PATH_TO_LLVM_BUILD }} \
-DLLVM_DIR=${{ env.PATH_TO_LLVM_BUILD }} \
-DCMAKE_BUILD_TYPE=$([[ -z "$BUILD_TYPE" ]] && echo RelWithDebInfo || echo $BUILD_TYPE) \
-DCLAD_CODE_COVERAGE=${CLAD_CODE_COVERAGE} \
-DCLAD_CODE_COVERAGE=${{ env.CLAD_CODE_COVERAGE }} \
-DLLVM_EXTERNAL_LIT="`which lit`" \
-DLLVM_ENABLE_WERROR=On \
$GITHUB_WORKSPACE \
Expand Down
17 changes: 17 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
cmake_minimum_required(VERSION 3.7.0)

enable_language(CXX)
set(CMAKE_CXX_EXTENSIONS NO)

include(GNUInstallDirs)

# MUST be done before call to clad project
get_cmake_property(_cache_vars CACHE_VARIABLES)
foreach(_cache_var ${_cache_vars})
get_property(_helpstring CACHE ${_cache_var} PROPERTY HELPSTRING)
if(_helpstring STREQUAL
"No help, variable specified on the command line.")
set(CMAKE_ARGS "${CMAKE_ARGS} -D${_cache_var}=\"${${_cache_var}}\"")
endif()
endforeach()

# Generate CMakeArgs.txt file with source, build dir and command line args
write_file("${CMAKE_CURRENT_BINARY_DIR}/CMakeArgs.txt"
"-S${CMAKE_SOURCE_DIR} -B${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_ARGS}")

if(POLICY CMP0075)
cmake_policy(SET CMP0075 NEW)
endif()
Expand Down
9 changes: 5 additions & 4 deletions demos/Arrays.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,13 +76,13 @@ int main() {
// the indexes of the array by using the format arr[0:<last index of arr>]
auto hessian_all = clad::hessian(weighted_avg, "arr[0:2], weights[0:2]");
// Generates the Hessian matrix for weighted_avg w.r.t. to arr.
auto hessian_arr = clad::hessian(weighted_avg, "arr[0:2]");
// auto hessian_arr = clad::hessian(weighted_avg, "arr[0:2]");

double matrix_all[36] = {0};
double matrix_arr[9] = {0};
// double matrix_arr[9] = {0};

clad::array_ref<double> matrix_all_ref(matrix_all, 36);
clad::array_ref<double> matrix_arr_ref(matrix_arr, 9);
// clad::array_ref<double> matrix_arr_ref(matrix_arr, 9);

hessian_all.execute(arr, weights, matrix_all_ref);
printf("Hessian Mode w.r.t. to all:\n matrix =\n"
Expand All @@ -102,12 +102,13 @@ int main() {
matrix_all[28], matrix_all[29], matrix_all[30], matrix_all[31],
matrix_all[32], matrix_all[33], matrix_all[34], matrix_all[35]);

hessian_arr.execute(arr, weights, matrix_arr_ref);
/*hessian_arr.execute(arr, weights, matrix_arr_ref);
printf("Hessian Mode w.r.t. to arr:\n matrix =\n"
" {%.2g, %.2g, %.2g}\n"
" {%.2g, %.2g, %.2g}\n"
" {%.2g, %.2g, %.2g}\n",
matrix_arr[0], matrix_arr[1], matrix_arr[2], matrix_arr[3],
matrix_arr[4], matrix_arr[5], matrix_arr[6], matrix_arr[7],
matrix_arr[8]);
*/
}
Binary file added docs/userDocs/source/_static/vector-mode.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
46 changes: 46 additions & 0 deletions docs/userDocs/source/user/CoreConcepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,52 @@ Substituting `s = z` we will get `sz` = 1
Thus we don't need to run the program twice for each input. However, as mentioned
above the only drawback is we need to re-run the program for a different output.


Vectorized Forward Mode Automatic Differentiation
===================================================

Vectorized Forward Mode Automatic Differentiation is a computational technique
that combines two powerful concepts: vectorization and forward mode automatic
differentiation. This approach is used to efficiently compute derivatives of
functions with respect to multiple input variables by taking advantage of both
parallel processing capabilities and the structure of the computation graph.

Working
--------

For computing gradient of a function with an n-dimensional input - forward mode
requires n forward passes.

We can do this in a single forward pass, instead of accumulating a single
scalar value of derivative with respect to a particular node, we maintain a
gradient vector at each node. Although, the strategy is pretty similar, it requires
three passes for computing partial derivatives w.r.t. the three scalar inputs of
the function.

At each node, we maintain a vector, storing the complete gradient of that node's
output w.r.t.. all the input parameters. All operations are now vector operations,
for example, applying the sum rule will result in the addition of vectors.
Initialization for input nodes are done using one-hot vectors.

.. figure:: ../_static/vector-mode.png
:width: 600
:align: center
:alt: Vectorized Forward Mode Automatic Differentiation

Vectorized Forward Mode Automatic Differentiation to compute the gradient.

Benefits
----------

We know that each node requires computing a vector, which requires more memory
and more time, which adds to these memory allocation calls. This must be offset
by some improvement in computing efficiency.

This can prevent the recomputation of some expensive functions, which would have
executed in a non-vectorized version due to multiple forward passes. This approach
can take advantage of the hardware's vectorization and parallelization capabilities
using SIMD techniques.

Derived Function Types and Derivative Types
=============================================

Expand Down
15 changes: 10 additions & 5 deletions include/clad/Differentiator/CladConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,23 +21,28 @@ enum order {
third = 3,
}; // enum order

enum opts {
enum opts : unsigned {
use_enzyme = 1 << ORDER_BITS,
vector_mode = 1 << (ORDER_BITS + 1),

// Storing two bits for tbr analysis.
// 00 - default, 01 - enable, 10 - disable, 11 - not used / invalid
enable_tbr = 1 << (ORDER_BITS + 2),
disable_tbr = 1 << (ORDER_BITS + 3),
}; // enum opts

constexpr unsigned GetDerivativeOrder(unsigned const bitmasked_opts) {
constexpr unsigned GetDerivativeOrder(const unsigned bitmasked_opts) {
return bitmasked_opts & ORDER_MASK;
}

constexpr bool HasOption(unsigned const bitmasked_opts, unsigned const option) {
constexpr bool HasOption(const unsigned bitmasked_opts, const unsigned option) {
return (bitmasked_opts & option) == option;
}

constexpr unsigned GetBitmaskedOpts() { return 0; }
constexpr unsigned GetBitmaskedOpts(unsigned const first) { return first; }
constexpr unsigned GetBitmaskedOpts(const unsigned first) { return first; }
template <typename... Opts>
constexpr unsigned GetBitmaskedOpts(unsigned const first, Opts... opts) {
constexpr unsigned GetBitmaskedOpts(const unsigned first, Opts... opts) {
return first | GetBitmaskedOpts(opts...);
}

Expand Down
10 changes: 9 additions & 1 deletion include/clad/Differentiator/DiffPlanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,12 @@ namespace clad {
using DiffSchedule = llvm::SmallVector<DiffRequest, 16>;
using DiffInterval = std::vector<clang::SourceRange>;

struct RequestOptions {
/// This is a flag to indicate the default behaviour to enable/disable
/// TBR analysis during reverse-mode differentiation.
bool EnableTBRAnalysis = false;
};

class DiffCollector: public clang::RecursiveASTVisitor<DiffCollector> {
/// The source interval where clad was activated.
///
Expand All @@ -101,9 +107,11 @@ namespace clad {
const clang::FunctionDecl* m_TopMostFD = nullptr;
clang::Sema& m_Sema;

RequestOptions& m_Options;

public:
DiffCollector(clang::DeclGroupRef DGR, DiffInterval& Interval,
DiffSchedule& plans, clang::Sema& S);
DiffSchedule& plans, clang::Sema& S, RequestOptions& opts);
bool VisitCallExpr(clang::CallExpr* E);

private:
Expand Down
Loading

0 comments on commit 80bbdae

Please sign in to comment.