Merge branch 'vgvassilev:master' into void-function-support

vgvassilev · Mar 14, 2024 · 80bbdae · 80bbdae
2 parents 24e55e4 + d2df900
commit 80bbdae
Show file tree

Hide file tree

Showing 37 changed files with 1,026 additions and 1,107 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -24,56 +24,81 @@ jobs:
       matrix:
 
         include:
-          - name: osx-clang-runtime11
-            os: macos-11
+          - name: osx14-arm-clang-runtime13
+            os: macos-14
             compiler: clang
-            clang-runtime: '11'
+            clang-runtime: '13'
+
+          - name: osx14-arm-clang-runtime14
+            os: macos-14
+            compiler: clang
+            clang-runtime: '14'
+
+          - name: osx14-arm-clang-runtime15
+            os: macos-14
+            compiler: clang
+            clang-runtime: '15'
+
+          - name: osx14-arm-clang-runtime16
+            os: macos-14
+            compiler: clang
+            clang-runtime: '16'
 
-          - name: osx-clang-runtime12
-            os: macos-latest
+          - name: osx14-arm-clang-runtime17
+            os: macos-14
+            compiler: clang
+            clang-runtime: '17'
+
+          - name: osx13-x86-clang-runtime12
+            os: macos-13
             compiler: clang
             clang-runtime: '12'
 
-          - name: osx-clang-runtime13
-            os: macos-latest
+          - name: osx13-x86-clang-runtime13
+            os: macos-13
             compiler: clang
             clang-runtime: '13'
 
-          - name: osx-clang-runtime14
-            os: macos-latest
+          - name: osx13-x86-clang-runtime14
+            os: macos-13
             compiler: clang
             clang-runtime: '14'
 
-          - name: osx-clang-runtime15
-            os: macos-latest
+          - name: osx13-x86-clang-runtime15
+            os: macos-13
             compiler: clang
             clang-runtime: '15'
 
-          - name: osx-clang-runtime16
-            os: macos-latest
+          - name: osx13-x86-clang-runtime16
+            os: macos-13
             compiler: clang
             clang-runtime: '16'
 
-          - name: osx-clang-runtime17
-            os: macos-latest
+          - name: osx13-x86-clang-runtime17
+            os: macos-13
             compiler: clang
             clang-runtime: '17'
 
-          - name: win-msvc-runtime14
-            os: windows-latest
+          - name: win2022-msvc-runtime14
+            os: windows-2022
             compiler: msvc
             clang-runtime: '14'
 
-          - name: win-msvc-runtime15
-            os: windows-latest
+          - name: win2022-msvc-runtime15
+            os: windows-2022
             compiler: msvc
             clang-runtime: '15'
 
-          - name: win-msvc-runtime16
-            os: windows-latest
+          - name: win2022-msvc-runtime16
+            os: windows-2022
             compiler: msvc
             clang-runtime: '16'
 
+          - name: win2022-msvc-runtime17
+            os: windows-2022
+            compiler: msvc
+            clang-runtime: '17'
+
           - name: ubu22-clang15-runtime16-debug
             os: ubuntu-22.04
             compiler: clang-15
@@ -559,7 +584,6 @@ jobs:
         # Update openssl on osx because the current one is deprecated by python.
         curl -L https://bootstrap.pypa.io/get-pip.py | sudo python3
         echo "/usr/local/opt/ccache/libexec" >> $GITHUB_PATH
-        PATH_TO_LLVM_BUILD=/usr/local/opt/llvm@${{ matrix.clang-runtime }}/
 
         # For now Package llvm@18 is unsuported on brew, llvm <=@11 are deprecated or deleted.
         # Install llvm from github releases.
@@ -591,16 +615,22 @@ jobs:
           # allowing clang to work with system's SDK.
           sudo rm -fr /usr/local/opt/llvm*/include/c++
         fi
-
+        
+        PATH_TO_LLVM_BUILD=$(brew --prefix llvm@${{ matrix.clang-runtime }})
+        
         pip3 install lit # LLVM lit is not part of the llvm releases...
 
         # We need headers in correct place
-        for file in $(xcrun --show-sdk-path)/usr/include/*
-        do
-          if [ ! -f /usr/local/include/$(basename $file) ]; then
-            ln -s $file /usr/local/include/$(basename $file)
-          fi
-        done
+        #FIXME: ln solution fails with error message No such file or directory on osx arm, 
+        #Copying over files as a temporary solution
+        sudo cp -r -n $(xcrun --show-sdk-path)/usr/include/ /usr/local/include/
+        #for file in $(xcrun --show-sdk-path)/usr/include/*
+        #do
+        #  if [ ! -f /usr/local/include/$(basename $file) ]; then
+        #    echo ${file}
+        #    ln -s ${file} /usr/local/include/$(basename $file)
+        #  fi
+        #done
         # We need PATH_TO_LLVM_BUILD later
         echo "PATH_TO_LLVM_BUILD=$PATH_TO_LLVM_BUILD" >> $GITHUB_ENV
 
@@ -740,10 +770,10 @@ jobs:
       if: ${{ runner.os != 'windows' }}
       run: |
         mkdir obj && cd obj
-        cmake -DClang_DIR="$PATH_TO_LLVM_BUILD" \
-          -DLLVM_DIR="$PATH_TO_LLVM_BUILD"  \
+        cmake -DClang_DIR=${{ env.PATH_TO_LLVM_BUILD }}  \
+          -DLLVM_DIR=${{ env.PATH_TO_LLVM_BUILD }}       \
           -DCMAKE_BUILD_TYPE=$([[ -z "$BUILD_TYPE" ]] && echo RelWithDebInfo || echo $BUILD_TYPE) \
-          -DCLAD_CODE_COVERAGE=${CLAD_CODE_COVERAGE}  \
+          -DCLAD_CODE_COVERAGE=${{ env.CLAD_CODE_COVERAGE }}   \
           -DLLVM_EXTERNAL_LIT="`which lit`" \
           -DLLVM_ENABLE_WERROR=On           \
           $GITHUB_WORKSPACE                 \

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,7 +1,24 @@
 cmake_minimum_required(VERSION 3.7.0)
 
+enable_language(CXX)
+set(CMAKE_CXX_EXTENSIONS NO)
+
 include(GNUInstallDirs)
 
+# MUST be done before call to clad project
+get_cmake_property(_cache_vars CACHE_VARIABLES)
+foreach(_cache_var ${_cache_vars})
+  get_property(_helpstring CACHE ${_cache_var} PROPERTY HELPSTRING)
+  if(_helpstring STREQUAL 
+    "No help, variable specified on the command line.")
+    set(CMAKE_ARGS "${CMAKE_ARGS} -D${_cache_var}=\"${${_cache_var}}\"")
+  endif()
+endforeach()
+
+# Generate CMakeArgs.txt file with source, build dir and command line args
+write_file("${CMAKE_CURRENT_BINARY_DIR}/CMakeArgs.txt"
+  "-S${CMAKE_SOURCE_DIR} -B${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_ARGS}")
+
 if(POLICY CMP0075)
   cmake_policy(SET CMP0075 NEW)
 endif()

diff --git a/demos/Arrays.cpp b/demos/Arrays.cpp
@@ -76,13 +76,13 @@ int main() {
   // the indexes of the array by using the format arr[0:<last index of arr>]
   auto hessian_all = clad::hessian(weighted_avg, "arr[0:2], weights[0:2]");
   // Generates the Hessian matrix for weighted_avg w.r.t. to arr.
-  auto hessian_arr = clad::hessian(weighted_avg, "arr[0:2]");
+  // auto hessian_arr = clad::hessian(weighted_avg, "arr[0:2]");
 
   double matrix_all[36] = {0};
-  double matrix_arr[9] = {0};
+  // double matrix_arr[9] = {0};
 
   clad::array_ref<double> matrix_all_ref(matrix_all, 36);
-  clad::array_ref<double> matrix_arr_ref(matrix_arr, 9);
+  // clad::array_ref<double> matrix_arr_ref(matrix_arr, 9);
 
   hessian_all.execute(arr, weights, matrix_all_ref);
   printf("Hessian Mode w.r.t. to all:\n matrix =\n"
@@ -102,12 +102,13 @@ int main() {
          matrix_all[28], matrix_all[29], matrix_all[30], matrix_all[31],
          matrix_all[32], matrix_all[33], matrix_all[34], matrix_all[35]);
 
-  hessian_arr.execute(arr, weights, matrix_arr_ref);
+  /*hessian_arr.execute(arr, weights, matrix_arr_ref);
   printf("Hessian Mode w.r.t. to arr:\n matrix =\n"
          "  {%.2g, %.2g, %.2g}\n"
          "  {%.2g, %.2g, %.2g}\n"
          "  {%.2g, %.2g, %.2g}\n",
          matrix_arr[0], matrix_arr[1], matrix_arr[2], matrix_arr[3],
          matrix_arr[4], matrix_arr[5], matrix_arr[6], matrix_arr[7],
          matrix_arr[8]);
+  */
 }
diff --git a/docs/userDocs/source/_static/vector-mode.png b/docs/userDocs/source/_static/vector-mode.png
diff --git a/docs/userDocs/source/user/CoreConcepts.rst b/docs/userDocs/source/user/CoreConcepts.rst
@@ -127,6 +127,52 @@ Substituting `s = z` we will get `sz` = 1
 Thus we don't need to run the program twice for each input. However, as mentioned
 above the only drawback is we need to re-run the program for a different output.
 
+
+Vectorized Forward Mode Automatic Differentiation
+===================================================
+
+Vectorized Forward Mode Automatic Differentiation is a computational technique
+that combines two powerful concepts: vectorization and forward mode automatic
+differentiation. This approach is used to efficiently compute derivatives of
+functions with respect to multiple input variables by taking advantage of both
+parallel processing capabilities and the structure of the computation graph.
+
+Working
+--------
+
+For computing gradient of a function with an n-dimensional input - forward mode
+requires n forward passes.
+
+We can do this in a single forward pass, instead of accumulating a single
+scalar value of derivative with respect to a particular node, we maintain a
+gradient vector at each node. Although, the strategy is pretty similar, it requires
+three passes for computing partial derivatives w.r.t. the three scalar inputs of
+the function.
+
+At each node, we maintain a vector, storing the complete gradient of that node's
+output w.r.t.. all the input parameters. All operations are now vector operations,
+for example, applying the sum rule will result in the addition of vectors.
+Initialization for input nodes are done using one-hot vectors.
+
+.. figure:: ../_static/vector-mode.png
+  :width: 600
+  :align: center
+  :alt: Vectorized Forward Mode Automatic Differentiation
+
+  Vectorized Forward Mode Automatic Differentiation to compute the gradient.
+
+Benefits
+----------
+
+We know that each node requires computing a vector, which requires more memory
+and more time, which adds to these memory allocation calls. This must be offset
+by some improvement in computing efficiency.
+
+This can prevent the recomputation of some expensive functions, which would have
+executed in a non-vectorized version due to multiple forward passes. This approach
+can take advantage of the hardware's vectorization and parallelization capabilities
+using SIMD techniques.
+
 Derived Function Types and Derivative Types
 =============================================
 

diff --git a/include/clad/Differentiator/CladConfig.h b/include/clad/Differentiator/CladConfig.h
@@ -21,23 +21,28 @@ enum order {
   third = 3,
 }; // enum order
 
-enum opts {
+enum opts : unsigned {
   use_enzyme = 1 << ORDER_BITS,
   vector_mode = 1 << (ORDER_BITS + 1),
+
+  // Storing two bits for tbr analysis.
+  // 00 - default, 01 - enable, 10 - disable, 11 - not used / invalid
+  enable_tbr = 1 << (ORDER_BITS + 2),
+  disable_tbr = 1 << (ORDER_BITS + 3),
 }; // enum opts
 
-constexpr unsigned GetDerivativeOrder(unsigned const bitmasked_opts) {
+constexpr unsigned GetDerivativeOrder(const unsigned bitmasked_opts) {
   return bitmasked_opts & ORDER_MASK;
 }
 
-constexpr bool HasOption(unsigned const bitmasked_opts, unsigned const option) {
+constexpr bool HasOption(const unsigned bitmasked_opts, const unsigned option) {
   return (bitmasked_opts & option) == option;
 }
 
 constexpr unsigned GetBitmaskedOpts() { return 0; }
-constexpr unsigned GetBitmaskedOpts(unsigned const first) { return first; }
+constexpr unsigned GetBitmaskedOpts(const unsigned first) { return first; }
 template <typename... Opts>
-constexpr unsigned GetBitmaskedOpts(unsigned const first, Opts... opts) {
+constexpr unsigned GetBitmaskedOpts(const unsigned first, Opts... opts) {
   return first | GetBitmaskedOpts(opts...);
 }
 

diff --git a/include/clad/Differentiator/DiffPlanner.h b/include/clad/Differentiator/DiffPlanner.h
@@ -86,6 +86,12 @@ namespace clad {
   using DiffSchedule = llvm::SmallVector<DiffRequest, 16>;
   using DiffInterval = std::vector<clang::SourceRange>;
 
+  struct RequestOptions {
+    /// This is a flag to indicate the default behaviour to enable/disable
+    /// TBR analysis during reverse-mode differentiation.
+    bool EnableTBRAnalysis = false;
+  };
+
   class DiffCollector: public clang::RecursiveASTVisitor<DiffCollector> {
     /// The source interval where clad was activated.
     ///
@@ -101,9 +107,11 @@ namespace clad {
     const clang::FunctionDecl* m_TopMostFD = nullptr;
     clang::Sema& m_Sema;
 
+    RequestOptions& m_Options;
+
   public:
     DiffCollector(clang::DeclGroupRef DGR, DiffInterval& Interval,
-                  DiffSchedule& plans, clang::Sema& S);
+                  DiffSchedule& plans, clang::Sema& S, RequestOptions& opts);
     bool VisitCallExpr(clang::CallExpr* E);
 
   private: