From d908e8e8038ebbdefe1b7410d55a6e4f97bc3330 Mon Sep 17 00:00:00 2001
From: "petro.zarytskyi" <petro.zarytskyi@gmail.com>
Date: Fri, 15 Mar 2024 12:48:23 +0200
Subject: [PATCH] Remove array_ref from benchmarks

---
 benchmark/AlgorithmicComplexity.cpp |  5 ++---
 benchmark/EnzymeCladComparison.cpp  | 21 ++++++---------------
 benchmark/MemoryComplexity.cpp      |  3 +--
 benchmark/Simple.cpp                |  8 +++-----
 benchmark/VectorModeComparison.cpp  |  4 +---
 demos/Arrays.cpp                    | 19 +++++--------------
 demos/Jupyter/Intro.ipynb           |  4 ++--
 7 files changed, 20 insertions(+), 44 deletions(-)
diff --git a/benchmark/AlgorithmicComplexity.cpp b/benchmark/AlgorithmicComplexity.cpp
index 1f45feb23..9717cb0cd 100644
--- a/benchmark/AlgorithmicComplexity.cpp
+++ b/benchmark/AlgorithmicComplexity.cpp
@@ -63,11 +63,10 @@ static void BM_ReverseGausP(benchmark::State& state) {
   long double sum = 0;
   int dim = 5;
   double result[5] = {};
-  clad::array_ref<double> result_ref(result, dim);
   for (auto _ : state) {
-    dfdp_grad.execute(x, p, /*sigma*/ 2, dim, result_ref);
+    dfdp_grad.execute(x, p, /*sigma*/ 2, dim, result);
     for (int i = 0; i < dim; i++) {
-      benchmark::DoNotOptimize(sum += result_ref[i]);
+      benchmark::DoNotOptimize(sum += result[i]);
       result[i] = 0; // clear for the next benchmark iteration
     }
   }
diff --git a/benchmark/EnzymeCladComparison.cpp b/benchmark/EnzymeCladComparison.cpp
index d69d40520..50488699e 100644
--- a/benchmark/EnzymeCladComparison.cpp
+++ b/benchmark/EnzymeCladComparison.cpp
@@ -36,9 +36,8 @@ static void BM_VectorForwardModeAddArrayAndMultiplyWithScalarsExecute(
   int dn = 0;
   double arr[5] = {1, 2, 3, 4, 5};
   double darr[5] = {0};
-  clad::array_ref<double> darr_ref(darr, n);
   for (auto _ : state) {
-    grad.execute(arr, x, y, 5, darr_ref, &dx, &dy, &dn);
+    grad.execute(arr, x, y, 5, darr, &dx, &dy, &dn);
     dx = 0;
     dy = 0;
     for (int i = 0; i < n; i++)
@@ -87,9 +86,8 @@ static void BM_VectorForwardModeSumExecute(benchmark::State& state) {
   auto grad = clad::differentiate<clad::opts::vector_mode>(sum, "p");
   double inputs[] = {1, 2, 3, 4, 5};
   double result[5] = {};
-  clad::array_ref<double> result_ref(result, 5);
   for (auto _ : state) {
-    grad.execute(inputs, /*dim*/ 5, result_ref);
+    grad.execute(inputs, /*dim*/ 5, result);
     for (int i = 0; i < 5; i++)
       result[i] = 0;
   }
@@ -126,9 +124,8 @@ static void BM_VectorForwardModeProductExecute(benchmark::State& state) {
   auto grad = clad::differentiate<clad::opts::vector_mode>(product, "p");
   double inputs[] = {1, 2, 3, 4, 5};
   double result[5] = {};
-  clad::array_ref<double> result_ref(result, 5);
   for (auto _ : state) {
-    grad.execute(inputs, /*dim*/ 5, result_ref);
+    grad.execute(inputs, /*dim*/ 5, result);
     for (int i = 0; i < 5; i++)
       result[i] = 0;
   }
@@ -210,12 +207,10 @@ static void BM_ReverseModeWeightedSum(benchmark::State& state) {
 
   double dinp[n];
   double dweights[n];
-  clad::array_ref<double> dinp_ref(dinp, n);
-  clad::array_ref<double> dweights_ref(dweights, n);
 
   double sum = 0;
   for (auto _ : state) {
-    grad.execute(inputs, weights, n, dinp_ref, dweights_ref);
+    grad.execute(inputs, weights, n, dinp, dweights);
     for (int i = 0; i < n; ++i) {
       sum += dinp[i] + dweights[i];
       dinp[i] = 0;
@@ -241,12 +236,10 @@ static void BM_VectorForwardModeWeightedSum(benchmark::State& state) {
 
   double dinp[n];
   double dweights[n];
-  clad::array_ref<double> dinp_ref(dinp, n);
-  clad::array_ref<double> dweights_ref(dweights, n);
 
   double sum = 0;
   for (auto _ : state) {
-    vm_grad.execute(inputs, weights, n, dinp_ref, dweights_ref);
+    vm_grad.execute(inputs, weights, n, dinp, dweights);
     for (int i = 0; i < n; ++i) {
       sum += dinp[i] + dweights[i];
       dinp[i] = 0;
@@ -271,12 +264,10 @@ static void BM_ReverseModeWeightedSumEnzyme(benchmark::State& state) {
 
   double dinp[n];
   double dweights[n];
-  clad::array_ref<double> dinp_ref(dinp, n);
-  clad::array_ref<double> dweights_ref(dweights, n);
 
   double sum = 0;
   for (auto _ : state) {
-    grad.execute(inputs, weights, n, dinp_ref, dweights_ref);
+    grad.execute(inputs, weights, n, dinp, dweights);
     for (int i = 0; i < n; ++i) {
       sum += dinp[i] + dweights[i];
       dinp[i] = 0;
diff --git a/benchmark/MemoryComplexity.cpp b/benchmark/MemoryComplexity.cpp
index 7662f2f0a..4afd6371c 100644
--- a/benchmark/MemoryComplexity.cpp
+++ b/benchmark/MemoryComplexity.cpp
@@ -89,10 +89,9 @@ static void BM_ReverseGausMemoryP(benchmark::State& state) {
     x[i] = 1;
     p[i] = i;
   }
-  clad::array_ref<double> result_ref(result, dim);
   AddBMCounterRAII MemCounters(*mm.get(), state);
   for (auto _ : state) {
-    dfdp_grad.execute(x, p, /*sigma*/ 2, dim, result_ref);
+    dfdp_grad.execute(x, p, /*sigma*/ 2, dim, result);
   }
 }
 BENCHMARK(BM_ReverseGausMemoryP)
diff --git a/benchmark/Simple.cpp b/benchmark/Simple.cpp
index 9c5ca8694..6e0848f26 100644
--- a/benchmark/Simple.cpp
+++ b/benchmark/Simple.cpp
@@ -27,7 +27,7 @@ static void BM_ForwardModePow2FwdDecl(benchmark::State &state) {
 BENCHMARK(BM_ForwardModePow2FwdDecl);
 
 // Benchmark calling the gradient via CladFunction::execute.
-inline void sum_grad_0(double*, int, clad::array_ref<double>);
+inline void sum_grad_0(double*, int, double*);
 static void BM_ReverseModeSumFwdDecl(benchmark::State &state) {
   auto grad = clad::gradient(sum, "p");
   (void) grad;
@@ -63,10 +63,9 @@ static void BM_VectorForwardModeSumFwdDecl(benchmark::State &state) {
   (void) vm_grad;
   double inputs[] = {1, 2, 3, 4, 5};
   double result[3] = {};
-  clad::array_ref<double> result_ref(result, 3);
   unsigned long long sum = 0;
   for (auto _ : state) {
-    sum_dvec_0(inputs,/*dim*/ 3, result_ref);
+    sum_dvec_0(inputs, /*dim*/ 3, result);
     benchmark::DoNotOptimize(sum += result[0] + result[1] + result[2]);
   }
 }
@@ -78,10 +77,9 @@ static void BM_VectorForwardModeSumExecute(benchmark::State &state) {
   auto vm_grad = clad::differentiate<clad::opts::vector_mode>(sum, "p");
   double inputs[] = {1, 2, 3, 4, 5};
   double result[3] = {};
-  clad::array_ref<double> result_ref(result, 3);
   unsigned long long sum = 0;
   for (auto _ : state) {
-    vm_grad.execute(inputs,/*dim*/ 3, result_ref);
+    vm_grad.execute(inputs, /*dim*/ 3, result);
     benchmark::DoNotOptimize(sum += result[0] + result[1] + result[2]);
   }
 }
diff --git a/benchmark/VectorModeComparison.cpp b/benchmark/VectorModeComparison.cpp
index d55550629..6f9614808 100644
--- a/benchmark/VectorModeComparison.cpp
+++ b/benchmark/VectorModeComparison.cpp
@@ -53,12 +53,10 @@ static void BM_ReverseModeWeightedSum(benchmark::State& state) {
 
   double dinp[n];
   double dweights[n];
-  clad::array_ref<double> dinp_ref(dinp, n);
-  clad::array_ref<double> dweights_ref(dweights, n);
 
   double sum = 0;
   for (auto _ : state) {
-    grad.execute(inputs, weights, n, dinp_ref, dweights_ref);
+    grad.execute(inputs, weights, n, dinp, dweights);
     for (int i = 0; i < n; ++i) {
       sum += dinp[i] + dweights[i];
       dinp[i] = 0;
diff --git a/demos/Arrays.cpp b/demos/Arrays.cpp
index 53d6a2c2d..e6bd1b6f6 100644
--- a/demos/Arrays.cpp
+++ b/demos/Arrays.cpp
@@ -52,20 +52,14 @@ int main() {
 
   double darr[3] = {0, 0, 0};
   double dweights[3] = {0, 0, 0};
-  // clad::array_ref is used by clad::gradient to keep track of the array size
-  // being sent into the generated gradient. Since clad::array_ref is a wrapper
-  // for the supplied array any changes to it will be reflected in the array and
-  // vice versa
-  clad::array_ref<double> darr_ref(darr, 3);
-  clad::array_ref<double> dweights_ref(dweights, 3);
-
-  weighted_avg_dall.execute(arr, weights, darr_ref, dweights_ref);
+
+  weighted_avg_dall.execute(arr, weights, darr, dweights);
   printf("Reverse Mode w.r.t. all:\n darr = {%.2g, %.2g, %.2g}\n dweights = "
          "{%.2g, %.2g, %.2g}\n",
          darr[0], darr[1], darr[2], dweights[0], dweights[1], dweights[2]);
 
   darr[0] = darr[1] = darr[2] = 0;
-  weighted_avg_darr.execute(arr, weights, darr_ref);
+  weighted_avg_darr.execute(arr, weights, darr);
   printf("Reverse Mode w.r.t. arr:\n darr = {%.2g, %.2g, %.2g}\n", darr[0],
          darr[1], darr[2]);
 
@@ -81,10 +75,7 @@ int main() {
   double matrix_all[36] = {0};
   // double matrix_arr[9] = {0};
 
-  clad::array_ref<double> matrix_all_ref(matrix_all, 36);
-  // clad::array_ref<double> matrix_arr_ref(matrix_arr, 9);
-
-  hessian_all.execute(arr, weights, matrix_all_ref);
+  hessian_all.execute(arr, weights, matrix_all);
   printf("Hessian Mode w.r.t. to all:\n matrix =\n"
          "  {%.2g, %.2g, %.2g, %.2g, %.2g, %.2g}\n"
          "  {%.2g, %.2g, %.2g, %.2g, %.2g, %.2g}\n"
@@ -102,7 +93,7 @@ int main() {
          matrix_all[28], matrix_all[29], matrix_all[30], matrix_all[31],
          matrix_all[32], matrix_all[33], matrix_all[34], matrix_all[35]);
 
-  /*hessian_arr.execute(arr, weights, matrix_arr_ref);
+  /*hessian_arr.execute(arr, weights, matrix_arr);
   printf("Hessian Mode w.r.t. to arr:\n matrix =\n"
          "  {%.2g, %.2g, %.2g}\n"
          "  {%.2g, %.2g, %.2g}\n"
diff --git a/demos/Jupyter/Intro.ipynb b/demos/Jupyter/Intro.ipynb
index 2bc7ea30c..fd0b803f9 100644
--- a/demos/Jupyter/Intro.ipynb
+++ b/demos/Jupyter/Intro.ipynb
@@ -143,7 +143,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "The code is: void fn_grad(double x, double y, clad::array_ref<double> _d_x, clad::array_ref<double> _d_y) {\n",
+      "The code is: void fn_grad(double x, double y, double *_d_x, double *_d_y) {\n",
       "    double _t2;\n",
       "    double _t3;\n",
       "    double _t4;\n",
@@ -356,7 +356,7 @@
      "output_type": "stream",
      "text": [
       "The code is: \n",
-      "void fn_grad(double x, double y, clad::array_ref<double> _d_x, clad::array_ref<double> _d_y, double &_final_error) {\n",
+      "void fn_grad(double x, double y, double *_d_x, double *_d_y, double &_final_error) {\n",
       "    double _t2;\n",
       "    double _t3;\n",
       "    double _t4;\n",