Run and verify gradients

vgvassilev · Nov 14, 2024 · 63882b6 · 63882b6
1 parent cab90ed
commit 63882b6
Show file tree

Hide file tree

Showing 2 changed files with 248 additions and 19 deletions.
diff --git a/demos/CUDA/BlackScholes/BlackScholes.cu b/demos/CUDA/BlackScholes/BlackScholes.cu
@@ -31,18 +31,33 @@
  * See supplied whitepaper for more explanations.
  */
 
+/*
+ * DISCLAIMER: The following file has been modified slightly to make it
+ * compatible with Clad. The original file can be found at NVIDIA's cuda-samples
+ * repository at GitHub.
+ *
+ * Relevant documentation regarding the problem at hand can be found at NVIDIA's
+ * cuda-samples repository. With the use of Clad, we compute some of the Greeks
+ * (sensitivities) for Black-Scholes and verify them using the
+ * theoretical values as denoted in Wikipedia
+ * (https://en.wikipedia.org/wiki/Black%E2%80%93Scholes_model).
+ *
+ * To build and run the demo, run the following command: make run
+ */
+
 #include "clad/Differentiator/Differentiator.h"
 
-#include <helper_functions.h>  // helper functions for string parsing
-#include <helper_cuda.h>  // helper functions CUDA error checking and initialization
+#include <helper_cuda.h> // helper functions CUDA error checking and initialization
+#include <helper_functions.h> // helper functions for string parsing
 
 ////////////////////////////////////////////////////////////////////////////////
 // Process an array of optN options on CPU
 ////////////////////////////////////////////////////////////////////////////////
-extern "C" void BlackScholesCPU(float *h_CallResult, float *h_PutResult,
-                                float *h_StockPrice, float *h_OptionStrike,
-                                float *h_OptionYears, float Riskfree,
+extern "C" void BlackScholesCPU(float* h_CallResult, float* h_PutResult,
+                                float* h_StockPrice, float* h_OptionStrike,
+                                float* h_OptionYears, float Riskfree,
                                 float Volatility, int optN);
+extern "C" double CND(double d);
 
 ////////////////////////////////////////////////////////////////////////////////
 // Process an array of OptN options on GPU
@@ -120,6 +135,108 @@ void launch(float* h_CallResultCPU, float* h_CallResultGPU,
   cudaFree(d_CallResult);
 }
 
+double d1(double S, double X, double T) {
+  return (log(S / X) + (RISKFREE + 0.5 * VOLATILITY * VOLATILITY) * T) /
+         (VOLATILITY * sqrt(T));
+}
+
+double N_prime(double d) {
+  const double RSQRT2PI =
+      0.39894228040143267793994605993438; // 1 / sqrt(2 * PI)
+  return RSQRT2PI * exp(-0.5 * d * d);
+}
+
+enum Greek { Delta, dX, Theta };
+
+double computeL1norm_Call(float* S, float* X, float* T, float* d, Greek greek) {
+  double delta, ref, sum_delta, sum_ref;
+  sum_delta = 0;
+  sum_ref = 0;
+  switch (greek) {
+  case Delta:
+    for (int i = 0; i < OPT_N; i++) {
+      double d1_val = d1(S[i], X[i], T[i]);
+      ref = CND(d1_val);
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+    break;
+  case dX:
+    for (int i = 0; i < OPT_N; i++) {
+      double T_val = T[i];
+      double d1_val = d1(S[i], X[i], T_val);
+      double d2_val = d1_val - VOLATILITY * sqrt(T_val);
+      double expRT = exp(-RISKFREE * T_val);
+      ref = -expRT * CND(d2_val);
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+    break;
+  case Theta:
+    for (int i = 0; i < OPT_N; i++) {
+      double S_val = S[i], X_val = X[i], T_val = T[i];
+      double d1_val = d1(S_val, X_val, T_val);
+      double d2_val = d1_val - VOLATILITY * sqrt(T_val);
+      double expRT = exp(-RISKFREE * T_val);
+      ref =
+          (S_val * N_prime(d1_val) * VOLATILITY) / (2 * sqrt(T_val)) +
+          RISKFREE * X_val * expRT *
+              CND(d2_val); // theta is with respect to t, so -theta is the
+                           // approximation of the derivative with respect to T
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+  }
+
+  return sum_delta / sum_ref;
+}
+
+double computeL1norm_Pull(float* S, float* X, float* T, float* d, Greek greek) {
+  double delta, ref, sum_delta, sum_ref;
+  sum_delta = 0;
+  sum_ref = 0;
+  switch (greek) {
+  case Delta:
+    for (int i = 0; i < OPT_N; i++) {
+      double d1_val = d1(S[i], X[i], T[i]);
+      ref = CND(d1_val) - 1.0;
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+    break;
+  case dX:
+    for (int i = 0; i < OPT_N; i++) {
+      double T_val = T[i];
+      double d1_val = d1(S[i], X[i], T_val);
+      double d2_val = d1_val - VOLATILITY * sqrt(T_val);
+      double expRT = exp(-RISKFREE * T_val);
+      ref = expRT * CND(-d2_val);
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+    break;
+  case Theta:
+    for (int i = 0; i < OPT_N; i++) {
+      double S_val = S[i], X_val = X[i], T_val = T[i];
+      double d1_val = d1(S_val, X_val, T_val);
+      double d2_val = d1_val - VOLATILITY * sqrt(T_val);
+      double expRT = exp(-RISKFREE * T_val);
+      ref = (S_val * N_prime(d1_val) * VOLATILITY) / (2 * sqrt(T_val)) -
+            RISKFREE * X_val * expRT * CND(-d2_val);
+      delta = fabs(d[i] - ref);
+      sum_delta += delta;
+      sum_ref += fabs(ref);
+    }
+  }
+
+  return sum_delta / sum_ref;
+}
+
 int main(int argc, char** argv) {
   float* h_CallResultCPU = (float*)malloc(OPT_SZ);
   float* h_PutResultCPU = (float*)malloc(OPT_SZ);
@@ -140,30 +257,136 @@ int main(int argc, char** argv) {
     h_OptionYears[i] = RandFloat(0.25f, 10.0f);
   }
 
+  // Compute gradients
   auto callGrad = clad::gradient(
       launch, "h_CallResultGPU, h_StockPrice, h_OptionStrike, h_OptionYears");
   auto putGrad = clad::gradient(
       launch, "h_PutResultGPU, h_StockPrice, h_OptionStrike, h_OptionYears");
 
+  // Declare and initialize the derivatives
+  float* d_CallResultGPU = (float*)malloc(OPT_SZ);
+  float* d_PutResultGPU = (float*)malloc(OPT_SZ);
+  float* d_StockPrice = (float*)calloc(OPT_N, sizeof(float));
+  float* d_OptionStrike = (float*)calloc(OPT_N, sizeof(float));
+  float* d_OptionYears = (float*)calloc(OPT_N, sizeof(float));
+
+  for (int i = 0; i < OPT_N; i++) {
+    d_CallResultGPU[i] = 1.0f;
+    d_PutResultGPU[i] = 1.0f;
+  }
+
+  // Launch the kernel and the gradient
+
+  // Compute the derivatives of the price of the call options
+  callGrad.execute(h_CallResultCPU, h_CallResultGPU, h_PutResultCPU,
+                   h_PutResultGPU, h_StockPrice, h_OptionStrike, h_OptionYears,
+                   d_CallResultGPU, d_StockPrice, d_OptionStrike,
+                   d_OptionYears);
+
   // Calculate max absolute difference and L1 distance
   // between CPU and GPU results
-  float delta, ref, sum_delta, sum_ref, max_delta, L1norm;
+  double delta, ref, sum_delta, sum_ref, L1norm;
   sum_delta = 0;
   sum_ref = 0;
-  max_delta = 0;
 
   for (int i = 0; i < OPT_N; i++) {
     ref = h_CallResultCPU[i];
-    delta = fabsf(h_CallResultCPU[i] - h_CallResultGPU[i]);
-
-    if (delta > max_delta)
-      max_delta = delta;
-
+    delta = fabs(h_CallResultCPU[i] - h_CallResultGPU[i]);
     sum_delta += delta;
-    sum_ref += fabsf(ref);
+    sum_ref += fabs(ref);
   }
 
   L1norm = sum_delta / sum_ref;
+  printf("L1norm = %E\n", L1norm);
+  if (L1norm > 1e-6) {
+    printf("Original test failed\n");
+    return EXIT_FAILURE;
+  }
+
+  // Verify delta
+  L1norm = computeL1norm_Call(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_StockPrice, Delta);
+  printf("L1norm of delta for Call option = %E\n", L1norm);
+  if (L1norm > 1e-5) {
+    printf("Gradient test failed: the difference between the computed and the "
+           "approximated theoretical delta for Call option is larger than "
+           "expected\n");
+    return EXIT_FAILURE;
+  }
+
+  // Verify derivatives with respect to the Strike price
+  L1norm = computeL1norm_Call(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_OptionStrike, dX);
+  printf("L1norm of derivative of Call w.r.t. the strike price = %E\n", L1norm);
+  if (L1norm > 1e-5) {
+    printf(
+        "Gradient test failed: the difference between the computed and the "
+        "approximated theoretical derivative of Call w.r.t. the strike price "
+        "is larger than expected\n");
+    return EXIT_FAILURE;
+  }
+
+  // Verify theta
+  L1norm = computeL1norm_Call(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_OptionYears, Theta);
+  printf("L1norm of theta for Call option = %E\n", L1norm);
+  if (L1norm > 1e-5) {
+    printf("Gradient test failed: the difference between the computed and the "
+           "approximated theoretical theta for Call option is larger than "
+           "expected\n");
+    return EXIT_FAILURE;
+  }
+
+  // Compute the derivatives of the price of the pull options
+  for (int i = 0; i < OPT_N; i++) {
+    h_CallResultCPU[i] = 0.0f;
+    h_PutResultCPU[i] = -1.0f;
+    d_CallResultGPU[i] = 1.0f;
+    d_PutResultGPU[i] = 1.0f;
+  }
+
+  for (int i = 0; i < OPT_N; i++) {
+    d_StockPrice[i] = 0.f;
+    d_OptionStrike[i] = 0.f;
+    d_OptionYears[i] = 0.f;
+  }
+
+  putGrad.execute(h_CallResultCPU, h_CallResultGPU, h_PutResultCPU,
+                  h_PutResultGPU, h_StockPrice, h_OptionStrike, h_OptionYears,
+                  d_PutResultGPU, d_StockPrice, d_OptionStrike, d_OptionYears);
+
+  // Verify delta
+  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_StockPrice, Delta);
+  printf("L1norm of delta for Pull option = %E\n", L1norm);
+  if (L1norm > 1e-5) {
+    printf("Gradient test failed: the difference between the computed and "
+           "the approximated theoretical delta for Pull option is larger than "
+           "expected\n");
+    return EXIT_FAILURE;
+  }
+
+  // Verify derivatives with respect to the Strike price
+  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_OptionStrike, dX);
+  printf("L1norm of derivative of Pull w.r.t. the strike price = %E\n", L1norm);
+  if (L1norm > 1e-6) {
+    printf("Gradient test failed: the difference between the computed and the "
+           "approximated theoretcial derivative of "
+           "PUll w.r.t. the strike price is larger than expected\n");
+    return EXIT_FAILURE;
+  }
+
+  // Verify theta
+  L1norm = computeL1norm_Pull(h_StockPrice, h_OptionStrike, h_OptionYears,
+                              d_OptionYears, Theta);
+  printf("L1norm of theta for Pull option = %E\n", L1norm);
+  if (L1norm > 1e-5) {
+    printf("Gradient test failed: the difference between the computed and the "
+           "approximated theoretical theta for Pull option is larger than "
+           "expected\n");
+    return EXIT_FAILURE;
+  }
 
   free(h_OptionYears);
   free(h_OptionStrike);
@@ -172,11 +395,11 @@ int main(int argc, char** argv) {
   free(h_CallResultGPU);
   free(h_PutResultCPU);
   free(h_CallResultCPU);
-
-  if (L1norm > 1e-6) {
-    printf("Original test failed\n");
-    return EXIT_FAILURE;
-  }
+  free(d_OptionYears);
+  free(d_OptionStrike);
+  free(d_StockPrice);
+  free(d_PutResultGPU);
+  free(d_CallResultGPU);
 
   return EXIT_SUCCESS;
 }
diff --git a/demos/CUDA/BlackScholes/BlackScholes_gold.cpp b/demos/CUDA/BlackScholes/BlackScholes_gold.cpp
@@ -25,12 +25,18 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/*
+ * DISCLAIMER: The following file has been modified slightly to make it
+ * compatible with Clad. The original file can be found at NVIDIA's cuda-samples
+ * repository at GitHub.
+ */
+
 #include <math.h>
 
 ////////////////////////////////////////////////////////////////////////////////
 // Polynomial approximation of cumulative normal distribution function
 ////////////////////////////////////////////////////////////////////////////////
-static double CND(double d) {
+extern "C" double CND(double d) {
   const double A1 = 0.31938153;
   const double A2 = -0.356563782;
   const double A3 = 1.781477937;