diff --git a/demos/CUDA/BlackScholes/BlackScholes.cu b/demos/CUDA/BlackScholes/BlackScholes.cu index 6e9ca4f0d..833401e1b 100644 --- a/demos/CUDA/BlackScholes/BlackScholes.cu +++ b/demos/CUDA/BlackScholes/BlackScholes.cu @@ -35,12 +35,11 @@ * DISCLAIMER: The following file has been slightly modified to ensure * compatibility with Clad and to serve as a Clad demo. Specifically, parts of * the original `main` function have been moved to a separate function to use - * `clad::gradient` on. Furthermore, Clad cannot clone printf statements, so - * some original print statements have been omitted. The same applies to the - * checkCudaErrors function. - * New helper functions are included in another file and invoked here to verify - * the gradient's results. The original file is available in NVIDIA's - * cuda-samples repository on GitHub. + * `clad::gradient` on. Furthermore, Clad cannot clone checkCudaErrors + * successfully, so these calls have been omitted. The same applies to the + * cudaDeviceSynchronize function. New helper functions are included in another + * file and invoked here to verify the gradient's results. The original file is + * available in NVIDIA's cuda-samples repository on GitHub. * * Relevant documentation regarding the problem at hand can be found in NVIDIA's * cuda-samples repository. Using Clad, we compute some of the Greeks @@ -110,6 +109,7 @@ void launch(float* h_CallResultCPU, float* h_CallResultGPU, *d_StockPrice = nullptr, *d_OptionStrike = nullptr, *d_OptionYears = nullptr; + printf("...allocating GPU memory for options.\n"); cudaMalloc((void**)&d_CallResult, OPT_SZ); cudaMalloc((void**)&d_PutResult, OPT_SZ); cudaMalloc((void**)&d_StockPrice, OPT_SZ); @@ -117,9 +117,14 @@ void launch(float* h_CallResultCPU, float* h_CallResultGPU, cudaMalloc((void**)&d_OptionYears, OPT_SZ); // Copy options data to GPU memory for further processing + printf("...copying input data to GPU mem.\n"); cudaMemcpy(d_StockPrice, h_StockPrice, OPT_SZ, cudaMemcpyHostToDevice); cudaMemcpy(d_OptionStrike, h_OptionStrike, OPT_SZ, cudaMemcpyHostToDevice); cudaMemcpy(d_OptionYears, h_OptionYears, OPT_SZ, cudaMemcpyHostToDevice); + printf("Data init done.\n\n"); + + printf("Executing Black-Scholes GPU kernel (%i iterations)...\n", + NUM_ITERATIONS); BlackScholesGPU<<>>( (float2*)d_CallResult, (float2*)d_PutResult, (float2*)d_StockPrice, @@ -128,14 +133,19 @@ void launch(float* h_CallResultCPU, float* h_CallResultGPU, // Both call and put is calculated + printf("\nReading back GPU results...\n"); // Read back GPU results to compare them to CPU results cudaMemcpy(h_CallResultGPU, d_CallResult, OPT_SZ, cudaMemcpyDeviceToHost); cudaMemcpy(h_PutResultGPU, d_PutResult, OPT_SZ, cudaMemcpyDeviceToHost); + // Calculate options values on CPU + printf("Checking the results...\n"); + printf("...running CPU calculations.\n\n"); // Calculate options values on CPU BlackScholesCPU(h_CallResultCPU, h_PutResultCPU, h_StockPrice, h_OptionStrike, h_OptionYears, RISKFREE, VOLATILITY, OPT_N); + printf("...releasing GPU memory.\n"); cudaFree(d_OptionYears); cudaFree(d_OptionStrike); cudaFree(d_StockPrice);