From fc9e33eae11c25d60d6a3f12d3abd97da38d033f Mon Sep 17 00:00:00 2001 From: kchristin Date: Thu, 12 Sep 2024 19:06:46 +0300 Subject: [PATCH] Print results of tests and use check-exec --- test/CUDA/GradientKernels.cu | 51 ++++++++++++++---------------------- 1 file changed, 19 insertions(+), 32 deletions(-) diff --git a/test/CUDA/GradientKernels.cu b/test/CUDA/GradientKernels.cu index 511ac278f..db67b1ed6 100644 --- a/test/CUDA/GradientKernels.cu +++ b/test/CUDA/GradientKernels.cu @@ -129,55 +129,42 @@ int main(void) { cudaMalloc(&dummy_in, sizeof(int)); cudaMalloc(&dummy_out, sizeof(int)); - int *out = (int*)malloc(10 * sizeof(int)); - for(int i = 0; i < 10; i++) { + int *out = (int*)malloc(5 * sizeof(int)); + for(int i = 0; i < 5; i++) { out[i] = 5; } int *d_out; - cudaMalloc(&d_out, 10 * sizeof(int)); - cudaMemcpy(d_out, out, 10 * sizeof(int), cudaMemcpyHostToDevice); + cudaMalloc(&d_out, 5 * sizeof(int)); + cudaMemcpy(d_out, out, 5 * sizeof(int), cudaMemcpyHostToDevice); int *d_in; - cudaMalloc(&d_in, 10 * sizeof(int)); + cudaMalloc(&d_in, 5 * sizeof(int)); auto add = clad::gradient(add_kernel, "in, out"); - add.execute_kernel(dim3(1), dim3(10, 1, 1), dummy_out, dummy_in, d_out, d_in); + add.execute_kernel(dim3(1), dim3(5, 1, 1), dummy_out, dummy_in, d_out, d_in); cudaDeviceSynchronize(); - int *res = (int*)malloc(10 * sizeof(int)); - cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost); - for(int i = 0; i < 10; i++) { - if (res[i] != 5) { - std::cerr << "wrong result of add_kernel_grad at index " << i << std::endl; - return 1; - } - } + int *res = (int*)malloc(5 * sizeof(int)); + cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost); + printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5 - cudaMemset(d_in, 0, 10 * sizeof(int)); + cudaMemset(d_in, 0, 5 * sizeof(int)); auto add_2 = clad::gradient(add_kernel_2, "in, out"); - add_2.execute_kernel(dim3(1), dim3(10, 1, 1), dummy_out, dummy_in, d_out, d_in); + add_2.execute_kernel(dim3(1), dim3(5, 1, 1), dummy_out, dummy_in, d_out, d_in); cudaDeviceSynchronize(); - cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost); - for(int i = 0; i < 10; i++) { - if (res[i] != 5) { - std::cerr << "wrong result of add_kernel_2_grad at index " << i << std::endl; - return 1; - } - } + cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost); + printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5 + - cudaMemset(d_in, 0, 10 * sizeof(int)); + cudaMemset(d_in, 0, 5 * sizeof(int)); auto add_3 = clad::gradient(add_kernel_3, "in, out"); - add_3.execute_kernel(dim3(10), dim3(1), dummy_out, dummy_in, d_out, d_in); + add_3.execute_kernel(dim3(5), dim3(1), dummy_out, dummy_in, d_out, d_in); cudaDeviceSynchronize(); - cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost); - for(int i = 0; i < 10; i++) { - if (res[i] != 5) { - std::cerr << "wrong result of add_kernel_3_grad at index " << i << std::endl; - return 1; - } - } + cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost); + printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5 + return 0; } \ No newline at end of file