Skip to content

Commit

Permalink
Print results of tests and use check-exec
Browse files Browse the repository at this point in the history
  • Loading branch information
kchristin22 committed Sep 12, 2024
1 parent 52dd28b commit fc9e33e
Showing 1 changed file with 19 additions and 32 deletions.
51 changes: 19 additions & 32 deletions test/CUDA/GradientKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -129,55 +129,42 @@ int main(void) {
cudaMalloc(&dummy_in, sizeof(int));
cudaMalloc(&dummy_out, sizeof(int));

int *out = (int*)malloc(10 * sizeof(int));
for(int i = 0; i < 10; i++) {
int *out = (int*)malloc(5 * sizeof(int));
for(int i = 0; i < 5; i++) {
out[i] = 5;
}
int *d_out;
cudaMalloc(&d_out, 10 * sizeof(int));
cudaMemcpy(d_out, out, 10 * sizeof(int), cudaMemcpyHostToDevice);
cudaMalloc(&d_out, 5 * sizeof(int));
cudaMemcpy(d_out, out, 5 * sizeof(int), cudaMemcpyHostToDevice);

int *d_in;
cudaMalloc(&d_in, 10 * sizeof(int));
cudaMalloc(&d_in, 5 * sizeof(int));

auto add = clad::gradient(add_kernel, "in, out");
add.execute_kernel(dim3(1), dim3(10, 1, 1), dummy_out, dummy_in, d_out, d_in);
add.execute_kernel(dim3(1), dim3(5, 1, 1), dummy_out, dummy_in, d_out, d_in);
cudaDeviceSynchronize();

int *res = (int*)malloc(10 * sizeof(int));
cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost);
for(int i = 0; i < 10; i++) {
if (res[i] != 5) {
std::cerr << "wrong result of add_kernel_grad at index " << i << std::endl;
return 1;
}
}
int *res = (int*)malloc(5 * sizeof(int));
cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost);
printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5

cudaMemset(d_in, 0, 10 * sizeof(int));
cudaMemset(d_in, 0, 5 * sizeof(int));
auto add_2 = clad::gradient(add_kernel_2, "in, out");
add_2.execute_kernel(dim3(1), dim3(10, 1, 1), dummy_out, dummy_in, d_out, d_in);
add_2.execute_kernel(dim3(1), dim3(5, 1, 1), dummy_out, dummy_in, d_out, d_in);
cudaDeviceSynchronize();

cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost);
for(int i = 0; i < 10; i++) {
if (res[i] != 5) {
std::cerr << "wrong result of add_kernel_2_grad at index " << i << std::endl;
return 1;
}
}
cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost);
printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5


cudaMemset(d_in, 0, 10 * sizeof(int));
cudaMemset(d_in, 0, 5 * sizeof(int));
auto add_3 = clad::gradient(add_kernel_3, "in, out");
add_3.execute_kernel(dim3(10), dim3(1), dummy_out, dummy_in, d_out, d_in);
add_3.execute_kernel(dim3(5), dim3(1), dummy_out, dummy_in, d_out, d_in);
cudaDeviceSynchronize();

cudaMemcpy(res, d_in, 10 * sizeof(int), cudaMemcpyDeviceToHost);
for(int i = 0; i < 10; i++) {
if (res[i] != 5) {
std::cerr << "wrong result of add_kernel_3_grad at index " << i << std::endl;
return 1;
}
}
cudaMemcpy(res, d_in, 5 * sizeof(int), cudaMemcpyDeviceToHost);
printf("%d, %d, %d, %d, %d\n", res[0], res[1], res[2], res[3], res[4]); // CHECK-EXEC: 5, 5, 5, 5, 5


return 0;
}

0 comments on commit fc9e33e

Please sign in to comment.