-
Notifications
You must be signed in to change notification settings - Fork 0
/
conways-cuda-bit-per-cell.c
123 lines (93 loc) · 3.76 KB
/
conways-cuda-bit-per-cell.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
#include <stdio.h>
#include <cuda_runtime.h>
#include <stdlib.h>
#include <time.h>
#define WIDTH 16364
#define HEIGHT 16364
__global__ void update_kernel(unsigned int *grid, unsigned int *new_grid, int width, int height) {
int x = blockIdx.x * blockDim.x + threadIdx.x; // x-coordinate in the grid
int y = blockIdx.y; // y-coordinate in the grid
if (x < width && y < height) {
int bit_position = x % 32; // Determine the bit position in the 32-bit word
int cell_block_index = x / 32; // Determine the 32-bit word (cell block) index
unsigned int mask = 1u << bit_position;
unsigned int cell = (grid[y * (width / 32) + cell_block_index]) & mask;
// Count the neighbours
int neighbour_count = 0;
for (int displacement_y = -1; displacement_y <= 1; displacement_y++) {
for (int displacement_x = -1; displacement_x <= 1; displacement_x++) {
if (displacement_x == 0 && displacement_y == 0) continue; // Skip the cell itself
int neighbour_x = x + displacement_x;
int neighbour_y = y + displacement_y;
// Check for boundary conditions
if (neighbour_x >= 0 && neighbour_x < width && neighbour_y >= 0 && neighbour_y < height) {
int neighbour_bit_position = neighbour_x % 32;
int neighbour_block_index = neighbour_x / 32;
unsigned int neighbourMask = 1u << neighbour_bit_position;
unsigned int neighbour_cell = (grid[neighbour_y * (width / 32) + neighbour_block_index]) & neighbourMask;
if (neighbour_cell) neighbour_count++;
}
}
}
// Apply Game of Life rules
if ((cell && (neighbour_count == 2 || neighbour_count == 3)) || (!cell && neighbour_count == 3)) {
atomicOr(&new_grid[y * (width / 32) + cell_block_index], mask);
} else {
atomicAnd(&new_grid[y * (width / 32) + cell_block_index], ~mask);
}
}
}
// Function to initialize the grid with a bit-per-cell arrangement
void initialize_grid(unsigned int *grid) {
// Seed for random number generation
srand(time(NULL));
// Iterate over each 32-bit integer in the grid
for (int i = 0; i < HEIGHT * (WIDTH / 32); i++) {
grid[i] = 0;
for (int j = 0; j < 32; j++) {
// Randomly set each bit to 0 or 1
grid[i] |= (rand() % 2) << j;
}
}
}
int main() {
size_t size = (WIDTH / 32) * HEIGHT * sizeof(unsigned int);
unsigned int *grid, *new_grid;
unsigned int *d_grid, *d_new_grid;
cudaEvent_t start, end;
float time = 0;
// Allocate host memory
grid = (unsigned int *)malloc(size);
new_grid = (unsigned int *)malloc(size);
// Initialize grid
initialize_grid(grid);
// Allocate device memory
cudaMalloc(&d_grid, size);
cudaMalloc(&d_new_grid, size);
// Copy initial grid to device
cudaMemcpy(d_grid, grid, size, cudaMemcpyHostToDevice);
dim3 dimBlock(32, 1); // Each block contains 32 threads
dim3 dimGrid(WIDTH / 32, HEIGHT);
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start);
// Main program loop
for (int iter = 0; iter < 10; iter++) {
update_kernel<<<dimGrid, dimBlock>>>(d_grid, d_new_grid, WIDTH, HEIGHT);
// Swap grids
unsigned int *temp = d_grid;
d_grid = d_new_grid;
d_new_grid = temp;
//printf("In iteration: %d\n", iter);
}
cudaEventRecord(end);
cudaEventSynchronize(end);
cudaEventElapsedTime(&time, start, end);
printf("The time to complete iterations is: %f\n", time);
// Cleanup
cudaFree(d_grid);
cudaFree(d_new_grid);
free(grid);
free(new_grid);
return 0;
}