Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Project 1: RICHARD CHEN #9

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Coherent works
  • Loading branch information
LaurelinTheGold committed Sep 12, 2021
commit b831e389777cfb616b0db26776092aee0aa2e660
75 changes: 74 additions & 1 deletion src/kernel.cu
Original file line number Diff line number Diff line change
@@ -90,6 +90,7 @@ int *dev_gridCellEndIndices; // to this cell?

// TODO-2.3 - consider what additional buffers you might need to reshuffle
// the position and velocity data to be coherent within cells.
glm::vec3 *dev_newPos;

// LOOK-2.1 - Grid parameters based on simulation parameters.
// These are automatically computed for you in Boids::initSimulation
@@ -191,6 +192,9 @@ void Boids::initSimulation(int N)
cudaMalloc((void **)&dev_gridCellEndIndices, gridCellCount * sizeof(int));
checkCUDAErrorWithLine("cudaMalloc dev_gridCellEndIndices failed!");

cudaMalloc((void **)&dev_newPos, N * sizeof(glm::vec3));
checkCUDAErrorWithLine("cudaMalloc dev_newPos failed!");

cudaDeviceSynchronize();
}

@@ -674,8 +678,8 @@ void Boids::stepSimulationScatteredGrid(float dt)
dev_pos,
dev_vel1,
dev_vel2);
// kernUpdateVelocityBruteForce<<<fullBlocksPerGrid, blockSize>>>(numObjects, dev_pos, dev_vel1, dev_vel2);
checkCUDAErrorWithLine("kernUpdateVelNeighborSearchScattered failed!");
// kernUpdateVelocityBruteForce<<<fullBlocksPerGrid, blockSize>>>(numObjects, dev_pos, dev_vel1, dev_vel2);
// - Update positions
kernUpdatePos<<<fullBlocksPerGrid, blockSize>>>(numObjects,
dt,
@@ -688,23 +692,91 @@ void Boids::stepSimulationScatteredGrid(float dt)
dev_vel2 = tmp;
}

__global__ void kernRearrangeParticleData(
int N, int *particleArrayIndices,
glm::vec3 *oldPos, glm::vec3 *newPos,
glm::vec3 *oldVel, glm::vec3 *newVel)
{
int index = (blockIdx.x * blockDim.x) + threadIdx.x;
if (index >= N)
{
return;
}
newPos[index] = oldPos[particleArrayIndices[index]];
newVel[index] = oldVel[particleArrayIndices[index]];
}
void Boids::stepSimulationCoherentGrid(float dt)
{
dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize);
dim3 fullBlocksPerGridCells((gridCellCount + blockSize - 1) / blockSize);
// TODO-2.3 - start by copying Boids::stepSimulationNaiveGrid
// Uniform Grid Neighbor search using Thrust sort on cell-coherent data.
// In Parallel:
// - Label each particle with its array index as well as its grid index.
// Use 2x width grids
kernComputeIndices<<<fullBlocksPerGrid, blockSize>>>(numObjects,
gridSideCount,
gridMinimum,
gridInverseCellWidth,
dev_pos,
dev_particleArrayIndices,
dev_particleGridIndices);
checkCUDAErrorWithLine("kernComputeIndices failed!");
// - Unstable key sort using Thrust. A stable sort isn't necessary, but you
// are welcome to do a performance comparison.
dev_thrust_particleArrayIndices = thrust::device_ptr<int>(dev_particleArrayIndices);
dev_thrust_particleGridIndices = thrust::device_ptr<int>(dev_particleGridIndices);
thrust::sort_by_key(dev_thrust_particleGridIndices,
dev_thrust_particleGridIndices + numObjects,
dev_thrust_particleArrayIndices);
// - Naively unroll the loop for finding the start and end indices of each
// cell's data pointers in the array of boid indices
kernResetIntBuffer<<<fullBlocksPerGridCells, blockSize>>>(
gridCellCount, dev_gridCellStartIndices, -1);
checkCUDAErrorWithLine("kernResetIntBuffer gridCellStartIndices failed!");
kernResetIntBuffer<<<fullBlocksPerGridCells, blockSize>>>(
gridCellCount, dev_gridCellEndIndices, -1);
checkCUDAErrorWithLine("kernResetIntBuffer gridCellEndIndices failed!");
kernIdentifyCellStartEnd<<<fullBlocksPerGrid, blockSize>>>(numObjects,
dev_particleGridIndices,
dev_gridCellStartIndices,
dev_gridCellEndIndices);
checkCUDAErrorWithLine("kernIdentifyCellStartEnd failed!");
// - BIG DIFFERENCE: use the rearranged array index buffer to reshuffle all
// the particle data in the simulation array.
// CONSIDER WHAT ADDITIONAL BUFFERS YOU NEED

// v1 holds current velocity data, rearrange into v2.
// in next function, flip so v1 after holds the next timesteps
kernRearrangeParticleData<<<fullBlocksPerGrid, blockSize>>>(numObjects,
dev_particleArrayIndices,
dev_pos,
dev_newPos,
dev_vel1,
dev_vel2);
checkCUDAErrorWithLine("kernRearrangeParticleData failed!");
// - Perform velocity updates using neighbor search
kernUpdateVelNeighborSearchCoherent<<<fullBlocksPerGrid, blockSize>>>(numObjects,
gridSideCount,
gridMinimum,
gridInverseCellWidth,
gridCellWidth,
dev_gridCellStartIndices,
dev_gridCellEndIndices,
dev_newPos,
dev_vel2,
dev_vel1);
checkCUDAErrorWithLine("kernUpdateVelNeighborSearchScattered failed!");
// - Update positions
kernUpdatePos<<<fullBlocksPerGrid, blockSize>>>(numObjects,
dt,
dev_newPos,
dev_vel2);
checkCUDAErrorWithLine("kernUpdatePos failed!");
// - Ping-pong buffers as needed. THIS MAY BE DIFFERENT FROM BEFORE.
glm::vec3 *tmp = dev_pos;
dev_pos = dev_newPos;
dev_newPos = tmp;
}

void Boids::endSimulation()
@@ -718,6 +790,7 @@ void Boids::endSimulation()
cudaFree(dev_gridCellStartIndices);
cudaFree(dev_particleGridIndices);
cudaFree(dev_particleArrayIndices);
cudaFree(dev_newPos);
}

void Boids::unitTest()
4 changes: 2 additions & 2 deletions src/main.cpp
Original file line number Diff line number Diff line change
@@ -15,10 +15,10 @@
// LOOK-2.1 LOOK-2.3 - toggles for UNIFORM_GRID and COHERENT_GRID
#define VISUALIZE 1
#define UNIFORM_GRID 1
#define COHERENT_GRID 0
#define COHERENT_GRID 1

// LOOK-1.2 - change this to adjust particle count in the simulation
const int N_FOR_VIS = 5000;
const int N_FOR_VIS = 100000;
// const int N_FOR_VIS = 30;
const float DT = 0.2f;