Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CUDA Verification Vector Copy Back #36

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions cuda/GridInit.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,21 @@ void release_device_memory(SimulationData GSD) {
cudaFree(GSD.verification);
}

void release_memory(SimulationData SD) {
free(SD.num_nucs);
free(SD.concs);
free(SD.mats);
free(SD.unionized_energy_array);
free(SD.nuclide_grid);
free(SD.verification);
}

SimulationData grid_init_do_not_profile( Inputs in, int mype )
{
// Structure to hold all allocated simuluation data arrays
SimulationData SD;



// Keep track of how much data we're allocating
size_t nbytes = 0;
Expand Down Expand Up @@ -143,6 +154,11 @@ SimulationData grid_init_do_not_profile( Inputs in, int mype )
printf("E%d = %lf\n", j, SD.nuclide_grid[i * in.n_gridpoints + j].energy);
}
*/

// Allocate Verification Array
size_t sz = in.lookups * sizeof(unsigned long);
SD.verification = (unsigned long *) malloc(sz);
nbytes += sz;


////////////////////////////////////////////////////////////////////
Expand Down
18 changes: 8 additions & 10 deletions cuda/Main.cu
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ int main( int argc, char* argv[] )
if( in.binary_mode == WRITE && mype == 0 )
binary_write(in, SD);

// Move data to GPU
SimulationData GSD = move_simulation_data_to_device( in, mype, SD );

// =====================================================================
// Cross Section (XS) Parallel Lookup Simulation
Expand All @@ -62,19 +60,19 @@ int main( int argc, char* argv[] )
if( in.simulation_method == EVENT_BASED )
{
if( in.kernel_id == 0 )
verification = run_event_based_simulation_baseline(in, GSD, mype);
verification = run_event_based_simulation_baseline(in, SD, mype);
else if( in.kernel_id == 1 )
verification = run_event_based_simulation_optimization_1(in, GSD, mype);
verification = run_event_based_simulation_optimization_1(in, SD, mype);
else if( in.kernel_id == 2 )
verification = run_event_based_simulation_optimization_2(in, GSD, mype);
verification = run_event_based_simulation_optimization_2(in, SD, mype);
else if( in.kernel_id == 3 )
verification = run_event_based_simulation_optimization_3(in, GSD, mype);
verification = run_event_based_simulation_optimization_3(in, SD, mype);
else if( in.kernel_id == 4 )
verification = run_event_based_simulation_optimization_4(in, GSD, mype);
verification = run_event_based_simulation_optimization_4(in, SD, mype);
else if( in.kernel_id == 5 )
verification = run_event_based_simulation_optimization_5(in, GSD, mype);
verification = run_event_based_simulation_optimization_5(in, SD, mype);
else if( in.kernel_id == 6 )
verification = run_event_based_simulation_optimization_6(in, GSD, mype);
verification = run_event_based_simulation_optimization_6(in, SD, mype);
else
{
printf("Error: No kernel ID %d found!\n", in.kernel_id);
Expand All @@ -97,7 +95,7 @@ int main( int argc, char* argv[] )
omp_end = get_time();

// Release device memory
release_device_memory(GSD);
release_memory(SD);

// Final Hash Step
verification = verification % 999983;
Expand Down
16 changes: 11 additions & 5 deletions cuda/Simulation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,11 @@
// line argument.
////////////////////////////////////////////////////////////////////////////////////

unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData GSD, int mype)
unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData SD, int mype)
{
// Move Data to GPU
SimulationData GSD = move_simulation_data_to_device(in, mype, SD);

////////////////////////////////////////////////////////////////////////////////
// Configure & Launch Simulation Kernel
////////////////////////////////////////////////////////////////////////////////
Expand All @@ -22,18 +25,21 @@ unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData
int nthreads = 256;
int nblocks = ceil( (double) in.lookups / (double) nthreads);


xs_lookup_kernel_baseline<<<nblocks, nthreads>>>( in, GSD );
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );

////////////////////////////////////////////////////////////////////////////////
// Reduce Verification Results
////////////////////////////////////////////////////////////////////////////////
if( mype == 0) printf("Reducing verification results...\n");
gpuErrchk(cudaMemcpy(SD.verification, GSD.verification, in.lookups * sizeof(unsigned long), cudaMemcpyDeviceToHost) );

unsigned long verification_scalar = thrust::reduce(thrust::device, GSD.verification, GSD.verification + in.lookups, 0);
gpuErrchk( cudaPeekAtLastError() );
gpuErrchk( cudaDeviceSynchronize() );
unsigned long verification_scalar = 0;
for( int i =0; i < in.lookups; i++ )
verification_scalar += SD.verification[i];

release_device_memory(GSD);

return verification_scalar;
}
Expand Down
1 change: 1 addition & 0 deletions cuda/XSbench_header.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@ unsigned long long run_event_based_simulation_optimization_6(Inputs in, Simulati
SimulationData grid_init_do_not_profile( Inputs in, int mype );
SimulationData move_simulation_data_to_device( Inputs in, int mype, SimulationData SD );
void release_device_memory(SimulationData GSD);
void release_memory(SimulationData SD);

// XSutils.cu
int NGP_compare( const void * a, const void * b );
Expand Down