From 622988e7c03d39975a36f0eaafa9ecb3773ea992 Mon Sep 17 00:00:00 2001 From: Pranav Sivaraman <14294205+pranav-sivaraman@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:52:43 -0500 Subject: [PATCH 1/2] feat: add copy back of verification vector --- cuda/GridInit.cu | 9 +++++++++ cuda/Main.cu | 18 ++++++++---------- cuda/Simulation.cu | 19 ++++++++++++++----- cuda/XSbench_header.cuh | 1 + 4 files changed, 32 insertions(+), 15 deletions(-) diff --git a/cuda/GridInit.cu b/cuda/GridInit.cu index 0cd5d185..13c62db1 100644 --- a/cuda/GridInit.cu +++ b/cuda/GridInit.cu @@ -87,6 +87,15 @@ void release_device_memory(SimulationData GSD) { cudaFree(GSD.verification); } +void release_memory(SimulationData SD) { + free(SD.num_nucs); + free(SD.concs); + free(SD.mats); + free(SD.unionized_energy_array); + free(SD.nuclide_grid); + free(SD.verification); +} + SimulationData grid_init_do_not_profile( Inputs in, int mype ) { // Structure to hold all allocated simuluation data arrays diff --git a/cuda/Main.cu b/cuda/Main.cu index 75ca2b0f..f5b81959 100644 --- a/cuda/Main.cu +++ b/cuda/Main.cu @@ -38,8 +38,6 @@ int main( int argc, char* argv[] ) if( in.binary_mode == WRITE && mype == 0 ) binary_write(in, SD); - // Move data to GPU - SimulationData GSD = move_simulation_data_to_device( in, mype, SD ); // ===================================================================== // Cross Section (XS) Parallel Lookup Simulation @@ -62,19 +60,19 @@ int main( int argc, char* argv[] ) if( in.simulation_method == EVENT_BASED ) { if( in.kernel_id == 0 ) - verification = run_event_based_simulation_baseline(in, GSD, mype); + verification = run_event_based_simulation_baseline(in, SD, mype); else if( in.kernel_id == 1 ) - verification = run_event_based_simulation_optimization_1(in, GSD, mype); + verification = run_event_based_simulation_optimization_1(in, SD, mype); else if( in.kernel_id == 2 ) - verification = run_event_based_simulation_optimization_2(in, GSD, mype); + verification = run_event_based_simulation_optimization_2(in, SD, mype); else if( in.kernel_id == 3 ) - verification = run_event_based_simulation_optimization_3(in, GSD, mype); + verification = run_event_based_simulation_optimization_3(in, SD, mype); else if( in.kernel_id == 4 ) - verification = run_event_based_simulation_optimization_4(in, GSD, mype); + verification = run_event_based_simulation_optimization_4(in, SD, mype); else if( in.kernel_id == 5 ) - verification = run_event_based_simulation_optimization_5(in, GSD, mype); + verification = run_event_based_simulation_optimization_5(in, SD, mype); else if( in.kernel_id == 6 ) - verification = run_event_based_simulation_optimization_6(in, GSD, mype); + verification = run_event_based_simulation_optimization_6(in, SD, mype); else { printf("Error: No kernel ID %d found!\n", in.kernel_id); @@ -97,7 +95,7 @@ int main( int argc, char* argv[] ) omp_end = get_time(); // Release device memory - release_device_memory(GSD); + release_memory(SD); // Final Hash Step verification = verification % 999983; diff --git a/cuda/Simulation.cu b/cuda/Simulation.cu index a757eeaf..8be81ab1 100644 --- a/cuda/Simulation.cu +++ b/cuda/Simulation.cu @@ -12,8 +12,14 @@ // line argument. //////////////////////////////////////////////////////////////////////////////////// -unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData GSD, int mype) +unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData SD, int mype) { + size_t sz = in.lookups * sizeof(unsigned long); + unsigned long * v = (unsigned long *) malloc(sz); + + // Move Data to GPU + SimulationData GSD = move_simulation_data_to_device(in, mype, SD); + //////////////////////////////////////////////////////////////////////////////// // Configure & Launch Simulation Kernel //////////////////////////////////////////////////////////////////////////////// @@ -22,18 +28,21 @@ unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData int nthreads = 256; int nblocks = ceil( (double) in.lookups / (double) nthreads); + xs_lookup_kernel_baseline<<>>( in, GSD ); gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); //////////////////////////////////////////////////////////////////////////////// // Reduce Verification Results //////////////////////////////////////////////////////////////////////////////// if( mype == 0) printf("Reducing verification results...\n"); + gpuErrchk(cudaMemcpy(v, GSD.verification, sz, cudaMemcpyDeviceToHost) ); - unsigned long verification_scalar = thrust::reduce(thrust::device, GSD.verification, GSD.verification + in.lookups, 0); - gpuErrchk( cudaPeekAtLastError() ); - gpuErrchk( cudaDeviceSynchronize() ); + unsigned long verification_scalar = 0; + for( int i =0; i < in.lookups; i++ ) + verification_scalar += v[i]; + + release_device_memory(GSD); return verification_scalar; } diff --git a/cuda/XSbench_header.cuh b/cuda/XSbench_header.cuh index 53dee7bd..4d45022d 100644 --- a/cuda/XSbench_header.cuh +++ b/cuda/XSbench_header.cuh @@ -139,6 +139,7 @@ unsigned long long run_event_based_simulation_optimization_6(Inputs in, Simulati SimulationData grid_init_do_not_profile( Inputs in, int mype ); SimulationData move_simulation_data_to_device( Inputs in, int mype, SimulationData SD ); void release_device_memory(SimulationData GSD); +void release_memory(SimulationData SD); // XSutils.cu int NGP_compare( const void * a, const void * b ); From 36a4e033482b45539a709ab55ad64952190b6b33 Mon Sep 17 00:00:00 2001 From: Pranav Sivaraman <14294205+pranav-sivaraman@users.noreply.github.com> Date: Sun, 5 Nov 2023 13:59:27 -0500 Subject: [PATCH 2/2] fix: allocation of host verification vector --- cuda/GridInit.cu | 7 +++++++ cuda/Simulation.cu | 7 ++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/cuda/GridInit.cu b/cuda/GridInit.cu index 13c62db1..6eb4d53e 100644 --- a/cuda/GridInit.cu +++ b/cuda/GridInit.cu @@ -100,6 +100,8 @@ SimulationData grid_init_do_not_profile( Inputs in, int mype ) { // Structure to hold all allocated simuluation data arrays SimulationData SD; + + // Keep track of how much data we're allocating size_t nbytes = 0; @@ -152,6 +154,11 @@ SimulationData grid_init_do_not_profile( Inputs in, int mype ) printf("E%d = %lf\n", j, SD.nuclide_grid[i * in.n_gridpoints + j].energy); } */ + + // Allocate Verification Array + size_t sz = in.lookups * sizeof(unsigned long); + SD.verification = (unsigned long *) malloc(sz); + nbytes += sz; //////////////////////////////////////////////////////////////////// diff --git a/cuda/Simulation.cu b/cuda/Simulation.cu index 8be81ab1..c7b8a736 100644 --- a/cuda/Simulation.cu +++ b/cuda/Simulation.cu @@ -14,9 +14,6 @@ unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData SD, int mype) { - size_t sz = in.lookups * sizeof(unsigned long); - unsigned long * v = (unsigned long *) malloc(sz); - // Move Data to GPU SimulationData GSD = move_simulation_data_to_device(in, mype, SD); @@ -36,11 +33,11 @@ unsigned long long run_event_based_simulation_baseline(Inputs in, SimulationData // Reduce Verification Results //////////////////////////////////////////////////////////////////////////////// if( mype == 0) printf("Reducing verification results...\n"); - gpuErrchk(cudaMemcpy(v, GSD.verification, sz, cudaMemcpyDeviceToHost) ); + gpuErrchk(cudaMemcpy(SD.verification, GSD.verification, in.lookups * sizeof(unsigned long), cudaMemcpyDeviceToHost) ); unsigned long verification_scalar = 0; for( int i =0; i < in.lookups; i++ ) - verification_scalar += v[i]; + verification_scalar += SD.verification[i]; release_device_memory(GSD);