From 307f6f173c41203e64fc5ff53a892e1dd2dfcac9 Mon Sep 17 00:00:00 2001 From: Josh Davis Date: Tue, 20 Jun 2023 17:31:15 -0400 Subject: [PATCH] Add working OpenACC port of XSBench --- .gitignore | 2 +- openacc/GridInit.c | 177 ++++++++++++++ openacc/Main.c | 115 +++++++++ openacc/Makefile | 108 +++++++++ openacc/Materials.c | 118 ++++++++++ openacc/Simulation.c | 402 +++++++++++++++++++++++++++++++ openacc/XSbench_header.h | 131 +++++++++++ openacc/XSutils.c | 48 ++++ openacc/io.c | 494 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 1594 insertions(+), 1 deletion(-) create mode 100644 openacc/GridInit.c create mode 100644 openacc/Main.c create mode 100644 openacc/Makefile create mode 100644 openacc/Materials.c create mode 100644 openacc/Simulation.c create mode 100644 openacc/XSbench_header.h create mode 100644 openacc/XSutils.c create mode 100644 openacc/io.c diff --git a/.gitignore b/.gitignore index 08f0e1b3..d6ff91a9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,2 @@ +*~ *.o -*/XSBench diff --git a/openacc/GridInit.c b/openacc/GridInit.c new file mode 100644 index 00000000..3787dff4 --- /dev/null +++ b/openacc/GridInit.c @@ -0,0 +1,177 @@ +#include "XSbench_header.h" + +SimulationData grid_init_do_not_profile( Inputs in, int mype ) +{ + // Structure to hold all allocated simuluation data arrays + SimulationData SD; + + // Keep track of how much data we're allocating + size_t nbytes = 0; + + // Set the initial seed value + uint64_t seed = 42; + + //////////////////////////////////////////////////////////////////// + // Initialize Nuclide Grids + //////////////////////////////////////////////////////////////////// + + if(mype == 0) printf("Intializing nuclide grids...\n"); + + // First, we need to initialize our nuclide grid. This comes in the form + // of a flattened 2D array that hold all the information we need to define + // the cross sections for all isotopes in the simulation. + // The grid is composed of "NuclideGridPoint" structures, which hold the + // energy level of the grid point and all associated XS data at that level. + // An array of structures (AOS) is used instead of + // a structure of arrays, as the grid points themselves are accessed in + // a random order, but all cross section interaction channels and the + // energy level are read whenever the gridpoint is accessed, meaning the + // AOS is more cache efficient. + + // Initialize Nuclide Grid + SD.length_nuclide_grid = in.n_isotopes * in.n_gridpoints; + SD.nuclide_grid = (NuclideGridPoint *) malloc( SD.length_nuclide_grid * sizeof(NuclideGridPoint)); + assert(SD.nuclide_grid != NULL); + nbytes += SD.length_nuclide_grid * sizeof(NuclideGridPoint); + for( int i = 0; i < SD.length_nuclide_grid; i++ ) + { + SD.nuclide_grid[i].energy = LCG_random_double(&seed); + SD.nuclide_grid[i].total_xs = LCG_random_double(&seed); + SD.nuclide_grid[i].elastic_xs = LCG_random_double(&seed); + SD.nuclide_grid[i].absorbtion_xs = LCG_random_double(&seed); + SD.nuclide_grid[i].fission_xs = LCG_random_double(&seed); + SD.nuclide_grid[i].nu_fission_xs = LCG_random_double(&seed); + } + + // Sort so that each nuclide has data stored in ascending energy order. + for( int i = 0; i < in.n_isotopes; i++ ) + qsort( &SD.nuclide_grid[i*in.n_gridpoints], in.n_gridpoints, sizeof(NuclideGridPoint), NGP_compare); + + // error debug check + /* + for( int i = 0; i < in.n_isotopes; i++ ) + { + printf("NUCLIDE %d ==============================\n", i); + for( int j = 0; j < in.n_gridpoints; j++ ) + printf("E%d = %lf\n", j, SD.nuclide_grid[i * in.n_gridpoints + j].energy); + } + */ + + + //////////////////////////////////////////////////////////////////// + // Initialize Acceleration Structure + //////////////////////////////////////////////////////////////////// + + if( in.grid_type == NUCLIDE ) + { + SD.length_unionized_energy_array = 0; + SD.length_index_grid = 0; + } + + if( in.grid_type == UNIONIZED ) + { + if(mype == 0) printf("Intializing unionized grid...\n"); + + // Allocate space to hold the union of all nuclide energy data + SD.length_unionized_energy_array = in.n_isotopes * in.n_gridpoints; + SD.unionized_energy_array = (double *) malloc( SD.length_unionized_energy_array * sizeof(double)); + assert(SD.unionized_energy_array != NULL ); + nbytes += SD.length_unionized_energy_array * sizeof(double); + + // Copy energy data over from the nuclide energy grid + for( int i = 0; i < SD.length_unionized_energy_array; i++ ) + SD.unionized_energy_array[i] = SD.nuclide_grid[i].energy; + + // Sort unionized energy array + qsort( SD.unionized_energy_array, SD.length_unionized_energy_array, sizeof(double), double_compare); + + // Allocate space to hold the acceleration grid indices + SD.length_index_grid = SD.length_unionized_energy_array * in.n_isotopes; + SD.index_grid = (int *) malloc( SD.length_index_grid * sizeof(int)); + assert(SD.index_grid != NULL); + nbytes += SD.length_index_grid * sizeof(int); + + // Generates the double indexing grid + int * idx_low = (int *) calloc( in.n_isotopes, sizeof(int)); + assert(idx_low != NULL ); + double * energy_high = (double *) malloc( in.n_isotopes * sizeof(double)); + assert(energy_high != NULL ); + + for( int i = 0; i < in.n_isotopes; i++ ) + energy_high[i] = SD.nuclide_grid[i * in.n_gridpoints + 1].energy; + + for( long e = 0; e < SD.length_unionized_energy_array; e++ ) + { + double unionized_energy = SD.unionized_energy_array[e]; + for( long i = 0; i < in.n_isotopes; i++ ) + { + if( unionized_energy < energy_high[i] ) + SD.index_grid[e * in.n_isotopes + i] = idx_low[i]; + else if( idx_low[i] == in.n_gridpoints - 2 ) + SD.index_grid[e * in.n_isotopes + i] = idx_low[i]; + else + { + idx_low[i]++; + SD.index_grid[e * in.n_isotopes + i] = idx_low[i]; + energy_high[i] = SD.nuclide_grid[i * in.n_gridpoints + idx_low[i] + 1].energy; + } + } + } + + free(idx_low); + free(energy_high); + } + + if( in.grid_type == HASH ) + { + if(mype == 0) printf("Intializing hash grid...\n"); + SD.length_unionized_energy_array = 0; + SD.length_index_grid = in.hash_bins * in.n_isotopes; + SD.index_grid = (int *) malloc( SD.length_index_grid * sizeof(int)); + assert(SD.index_grid != NULL); + nbytes += SD.length_index_grid * sizeof(int); + + double du = 1.0 / in.hash_bins; + + // For each energy level in the hash table + #pragma omp parallel for + for( long e = 0; e < in.hash_bins; e++ ) + { + double energy = e * du; + + // We need to determine the bounding energy levels for all isotopes + for( long i = 0; i < in.n_isotopes; i++ ) + { + SD.index_grid[e * in.n_isotopes + i] = grid_search_nuclide( in.n_gridpoints, energy, SD.nuclide_grid + i * in.n_gridpoints, 0, in.n_gridpoints-1); + } + } + } + + //////////////////////////////////////////////////////////////////// + // Initialize Materials and Concentrations + //////////////////////////////////////////////////////////////////// + if(mype == 0) printf("Intializing material data...\n"); + + // Set the number of nuclides in each material + SD.num_nucs = load_num_nucs(in.n_isotopes); + SD.length_num_nucs = 12; // There are always 12 materials in XSBench + + // Intialize the flattened 2D grid of material data. The grid holds + // a list of nuclide indices for each of the 12 material types. The + // grid is allocated as a full square grid, even though not all + // materials have the same number of nuclides. + SD.mats = load_mats(SD.num_nucs, in.n_isotopes, &SD.max_num_nucs); + SD.length_mats = SD.length_num_nucs * SD.max_num_nucs; + + // Intialize the flattened 2D grid of nuclide concentration data. The grid holds + // a list of nuclide concentrations for each of the 12 material types. The + // grid is allocated as a full square grid, even though not all + // materials have the same number of nuclides. + SD.concs = load_concs(SD.num_nucs, SD.max_num_nucs); + SD.length_concs = SD.length_mats; + + if(mype == 0) printf("Intialization complete. Allocated %.0lf MB of data.\n", nbytes/1024.0/1024.0 ); + + return SD; + +} diff --git a/openacc/Main.c b/openacc/Main.c new file mode 100644 index 00000000..1d4d4b15 --- /dev/null +++ b/openacc/Main.c @@ -0,0 +1,115 @@ +#include "XSbench_header.h" + +#ifdef MPI +#include +#endif + +int main( int argc, char* argv[] ) +{ + // ===================================================================== + // Initialization & Command Line Read-In + // ===================================================================== + int version = 20; + int mype = 0; + double omp_start, omp_end; + int nprocs = 1; + unsigned long long verification; + + #ifdef MPI + MPI_Status stat; + MPI_Init(&argc, &argv); + MPI_Comm_size(MPI_COMM_WORLD, &nprocs); + MPI_Comm_rank(MPI_COMM_WORLD, &mype); + #endif + + // Process CLI Fields -- store in "Inputs" structure + Inputs in = read_CLI( argc, argv ); + + // Set number of OpenMP Threads + //omp_set_num_threads(in.nthreads); + + // Print-out of Input Summary + if( mype == 0 ) + print_inputs( in, nprocs, version ); + + // ===================================================================== + // Prepare Nuclide Energy Grids, Unionized Energy Grid, & Material Data + // This is not reflective of a real Monte Carlo simulation workload, + // therefore, do not profile this region! + // ===================================================================== + + SimulationData SD; + + // If read from file mode is selected, skip initialization and load + // all simulation data structures from file instead + if( in.binary_mode == READ ) + SD = binary_read(in); + else + SD = grid_init_do_not_profile( in, mype ); + + // If writing from file mode is selected, write all simulation data + // structures to file + if( in.binary_mode == WRITE && mype == 0 ) + binary_write(in, SD); + + + // ===================================================================== + // Cross Section (XS) Parallel Lookup Simulation + // This is the section that should be profiled, as it reflects a + // realistic continuous energy Monte Carlo macroscopic cross section + // lookup kernel. + // ===================================================================== + + if( mype == 0 ) + { + printf("\n"); + border_print(); + center_print("SIMULATION", 79); + border_print(); + } + + // Start Simulation Timer + omp_start = omp_get_wtime(); + + // Run simulation + if( in.simulation_method == EVENT_BASED ) + { + if( in.kernel_id == 0 ) + verification = run_event_based_simulation(in, SD, mype); + else + { + printf("Error: No kernel ID %d found!\n", in.kernel_id); + exit(1); + } + } + else + { + printf("History-based simulation not implemented in OpenMP offload code. Instead,\nuse the event-based method with \"-m event\" argument.\n"); + exit(1); + } + + if( mype == 0) + { + printf("\n" ); + printf("Simulation complete.\n" ); + } + + // End Simulation Timer + omp_end = omp_get_wtime(); + + // ===================================================================== + // Output Results & Finalize + // ===================================================================== + + // Final Hash Step + verification = verification % 999983; + + // Print / Save Results and Exit + int is_invalid_result = print_results( in, mype, omp_end-omp_start, nprocs, verification ); + + #ifdef MPI + MPI_Finalize(); + #endif + + return is_invalid_result; +} diff --git a/openacc/Makefile b/openacc/Makefile new file mode 100644 index 00000000..51a0bbd4 --- /dev/null +++ b/openacc/Makefile @@ -0,0 +1,108 @@ +#=============================================================================== +# User Options +#=============================================================================== + +COMPILER = pgi +OPTIMIZE = yes +DEBUG = yes +PROFILE = no +MPI = no +OPENACC = no + +#=============================================================================== +# Program name & source code list +#=============================================================================== + +program = XSBench + +source = \ +Main.c \ +io.c \ +Simulation.c \ +GridInit.c \ +XSutils.c \ +Materials.c + +obj = $(source:.c=.o) + +#=============================================================================== +# Sets Flags +#=============================================================================== + +# Standard Flags +CFLAGS := -std=gnu99 -Wall + +# Linker Flags +LDFLAGS = -lm + +# PGI Compiler +ifeq ($(COMPILER),pgi) + CC = pgcc + CFLAGS = -c99 -Minform=warn -mp -Minfo=mp + ifeq ($(OPTIMIZE),yes) + CFLAGS += -fast + endif + ifeq ($(VEC_INFO),yes) + CFLAGS += -Minfo=vect + endif + # OpenACC (assumes NVIDA GPU) + ifeq ($(OPENACC),yes) + CFLAGS += -acc -Minfo=accel -gpu=cc80 + endif +endif + +# GNU Compiler +ifeq ($(COMPILER),gnu) + CC = gcc + CFLAGS += -std=gnu99 -Wall -fopenmp + # Optimization + ifeq ($(OPTIMIZE),yes) + CFLAGS += -O3 + # Compiler Vectorization (needs -O3 flag) information + ifeq ($(VEC_INFO),yes) + CFLAGS += -ftree-vectorizer-verbose=6 + endif + endif +endif + +# Debug Flags +ifeq ($(DEBUG),yes) + CFLAGS += -g + LDFLAGS += -g +endif + +# Profiling Flags +ifeq ($(PROFILE),yes) + CFLAGS += -pg + LDFLAGS += -pg +endif + +# Optimization Flags +ifeq ($(OPTIMIZE),yes) + CFLAGS += -O3 +endif + +# MPI +ifeq ($(MPI),yes) + CC = mpicc + CFLAGS += -DMPI +endif + +#=============================================================================== +# Targets to Build +#=============================================================================== + +$(program): $(obj) XSbench_header.h Makefile + $(CC) $(CFLAGS) $(obj) -o $@ $(LDFLAGS) + +%.o: %.c XSbench_header.h Makefile + $(CC) $(CFLAGS) -c $< -o $@ + +clean: + rm -rf $(program) $(obj) + +edit: + vim -p $(source) XSbench_header.h + +run: + ./$(program) diff --git a/openacc/Materials.c b/openacc/Materials.c new file mode 100644 index 00000000..69f1c7f7 --- /dev/null +++ b/openacc/Materials.c @@ -0,0 +1,118 @@ +// Material data is hard coded into the functions in this file. +// Note that there are 12 materials present in H-M (large or small) + +#include "XSbench_header.h" + +// num_nucs represents the number of nuclides that each material contains +int * load_num_nucs(long n_isotopes) +{ + int * num_nucs = (int*)malloc(12*sizeof(int)); + + // Material 0 is a special case (fuel). The H-M small reactor uses + // 34 nuclides, while H-M larges uses 300. + if( n_isotopes == 68 ) + num_nucs[0] = 34; // HM Small is 34, H-M Large is 321 + else + num_nucs[0] = 321; // HM Small is 34, H-M Large is 321 + + num_nucs[1] = 5; + num_nucs[2] = 4; + num_nucs[3] = 4; + num_nucs[4] = 27; + num_nucs[5] = 21; + num_nucs[6] = 21; + num_nucs[7] = 21; + num_nucs[8] = 21; + num_nucs[9] = 21; + num_nucs[10] = 9; + num_nucs[11] = 9; + + return num_nucs; +} + +// Assigns an array of nuclide ID's to each material +int * load_mats( int * num_nucs, long n_isotopes, int * max_num_nucs ) +{ + *max_num_nucs = 0; + int num_mats = 12; + for( int m = 0; m < num_mats; m++ ) + { + if( num_nucs[m] > *max_num_nucs ) + *max_num_nucs = num_nucs[m]; + } + int * mats = (int *) malloc( num_mats * (*max_num_nucs) * sizeof(int) ); + + // Small H-M has 34 fuel nuclides + int mats0_Sml[] = { 58, 59, 60, 61, 40, 42, 43, 44, 45, 46, 1, 2, 3, 7, + 8, 9, 10, 29, 57, 47, 48, 0, 62, 15, 33, 34, 52, 53, + 54, 55, 56, 18, 23, 41 }; //fuel + // Large H-M has 300 fuel nuclides + int mats0_Lrg[321] = { 58, 59, 60, 61, 40, 42, 43, 44, 45, 46, 1, 2, 3, 7, + 8, 9, 10, 29, 57, 47, 48, 0, 62, 15, 33, 34, 52, 53, + 54, 55, 56, 18, 23, 41 }; //fuel + for( int i = 0; i < 321-34; i++ ) + mats0_Lrg[34+i] = 68 + i; // H-M large adds nuclides to fuel only + + // These are the non-fuel materials + int mats1[] = { 63, 64, 65, 66, 67 }; // cladding + int mats2[] = { 24, 41, 4, 5 }; // cold borated water + int mats3[] = { 24, 41, 4, 5 }; // hot borated water + int mats4[] = { 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, 27, 28, 29, + 30, 31, 32, 26, 49, 50, 51, 11, 12, 13, 14, 6, 16, + 17 }; // RPV + int mats5[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // lower radial reflector + int mats6[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // top reflector / plate + int mats7[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // bottom plate + int mats8[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // bottom nozzle + int mats9[] = { 24, 41, 4, 5, 19, 20, 21, 22, 35, 36, 37, 38, 39, 25, + 49, 50, 51, 11, 12, 13, 14 }; // top nozzle + int mats10[] = { 24, 41, 4, 5, 63, 64, 65, 66, 67 }; // top of FA's + int mats11[] = { 24, 41, 4, 5, 63, 64, 65, 66, 67 }; // bottom FA's + + // H-M large v small dependency + if( n_isotopes == 68 ) + memcpy( mats, mats0_Sml, num_nucs[0] * sizeof(int) ); + else + memcpy( mats, mats0_Lrg, num_nucs[0] * sizeof(int) ); + + // Copy other materials + memcpy( mats + *max_num_nucs * 1, mats1, num_nucs[1] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 2, mats2, num_nucs[2] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 3, mats3, num_nucs[3] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 4, mats4, num_nucs[4] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 5, mats5, num_nucs[5] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 6, mats6, num_nucs[6] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 7, mats7, num_nucs[7] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 8, mats8, num_nucs[8] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 9, mats9, num_nucs[9] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 10, mats10, num_nucs[10] * sizeof(int) ); + memcpy( mats + *max_num_nucs * 11, mats11, num_nucs[11] * sizeof(int) ); + + + return mats; +} + +// Randomizes the concentrations of all nuclides in a variety of materials +double * load_concs( int * num_nucs, int max_num_nucs ) +{ + uint64_t seed = STARTING_SEED * STARTING_SEED; + double * concs = (double *) malloc( 12 * max_num_nucs * sizeof( double ) ); + + for( int i = 0; i < 12; i++ ) + for( int j = 0; j < num_nucs[i]; j++ ) + concs[i * max_num_nucs + j] = LCG_random_double(&seed); + + // test + /* + for( int i = 0; i < 12; i++ ) + for( int j = 0; j < num_nucs[i]; j++ ) + printf("concs[%d][%d] = %lf\n", i, j, concs[i][j] ); + */ + + return concs; +} + diff --git a/openacc/Simulation.c b/openacc/Simulation.c new file mode 100644 index 00000000..2f0c459b --- /dev/null +++ b/openacc/Simulation.c @@ -0,0 +1,402 @@ +#include "XSbench_header.h" + +//////////////////////////////////////////////////////////////////////////////////// +// BASELINE FUNCTIONS +//////////////////////////////////////////////////////////////////////////////////// +// All "baseline" code is at the top of this file. The baseline code is a simple +// implementation of the algorithm, with only minor CPU optimizations in place. +// Following these functions are a number of optimized variants, +// which each deploy a different combination of optimizations strategies. By +// default, XSBench will only run the baseline implementation. Optimized variants +// are not yet implemented for this OpenMP targeting offload port. +//////////////////////////////////////////////////////////////////////////////////// + +unsigned long long run_event_based_simulation(Inputs in, SimulationData SD, int mype) +{ + if( mype == 0) + printf("Beginning event based simulation...\n"); + + //////////////////////////////////////////////////////////////////////////////// + // SUMMARY: Simulation Data Structure Manifest for "SD" Object + // Here we list all heap arrays (and lengths) in SD that would need to be + // offloaded manually if using an accelerator with a seperate memory space + //////////////////////////////////////////////////////////////////////////////// + // int * num_nucs; // Length = length_num_nucs; + // double * concs; // Length = length_concs + // int * mats; // Length = length_mats + // double * unionized_energy_array; // Length = length_unionized_energy_array + // int * index_grid; // Length = length_index_grid + // NuclideGridPoint * nuclide_grid; // Length = length_nuclide_grid + // + // Note: "unionized_energy_array" and "index_grid" can be of zero length + // depending on lookup method. + // + // Note: "Lengths" are given as the number of objects in the array, not the + // number of bytes. + //////////////////////////////////////////////////////////////////////////////// + + + //////////////////////////////////////////////////////////////////////////////// + // Begin Actual Simulation Loop + //////////////////////////////////////////////////////////////////////////////// + unsigned long long * verification = (unsigned long long *) malloc(in.lookups * sizeof(unsigned long long)); + + #pragma acc parallel loop\ + copyin( SD)\ + copyin( SD.max_num_nucs)\ + copyin( SD.num_nucs[0:SD.length_num_nucs])\ + copyin( SD.concs[0:SD.length_concs])\ + copyin( SD.mats[0:SD.length_mats])\ + copyin( SD.unionized_energy_array[0:SD.length_unionized_energy_array])\ + copyin( SD.index_grid[0:SD.length_index_grid])\ + copyin( SD.nuclide_grid[0:SD.length_nuclide_grid])\ + copyout( verification[0:in.lookups]) + for( int i = 0; i < in.lookups; i++ ) + { + // Set the initial seed value + uint64_t seed = STARTING_SEED; + + // Forward seed to lookup index (we need 2 samples per lookup) + seed = fast_forward_LCG(seed, 2*i); + + // Randomly pick an energy and material for the particle + double p_energy = LCG_random_double(&seed); + int mat = pick_mat(&seed); + + // debugging + //printf("E = %lf mat = %d\n", p_energy, mat); + + double macro_xs_vector[5] = {0}; + + // Perform macroscopic Cross Section Lookup + calculate_macro_xs( + p_energy, // Sampled neutron energy (in lethargy) + mat, // Sampled material type index neutron is in + in.n_isotopes, // Total number of isotopes in simulation + in.n_gridpoints, // Number of gridpoints per isotope in simulation + SD.num_nucs, // 1-D array with number of nuclides per material + SD.concs, // Flattened 2-D array with concentration of each nuclide in each material + SD.unionized_energy_array, // 1-D Unionized energy array + SD.index_grid, // Flattened 2-D grid holding indices into nuclide grid for each unionized energy level + SD.nuclide_grid, // Flattened 2-D grid holding energy levels and XS_data for all nuclides in simulation + SD.mats, // Flattened 2-D array with nuclide indices defining composition of each type of material + macro_xs_vector, // 1-D array with result of the macroscopic cross section (5 different reaction channels) + in.grid_type, // Lookup type (nuclide, hash, or unionized) + in.hash_bins, // Number of hash bins used (if using hash lookup type) + SD.max_num_nucs // Maximum number of nuclides present in any material + ); + + // For verification, and to prevent the compiler from optimizing + // all work out, we interrogate the returned macro_xs_vector array + // to find its maximum value index, then increment the verification + // value by that index. In this implementation, we prevent thread + // contention by using an OMP reduction on the verification value. + // For accelerators, a different approach might be required + // (e.g., atomics, reduction of thread-specific values in large + // array via CUDA thrust, etc). + double max = -1.0; + int max_idx = 0; + for(int j = 0; j < 5; j++ ) + { + if( macro_xs_vector[j] > max ) + { + max = macro_xs_vector[j]; + max_idx = j; + } + } + verification[i] = max_idx+1; + } + + // Reduce validation hash on the host + unsigned long long validation_hash = 0; + for( int i = 0; i < in.lookups; i++ ) + validation_hash += verification[i]; + + return validation_hash; +} + +// Calculates the microscopic cross section for a given nuclide & energy +void calculate_micro_xs( double p_energy, int nuc, long n_isotopes, + long n_gridpoints, + double * egrid, int * index_data, + NuclideGridPoint * nuclide_grids, + long idx, double * xs_vector, int grid_type, int hash_bins ){ + // Variables + double f; + NuclideGridPoint * low, * high; + + // If using only the nuclide grid, we must perform a binary search + // to find the energy location in this particular nuclide's grid. + if( grid_type == NUCLIDE ) + { + // Perform binary search on the Nuclide Grid to find the index + idx = grid_search_nuclide( n_gridpoints, p_energy, &nuclide_grids[nuc*n_gridpoints], 0, n_gridpoints-1); + + // pull ptr from nuclide grid and check to ensure that + // we're not reading off the end of the nuclide's grid + if( idx == n_gridpoints - 1 ) + low = &nuclide_grids[nuc*n_gridpoints + idx - 1]; + else + low = &nuclide_grids[nuc*n_gridpoints + idx]; + } + else if( grid_type == UNIONIZED) // Unionized Energy Grid - we already know the index, no binary search needed. + { + // pull ptr from energy grid and check to ensure that + // we're not reading off the end of the nuclide's grid + if( index_data[idx * n_isotopes + nuc] == n_gridpoints - 1 ) + low = &nuclide_grids[nuc*n_gridpoints + index_data[idx * n_isotopes + nuc] - 1]; + else + low = &nuclide_grids[nuc*n_gridpoints + index_data[idx * n_isotopes + nuc]]; + } + else // Hash grid + { + // load lower bounding index + int u_low = index_data[idx * n_isotopes + nuc]; + + // Determine higher bounding index + int u_high; + if( idx == hash_bins - 1 ) + u_high = n_gridpoints - 1; + else + u_high = index_data[(idx+1)*n_isotopes + nuc] + 1; + + // Check edge cases to make sure energy is actually between these + // Then, if things look good, search for gridpoint in the nuclide grid + // within the lower and higher limits we've calculated. + double e_low = nuclide_grids[nuc*n_gridpoints + u_low].energy; + double e_high = nuclide_grids[nuc*n_gridpoints + u_high].energy; + int lower; + if( p_energy <= e_low ) + lower = 0; + else if( p_energy >= e_high ) + lower = n_gridpoints - 1; + else + lower = grid_search_nuclide( n_gridpoints, p_energy, &nuclide_grids[nuc*n_gridpoints], u_low, u_high); + + if( lower == n_gridpoints - 1 ) + low = &nuclide_grids[nuc*n_gridpoints + lower - 1]; + else + low = &nuclide_grids[nuc*n_gridpoints + lower]; + } + + high = low + 1; + + // calculate the re-useable interpolation factor + f = (high->energy - p_energy) / (high->energy - low->energy); + + // Total XS + xs_vector[0] = high->total_xs - f * (high->total_xs - low->total_xs); + + // Elastic XS + xs_vector[1] = high->elastic_xs - f * (high->elastic_xs - low->elastic_xs); + + // Absorbtion XS + xs_vector[2] = high->absorbtion_xs - f * (high->absorbtion_xs - low->absorbtion_xs); + + // Fission XS + xs_vector[3] = high->fission_xs - f * (high->fission_xs - low->fission_xs); + + // Nu Fission XS + xs_vector[4] = high->nu_fission_xs - f * (high->nu_fission_xs - low->nu_fission_xs); + + //test + /* + if( omp_get_thread_num() == 0 ) + { + printf("Lookup: Energy = %lf, nuc = %d\n", p_energy, nuc); + printf("e_h = %lf e_l = %lf\n", high->energy , low->energy); + printf("xs_h = %lf xs_l = %lf\n", high->elastic_xs, low->elastic_xs); + printf("total_xs = %lf\n\n", xs_vector[1]); + } + */ + +} + +// Calculates macroscopic cross section based on a given material & energy +void calculate_macro_xs( double p_energy, int mat, long n_isotopes, + long n_gridpoints, int * num_nucs, + double * concs, + double * egrid, int * index_data, + NuclideGridPoint * nuclide_grids, + int * mats, + double * macro_xs_vector, int grid_type, int hash_bins, int max_num_nucs ){ + int p_nuc; // the nuclide we are looking up + long idx = -1; + double conc; // the concentration of the nuclide in the material + + // cleans out macro_xs_vector + for( int k = 0; k < 5; k++ ) + macro_xs_vector[k] = 0; + + // If we are using the unionized energy grid (UEG), we only + // need to perform 1 binary search per macroscopic lookup. + // If we are using the nuclide grid search, it will have to be + // done inside of the "calculate_micro_xs" function for each different + // nuclide in the material. + if( grid_type == UNIONIZED ) + idx = grid_search( n_isotopes * n_gridpoints, p_energy, egrid); + else if( grid_type == HASH ) + { + double du = 1.0 / hash_bins; + idx = p_energy / du; + } + + // Once we find the pointer array on the UEG, we can pull the data + // from the respective nuclide grids, as well as the nuclide + // concentration data for the material + // Each nuclide from the material needs to have its micro-XS array + // looked up & interpolatied (via calculate_micro_xs). Then, the + // micro XS is multiplied by the concentration of that nuclide + // in the material, and added to the total macro XS array. + // (Independent -- though if parallelizing, must use atomic operations + // or otherwise control access to the xs_vector and macro_xs_vector to + // avoid simulataneous writing to the same data structure) + for( int j = 0; j < num_nucs[mat]; j++ ) + { + double xs_vector[5]; + p_nuc = mats[mat*max_num_nucs + j]; + conc = concs[mat*max_num_nucs + j]; + calculate_micro_xs( p_energy, p_nuc, n_isotopes, + n_gridpoints, egrid, index_data, + nuclide_grids, idx, xs_vector, grid_type, hash_bins ); + for( int k = 0; k < 5; k++ ) + macro_xs_vector[k] += xs_vector[k] * conc; + } + + //test + /* + for( int k = 0; k < 5; k++ ) + printf("Energy: %lf, Material: %d, XSVector[%d]: %lf\n", + p_energy, mat, k, macro_xs_vector[k]); + */ +} + + +// binary search for energy on unionized energy grid +// returns lower index +long grid_search( long n, double quarry, double * A) +{ + long lowerLimit = 0; + long upperLimit = n-1; + long examinationPoint; + long length = upperLimit - lowerLimit; + + while( length > 1 ) + { + examinationPoint = lowerLimit + ( length / 2 ); + + if( A[examinationPoint] > quarry ) + upperLimit = examinationPoint; + else + lowerLimit = examinationPoint; + + length = upperLimit - lowerLimit; + } + + return lowerLimit; +} + +// binary search for energy on nuclide energy grid +long grid_search_nuclide( long n, double quarry, NuclideGridPoint * A, long low, long high) +{ + long lowerLimit = low; + long upperLimit = high; + long examinationPoint; + long length = upperLimit - lowerLimit; + + while( length > 1 ) + { + examinationPoint = lowerLimit + ( length / 2 ); + + if( A[examinationPoint].energy > quarry ) + upperLimit = examinationPoint; + else + lowerLimit = examinationPoint; + + length = upperLimit - lowerLimit; + } + + return lowerLimit; +} + +// picks a material based on a probabilistic distribution +int pick_mat( uint64_t * seed ) +{ + // I have a nice spreadsheet supporting these numbers. They are + // the fractions (by volume) of material in the core. Not a + // *perfect* approximation of where XS lookups are going to occur, + // but this will do a good job of biasing the system nonetheless. + + // Also could be argued that doing fractions by weight would be + // a better approximation, but volume does a good enough job for now. + + double dist[12]; + dist[0] = 0.140; // fuel + dist[1] = 0.052; // cladding + dist[2] = 0.275; // cold, borated water + dist[3] = 0.134; // hot, borated water + dist[4] = 0.154; // RPV + dist[5] = 0.064; // Lower, radial reflector + dist[6] = 0.066; // Upper reflector / top plate + dist[7] = 0.055; // bottom plate + dist[8] = 0.008; // bottom nozzle + dist[9] = 0.015; // top nozzle + dist[10] = 0.025; // top of fuel assemblies + dist[11] = 0.013; // bottom of fuel assemblies + + double roll = LCG_random_double(seed); + + // makes a pick based on the distro + for( int i = 0; i < 12; i++ ) + { + double running = 0; + for( int j = i; j > 0; j-- ) + running += dist[j]; + if( roll < running ) + return i; + } + + return 0; +} + +double LCG_random_double(uint64_t * seed) +{ + // LCG parameters + const uint64_t m = 9223372036854775808ULL; // 2^63 + const uint64_t a = 2806196910506780709ULL; + const uint64_t c = 1ULL; + *seed = (a * (*seed) + c) % m; + return (double) (*seed) / (double) m; + //return ldexp(*seed, -63); + +} + +uint64_t fast_forward_LCG(uint64_t seed, uint64_t n) +{ + // LCG parameters + const uint64_t m = 9223372036854775808ULL; // 2^63 + uint64_t a = 2806196910506780709ULL; + uint64_t c = 1ULL; + + n = n % m; + + uint64_t a_new = 1; + uint64_t c_new = 0; + + while(n > 0) + { + if(n & 1) + { + a_new *= a; + c_new = c_new * a + c; + } + c *= (a + 1); + a *= a; + + n >>= 1; + } + + return (a_new * seed + c_new) % m; + +} + diff --git a/openacc/XSbench_header.h b/openacc/XSbench_header.h new file mode 100644 index 00000000..1140c9c0 --- /dev/null +++ b/openacc/XSbench_header.h @@ -0,0 +1,131 @@ +#ifndef __XSBENCH_HEADER_H__ +#define __XSBENCH_HEADER_H__ + +#include +#include +#include +#include +#include +#include +#include +#ifdef _OPENACC +#include +#endif +#include +#include +#include +#include + +// Papi Header +#ifdef PAPI +#include "papi.h" +#endif + +// Grid types +#define UNIONIZED 0 +#define NUCLIDE 1 +#define HASH 2 + +// Simulation types +#define HISTORY_BASED 1 +#define EVENT_BASED 2 + +// Binary Mode Type +#define NONE 0 +#define READ 1 +#define WRITE 2 + +// Starting Seed +#define STARTING_SEED 1070 + +// Structures +typedef struct{ + double energy; + double total_xs; + double elastic_xs; + double absorbtion_xs; + double fission_xs; + double nu_fission_xs; +} NuclideGridPoint; + +typedef struct{ + int nthreads; + long n_isotopes; + long n_gridpoints; + int lookups; + char * HM; + int grid_type; // 0: Unionized Grid (default) 1: Nuclide Grid + int hash_bins; + int particles; + int simulation_method; + int binary_mode; + int kernel_id; +} Inputs; + +typedef struct{ + int * num_nucs; // Length = length_num_nucs; + double * concs; // Length = length_concs + int * mats; // Length = length_mats + double * unionized_energy_array; // Length = length_unionized_energy_array + int * index_grid; // Length = length_index_grid + NuclideGridPoint * nuclide_grid; // Length = length_nuclide_grid + int length_num_nucs; + int length_concs; + int length_mats; + int length_unionized_energy_array; + long length_index_grid; + int length_nuclide_grid; + int max_num_nucs; + double * p_energy_samples; + int length_p_energy_samples; + int * mat_samples; + int length_mat_samples; +} SimulationData; + +// io.c +void logo(int version); +void center_print(const char *s, int width); +void border_print(void); +void fancy_int(long a); +Inputs read_CLI( int argc, char * argv[] ); +void print_CLI_error(void); +void print_inputs(Inputs in, int nprocs, int version); +int print_results( Inputs in, int mype, double runtime, int nprocs, unsigned long long vhash ); +void binary_write( Inputs in, SimulationData SD ); +SimulationData binary_read( Inputs in ); + +// Simulation.c +unsigned long long run_event_based_simulation(Inputs in, SimulationData SD, int mype); +unsigned long long run_history_based_simulation(Inputs in, SimulationData SD, int mype); +void calculate_micro_xs( double p_energy, int nuc, long n_isotopes, + long n_gridpoints, + double * egrid, int * index_data, + NuclideGridPoint * nuclide_grids, + long idx, double * xs_vector, int grid_type, int hash_bins ); +void calculate_macro_xs( double p_energy, int mat, long n_isotopes, + long n_gridpoints, int * num_nucs, + double * concs, + double * egrid, int * index_data, + NuclideGridPoint * nuclide_grids, + int * mats, + double * macro_xs_vector, int grid_type, int hash_bins, int max_num_nucs ); +long grid_search( long n, double quarry, double * A); +long grid_search_nuclide( long n, double quarry, NuclideGridPoint * A, long low, long high); +int pick_mat( uint64_t * seed ); +double LCG_random_double(uint64_t * seed); +uint64_t fast_forward_LCG(uint64_t seed, uint64_t n); +unsigned long long run_event_based_simulation_optimization_1(Inputs in, SimulationData SD, int mype); + +// GridInit.c +SimulationData grid_init_do_not_profile( Inputs in, int mype ); + +// XSutils.c +int NGP_compare( const void * a, const void * b ); +int double_compare(const void * a, const void * b); +size_t estimate_mem_usage( Inputs in ); + +// Materials.c +int * load_num_nucs(long n_isotopes); +int * load_mats( int * num_nucs, long n_isotopes, int * max_num_nucs ); +double * load_concs( int * num_nucs, int max_num_nucs ); +#endif diff --git a/openacc/XSutils.c b/openacc/XSutils.c new file mode 100644 index 00000000..d433ecdc --- /dev/null +++ b/openacc/XSutils.c @@ -0,0 +1,48 @@ +#include "XSbench_header.h" + +int double_compare(const void * a, const void * b) +{ + double A = *((double *) a); + double B = *((double *) b); + + if( A > B ) + return 1; + else if( A < B ) + return -1; + else + return 0; +} + +int NGP_compare(const void * a, const void * b) +{ + NuclideGridPoint A = *((NuclideGridPoint *) a); + NuclideGridPoint B = *((NuclideGridPoint *) b); + + if( A.energy > B.energy ) + return 1; + else if( A.energy < B.energy ) + return -1; + else + return 0; +} + + +size_t estimate_mem_usage( Inputs in ) +{ + size_t single_nuclide_grid = in.n_gridpoints * sizeof( NuclideGridPoint ); + size_t all_nuclide_grids = in.n_isotopes * single_nuclide_grid; + size_t size_UEG = in.n_isotopes*in.n_gridpoints*sizeof(double) + in.n_isotopes*in.n_gridpoints*in.n_isotopes*sizeof(int); + size_t size_hash_grid = in.hash_bins * in.n_isotopes * sizeof(int); + size_t memtotal; + + if( in.grid_type == UNIONIZED ) + memtotal = all_nuclide_grids + size_UEG; + else if( in.grid_type == NUCLIDE ) + memtotal = all_nuclide_grids; + else + memtotal = all_nuclide_grids + size_hash_grid; + + memtotal = ceil(memtotal / (1024.0*1024.0)); + return memtotal; +} + diff --git a/openacc/io.c b/openacc/io.c new file mode 100644 index 00000000..aa584a92 --- /dev/null +++ b/openacc/io.c @@ -0,0 +1,494 @@ +#include "XSbench_header.h" + +#ifdef MPI +#include +#endif + +// Prints program logo +void logo(int version) +{ + border_print(); + printf( + " __ __ ___________ _ \n" + " \\ \\ / // ___| ___ \\ | | \n" + " \\ V / \\ `--.| |_/ / ___ _ __ ___| |__ \n" + " / \\ `--. \\ ___ \\/ _ \\ '_ \\ / __| '_ \\ \n" + " / /^\\ \\/\\__/ / |_/ / __/ | | | (__| | | | \n" + " \\/ \\/\\____/\\____/ \\___|_| |_|\\___|_| |_| \n\n" + ); + border_print(); + center_print("Developed at Argonne National Laboratory", 79); + char v[100]; + sprintf(v, "Version: %d", version); + center_print(v, 79); + border_print(); +} + +// Prints Section titles in center of 80 char terminal +void center_print(const char *s, int width) +{ + int length = strlen(s); + int i; + for (i=0; i<=(width-length)/2; i++) { + fputs(" ", stdout); + } + fputs(s, stdout); + fputs("\n", stdout); +} + +int print_results( Inputs in, int mype, double runtime, int nprocs, + unsigned long long vhash ) +{ + // Calculate Lookups per sec + int lookups = 0; + if( in.simulation_method == HISTORY_BASED ) + lookups = in.lookups * in.particles; + else if( in.simulation_method == EVENT_BASED ) + lookups = in.lookups; + int lookups_per_sec = (int) ((double) lookups / runtime); + + // If running in MPI, reduce timing statistics and calculate average + #ifdef MPI + int total_lookups = 0; + MPI_Barrier(MPI_COMM_WORLD); + MPI_Reduce(&lookups_per_sec, &total_lookups, 1, MPI_INT, + MPI_SUM, 0, MPI_COMM_WORLD); + #endif + + int is_invalid_result = 1; + + // Print output + if( mype == 0 ) + { + border_print(); + center_print("RESULTS", 79); + border_print(); + + // Print the results + printf("NOTE: Timings are estimated -- use nvprof/nsys/iprof/rocprof for formal analysis\n"); + #ifdef MPI + printf("MPI ranks: %d\n", nprocs); + #endif + #ifdef MPI + printf("Total Lookups/s: "); + fancy_int(total_lookups); + printf("Avg Lookups/s per MPI rank: "); + fancy_int(total_lookups / nprocs); + #else + printf("Runtime: %.3lf seconds\n", runtime); + printf("Lookups: "); fancy_int(lookups); + printf("Lookups/s: "); + fancy_int(lookups_per_sec); + #endif + } + + unsigned long long large = 0; + unsigned long long small = 0; + if( in.simulation_method == EVENT_BASED ) + { + small = 945990; + large = 952131; + } + else if( in.simulation_method == HISTORY_BASED ) + { + small = 941535; + large = 954318; + } + if( strcmp(in.HM, "large") == 0 ) + { + if( vhash == large ) + is_invalid_result = 0; + } + else if( strcmp(in.HM, "small") == 0 ) + { + if( vhash == small ) + is_invalid_result = 0; + } + + if(mype == 0 ) + { + if( is_invalid_result ) + printf("Verification checksum: %llu (WARNING - INAVALID CHECKSUM!)\n", vhash); + else + printf("Verification checksum: %llu (Valid)\n", vhash); + border_print(); + } + + return is_invalid_result; +} + +void print_inputs(Inputs in, int nprocs, int version ) +{ + // Calculate Estimate of Memory Usage + int mem_tot = estimate_mem_usage( in ); + logo(version); + center_print("INPUT SUMMARY", 79); + border_print(); + printf("Programming Model: OpenACC\n"); + if( in.simulation_method == EVENT_BASED ) + printf("Simulation Method: Event Based\n"); + else + printf("Simulation Method: History Based\n"); + if( in.grid_type == NUCLIDE ) + printf("Grid Type: Nuclide Grid\n"); + else if( in.grid_type == UNIONIZED ) + printf("Grid Type: Unionized Grid\n"); + else + printf("Grid Type: Hash\n"); + + printf("Materials: %d\n", 12); + printf("H-M Benchmark Size: %s\n", in.HM); + printf("Total Nuclides: %ld\n", in.n_isotopes); + printf("Gridpoints (per Nuclide): "); + fancy_int(in.n_gridpoints); + if( in.grid_type == HASH ) + { + printf("Hash Bins: "); + fancy_int(in.hash_bins); + } + if( in.grid_type == UNIONIZED ) + { + printf("Unionized Energy Gridpoints: "); + fancy_int(in.n_isotopes*in.n_gridpoints); + } + if( in.simulation_method == HISTORY_BASED ) + { + printf("Particle Histories: "); fancy_int(in.particles); + printf("XS Lookups per Particle: "); fancy_int(in.lookups); + } + printf("Total XS Lookups: "); fancy_int(in.lookups); + #ifdef MPI + printf("MPI Ranks: %d\n", nprocs); + printf("Mem Usage per MPI Rank (MB): "); fancy_int(mem_tot); + #else + printf("Est. Memory Usage (MB): "); fancy_int(mem_tot); + #endif + printf("Binary File Mode: "); + if( in.binary_mode == NONE ) + printf("Off\n"); + else if( in.binary_mode == READ) + printf("Read\n"); + else + printf("Write\n"); + border_print(); + center_print("INITIALIZATION - DO NOT PROFILE", 79); + border_print(); +} + +void border_print(void) +{ + printf( + "===================================================================" + "=============\n"); +} + +// Prints comma separated integers - for ease of reading +void fancy_int( long a ) +{ + if( a < 1000 ) + printf("%ld\n",a); + + else if( a >= 1000 && a < 1000000 ) + printf("%ld,%03ld\n", a / 1000, a % 1000); + + else if( a >= 1000000 && a < 1000000000 ) + printf("%ld,%03ld,%03ld\n",a / 1000000,(a % 1000000) / 1000,a % 1000 ); + + else if( a >= 1000000000 ) + printf("%ld,%03ld,%03ld,%03ld\n", + a / 1000000000, + (a % 1000000000) / 1000000, + (a % 1000000) / 1000, + a % 1000 ); + else + printf("%ld\n",a); +} + +void print_CLI_error(void) +{ + printf("Usage: ./XSBench \n"); + printf("Options include:\n"); + printf(" -m Simulation method (history, event)\n"); + printf(" -s Size of H-M Benchmark to run (small, large, XL, XXL)\n"); + printf(" -g Number of gridpoints per nuclide (overrides -s defaults)\n"); + printf(" -G Grid search type (unionized, nuclide, hash). Defaults to unionized.\n"); + printf(" -p Number of particle histories\n"); + printf(" -l History Based: Number of Cross-section (XS) lookups per particle. Event Based: Total number of XS lookups.\n"); + printf(" -h Number of hash bins (only relevant when used with \"-G hash\")\n"); + printf(" -b Read or write all data structures to file. If reading, this will skip initialization phase. (read, write)\n"); + printf(" -k Specifies which kernel to run. 0 is baseline, 1, 2, etc are optimized variants. (0 is default.)\n"); + printf("Default is equivalent to: -m history -s large -l 34 -p 500000 -G unionized\n"); + printf("See readme for full description of default run values\n"); + exit(4); +} + +Inputs read_CLI( int argc, char * argv[] ) +{ + Inputs input; + + // defaults to the history based simulation method + input.simulation_method = HISTORY_BASED; + + // defaults to max threads on the system + input.nthreads = omp_get_num_procs(); + + // defaults to 355 (corresponding to H-M Large benchmark) + input.n_isotopes = 355; + + // defaults to 11303 (corresponding to H-M Large benchmark) + input.n_gridpoints = 11303; + + // defaults to 500,000 + input.particles = 500000; + + // defaults to 34 + input.lookups = 34; + + // default to unionized grid + input.grid_type = UNIONIZED; + + // default to unionized grid + input.hash_bins = 10000; + + // default to no binary read/write + input.binary_mode = NONE; + + // defaults to baseline kernel + input.kernel_id = 0; + + // defaults to H-M Large benchmark + input.HM = (char *) malloc( 6 * sizeof(char) ); + input.HM[0] = 'l' ; + input.HM[1] = 'a' ; + input.HM[2] = 'r' ; + input.HM[3] = 'g' ; + input.HM[4] = 'e' ; + input.HM[5] = '\0'; + + // Check if user sets these + int user_g = 0; + + int default_lookups = 1; + int default_particles = 1; + + // Collect Raw Input + for( int i = 1; i < argc; i++ ) + { + char * arg = argv[i]; + + // n_gridpoints (-g) + if( strcmp(arg, "-g") == 0 ) + { + if( ++i < argc ) + { + user_g = 1; + input.n_gridpoints = atol(argv[i]); + } + else + print_CLI_error(); + } + // Simulation Method (-m) + else if( strcmp(arg, "-m") == 0 ) + { + char * sim_type; + if( ++i < argc ) + sim_type = argv[i]; + else + print_CLI_error(); + + if( strcmp(sim_type, "history") == 0 ) + input.simulation_method = HISTORY_BASED; + else if( strcmp(sim_type, "event") == 0 ) + { + input.simulation_method = EVENT_BASED; + // Also resets default # of lookups + if( default_lookups && default_particles ) + { + input.lookups = input.lookups * input.particles; + input.particles = 0; + } + } + else + print_CLI_error(); + } + // lookups (-l) + else if( strcmp(arg, "-l") == 0 ) + { + if( ++i < argc ) + { + input.lookups = atoi(argv[i]); + default_lookups = 0; + } + else + print_CLI_error(); + } + // hash bins (-h) + else if( strcmp(arg, "-h") == 0 ) + { + if( ++i < argc ) + input.hash_bins = atoi(argv[i]); + else + print_CLI_error(); + } + // particles (-p) + else if( strcmp(arg, "-p") == 0 ) + { + if( ++i < argc ) + { + input.particles = atoi(argv[i]); + default_particles = 0; + } + else + print_CLI_error(); + } + // HM (-s) + else if( strcmp(arg, "-s") == 0 ) + { + if( ++i < argc ) + input.HM = argv[i]; + else + print_CLI_error(); + } + // grid type (-G) + else if( strcmp(arg, "-G") == 0 ) + { + char * grid_type; + if( ++i < argc ) + grid_type = argv[i]; + else + print_CLI_error(); + + if( strcmp(grid_type, "unionized") == 0 ) + input.grid_type = UNIONIZED; + else if( strcmp(grid_type, "nuclide") == 0 ) + input.grid_type = NUCLIDE; + else if( strcmp(grid_type, "hash") == 0 ) + input.grid_type = HASH; + else + print_CLI_error(); + } + // binary mode (-b) + else if( strcmp(arg, "-b") == 0 ) + { + char * binary_mode; + if( ++i < argc ) + binary_mode = argv[i]; + else + print_CLI_error(); + + if( strcmp(binary_mode, "read") == 0 ) + input.binary_mode = READ; + else if( strcmp(binary_mode, "write") == 0 ) + input.binary_mode = WRITE; + else + print_CLI_error(); + } + // kernel optimization selection (-k) + else if( strcmp(arg, "-k") == 0 ) + { + if( ++i < argc ) + { + input.kernel_id = atoi(argv[i]); + } + else + print_CLI_error(); + } + else + print_CLI_error(); + } + + // Validate Input + + // Validate nthreads + if( input.nthreads < 1 ) + print_CLI_error(); + + // Validate n_isotopes + if( input.n_isotopes < 1 ) + print_CLI_error(); + + // Validate n_gridpoints + if( input.n_gridpoints < 1 ) + print_CLI_error(); + + // Validate lookups + if( input.lookups < 1 ) + print_CLI_error(); + + // Validate Hash Bins + if( input.hash_bins < 1 ) + print_CLI_error(); + + // Validate HM size + if( strcasecmp(input.HM, "small") != 0 && + strcasecmp(input.HM, "large") != 0 && + strcasecmp(input.HM, "XL") != 0 && + strcasecmp(input.HM, "XXL") != 0 ) + print_CLI_error(); + + // Set HM size specific parameters + // (defaults to large) + if( strcasecmp(input.HM, "small") == 0 ) + input.n_isotopes = 68; + else if( strcasecmp(input.HM, "XL") == 0 && user_g == 0 ) + input.n_gridpoints = 238847; // sized to make 120 GB XS data + else if( strcasecmp(input.HM, "XXL") == 0 && user_g == 0 ) + input.n_gridpoints = 238847 * 2.1; // 252 GB XS data + + // Return input struct + return input; +} + +void binary_write( Inputs in, SimulationData SD ) +{ + char * fname = "XS_data.dat"; + printf("Writing all data structures to binary file %s...\n", fname); + FILE * fp = fopen(fname, "w"); + + // Write SimulationData Object. Include pointers, even though we won't be using them. + fwrite(&SD, sizeof(SimulationData), 1, fp); + + // Write heap arrays in SimulationData Object + fwrite(SD.num_nucs, sizeof(int), SD.length_num_nucs, fp); + fwrite(SD.concs, sizeof(double), SD.length_concs, fp); + fwrite(SD.mats, sizeof(int), SD.length_mats, fp); + fwrite(SD.nuclide_grid, sizeof(NuclideGridPoint), SD.length_nuclide_grid, fp); + fwrite(SD.index_grid, sizeof(int), SD.length_index_grid, fp); + fwrite(SD.unionized_energy_array, sizeof(double), SD.length_unionized_energy_array, fp); + + fclose(fp); +} + +SimulationData binary_read( Inputs in ) +{ + SimulationData SD; + + char * fname = "XS_data.dat"; + printf("Reading all data structures from binary file %s...\n", fname); + + FILE * fp = fopen(fname, "r"); + assert(fp != NULL); + + // Read SimulationData Object. Include pointers, even though we won't be using them. + fread(&SD, sizeof(SimulationData), 1, fp); + + // Allocate space for arrays on heap + SD.num_nucs = (int *) malloc(SD.length_num_nucs * sizeof(int)); + SD.concs = (double *) malloc(SD.length_concs * sizeof(double)); + SD.mats = (int *) malloc(SD.length_mats * sizeof(int)); + SD.nuclide_grid = (NuclideGridPoint *) malloc(SD.length_nuclide_grid * sizeof(NuclideGridPoint)); + SD.index_grid = (int *) malloc( SD.length_index_grid * sizeof(int)); + SD.unionized_energy_array = (double *) malloc( SD.length_unionized_energy_array * sizeof(double)); + + // Read heap arrays into SimulationData Object + fread(SD.num_nucs, sizeof(int), SD.length_num_nucs, fp); + fread(SD.concs, sizeof(double), SD.length_concs, fp); + fread(SD.mats, sizeof(int), SD.length_mats, fp); + fread(SD.nuclide_grid, sizeof(NuclideGridPoint), SD.length_nuclide_grid, fp); + fread(SD.index_grid, sizeof(int), SD.length_index_grid, fp); + fread(SD.unionized_energy_array, sizeof(double), SD.length_unionized_energy_array, fp); + + fclose(fp); + + return SD; +}