From db54e844903410a6f2563024016982e699a9c549 Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Tue, 2 Apr 2019 08:36:30 -0700 Subject: [PATCH 1/5] Change internal iteration-distance vars from 'step' to 'stride' to avoid confusion with [time-]step dimension. --- README.md | 8 +- src/common/common_utils.cpp | 2 +- src/kernel/lib/auto_tuner.cpp | 10 +- src/kernel/lib/auto_tuner.hpp | 2 +- src/kernel/lib/context.cpp | 158 ++++++++++++++++---------------- src/kernel/lib/indices.hpp | 13 ++- src/kernel/lib/settings.cpp | 4 +- src/kernel/lib/setup.cpp | 2 +- src/kernel/lib/stencil_calc.cpp | 46 +++++----- src/kernel/lib/stencil_calc.hpp | 4 +- utils/bin/gen_loops.pl | 20 ++-- 11 files changed, 137 insertions(+), 132 deletions(-) diff --git a/README.md b/README.md index d4cccee7..8db88365 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # YASK--Yet Another Stencil Kernel * New YASK users may want to start with the [YASK tutorial](docs/YASK-tutorial.pdf). -* Existing YASK users may want to jump to the [backward-compatibility notices](#backward-compatibility-notices). +* Users with existing YASK-based code may want to jump to the [backward-compatibility notices](#backward-compatibility-notices). ## Overview YASK is a framework to rapidly create high-performance stencil code including optimizations and features such as @@ -9,7 +9,7 @@ YASK is a framework to rapidly create high-performance stencil code including op * Multi-level OpenMP parallelism to exploit multiple cores and threads, * Scaling to multiple sockets and nodes via MPI with overlapped communication and compute, and * Spatial tiling with automatically-tuned block sizes, -* Temporal tiling to further increase cache locality, +* Temporal tiling in multiple dimensions to further increase cache locality, * APIs for C++ and Python: [API documentation](https://rawgit.com/intel/yask/api-docs/html/index.html). YASK contains a domain-specific compiler to convert scalar stencil code to SIMD-optimized code for Intel(R) Xeon Phi(TM) and Intel(R) Xeon(R) processors. @@ -68,9 +68,9 @@ This changed the old behavior of `make` defaulting to `snb` architecture and `bi Those options are still available to override the host-based default. * Version 2.16.03 moved the position of the log-file name to the last column in the CSV output of `utils/bin/yask_log_to_csv.pl`. * Version 2.15.04 required a call to `yc_grid::set_dynamic_step_alloc(true)` to allow changing the -allocation in the step (time) dimension for grid variables created at YASK compile-time. +allocation in the step (time) dimension at run-time for grid variables created at YASK compile-time. * Version 2.15.02 required all "misc" indices to be yask-compiler-time constants. -* Version 2.14.05 changed the meaning of temporal sizes so that 0 means never do temporal blocking and 1 allows blocking within a single time-step for multi-pack solutions. The behavior of the default settings have not changed. +* Version 2.14.05 changed the meaning of temporal sizes so that 0 means never do temporal blocking and 1 allows blocking within a single time-step for multi-pack solutions. The default setting is 0, which keeps the old behavior. * Version 2.13.06 changed the default behavior of the performance-test utility (`yask.sh`) to run trials for a given amount of time instead of a given number of steps. As of version 2.13.08, use the `-trial_time` option to specify the number of seconds to run. To force a specific number of trials as in previous versions, use the `-trial_steps` option. * Version 2.13.02 required some changes in perf statistics due to step (temporal) conditions. Both text output and `yk_stats` APIs affected. * Version 2.12.00 removed the long-deprecated `==` operator for asserting equality between a grid point and an equation. Use `EQUALS` instead. diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index 20452324..09a9b942 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -43,7 +43,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "2.19.03"; + const string version = "2.19.04"; string yask_get_version_string() { return version; diff --git a/src/kernel/lib/auto_tuner.cpp b/src/kernel/lib/auto_tuner.cpp index 4d084bc6..792d7432 100644 --- a/src/kernel/lib/auto_tuner.cpp +++ b/src/kernel/lib/auto_tuner.cpp @@ -107,13 +107,13 @@ namespace yask { // Determine number of steps to run. // If wave-fronts are enabled, run a max number of these steps. idx_t step_dir = dims->_step_dir; // +/- 1. - idx_t step_t = min(max(wf_steps, idx_t(1)), +AutoTuner::max_step_t) * step_dir; + idx_t stride_t = min(max(wf_steps, idx_t(1)), +AutoTuner::max_stride_t) * step_dir; // Run time-steps until AT converges. - for (idx_t t = 0; ; t += step_t) { + for (idx_t t = 0; ; t += stride_t) { - // Run step_t time-step(s). - run_solution(t, t + step_t - step_dir); + // Run stride_t time-step(s). + run_solution(t, t + stride_t - step_dir); // AT done on this rank? if (!is_auto_tuner_enabled()) @@ -325,7 +325,7 @@ namespace yask { auto dmax = outer_sizes()[dname]; // Determine distance of GD neighbors. - auto dist = dmin; // step by cluster size. + auto dist = dmin; // stride by cluster size. dist = max(dist, min_dist); dist *= radius; diff --git a/src/kernel/lib/auto_tuner.hpp b/src/kernel/lib/auto_tuner.hpp index 8176183e..69ff8eea 100644 --- a/src/kernel/lib/auto_tuner.hpp +++ b/src/kernel/lib/auto_tuner.hpp @@ -79,7 +79,7 @@ namespace yask { bool in_warmup = true; public: - static constexpr idx_t max_step_t = 4; + static constexpr idx_t max_stride_t = 4; AutoTuner(StencilContext* context, KernelSettings* settings, diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index e6ff059f..7b0273f2 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -42,16 +42,16 @@ namespace yask { // Determine step dir from order of first/last. idx_t step_dir = (last_step_index >= first_step_index) ? 1 : -1; - // Find begin, step and end in step-dim. + // Find begin, stride and end in step-dim. idx_t begin_t = first_step_index; - idx_t step_t = step_dir; // always +/- 1 for ref run. - assert(step_t); + idx_t stride_t = step_dir; // always +/- 1 for ref run. + assert(stride_t); idx_t end_t = last_step_index + step_dir; // end is beyond last. // backward? - if (step_t < 0) { - begin_t = end_t + step_t; - end_t = step_t; + if (stride_t < 0) { + begin_t = end_t + stride_t; + end_t = stride_t; } // Begin & end tuples. @@ -101,22 +101,22 @@ namespace yask { exchange_halos(); // Number of iterations to get from begin_t, stopping before end_t, - // stepping by step_t. + // jumping by stride_t. const idx_t num_t = abs(end_t - begin_t); for (idx_t index_t = 0; index_t < num_t; index_t++) { // This value of index_t steps from start_t to stop_t-1. - const idx_t start_t = begin_t + (index_t * step_t); - const idx_t stop_t = (step_t > 0) ? - min(start_t + step_t, end_t) : - max(start_t + step_t, end_t); + const idx_t start_t = begin_t + (index_t * stride_t); + const idx_t stop_t = (stride_t > 0) ? + min(start_t + stride_t, end_t) : + max(start_t + stride_t, end_t); // Set indices that will pass through generated code // because the step loop is coded here. rank_idxs.index[step_posn] = index_t; rank_idxs.start[step_posn] = start_t; rank_idxs.stop[step_posn] = stop_t; - rank_idxs.step[step_posn] = step_t; + rank_idxs.stride[step_posn] = stride_t; // Loop thru bundles. We ignore bundle packs here // because packing bundles is an optional optimizations. @@ -146,9 +146,9 @@ namespace yask { // Indices needed for the generated misc loops. Will normally be a // copy of rank_idxs except when updating scratch-grids. ScanIndices misc_idxs = sg->adjust_span(scratch_grid_idx, rank_idxs); - misc_idxs.step.setFromConst(1); // ensure unit step. + misc_idxs.stride.setFromConst(1); // ensure unit stride. - // Define misc-loop function. Since step is always 1, we + // Define misc-loop function. Since stride is always 1, we // ignore misc_stop. If point is in sub-domain for this // bundle, then evaluate the reference scalar code. // TODO: fix domain of scratch grids. @@ -198,16 +198,16 @@ namespace yask { // Determine step dir from order of first/last. idx_t step_dir = (last_step_index >= first_step_index) ? 1 : -1; - // Find begin, step and end in step-dim. + // Find begin, stride and end in step-dim. idx_t begin_t = first_step_index; - // Step-size in step-dim is number of region steps. + // Stride-size in step-dim is number of region steps. // Then, it is multipled by +/-1 to get proper direction. - idx_t step_t = max(wf_steps, idx_t(1)) * step_dir; - assert(step_t); + idx_t stride_t = max(wf_steps, idx_t(1)) * step_dir; + assert(stride_t); idx_t end_t = last_step_index + step_dir; // end is beyond last. - // Begin, end, step tuples. + // Begin, end, stride tuples. // Based on overall bounding box, which includes // any needed extensions for wave-fronts. IdxTuple begin(stencil_dims); @@ -216,14 +216,14 @@ namespace yask { IdxTuple end(stencil_dims); end.setVals(ext_bb.bb_end, false); end[step_posn] = end_t; - IdxTuple step(stencil_dims); - step.setVals(opts->_region_sizes, false); // step by region sizes. - step[step_posn] = step_t; + IdxTuple stride(stencil_dims); + stride.setVals(opts->_region_sizes, false); // stride by region sizes. + stride[step_posn] = stride_t; TRACE_MSG("run_solution: [" << begin.makeDimValStr() << " ... " << end.makeDimValStr() << ") by " << - step.makeDimValStr()); + stride.makeDimValStr()); if (!rank_bb.bb_valid) THROW_YASK_EXCEPTION("Error: run_solution() called without calling prepare_solution() first"); if (ext_bb.bb_size < 1) { @@ -274,16 +274,16 @@ namespace yask { } // If original region covered entire rank in a dim, set - // step size to ensure only one step is taken. + // stride size to ensure only one stride is taken. DOMAIN_VAR_LOOP(i, j) { if (opts->_region_sizes[i] >= opts->_rank_sizes[i]) - step[i] = end[i] - begin[i]; + stride[i] = end[i] - begin[i]; } TRACE_MSG("run_solution: after adjustment for " << num_wf_shifts << " wave-front shift(s): [" << begin.makeDimValStr() << " ... " << end.makeDimValStr() << ") by " << - step.makeDimValStr()); + stride.makeDimValStr()); // At this point, 'begin' and 'end' should describe the *max* range // needed in the domain for this rank for the first time step. At @@ -296,7 +296,7 @@ namespace yask { ScanIndices rank_idxs(*dims, true, &rank_domain_offsets); rank_idxs.begin = begin; rank_idxs.end = end; - rank_idxs.step = step; + rank_idxs.stride = stride; // Make sure threads are set properly for a region. set_region_threads(); @@ -305,22 +305,22 @@ namespace yask { exchange_halos(); // Number of iterations to get from begin_t to end_t-1, - // stepping by step_t. - const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(step_t)); + // jumping by stride_t. + const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(stride_t)); for (idx_t index_t = 0; index_t < num_t; index_t++) { // This value of index_t steps from start_t to stop_t-1. - const idx_t start_t = begin_t + (index_t * step_t); - const idx_t stop_t = (step_t > 0) ? - min(start_t + step_t, end_t) : - max(start_t + step_t, end_t); + const idx_t start_t = begin_t + (index_t * stride_t); + const idx_t stop_t = (stride_t > 0) ? + min(start_t + stride_t, end_t) : + max(start_t + stride_t, end_t); idx_t this_num_t = abs(stop_t - start_t); // Set indices that will pass through generated code. rank_idxs.index[step_posn] = index_t; rank_idxs.start[step_posn] = start_t; rank_idxs.stop[step_posn] = stop_t; - rank_idxs.step[step_posn] = step_t; + rank_idxs.stride[step_posn] = stride_t; // Start timer for auto-tuner. _at.timer.start(); @@ -589,25 +589,25 @@ namespace yask { region_idxs.initFromOuter(rank_idxs); // Time range. - // When doing WF rank tiling, this loop will step through + // When doing WF rank tiling, this loop will stride through // several time-steps in each region. - // When also doing TB, it will step by the block steps. + // When also doing TB, it will stride by the block strides. idx_t begin_t = region_idxs.begin[step_posn]; idx_t end_t = region_idxs.end[step_posn]; idx_t step_dir = (end_t >= begin_t) ? 1 : -1; - idx_t step_t = max(tb_steps, idx_t(1)) * step_dir; - assert(step_t); - const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(step_t)); + idx_t stride_t = max(tb_steps, idx_t(1)) * step_dir; + assert(stride_t); + const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(stride_t)); // Time loop. idx_t region_shift_num = 0; for (idx_t index_t = 0; index_t < num_t; index_t++) { // This value of index_t steps from start_t to stop_t-1. - const idx_t start_t = begin_t + (index_t * step_t); - const idx_t stop_t = (step_t > 0) ? - min(start_t + step_t, end_t) : - max(start_t + step_t, end_t); + const idx_t start_t = begin_t + (index_t * stride_t); + const idx_t stop_t = (stride_t > 0) ? + min(start_t + stride_t, end_t) : + max(start_t + stride_t, end_t); // Set step indices that will pass through generated code. region_idxs.index[step_posn] = index_t; @@ -638,10 +638,10 @@ namespace yask { continue; } - // Steps within a region are based on pack block sizes. + // Strides within a region are based on pack block sizes. auto& settings = bp->getActiveSettings(); - region_idxs.step = settings._block_sizes; - region_idxs.step[step_posn] = step_t; + region_idxs.stride = settings._block_sizes; + region_idxs.stride[step_posn] = stride_t; // Groups in region loops are based on block-group sizes. region_idxs.group_size = settings._block_group_sizes; @@ -659,7 +659,7 @@ namespace yask { // If there is only one blk in a region, make sure // this blk fills this whole region. if (settings._block_sizes[i] >= settings._region_sizes[i]) - region_idxs.step[i] = region_idxs.end[i] - region_idxs.begin[i]; + region_idxs.stride[i] = region_idxs.end[i] - region_idxs.begin[i]; } // Only need to loop through the span of the region if it is @@ -695,10 +695,10 @@ namespace yask { // calc_block() is called. BundlePackPtr bp; - // Steps within a region are based on rank block sizes. + // Strides within a region are based on rank block sizes. auto& settings = *opts; - region_idxs.step = settings._block_sizes; - region_idxs.step[step_posn] = step_t; + region_idxs.stride = settings._block_sizes; + region_idxs.stride[step_posn] = stride_t; // Groups in region loops are based on block-group sizes. region_idxs.group_size = settings._block_group_sizes; @@ -717,9 +717,9 @@ namespace yask { DOMAIN_VAR_LOOP(i, j) { - // If original blk covered entire region, reset step. + // If original blk covered entire region, reset stride. if (settings._block_sizes[i] >= settings._region_sizes[i]) - region_idxs.step[i] = region_idxs.end[i] - region_idxs.begin[i]; + region_idxs.stride[i] = region_idxs.end[i] - region_idxs.begin[i]; } // To tesselate n-D domain space, we use n+1 distinct @@ -839,9 +839,9 @@ namespace yask { idx_t begin_t = block_idxs.begin[step_posn]; idx_t end_t = block_idxs.end[step_posn]; idx_t step_dir = (end_t >= begin_t) ? 1 : -1; - idx_t step_t = max(tb_steps, idx_t(1)) * step_dir; - assert(step_t); - const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(step_t)); + idx_t stride_t = max(tb_steps, idx_t(1)) * step_dir; + assert(stride_t); + const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(stride_t)); // If TB is not being used, just process the given pack. // No need for a time loop. @@ -849,7 +849,7 @@ namespace yask { // calc_region() when not using TB. if (tb_steps == 0) { assert(bp); - assert(abs(step_t) == 1); + assert(abs(stride_t) == 1); assert(abs(end_t - begin_t) == 1); assert(num_t == 1); @@ -858,10 +858,10 @@ namespace yask { block_idxs.start[step_posn] = begin_t; block_idxs.stop[step_posn] = end_t; - // Steps within a block are based on pack mini-block sizes. + // Strides within a block are based on pack mini-block sizes. auto& settings = bp->getActiveSettings(); - block_idxs.step = settings._mini_block_sizes; - block_idxs.step[step_posn] = step_t; + block_idxs.stride = settings._mini_block_sizes; + block_idxs.stride[step_posn] = stride_t; // Groups in block loops are based on mini-block-group sizes. block_idxs.group_size = settings._mini_block_group_sizes; @@ -898,10 +898,10 @@ namespace yask { block_idxs.start[step_posn] = begin_t; block_idxs.stop[step_posn] = end_t; - // Steps within a block are based on rank mini-block sizes. + // Strides within a block are based on rank mini-block sizes. auto& settings = *opts; - block_idxs.step = settings._mini_block_sizes; - block_idxs.step[step_posn] = step_dir; + block_idxs.stride = settings._mini_block_sizes; + block_idxs.stride[step_posn] = step_dir; // Groups in block loops are based on mini-block-group sizes. block_idxs.group_size = settings._mini_block_group_sizes; @@ -923,16 +923,16 @@ namespace yask { auto width = region_idxs.stop[i] - region_idxs.start[i]; adj_block_idxs.end[i] += width; - // If original MB covers a whole block, reset step. + // If original MB covers a whole block, reset stride. if (settings._mini_block_sizes[i] >= settings._block_sizes[i]) - adj_block_idxs.step[i] = adj_block_idxs.end[i] - adj_block_idxs.begin[i]; + adj_block_idxs.stride[i] = adj_block_idxs.end[i] - adj_block_idxs.begin[i]; } TRACE_MSG("calc_block: phase " << phase << ", adjusted block [" << adj_block_idxs.begin.makeValStr(nsdims) << " ... " << adj_block_idxs.end.makeValStr(nsdims) << ") with mini-block stride " << - adj_block_idxs.step.makeValStr(nsdims)); + adj_block_idxs.stride.makeValStr(nsdims)); // Loop thru shapes. for (idx_t shape = 0; shape < nshapes; shape++) { @@ -1005,24 +1005,24 @@ namespace yask { mini_block_idxs.initFromOuter(adj_block_idxs); // Time range. - // No more temporal blocks below mini-blocks, so we always step + // No more temporal blocks below mini-blocks, so we always stride // by +/- 1. idx_t begin_t = mini_block_idxs.begin[step_posn]; idx_t end_t = mini_block_idxs.end[step_posn]; idx_t step_dir = (end_t >= begin_t) ? 1 : -1; - idx_t step_t = 1 * step_dir; // +/- 1. - assert(step_t); - const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(step_t)); + idx_t stride_t = 1 * step_dir; // +/- 1. + assert(stride_t); + const idx_t num_t = CEIL_DIV(abs(end_t - begin_t), abs(stride_t)); // Time loop. idx_t shift_num = 0; for (idx_t index_t = 0; index_t < num_t; index_t++) { // This value of index_t steps from start_t to stop_t-1. - const idx_t start_t = begin_t + (index_t * step_t); - const idx_t stop_t = (step_t > 0) ? - min(start_t + step_t, end_t) : - max(start_t + step_t, end_t); + const idx_t start_t = begin_t + (index_t * stride_t); + const idx_t stop_t = (stride_t > 0) ? + min(start_t + stride_t, end_t) : + max(start_t + stride_t, end_t); TRACE_MSG("calc_mini_block: phase " << phase << ", shape " << shape << ", in step " << start_t); @@ -1060,11 +1060,11 @@ namespace yask { if (region_thread_idx == 0) bp->start_timers(); - // Steps within a mini-blk are based on sub-blk sizes. + // Strides within a mini-blk are based on sub-blk sizes. // This will get overridden later if thread binding is enabled. auto& settings = bp->getActiveSettings(); - mini_block_idxs.step = settings._sub_block_sizes; - mini_block_idxs.step[step_posn] = step_t; + mini_block_idxs.stride = settings._sub_block_sizes; + mini_block_idxs.stride[step_posn] = stride_t; // Groups in mini-blk loops are based on sub-block-group sizes. mini_block_idxs.group_size = settings._sub_block_group_sizes; @@ -1925,7 +1925,7 @@ namespace yask { void StencilContext::mark_grids_dirty(const BundlePackPtr& sel_bp, idx_t start, idx_t stop) { STATE_VARS(this); - idx_t step = (start > stop) ? -1 : 1; + idx_t stride = (start > stop) ? -1 : 1; map> grids_done; // Stencil bundle packs. @@ -1936,14 +1936,14 @@ namespace yask { continue; // Each input step. - for (idx_t t = start; t != stop; t += step) { + for (idx_t t = start; t != stop; t += stride) { // Each bundle in this pack. for (auto* sb : *bp) { // Get output step for this bundle, if any. // For many stencils, this will be t+1 or - // t-1 if stepping backward. + // t-1 if striding backward. idx_t t_out = 0; if (!sb->get_output_step_index(t, t_out)) continue; diff --git a/src/kernel/lib/indices.hpp b/src/kernel/lib/indices.hpp index 7bc4cf36..9f2bd9c5 100644 --- a/src/kernel/lib/indices.hpp +++ b/src/kernel/lib/indices.hpp @@ -346,13 +346,18 @@ namespace yask { // Input values; not modified. Indices begin, end; // first and end (beyond last) range of each index. - Indices step; // step value within range. - Indices align; // alignment of steps after first one. + Indices stride; // distance between indices within [begin .. end). + Indices align; // alignment of indices after first one. Indices align_ofs; // adjustment for alignment (see below). Indices group_size; // proximity grouping within range. - // Alignment: when possible, start positions after the first - // in each dim will be aligned such that ((start - align_ofs) % align) == 0. + // Alignment: + // First 'start' index is always at 'begin'. + // Subsequent indices are at 'begin' + 'stride', 'begin' + 2*'stride', etc. if 'align'==1. + // If 'align'>1, subsequent indices will be aligned such that + // (('start' - 'align_ofs') % 'align') == 0. + // Last 'start' index is always < 'end'. + // Last 'stop' index always == 'end'. // Output values; set once for entire range. Indices num_indices; // number of indices in each dim. diff --git a/src/kernel/lib/settings.cpp b/src/kernel/lib/settings.cpp index 0e8f4dff..d3e937e1 100644 --- a/src/kernel/lib/settings.cpp +++ b/src/kernel/lib/settings.cpp @@ -639,9 +639,9 @@ namespace yask { bt = max(bt, idx_t(0)); mbt = max(mbt, idx_t(0)); if (!rt) - rt = bt; // Default region steps to block steps. + rt = bt; // Default region steps == block steps. if (!mbt) - mbt = bt; // Default mini-blk steps to block steps. + mbt = bt; // Default mini-blk steps == block steps. // Determine num regions. // Also fix up region sizes as needed. diff --git a/src/kernel/lib/setup.cpp b/src/kernel/lib/setup.cpp index 0100dd7e..ac3563fd 100644 --- a/src/kernel/lib/setup.cpp +++ b/src/kernel/lib/setup.cpp @@ -42,7 +42,7 @@ namespace yask { ndims(NUM_STENCIL_DIMS), begin(idx_t(0), ndims), end(idx_t(0), ndims), - step(idx_t(1), ndims), + stride(idx_t(1), ndims), align(idx_t(1), ndims), align_ofs(idx_t(0), ndims), group_size(idx_t(1), ndims), diff --git a/src/kernel/lib/stencil_calc.cpp b/src/kernel/lib/stencil_calc.cpp index 575851dc..8372de75 100644 --- a/src/kernel/lib/stencil_calc.cpp +++ b/src/kernel/lib/stencil_calc.cpp @@ -44,7 +44,7 @@ namespace yask { TRACE_MSG("calc_mini_block('" << get_name() << "'): [" << mini_block_idxs.begin.makeValStr(nsdims) << " ... " << mini_block_idxs.end.makeValStr(nsdims) << ") by " << - mini_block_idxs.step.makeValStr(nsdims) << + mini_block_idxs.stride.makeValStr(nsdims) << " by region thread " << region_thread_idx); assert(!is_scratch()); @@ -146,28 +146,28 @@ namespace yask { DOMAIN_VAR_LOOP(i, j) { // If binding threads to sub-blocks and this is the - // binding dim, set step size and alignment + // binding dim, set stride size and alignment // granularity to the slab width. Setting the // alignment keeps slabs aligned between packs. if (bind_threads && i == bind_posn) { - adj_mb_idxs.step[i] = bind_slab_pts; + adj_mb_idxs.stride[i] = bind_slab_pts; adj_mb_idxs.align[i] = bind_slab_pts; } // If original [or auto-tuned] sub-block covers - // entire mini-block, set step size to full width. + // entire mini-block, set stride size to full width. // Also do this when binding and this is not the // binding dim. else if ((settings._sub_block_sizes[i] >= settings._mini_block_sizes[i]) || bind_threads) - adj_mb_idxs.step[i] = adj_mb_idxs.end[i] - adj_mb_idxs.begin[i]; + adj_mb_idxs.stride[i] = adj_mb_idxs.end[i] - adj_mb_idxs.begin[i]; } TRACE_MSG("calc_mini_block('" << get_name() << "'): " << " for reqd bundle '" << sg->get_name() << "': [" << adj_mb_idxs.begin.makeValStr(nsdims) << " ... " << adj_mb_idxs.end.makeValStr(nsdims) << ") by " << - adj_mb_idxs.step.makeValStr(nsdims) << + adj_mb_idxs.stride.makeValStr(nsdims) << " by region thread " << region_thread_idx << " and block thread " << block_thread_idx); @@ -225,12 +225,12 @@ namespace yask { ScanIndices misc_idxs(*dims, true, 0); misc_idxs.initFromOuter(mini_block_idxs); - // Step sizes and alignment are one element. - misc_idxs.step.setFromConst(1); + // Stride sizes and alignment are one element. + misc_idxs.stride.setFromConst(1); misc_idxs.align.setFromConst(1); // Define misc-loop function. - // Since step is always 1, we ignore misc_idxs.stop. + // Since stride is always 1, we ignore misc_idxs.stop. #define MISC_FN(pt_idxs) do { \ calc_scalar(region_thread_idx, pt_idxs.start); \ } while(0) @@ -472,10 +472,10 @@ namespace yask { ") by region thread " << region_thread_idx << " and block thread " << block_thread_idx); - // Step sizes are based on cluster lengths (in vector units). - // The step in the inner loop is hard-coded in the generated code. + // Stride sizes are based on cluster lengths (in vector units). + // The stride in the inner loop is hard-coded in the generated code. DOMAIN_VAR_LOOP(i, j) { - norm_sub_block_idxs.step[i] = dims->_cluster_mults[j]; // N vecs. + norm_sub_block_idxs.stride[i] = dims->_cluster_mults[j]; // N vecs. } // Define the function called from the generated loops to simply @@ -520,9 +520,9 @@ namespace yask { normalize_indices(sub_block_vidxs.end, norm_sub_block_idxs.end); norm_sub_block_idxs.stop = norm_sub_block_idxs.end; - // Step sizes are one vector. - // The step in the inner loop is hard-coded in the generated code. - norm_sub_block_idxs.step.setFromConst(1); + // Stride sizes are one vector. + // The stride in the inner loop is hard-coded in the generated code. + norm_sub_block_idxs.stride.setFromConst(1); // Also normalize the *full* vector indices to determine if // we need a mask at each vector index. @@ -539,7 +539,7 @@ namespace yask { // should be used only around the outside of the inner block of // clusters. Then, call the loop-of-vectors function // w/appropriate mask. See the mask diagrams above that show - // how the masks are ANDed together. Since step is always 1, we + // how the masks are ANDed together. Since stride is always 1, we // ignore loop_idxs.stop. #define CALC_INNER_LOOP(loop_idxs) \ bool ok = false; \ @@ -573,8 +573,8 @@ namespace yask { // global rather than normalized as in the cluster and vector loops. ScanIndices misc_idxs(sub_block_idxs); - // Step sizes and alignment are one element. - misc_idxs.step.setFromConst(1); + // Stride sizes and alignment are one element. + misc_idxs.stride.setFromConst(1); misc_idxs.align.setFromConst(1); TRACE_MSG("calc_sub_block_vec: using scalar code for [" << @@ -587,7 +587,7 @@ namespace yask { " and block thread " << block_thread_idx); // Define misc-loop function. This is called at each point in - // the sub-block. Since step is always 1, we ignore + // the sub-block. Since stride is always 1, we ignore // misc_idxs.stop. TODO: handle more efficiently: do one slab // for inner-peel and one for outer-peel, calculate masks, and // call vector code. @@ -682,7 +682,7 @@ namespace yask { // If this bundle is updating scratch grid(s), // expand begin & end of 'idxs' by sizes of halos. - // Step indices may also change. + // Stride indices may also change. // NB: it is not necessary that the domain of each grid // is the same as the span of 'idxs'. However, it should be // at least that large to ensure that grid is able to hold @@ -739,11 +739,11 @@ namespace yask { assert(adj_idxs.begin[i] >= gp->get_first_rank_alloc_index(posn)); assert(adj_idxs.end[i] <= gp->get_last_rank_alloc_index(posn) + 1); - // If existing step is >= whole tile, adjust it also. + // If existing stride is >= whole tile, adjust it also. idx_t width = idxs.end[i] - idxs.begin[i]; - if (idxs.step[i] >= width) { + if (idxs.stride[i] >= width) { idx_t adj_width = adj_idxs.end[i] - adj_idxs.begin[i]; - adj_idxs.step[i] = adj_width; + adj_idxs.stride[i] = adj_width; } } } diff --git a/src/kernel/lib/stencil_calc.hpp b/src/kernel/lib/stencil_calc.hpp index 165cc4dc..a6465b76 100644 --- a/src/kernel/lib/stencil_calc.hpp +++ b/src/kernel/lib/stencil_calc.hpp @@ -225,7 +225,7 @@ namespace yask { // Calculate a series of cluster results within an inner loop. // All indices start at 'start_idxs'. Inner loop iterates to - // 'stop_inner' by 'step_inner'. + // 'stop_inner' by 'stride_inner'. // Indices must be rank-relative. // Indices must be normalized, i.e., already divided by VLEN_*. virtual void @@ -245,7 +245,7 @@ namespace yask { // Calculate a series of vector results within an inner loop. // All indices start at 'start_idxs'. Inner loop iterates to - // 'stop_inner' by 'step_inner'. + // 'stop_inner' by 'stride_inner'. // Indices must be rank-relative. // Indices must be normalized, i.e., already divided by VLEN_*. // Each vector write is masked by 'write_mask'. diff --git a/utils/bin/gen_loops.pl b/utils/bin/gen_loops.pl index dcf6a142..ad519071 100755 --- a/utils/bin/gen_loops.pl +++ b/utils/bin/gen_loops.pl @@ -86,8 +86,8 @@ sub beginVar { sub endVar { return inVar("end", @_); } -sub stepVar { - return inVar("step", @_); +sub strideVar { + return inVar("stride", @_); } sub alignVar { return inVar("align", @_); @@ -206,7 +206,7 @@ ($$$) if ($pass == 0) { my $bvar = beginVar($dim); my $evar = endVar($dim); - my $svar = stepVar($dim); + my $svar = strideVar($dim); my $avar = alignVar($dim); my $aovar = alignOfsVar($dim); my $aavar = adjAlignVar($dim); @@ -226,12 +226,12 @@ ($$$) # abvar = round_down_flr(20 - 15, 4) + 15 = 4 + 15 = 19. push @$code, - " // Alignment must be less than or equal to step size.", + " // Alignment must be less than or equal to stride size.", " const $itype $aavar = std::min($avar, $svar);", " // Aligned beginning point such that ($bvar - $svar) < $abvar <= $bvar.", " const $itype $abvar = yask::round_down_flr($bvar - $aovar, $aavar) + $aovar;", - " // Number of iterations to get from $abvar to (but not including) $evar, stepping by $svar.". - " This value is rounded up because the last iteration may cover fewer than $svar steps.", + " // Number of iterations to get from $abvar to (but not including) $evar, striding by $svar.". + " This value is rounded up because the last iteration may cover fewer than $svar strides.", " const $itype $nvar = yask::ceil_idiv_flr($evar - $abvar, $svar);"; # For grouped loops. @@ -482,7 +482,7 @@ ($$$$) } } - # start and stop vars based on individual begin, end, step, and index vars. + # start and stop vars based on individual begin, end, stride, and index vars. for my $dim (@$loopDims) { my $divar = indexVar($dim); my $stvar = startVar($dim); @@ -490,7 +490,7 @@ ($$$$) my $bvar = beginVar($dim); my $abvar = alignBeginVar($dim); my $evar = endVar($dim); - my $svar = stepVar($dim); + my $svar = strideVar($dim); push @$code, " // This value of $divar covers ".dimStr($dim)." from $stvar to (but not including) $spvar.", " idx_t $stvar = std::max($abvar + ($divar * $svar), $bvar);", @@ -982,8 +982,8 @@ () " This struct contains the following 'Indices' elements:\n", " 'begin': [in] first index to scan in each dim.\n", " 'end': [in] value beyond last index to scan in each dim.\n", - " 'step': [in] space between each scan point in each dim.\n", - " 'align': [in] alignment of steps after first one.\n", + " 'stride': [in] distance between each scan point in each dim.\n", + " 'align': [in] alignment of strides after first one.\n", " 'align_ofs': [in] value to subtract from 'start' before applying alignment.\n", " 'group_size': [in] min size of each group of points visisted first in a multi-dim loop.\n", " 'start': [out] set to first scan point in called function(s) in inner loop(s).\n", From c9afdb37377fc8b1178c263bdd90fb3e108d2c55 Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Thu, 4 Apr 2019 13:41:24 -0700 Subject: [PATCH 2/5] Track and check step-var indices. Closes #194. --- README.md | 1 + include/yask_compiler_api.hpp | 8 +- include/yk_grid_api.hpp | 300 +++++++++++++--------- include/yk_solution_api.hpp | 4 +- src/common/common_utils.cpp | 2 +- src/kernel/Makefile | 20 +- src/kernel/lib/context.cpp | 66 ++--- src/kernel/lib/context.hpp | 8 +- src/kernel/lib/generic_grids.hpp | 2 +- src/kernel/lib/grid_apis.cpp | 76 ++++-- src/kernel/lib/realv_grids.cpp | 78 ++++-- src/kernel/lib/realv_grids.hpp | 103 +++++--- src/kernel/tests/yask_kernel_api_test.cpp | 12 +- src/kernel/tests/yask_kernel_api_test.py | 10 +- 14 files changed, 420 insertions(+), 270 deletions(-) diff --git a/README.md b/README.md index 8db88365..96c824ac 100644 --- a/README.md +++ b/README.md @@ -58,6 +58,7 @@ YASK contains a domain-specific compiler to convert scalar stencil code to SIMD- for functional testing if you don't have native support for any given instruction set. ### Backward-compatibility notices, including changes in default behavior: +* Version 2.20.00 added checking of the step-dimension index value in the `yk_grid::get_element()` and similar APIs. Previously, invalid values silently "wrapped" around to valid ones. Now, the step index must be valid when reading, and the valid step indices are updated when writing. * Version 2.19.01 turned off multi-pass tuning by default. Enable with `-auto_tune_each_pass`. * Version 2.18.03 allowed the default radius to be stencil-specific and changed the names of example stencil "9axis" to "3axis_with_diags". * Version 2.18.00 added the ability to specify the global-domain size, and it will calculate the local-domain sizes from it. diff --git a/include/yask_compiler_api.hpp b/include/yask_compiler_api.hpp index bf3be52f..09cd528d 100644 --- a/include/yask_compiler_api.hpp +++ b/include/yask_compiler_api.hpp @@ -182,7 +182,7 @@ namespace yask { /** C++ initializer-list version with same semantics as new_grid(const std::string& name, const std::vector& dims). - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to the new \ref yc_grid object. */ virtual yc_grid_ptr @@ -223,7 +223,7 @@ namespace yask { /** C++ initializer-list version with same semantics as new_scratch_grid(const std::string& name, const std::vector& dims). - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to the new \ref yc_grid object. */ virtual yc_grid_ptr @@ -492,7 +492,7 @@ namespace yask { /** C++ initializer-list version with same semantics as new_grid_point(std::vector index_exprs). - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to AST node used to read or write from point in grid. */ virtual yc_grid_point_node_ptr new_grid_point(const std::initializer_list& index_exprs) = 0; @@ -523,7 +523,7 @@ namespace yask { /** C++ initializer-list version with same semantics as new_relative_grid_point(std::vector dim_offsets). - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to AST node used to read or write from point in grid. */ virtual yc_grid_point_node_ptr new_relative_grid_point(const std::initializer_list& dim_offsets) = 0; diff --git a/include/yk_grid_api.hpp b/include/yk_grid_api.hpp index bf98e520..03d7b9a3 100644 --- a/include/yk_grid_api.hpp +++ b/include/yk_grid_api.hpp @@ -43,7 +43,8 @@ namespace yask { /// A run-time data container. /** - "Grid" is a generic term for any n-dimensional array. A 0-dim grid + A run-time "grid variable" (often referred to simply as a "grid", which is a bit of a misnomer) + is a generic term for any n-dimensional array. A 0-dim grid is a scalar, a 1-dim grid is an array, etc. A run-time grid contains data, unlike yc_grid, a compile-time grid variable. @@ -60,13 +61,27 @@ namespace yask { These may be returned via yk_solution::get_misc_dim_names() if they were defined in the YASK compiler, or they may be any other name that is not a step or domain dimension. - In the step dimension, there is no fixed domain size, and no - specified first or last index. - However, there is an allocation size, which is the number of values in the step - dimension that are stored in memory. - Step-dimension indices "wrap-around" within this allocation to reuse memory. - For example, if the step dimension is "t", and the t-dimension allocation size is 3, - then t=-2, t=0, t=3, t=6, ..., t=303, etc. would all alias to the same addresses in memory. + In the step dimension, there is no fixed first or last index. + However, there is a finite allocation size, which is the number of + values in the step dimension that are stored in memory. The valid + indices in the step dimension are always consecutive and change based + on what was last written to the grid. For example: If a grid `A` has + an allocation size of two (2) in the `t` step dimension, its initial + valid `t` indices are 0 and 1. Calling `A->get_element({0, x})` or + `A->get_element({1, x})` would return a value from `A` assuming `x` + is a valid index, but `A->get_element({2, x})` would cause a run-time + exception. Let's say the YASK solution defines `A(t+1, x) EQUALS + (A(t, x) + A(t, x+1))/2`. Calling yk_solution::run_solution(1) means + that `A(2, x)` would be defined for all `x` in the domain because + `t+1 == 2` on the left-hand-side of the equation. Thus, the new + valid `t` indices in `A` would be 1 and 2, and `A(0, x)` is no longer + stored in memory because the allocation size is only 2. Then, + calling `A->get_element({1, x})` or `A->get_element({2, x})` would + succeed and `A->get_element({0, x})` would fail. Calling APIs that + set values in a grid such as set_element() will also update the valid + step index range. The current valid indices in the step dimension + can be retrieved via yk_grid::get_first_valid_step_index() and + yk_grid::get_last_valid_step_index(). In each domain dimension, grid sizes include the following components: @@ -108,9 +123,11 @@ namespace yask { Data in these overlapped areas are exchanged as needed during stencil application to maintain a consistent values as if there was only one rank. - In each miscellaneous dimension, there is only an allocation size, - and there is no wrap-around as in the step dimension. - Each index must be between its first and last allowed value. + In each miscellaneous dimension, there is no padding or halos. + There is a fixed allocation size, and + each index must be between its first and last valid value. + The valid miscellaneous indices may be retrieved via + yk_grid::get_first_misc_index() and yk_grid::get_last_misc_index(). All sizes are expressed in numbers of elements. Each element may be a 4-byte (single precision) @@ -129,19 +146,6 @@ namespace yask { via yk_grid::alloc_storage(). - **[Advanced]** Storage for a specific grid may be shared with another grid with existing storage via yk_grid::share_storage(). - - @note The domain index arguments to the \ref yk_grid functions that require indices - are *always* relative to the overall problem; they are *not* relative to the current rank. - The first and last overall-problem index that lies within a rank can be - retrieved via yk_solution::get_first_rank_domain_index() and - yk_solution::get_last_rank_domain_index(), respectively. - The first and last accessible index that lies within a rank for a given grid can be - retrieved via yk_grid::get_first_rank_alloc_index() and - yk_grid::get_last_rank_alloc_index(), respectively. - Also, index arguments are always inclusive. - Specifically, for functions that return or require a "last" index, that - index indicates the last one in the relevant range, i.e., *not* one past the last value - (this is more like Fortran and Perl than Python and Lisp). */ class yk_grid { public: @@ -184,6 +188,79 @@ namespace yask { virtual bool is_dim_used(const std::string& dim) const =0; + /// Get the first valid index in this rank in the specified dimension. + /** + This is a convenience function that provides the first possible + index in any grid dimension regardless of the dimension type. + It is equivalent to + get_first_rank_alloc_index(dim) when `dim` is + a domain dimension, get_first_misc_index(dim) + for a misc dimension, and get_first_valid_step_index() + for the step dimension. + @note This function should be called only *after* calling prepare_solution() + because prepare_solution() assigns this rank's position in the problem domain. + @returns the first valid index. + */ + virtual idx_t + get_first_valid_index(const std::string& dim + /**< [in] Name of dimension to get. Must be one of + the names from get_dim_names(). */ ) const =0; + + /// Get the last index in this rank in the specified dimension. + /** + This is a convenience function that provides the last possible + index in any grid dimension regardless of the dimension type. + It is equivalent to + get_last_rank_alloc_index(dim) when `dim` is + a domain dimension, get_last_misc_index(dim) + for a misc dimension, and get_last_valid_step_index() + for the step dimension. + @note This function should be called only *after* calling prepare_solution() + because prepare_solution() assigns this rank's position in the problem domain. + @returns the last valid index. + */ + virtual idx_t + get_last_valid_index(const std::string& dim + /**< [in] Name of dimension to get. Must be one of + the names from get_dim_names(). */ ) const =0; + + /// Get the number of elements allocated in the specified dimension. + /** + For the domain dimensions, this includes the rank-domain and padding sizes. + See the "Detailed Description" for \ref yk_grid for information on grid sizes. + For any dimension `dim`, `get_alloc_size(dim) == + get_last_valid_index(dim) - get_first_valid_index(dim) + 1`; + @returns allocation in number of elements (not bytes). + */ + virtual idx_t + get_alloc_size(const std::string& dim + /**< [in] Name of dimension to get. Must be one of + the names from get_dim_names(). */ ) const =0; + + /// Get the first valid index in the step dimension. + /** + The valid step indices in a grid are updated by calling yk_solution::run_solution() + or one of the element-setting API functions. + Equivalient to get_first_valid_index(dim), where `dim` is the step dimension. + @returns the first index in the step dimension that can be used in one of the + element-getting API functions. + This grid must use the step index. + */ + virtual idx_t + get_first_valid_step_index() const =0; + + /// Get the last valid index in the step dimension. + /** + The valid step indices in a grid are updated by calling yk_solution::run_solution() + or one of the element-setting API functions. + Equivalient to get_last_valid_index(dim), where `dim` is the step dimension. + @returns the last index in the step dimension that can be used in one of the + element-getting API functions. + This grid must use the step index. + */ + virtual idx_t + get_last_valid_step_index() const =0; + /// Get the domain size for this rank. /** @returns The same value as yk_solution::get_rank_domain_size() if @@ -322,21 +399,10 @@ namespace yask { Must be one of the names from yk_solution::get_domain_dim_names(). */ ) const =0; - /// Get the storage allocation in the specified dimension. - /** - For the step dimension, this is the specified allocation and - does not typically depend on the number of steps evaluated. - For the non-step dimensions, this includes the domain and padding sizes. - See the "Detailed Description" for \ref yk_grid for information on grid sizes. - @returns allocation in number of elements (not bytes). - */ - virtual idx_t - get_alloc_size(const std::string& dim - /**< [in] Name of dimension to get. */ ) const =0; - /// Get the first index of a specified miscellaneous dimension. /** - @returns the first allowed index in a non-step and non-domain dimension. + Equivalent to get_first_valid_index(dim), where `dim` is a misc dimension. + @returns the first valid index in a non-step and non-domain dimension. */ virtual idx_t get_first_misc_index(const std::string& dim @@ -345,48 +411,44 @@ namespace yask { /// Get the last index of a specified miscellaneous dimension. /** - @returns the last allowed index in a non-step and non-domain dimension. + Equivalent to get_last_valid_index(dim), where `dim` is a misc dimension. + @returns the last valid index in a non-step and non-domain dimension. */ virtual idx_t get_last_misc_index(const std::string& dim /**< [in] Name of dimension to get. Must be one of the names from yk_solution::get_misc_dim_names(). */ ) const =0; - /// Determine whether an element at the given indices is allocated in this rank. + /// Determine whether the given indices refer to an accessible element in this rank. /** Provide indices in a list in the same order returned by get_dim_names(). - Indices are relative to the *overall* problem domain. - @returns `true` if index values fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension; `false` otherwise. + Domain indices are relative to the *overall* problem domain. + @returns `true` if index values fall within the range returned by + get_first_valid_index(dim) and get_last_valid_index(dim) for each dimension + `dim` in the grid; `false` otherwise. */ virtual bool - is_element_allocated(const std::vector& indices - /**< [in] List of indices, one for each grid dimension. */ ) const =0; + are_indices_valid(const std::vector& indices + /**< [in] List of indices, one for each grid dimension. */ ) const =0; #ifndef SWIG - /// Determine whether an element at the given indices is allocated in this rank. + /// Determine whether the given indices refer to an accessible element in this rank. /** - Provide indices in a list in the same order returned by get_dim_names(). - Indices are relative to the *overall* problem domain. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. - @returns `true` if index values fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension; `false` otherwise. + See get_last_misc_index(). */ virtual bool - is_element_allocated(const std::initializer_list& indices - /**< [in] List of indices, one for each grid dimension. */ ) const =0; + are_indices_valid(const std::initializer_list& indices + /**< [in] List of indices, one for each grid dimension. */ ) const =0; #endif /// Read the value of one element in this grid. /** Provide indices in a list in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. - @returns value in grid at given multi-dimensional location. + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive, + for each dimension in the grid. + @returns value in grid at given indices. */ virtual double get_element(const std::vector& indices @@ -395,16 +457,8 @@ namespace yask { #ifndef SWIG /// Read the value of one element in this grid. /** - Provide indices in a list in the same order returned by get_dim_names(). - Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. - @note The return value is a double-precision floating-point value, but - it will be converted from a single-precision if - yk_solution::get_element_bytes() returns 4. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. - @returns value in grid at given multi-dimensional location. + See get_element(). + @returns value in grid at given indices. */ virtual double get_element(const std::initializer_list& indices @@ -415,14 +469,19 @@ namespace yask { /** Provide indices in a list in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. + If the grid uses the step dimension, the value of the step index + will be used to update the current valid step indices in the grid. + If `strict_indices` is `false` and any non-step index values + are invalid as defined by are_indices_valid(), + the API will have no effect and return zero (0). + If `strict_indices` is `true` and any non-step index values + are invalid, the API will throw an exception. @note The parameter value is a double-precision floating-point value, but it will be converted to single-precision if yk_solution::get_element_bytes() returns 4. If storage has not been allocated for this grid, this will have no effect. - @returns Number of elements set. + @returns Number of elements set, which will be one (1) if the indices + are valid and zero (0) if they are not. */ virtual idx_t set_element(double val /**< [in] Element in grid will be set to this. */, @@ -436,17 +495,7 @@ namespace yask { #ifndef SWIG /// Set the value of one element in this grid. /** - Provide the number of indices equal to the number of dimensions in the grid. - Indices beyond that will be ignored. - Indices are relative to the *overall* problem domain. - If any index values fall outside of the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension, this will have no effect. - @note The parameter value is a double-precision floating-point value, but - it will be converted to single-precision if - yk_solution::get_element_bytes() returns 4. - If storage has not been allocated for this grid, this will have no effect. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + See set_element(). @returns Number of elements set. */ virtual idx_t @@ -459,21 +508,22 @@ namespace yask { in no change to grid. */ ) =0; #endif - /// Write elements within specified subset of this grid into a buffer. + /// Copy elements within specified subset of this grid into a buffer. /** Reads all elements from `first_indices` to `last_indices` in each dimension and writes them to consecutive memory locations in the buffer. - Indices in the buffer progress in row-major order. + Indices in the buffer progress in row-major order, i.e., + traditional C-language layout. The buffer pointed to must contain the number of bytes equal to yk_solution::get_element_bytes() multiplied by the number of elements in the specified slice. Since the reads proceed in row-major order, the last index is "unit-stride" in the buffer. + Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive. @returns Number of elements read. */ virtual idx_t @@ -488,9 +538,8 @@ namespace yask { /** Provide indices in a list in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension if `strict_indices` is set to true. + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive. Updates are OpenMP atomic, meaning that this function can be called by several OpenMP threads without causing a race condition. @note The parameter value is a double-precision floating-point value, but @@ -511,19 +560,7 @@ namespace yask { #ifndef SWIG /// Atomically add to the value of one grid element. /** - Provide the number of indices equal to the number of dimensions in the grid. - Indices beyond that will be ignored. - Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension if `strict_indices` is set to true. - Updates are OpenMP atomic, meaning that this function can be called by - several OpenMP threads without causing a race condition. - @note The parameter value is a double-precision floating-point value, but - it will be converted to single-precision if - yk_solution::get_element_bytes() returns 4. - If storage has not been allocated for this grid, this will have no effect. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + See add_to_element(). @returns Number of elements set. */ virtual idx_t @@ -555,9 +592,9 @@ namespace yask { specified value. Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive, + if `strict_indices` is `true`. If storage has not been allocated for this grid, this will have no effect. @returns Number of elements set. */ @@ -586,9 +623,8 @@ namespace yask { in the buffer. Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for - each dimension. + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive. If storage has not been allocated for this grid, this will have no effect. @returns Number of elements written. */ @@ -610,8 +646,8 @@ namespace yask { `first_target_indices` and `last_target_indices` in each dimension. Provide indices in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. - Accessed index values must fall within the allocated space as returned by - get_first_rank_alloc_index() and get_last_rank_alloc_index() for + Index values must fall between the values returned by + get_first_valid_index() and get_last_valid_index(), inclusive, for each dimension in both grids. @returns Number of elements copied. */ @@ -641,8 +677,7 @@ namespace yask { #ifndef SWIG /// Format the indices for pretty-printing. /** - Provide indices in a list in the same order returned by get_dim_names(). - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + See format_indices(). @returns A string containing the grid name and the index values. */ virtual std::string @@ -816,8 +851,8 @@ namespace yask { /// **[Advanced]** Set the first index of a specified miscellaneous dimension. /** - Sets the first allowed index in a non-step and non-domain dimension. - After calling this function, the last allowed index will be the first index + Sets the first valid index in a non-step and non-domain dimension. + After calling this function, the last valid index will be the first index as set by this function plus the allocation size set by set_alloc_size() minus one. */ @@ -828,12 +863,10 @@ namespace yask { idx_t idx /**< [in] New value for first index. May be negative. */ ) =0; - /// **[Advanced]** Get the first accessible index in this grid in this rank in the specified dimension. + /// **[Advanced]** Get the first accessible index in this grid in this rank in the specified domain dimension. /** - This returns the first *overall* index allowed in this grid. - This element may be in the domain, left halo, or extra left padding area. - This function is only for checking the legality of an index. - @returns First allowed index in this grid. + Equivalent to get_first_valid_index(dim), where `dim` is a domain dimension. + @returns First valid index in this grid. */ virtual idx_t get_first_rank_alloc_index(const std::string& dim @@ -841,12 +874,10 @@ namespace yask { Must be one of the names from yk_solution::get_domain_dim_names(). */ ) const =0; - /// **[Advanced]** Get the last accessible index in this grid in this rank in the specified dimension. + /// **[Advanced]** Get the last accessible index in this grid in this rank in the specified domain dimension. /** - This returns the last *overall* index allowed in this grid. - This element may be in the domain, right halo, or extra right padding area. - This function is only for checking the legality of an index. - @returns Last allowed index in this grid. + Equivalent to get_last_valid_index(dim), where `dim` is a domain dimension. + @returns Last valid index in this grid. */ virtual idx_t get_last_rank_alloc_index(const std::string& dim @@ -1020,6 +1051,23 @@ namespace yask { /**< [in] Name of dimension to get. Must be one of the names from yk_solution::get_domain_dim_names(). */ ) const =0; + + /// **[Deprecated]** Use are_indices_valid() instead. + virtual bool + is_element_allocated(const std::vector& indices + /**< [in] List of indices, one for each grid dimension. */ ) const { + return are_indices_valid(indices); + } + +#ifndef SWIG + /// **[Deprecated]** Use are_indices_valid() instead. + virtual bool + is_element_allocated(const std::initializer_list& indices + /**< [in] List of indices, one for each grid dimension. */ ) const { + return are_indices_valid(indices); + } +#endif + }; /** @}*/ diff --git a/include/yk_solution_api.hpp b/include/yk_solution_api.hpp index 016d92af..222175f6 100644 --- a/include/yk_solution_api.hpp +++ b/include/yk_solution_api.hpp @@ -709,7 +709,7 @@ namespace yask { /** See documentation for the version of new_grid() with a vector of dimension names as a parameter. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to the new grid. */ virtual yk_grid_ptr @@ -786,7 +786,7 @@ namespace yask { /** See documentation for the version of new_fixed_size_grid() with a vector of dimension names as a parameter. - @note This version is not available (or needed) in SWIG-based APIs, e.g., Python. + @note This version is not available (or needed) in the Python API. @returns Pointer to the new grid. */ virtual yk_grid_ptr diff --git a/src/common/common_utils.cpp b/src/common/common_utils.cpp index 09a9b942..512781c9 100644 --- a/src/common/common_utils.cpp +++ b/src/common/common_utils.cpp @@ -43,7 +43,7 @@ namespace yask { // for numbers above 9 (at least up to 99). // Format: "major.minor.patch". - const string version = "2.19.04"; + const string version = "2.20.00"; string yask_get_version_string() { return version; diff --git a/src/kernel/Makefile b/src/kernel/Makefile index da7b628f..a5292691 100644 --- a/src/kernel/Makefile +++ b/src/kernel/Makefile @@ -781,7 +781,7 @@ py-api-no-yc: $(MAKE) $(NO_YC_MAKE_FLAGS) py-api # Validation runs for each binary. -DEF_TEST_ARGS := -stencil $(YK_STENCIL) -arch $(YK_ARCH) -v \ +DEF_TEST_ARGS := -ranks $(ranks) -stencil $(YK_STENCIL) -arch $(YK_ARCH) -v \ -trial_steps 2 -max_threads 6 -block_threads 2 -no-use_shm -overlap_comms val0 := $(DEF_TEST_ARGS) -l 48 -r 32 -b 24 -rt 0 $(EXTRA_TEST_ARGS) @@ -800,24 +800,18 @@ yk-tests: $(YK_SCRIPT) $(val2) $(YK_SCRIPT) $(val3) $(YK_SCRIPT) $(val4a) - -yk-mp-tests: - $(YK_SCRIPT) -ranks $(ranks) $(val0) - $(YK_SCRIPT) -ranks $(ranks) $(val1) - $(YK_SCRIPT) -ranks $(ranks) $(val2) - $(YK_SCRIPT) -ranks $(ranks) $(val3) - $(YK_SCRIPT) -ranks $(ranks) $(val4b) - $(YK_SCRIPT) -ranks $(ranks) $(val4c) + $(YK_SCRIPT) $(val4b) + $(YK_SCRIPT) $(val4c) # Run the default YASK compiler and kernel. yc-and-yk-test: $(YK_EXEC) $(YK_SCRIPT) - $(MAKE) yk-tests - if (( $(ranks) > 1 )); then $(MAKE) yk-mp-tests; fi + $(MAKE) ranks=1 yk-tests + if (( $(ranks) > 1 )); then $(MAKE) yk-tests; fi # Run the YASK kernel test without implicity using the YASK compiler. yk-test-no-yc: kernel-no-yc $(YK_SCRIPT) - $(MAKE) yk-tests - if (( $(ranks) > 1 )); then $(MAKE) yk-mp-tests; fi + $(MAKE) ranks=1 yk-tests + if (( $(ranks) > 1 )); then $(MAKE) yk-tests; fi # Run the kernel API tests for C++ and Python with and w/o expected exceptions. api-tests: diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index 7b0273f2..52e49ba0 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -172,7 +172,7 @@ namespace yask { // rank. This is needed because neighbors will not know what // grids are actually dirty, and all ranks must have the same // information about which grids are possibly dirty. - mark_grids_dirty(nullptr, start_t, stop_t); + update_grids(nullptr, start_t, stop_t, true); } // all bundles. @@ -183,7 +183,7 @@ namespace yask { exchange_halos(); run_time.stop(); - } + } // run_ref. // Eval stencil bundle pack(s) over grid(s) using optimized code. void StencilContext::run_solution(idx_t first_step_index, @@ -405,7 +405,7 @@ namespace yask { // same information about which grids are possibly // dirty. TODO: make this smarter to save unneeded // MPI exchanges. - mark_grids_dirty(bp, start_t, stop_t); + update_grids(bp, start_t, stop_t, true); // Do the appropriate steps for halo exchange of exterior. // TODO: exchange halo for each dim as soon as it's done. @@ -426,8 +426,8 @@ namespace yask { #include "yask_rank_loops.hpp" // Mark as dirty only if we did exterior. - if (do_mpi_left || do_mpi_right) - mark_grids_dirty(bp, start_t, stop_t); + bool mark_dirty = do_mpi_left || do_mpi_right; + update_grids(bp, start_t, stop_t, mark_dirty); // Do the appropriate steps for halo exchange depending // on 'do_mpi_*' flags. @@ -483,7 +483,7 @@ namespace yask { } // domain dims. // Mark grids dirty for all packs. - mark_grids_dirty(bp, start_t, stop_t); + update_grids(bp, start_t, stop_t, true); // Do the appropriate steps for halo exchange of exterior. // TODO: exchange halo for each dim as soon as it's done. @@ -504,8 +504,8 @@ namespace yask { #include "yask_rank_loops.hpp" // Mark as dirty only if we did exterior. - if (do_mpi_left || do_mpi_right) - mark_grids_dirty(bp, start_t, stop_t); + bool mark_dirty = do_mpi_left || do_mpi_right; + update_grids(bp, start_t, stop_t, mark_dirty); // Do the appropriate steps for halo exchange depending // on 'do_mpi_*' flags. @@ -1639,10 +1639,13 @@ namespace yask { continue; // Check all allocated step indices. - idx_t stop_t = 1; - if (gp->is_dim_used(step_dim)) - stop_t = gp->get_alloc_size(step_dim); - for (idx_t t = 0; t < stop_t; t++) { + // Use '0' for grids that don't use the step dim. + idx_t start_t = 0, stop_t = 1; + if (gp->is_dim_used(step_dim)) { + start_t = gp->get_first_valid_step_index(); + stop_t = gp->get_last_valid_step_index() + 1; + } + for (idx_t t = start_t; t < stop_t; t++) { // Only need to swap grids whose halos are not up-to-date // for this step. @@ -1652,13 +1655,14 @@ namespace yask { // Swap this grid. gridsToSwap[gname] = gp; - // Update last step. - lastStepsToSwap[gp] = t; - - // First? - if (firstStepsToSwap.count(gp) == 0) + // Update first step. + if (firstStepsToSwap.count(gp) == 0 || t < firstStepsToSwap[gp]) firstStepsToSwap[gp] = t; + // Update last step. + if (lastStepsToSwap.count(gp) == 0 || t > lastStepsToSwap[gp]) + lastStepsToSwap[gp] = t; + } // steps. } // grids. TRACE_MSG("exchange_halos: need to exchange halos for " << @@ -1781,9 +1785,7 @@ namespace yask { (send_vec_ok ? "with" : "without") << " vector copy into " << buf); if (send_vec_ok) - nelems = gp->get_vecs_in_slice(buf, - first, firstStepsToSwap[gp], - last, lastStepsToSwap[gp]); + nelems = gp->get_vecs_in_slice(buf, first, last); else nelems = gp->get_elements_in_slice(buf, first, last); idx_t nbytes = nelems * get_element_bytes(); @@ -1853,9 +1855,7 @@ namespace yask { (recv_vec_ok ? "with" : "without") << " vector copy from " << buf); if (recv_vec_ok) - nelems = gp->set_vecs_in_slice(buf, - first, firstStepsToSwap[gp], - last, lastStepsToSwap[gp]); + nelems = gp->set_vecs_in_slice(buf, first, last); else nelems = gp->set_elements_in_slice(buf, first, last); assert(nelems <= recvBuf.get_size()); @@ -1918,12 +1918,10 @@ namespace yask { #endif } - // Mark grids that have been written to by bundle pack 'sel_bp'. - // TODO: only mark grids that are written to in their halo-read area. - // TODO: add index for misc dim(s). - // TODO: track sub-domain of grid that is dirty. - void StencilContext::mark_grids_dirty(const BundlePackPtr& sel_bp, - idx_t start, idx_t stop) { + // Update data in grids that have been written to by bundle pack 'sel_bp'. + void StencilContext::update_grids(const BundlePackPtr& sel_bp, + idx_t start, idx_t stop, + bool mark_dirty) { STATE_VARS(this); idx_t stride = (start > stop) ? -1 : 1; map> grids_done; @@ -1952,18 +1950,20 @@ namespace yask { // scratch grids as dirty because they are never exchanged. for (auto gp : sb->outputGridPtrs) { - // Mark output step as dirty if not already done. + // Update if not already done. if (grids_done[gp].count(t_out) == 0) { - gp->set_dirty(true, t_out); + gp->update_valid_step(t_out); + if (mark_dirty) + gp->set_dirty(true, t_out); TRACE_MSG("grid '" << gp->get_name() << - "' marked as dirty at step " << t_out); + "' updated at step " << t_out); grids_done[gp].insert(t_out); } } } // bundles. } // steps. } // packs. - } // mark_grids_dirty(). + } // update_grids(). // Reset any locks, etc. void StencilContext::reset_locks() { diff --git a/src/kernel/lib/context.hpp b/src/kernel/lib/context.hpp index 702bf4bf..0e92009e 100644 --- a/src/kernel/lib/context.hpp +++ b/src/kernel/lib/context.hpp @@ -445,10 +445,12 @@ namespace yask { // Call MPI_Test() on all unfinished requests to promote MPI progress. void poke_halo_exchange(); - // Mark grids that have been written to by bundle pack 'sel_bp'. + // Update valid steps in grids that have been written to by bundle pack 'sel_bp'. // If sel_bp==null, use all bundles. - void mark_grids_dirty(const BundlePackPtr& sel_bp, - idx_t start, idx_t stop); + // If 'mark_dirty', also mark as needing halo exchange. + void update_grids(const BundlePackPtr& sel_bp, + idx_t start, idx_t stop, + bool mark_dirty); // Set various limits in 'idxs' based on current step in region. bool shift_region(const Indices& base_start, const Indices& base_stop, diff --git a/src/kernel/lib/generic_grids.hpp b/src/kernel/lib/generic_grids.hpp index 95c7a9c9..d6c2fb64 100644 --- a/src/kernel/lib/generic_grids.hpp +++ b/src/kernel/lib/generic_grids.hpp @@ -51,7 +51,7 @@ namespace yask { const static int _numa_unset = -999; int _numa_pref = _numa_unset; // use default from _opts. - // Note that both _dims and *_layout_base hold dimensions unless this + // Note that both _dims and *_layout_base hold sizes unless this // is a scalar. For a scalar, _dims is empty and _layout_base = 0. IdxTuple _grid_dims; // names and lengths of grid dimensions. Layout* _layout_base = 0; // memory layout. diff --git a/src/kernel/lib/grid_apis.cpp b/src/kernel/lib/grid_apis.cpp index 7bf9c1eb..8b88f80f 100644 --- a/src/kernel/lib/grid_apis.cpp +++ b/src/kernel/lib/grid_apis.cpp @@ -34,7 +34,8 @@ namespace yask { cerr << "\n*** WARNING: call to deprecated YASK API '" \ #api_name "' that will be removed in a future release ***\n" - // APIs to get info from vars. + // APIs to get info from vars: one with name of dim with a lot + // of checking, and one with index of dim with no checking. #define GET_GRID_API(api_name, expr, step_ok, domain_ok, misc_ok, prep_req) \ idx_t YkGridBase::api_name(const string& dim) const { \ STATE_VARS(this); \ @@ -53,13 +54,15 @@ namespace yask { auto rtn = expr; \ return rtn; \ } + GET_GRID_API(get_first_valid_index, _rank_offsets[posn] + _local_offsets[posn] - _actl_left_pads[posn], true, true, true, true) + GET_GRID_API(get_last_valid_index, _rank_offsets[posn] + _local_offsets[posn] + _domains[posn] + _actl_right_pads[posn] - 1, true, true, true, true) + GET_GRID_API(get_first_misc_index, _local_offsets[posn], false, false, true, false) + GET_GRID_API(get_last_misc_index, _local_offsets[posn] + _domains[posn] - 1, false, false, true, false) GET_GRID_API(get_rank_domain_size, _domains[posn], false, true, false, false) GET_GRID_API(get_left_pad_size, _actl_left_pads[posn], false, true, false, false) GET_GRID_API(get_right_pad_size, _actl_right_pads[posn], false, true, false, false) GET_GRID_API(get_left_halo_size, _left_halos[posn], false, true, false, false) GET_GRID_API(get_right_halo_size, _right_halos[posn], false, true, false, false) - GET_GRID_API(get_first_misc_index, _local_offsets[posn], false, false, true, false) - GET_GRID_API(get_last_misc_index, _local_offsets[posn] + _domains[posn] - 1, false, false, true, false) GET_GRID_API(get_left_extra_pad_size, _actl_left_pads[posn] - _left_halos[posn], false, true, false, false) GET_GRID_API(get_right_extra_pad_size, _actl_right_pads[posn] - _right_halos[posn], false, true, false, false) GET_GRID_API(get_alloc_size, _allocs[posn], true, true, true, false) @@ -74,8 +77,6 @@ namespace yask { GET_GRID_API(_get_vec_len, _vec_lens[posn], true, true, true, true) GET_GRID_API(_get_rank_offset, _rank_offsets[posn], true, true, true, true) GET_GRID_API(_get_local_offset, _local_offsets[posn], true, true, true, false) - GET_GRID_API(_get_first_alloc_index, _rank_offsets[posn] + _local_offsets[posn] - _actl_left_pads[posn], true, true, true, true) - GET_GRID_API(_get_last_alloc_index, _rank_offsets[posn] + _local_offsets[posn] + _domains[posn] + _actl_right_pads[posn] - 1, true, true, true, true) GET_GRID_API(get_pad_size, _actl_left_pads[posn]; DEPRECATED(get_pad_size), false, true, false, false) GET_GRID_API(get_halo_size, _left_halos[posn]; DEPRECATED(get_halo_size), false, true, false, false) @@ -260,27 +261,34 @@ namespace yask { } // API get, set, etc. - bool YkGridBase::is_element_allocated(const Indices& indices) const { + bool YkGridBase::are_indices_valid(const Indices& indices) const { if (!is_storage_allocated()) return false; - return checkIndices(indices, "is_element_allocated", false, false); + return checkIndices(indices, "are_indices_valid", false, true, false); } double YkGridBase::get_element(const Indices& indices) const { + STATE_VARS(this); if (!is_storage_allocated()) { THROW_YASK_EXCEPTION("Error: call to 'get_element' with no data allocated for grid '" + get_name() + "'"); } - checkIndices(indices, "get_element", true, false); + checkIndices(indices, "get_element", true, true, false); idx_t asi = get_alloc_step_index(indices); real_t val = readElem(indices, asi, __LINE__); + TRACE_MSG("get_element({" << makeIndexString(indices) << "}) on '" << + get_name() + "' returns " << val); return double(val); } idx_t YkGridBase::set_element(double val, const Indices& indices, bool strict_indices) { + STATE_VARS(this); idx_t nup = 0; if (get_raw_storage_buffer() && - checkIndices(indices, "set_element", strict_indices, false)) { + + // Don't check step index because this is a write-only API + // that updates the step index. + checkIndices(indices, "set_element", strict_indices, false, false)) { idx_t asi = get_alloc_step_index(indices); writeElem(real_t(val), indices, asi, __LINE__); nup++; @@ -288,14 +296,21 @@ namespace yask { // Set appropriate dirty flag. set_dirty_using_alloc_index(true, asi); } + TRACE_MSG("set_element(" << val << ", {" << + makeIndexString(indices) << "}, " << + strict_indices << ") on '" << + get_name() + "' returns " << nup); return nup; } idx_t YkGridBase::add_to_element(double val, const Indices& indices, bool strict_indices) { + STATE_VARS(this); idx_t nup = 0; if (get_raw_storage_buffer() && - checkIndices(indices, "add_to_element", strict_indices, false)) { + + // Check step index because this API must read before writing. + checkIndices(indices, "add_to_element", strict_indices, true, false)) { idx_t asi = get_alloc_step_index(indices); addToElem(real_t(val), indices, asi, __LINE__); nup++; @@ -303,18 +318,23 @@ namespace yask { // Set appropriate dirty flag. set_dirty_using_alloc_index(true, asi); } + TRACE_MSG("add_to_element(" << val << ", {" << + makeIndexString(indices) << "}, " << + strict_indices << ") on '" << + get_name() + "' returns " << nup); return nup; } idx_t YkGridBase::get_elements_in_slice(void* buffer_ptr, const Indices& first_indices, const Indices& last_indices) const { + STATE_VARS(this); if (!is_storage_allocated()) { THROW_YASK_EXCEPTION("Error: call to 'get_elements_in_slice' with no data allocated for grid '" + get_name() + "'"); } - checkIndices(first_indices, "get_elements_in_slice", true, false); - checkIndices(last_indices, "get_elements_in_slice", true, false); + checkIndices(first_indices, "get_elements_in_slice", true, true, false); + checkIndices(last_indices, "get_elements_in_slice", true, true, false); // Find range. IdxTuple numElemsTuple = get_slice_range(first_indices, last_indices); @@ -331,21 +351,27 @@ namespace yask { ((real_t*)buffer_ptr)[idx] = val; return true; // keep going. }); - return numElemsTuple.product(); + auto nup = numElemsTuple.product(); + TRACE_MSG("get_elements_in_slice(" << buffer_ptr << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}) on '" << + get_name() + "' returns " << nup); + return nup; } idx_t YkGridBase::set_elements_in_slice_same(double val, const Indices& first_indices, const Indices& last_indices, bool strict_indices) { + STATE_VARS(this); if (!is_storage_allocated()) return 0; // 'Fixed' copy of indices. Indices first, last; checkIndices(first_indices, "set_elements_in_slice_same", - strict_indices, false, &first); + strict_indices, false, false, &first); checkIndices(last_indices, "set_elements_in_slice_same", - strict_indices, false, &last); + strict_indices, false, false, &last); // Find range. IdxTuple numElemsTuple = get_slice_range(first, last); @@ -366,15 +392,22 @@ namespace yask { // Set appropriate dirty flag(s). set_dirty_in_slice(first, last); - return numElemsTuple.product(); + auto nup = numElemsTuple.product(); + TRACE_MSG("set_elements_in_slice_same(" << val << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}, " << + strict_indices << ") on '" << + get_name() + "' returns " << nup); + return nup; } idx_t YkGridBase::set_elements_in_slice(const void* buffer_ptr, const Indices& first_indices, const Indices& last_indices) { + STATE_VARS(this); if (!is_storage_allocated()) return 0; - checkIndices(first_indices, "set_elements_in_slice", true, false); - checkIndices(last_indices, "set_elements_in_slice", true, false); + checkIndices(first_indices, "set_elements_in_slice", true, false, false); + checkIndices(last_indices, "set_elements_in_slice", true, false, false); // Find range. IdxTuple numElemsTuple = get_slice_range(first_indices, last_indices); @@ -396,7 +429,12 @@ namespace yask { // Set appropriate dirty flag(s). set_dirty_in_slice(first_indices, last_indices); - return numElemsTuple.product(); + auto nup = numElemsTuple.product(); + TRACE_MSG("set_elements_in_slice(" << buffer_ptr << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}) on '" << + get_name() + "' returns " << nup); + return nup; } } // namespace. diff --git a/src/kernel/lib/realv_grids.cpp b/src/kernel/lib/realv_grids.cpp index fa3ccdf9..1a331ae6 100644 --- a/src/kernel/lib/realv_grids.cpp +++ b/src/kernel/lib/realv_grids.cpp @@ -31,7 +31,7 @@ using namespace std; namespace yask { // Ctor. - // Important: '*ggb' is NOT yet constructed. + // Important: '*ggb' exists but is NOT yet constructed. YkGridBase::YkGridBase(KernelStateBase& stateb, GenericGridBase* ggb, const GridDimNames& dimNames) : @@ -112,8 +112,15 @@ namespace yask { void YkGridBase::set_dirty(bool dirty, idx_t step_idx) { if (_dirty_steps.size() == 0) resize(); - if (_has_step_dim) + if (_has_step_dim) { + + // Also update valid step. + if (dirty) + update_valid_step(step_idx); + + // Wrap index. step_idx = _wrap_step(step_idx); + } else step_idx = 0; set_dirty_using_alloc_index(dirty, step_idx); @@ -280,8 +287,14 @@ namespace yask { // Resize dirty flags, too. size_t old_dirty = _dirty_steps.size(); - if (old_dirty != new_dirty) - _dirty_steps.assign(new_dirty, true); // set all as dirty. + if (old_dirty != new_dirty) { + + // Resize & set all as dirty. + _dirty_steps.assign(new_dirty, true); + + // Init range. + init_valid_steps(); + } // Report changes in TRACE mode. if (old_allocs != new_allocs || old_dirty != new_dirty) { @@ -399,10 +412,13 @@ namespace yask { } // Make sure indices are in range. - // Side-effect: If clipped_indices is not NULL, set them to in-range if out-of-range. + // Side-effect: If clipped_indices is not NULL, + // 1) set them to in-range if out-of-range, and + // 2) normalize them if 'normalize' is 'true'. bool YkGridBase::checkIndices(const Indices& indices, - const string& fn, + const string& fn, // name for error msg. bool strict_indices, // die if out-of-range. + bool check_step, // check step index. bool normalize, // div by vec lens. Indices* clipped_indices) const { bool all_ok = true; @@ -416,31 +432,34 @@ namespace yask { *clipped_indices = indices; for (int i = 0; i < n; i++) { idx_t mbit = 1LL << i; + bool is_step_dim = _step_dim_mask & mbit; idx_t idx = indices[i]; bool ok = false; auto& dname = get_dim_name(i); - // Any step index is ok because it wraps around. - // TODO: check that it's < magic added value in wrap_index(). - if (_step_dim_mask & mbit) + // If this is the step dim and we're not checking + // it, then anything is ok. + if (is_step_dim && !check_step) ok = true; - // Within first..last indices? + // Otherwise, check range. else { - auto first_ok = _get_first_alloc_index(i); - auto last_ok = _get_last_alloc_index(i); + + // First..last indices. + auto first_ok = get_first_valid_index(i); + auto last_ok = get_last_valid_index(i); if (idx >= first_ok && idx <= last_ok) ok = true; // Handle outliers. if (!ok) { if (strict_indices) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: " + fn + ": index in dim '" + dname + - "' is " << idx << ", which is not in allocated range [" << - first_ok << "..." << last_ok << "] of grid '" + - get_name() + "'"); + THROW_YASK_EXCEPTION("Error: " + fn + ": index in dim '" + dname + + "' is " + to_string(idx) + ", which is not in allowed range [" + + to_string(first_ok) + "..." + to_string(last_ok) + + "] of grid '" + get_name() + "'"); } - + // Update the output indices. if (clipped_indices) { if (idx < first_ok) @@ -448,10 +467,9 @@ namespace yask { if (idx > last_ok) (*clipped_indices)[i] = last_ok; } + all_ok = false; } - } - if (!ok) - all_ok = false; + } // need to check. // Normalize? if (clipped_indices && normalize) { @@ -464,6 +482,26 @@ namespace yask { return all_ok; } + // Update what steps are valid. + void YkGridBase::update_valid_step(idx_t t) { + STATE_VARS(this); + if (_has_step_dim) { + + // If 't' is before first step, pull offset back. + if (t < get_first_valid_step_index()) + _local_offsets[+Indices::step_posn] = t; + + // If 't' is after last step, push offset out. + else if (t > get_last_valid_step_index()) + _local_offsets[+Indices::step_posn] = t - _domains[+Indices::step_posn] + 1; + + TRACE_MSG("update_valid_step(" << t << "): valid step(s) in '" << + get_name() << "' are now [" << get_first_valid_step_index() << + " ... " << get_last_valid_step_index() << "]"); + } + } + + // Set dirty flags between indices. void YkGridBase::set_dirty_in_slice(const Indices& first_indices, const Indices& last_indices) { diff --git a/src/kernel/lib/realv_grids.hpp b/src/kernel/lib/realv_grids.hpp index 72694e14..d527a987 100644 --- a/src/kernel/lib/realv_grids.hpp +++ b/src/kernel/lib/realv_grids.hpp @@ -77,7 +77,7 @@ namespace yask { Indices _left_halos, _right_halos; // space within pads for halo exchange | zero. Indices _left_wf_exts, _right_wf_exts; // additional halos for wave-fronts | zero. Indices _rank_offsets; // offsets of this grid domain in overall problem | zero. - Indices _local_offsets; // offsets of this grid domain in this rank | first index for misc. + Indices _local_offsets; // offsets of this grid domain in this rank | first index for step or misc. Indices _allocs; // actual grid alloc in reals | same. // Sizes in vectors for sizes that are always vec lens (to avoid division). @@ -154,7 +154,8 @@ namespace yask { return false; } - // shallow copy. + // Shallow-copy GenericGrid object. + // This will copy its meta-data and share the elements. *tp = *sp; return true; } @@ -181,6 +182,29 @@ namespace yask { const GridDimNames& dimNames); virtual ~YkGridBase() { } + // Step-indices. + virtual idx_t get_first_valid_step_index() const { + if (!_has_step_dim) + THROW_YASK_EXCEPTION("Error: 'get_first_valid_step_index()' called on grid '" + + get_name() + "' that does not use the step dimension"); + return _local_offsets[+Indices::step_posn]; + } + virtual idx_t get_last_valid_step_index() const { + if (!_has_step_dim) + THROW_YASK_EXCEPTION("Error: 'get_last_valid_step_index()' called on grid '" + + get_name() + "' that does not use the step dimension"); + return _local_offsets[+Indices::step_posn] + _domains[+Indices::step_posn] - 1; + } + void update_valid_step(idx_t t); + inline void update_valid_step(const Indices& indices) { + if (_has_step_dim) + update_valid_step(indices[+Indices::step_posn]); + } + inline void init_valid_steps() { + if (_has_step_dim) + _local_offsets[+Indices::step_posn] = 0; + } + // Halo-exchange flag accessors. virtual bool is_dirty(idx_t step_idx) const; virtual void set_dirty(bool dirty, idx_t step_idx); @@ -266,7 +290,7 @@ namespace yask { // 1 => 1. // 2 => 0. - // Avoid discontinuity caused by dividing negative numbers by + // Avoid discontinuity caused by dividing negative numbers // using floored-mod. idx_t res = imod_flr(t, _domains[+Indices::step_posn]); return res; @@ -303,9 +327,10 @@ namespace yask { // Optionally fix them to be in range and return in 'fixed_indices'. // If 'normalize', make rank-relative, divide by vlen and return in 'fixed_indices'. virtual bool checkIndices(const Indices& indices, - const std::string& fn, - bool strict_indices, - bool normalize, + const std::string& fn, // name for error msg. + bool strict_indices, // die if out-of-range. + bool check_step, // check step index. + bool normalize, // div by vec lens. Indices* fixed_indices = NULL) const; // Set elements to a sequence of values using seed. @@ -392,16 +417,12 @@ namespace yask { // Possibly vectorized version of set/get_elements_in_slice(). virtual idx_t set_vecs_in_slice(const void* buffer_ptr, const Indices& first_indices, - idx_t first_alloc_step_idx, - const Indices& last_indices, - idx_t last_alloc_step_idx) { + const Indices& last_indices) { return set_elements_in_slice(buffer_ptr, first_indices, last_indices); } virtual idx_t get_vecs_in_slice(void* buffer_ptr, const Indices& first_indices, - idx_t first_alloc_step_idx, - const Indices& last_indices, - idx_t last_alloc_step_idx) const { + const Indices& last_indices) const { return get_elements_in_slice(buffer_ptr, first_indices, last_indices); } @@ -416,8 +437,6 @@ namespace yask { // they can break the usage model. // They are not protected because they are used from outside // this class hierarchy. - GET_GRID_API(_get_first_alloc_index) - GET_GRID_API(_get_last_alloc_index) GET_GRID_API(_get_left_wf_ext) GET_GRID_API(_get_local_offset) GET_GRID_API(_get_rank_offset) @@ -434,6 +453,8 @@ namespace yask { SET_GRID_API(_set_right_wf_ext) // Exposed APIs. + GET_GRID_API(get_first_valid_index) + GET_GRID_API(get_last_valid_index) GET_GRID_API(get_rank_domain_size) GET_GRID_API(get_first_rank_domain_index) GET_GRID_API(get_last_rank_domain_index) @@ -482,14 +503,14 @@ namespace yask { return format_indices(indices2); } - virtual bool is_element_allocated(const Indices& indices) const; - virtual bool is_element_allocated(const GridIndices& indices) const { + virtual bool are_indices_valid(const Indices& indices) const; + virtual bool are_indices_valid(const GridIndices& indices) const { const Indices indices2(indices); - return is_element_allocated(indices2); + return are_indices_valid(indices2); } - virtual bool is_element_allocated(const std::initializer_list& indices) const { + virtual bool are_indices_valid(const std::initializer_list& indices) const { const Indices indices2(indices); - return is_element_allocated(indices2); + return are_indices_valid(indices2); } virtual double get_element(const Indices& indices) const; @@ -1008,15 +1029,13 @@ namespace yask { // Indices must be vec-normalized and rank-relative. virtual idx_t set_vecs_in_slice(const void* buffer_ptr, const Indices& first_indices, - idx_t first_alloc_step_idx, - const Indices& last_indices, - idx_t last_alloc_step_idx) { + const Indices& last_indices) { STATE_VARS(this); if (!is_storage_allocated()) return 0; Indices firstv, lastv; - checkIndices(first_indices, "set_vecs_in_slice", true, true, &firstv); - checkIndices(last_indices, "set_vecs_in_slice", true, true, &lastv); + checkIndices(first_indices, "set_vecs_in_slice", true, false, true, &firstv); + checkIndices(last_indices, "set_vecs_in_slice", true, false, true, &lastv); // Find range. IdxTuple numVecsTuple = get_slice_range(firstv, lastv); @@ -1027,17 +1046,19 @@ namespace yask { // Do step loop explicitly. auto sp = +Indices::step_posn; + idx_t first_t = 0, last_t = 0; if (_has_step_dim) { - assert(last_alloc_step_idx >= first_alloc_step_idx); - assert(first_alloc_step_idx == _wrap_step(firstv[sp])); - assert(last_alloc_step_idx == _wrap_step(lastv[sp])); + first_t = firstv[sp]; + last_t = lastv[sp]; numVecsTuple[sp] = 1; // Do one at a time. } idx_t iofs = 0; - for (idx_t t = first_alloc_step_idx; t <= last_alloc_step_idx; t++) { + for (idx_t t = first_t; t <= last_t; t++) { - // Do only this one step. + // Do only this one step in this iteration. + idx_t ti = 0; if (_has_step_dim) { + ti = _wrap_step(t); firstv[sp] = t; lastv[sp] = t; } @@ -1049,7 +1070,7 @@ namespace yask { Indices pt = firstv.addElements(ofs); real_vec_t val = ((real_vec_t*)buffer_ptr)[idx + iofs]; - writeVecNorm(val, pt, t, __LINE__); + writeVecNorm(val, pt, ti, __LINE__); return true; // keep going. }); iofs += numVecsTuple.product(); @@ -1063,16 +1084,14 @@ namespace yask { virtual idx_t get_vecs_in_slice(void* buffer_ptr, const Indices& first_indices, - idx_t first_alloc_step_idx, - const Indices& last_indices, - idx_t last_alloc_step_idx) const { + const Indices& last_indices) const { STATE_VARS(this); if (!is_storage_allocated()) FORMAT_AND_THROW_YASK_EXCEPTION("Error: call to 'get_vecs_in_slice' with no data allocated for grid '" << get_name()); Indices firstv, lastv; - checkIndices(first_indices, "get_vecs_in_slice", true, true, &firstv); - checkIndices(last_indices, "get_vecs_in_slice", true, true, &lastv); + checkIndices(first_indices, "get_vecs_in_slice", true, true, true, &firstv); + checkIndices(last_indices, "get_vecs_in_slice", true, true, true, &lastv); // Find range. IdxTuple numVecsTuple = get_slice_range(firstv, lastv); @@ -1084,17 +1103,19 @@ namespace yask { // Do step loop explicitly. auto sp = +Indices::step_posn; + idx_t first_t = 0, last_t = 0; if (_has_step_dim) { - assert(last_alloc_step_idx >= first_alloc_step_idx); - assert(first_alloc_step_idx == _wrap_step(firstv[sp])); - assert(last_alloc_step_idx == _wrap_step(lastv[sp])); + first_t = firstv[sp]; + last_t = lastv[sp]; numVecsTuple[sp] = 1; // Do one at a time. } idx_t iofs = 0; - for (idx_t t = first_alloc_step_idx; t <= last_alloc_step_idx; t++) { + for (idx_t t = first_t; t <= last_t; t++) { - // Do only this one step. + // Do only this one step in this iteration. + idx_t ti = 0; if (_has_step_dim) { + ti = _wrap_step(t); firstv[sp] = t; lastv[sp] = t; } @@ -1105,7 +1126,7 @@ namespace yask { size_t idx) { Indices pt = firstv.addElements(ofs); - real_vec_t val = readVecNorm(pt, t, __LINE__); + real_vec_t val = readVecNorm(pt, ti, __LINE__); ((real_vec_t*)buffer_ptr)[idx + iofs] = val; return true; // keep going. }); diff --git a/src/kernel/tests/yask_kernel_api_test.cpp b/src/kernel/tests/yask_kernel_api_test.cpp index 626e0f22..9e7fa7c9 100644 --- a/src/kernel/tests/yask_kernel_api_test.cpp +++ b/src/kernel/tests/yask_kernel_api_test.cpp @@ -131,6 +131,9 @@ int main() { // Step dim? else if (dname == soln->get_step_dim_name()) { + os << " currently-valid step index range: " << + grid->get_first_valid_step_index() << " ... " << + grid->get_last_valid_step_index() << endl; } // Misc dim? @@ -167,9 +170,10 @@ int main() { // Step dim? else if (dname == soln->get_step_dim_name()) { - // Set indices for one time-step. - first_idx = 0; - last_idx = 0; + // Set indices for valid time-steps. + first_idx = grid->get_first_valid_step_index(); + last_idx = grid->get_last_valid_step_index(); + assert(last_idx - first_idx + 1 == grid->get_alloc_size(dname)); } // Misc dim? @@ -245,7 +249,7 @@ int main() { os << "End of YASK kernel API test.\n"; return 0; } - catch (yask_exception& e) { + catch (yask_exception e) { cerr << "YASK kernel API test: " << e.get_message() << " on rank " << env->get_rank_index() << ".\n"; return 1; diff --git a/src/kernel/tests/yask_kernel_api_test.py b/src/kernel/tests/yask_kernel_api_test.py index edf573f6..432a9e81 100755 --- a/src/kernel/tests/yask_kernel_api_test.py +++ b/src/kernel/tests/yask_kernel_api_test.py @@ -215,11 +215,15 @@ def init_grid(grid, timestep) : print(" " + grid.get_name() + repr(grid.get_dim_names())) for dname in grid.get_dim_names() : if dname in soln.get_domain_dim_names() : - print(" '" + dname + "' allowed index range in this rank: " + + print(" '" + dname + "' allowed domain index range in this rank: " + repr(grid.get_first_rank_alloc_index(dname)) + " ... " + repr(grid.get_last_rank_alloc_index(dname))) - elif dname in soln.get_misc_dim_names() : - print(" '" + dname + "' allowed index range: " + + elif dname == soln.get_step_dim_name() : + print(" '" + dname + "' allowed step index range: " + + repr(grid.get_first_valid_step_index()) + " ... " + + repr(grid.get_last_valid_step_index())) + else : + print(" '" + dname + "' allowed misc index range: " + repr(grid.get_first_misc_index(dname)) + " ... " + repr(grid.get_last_misc_index(dname))) From 31790314a5f1179ff061abfe8ec50ca22c4ea6f9 Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Thu, 11 Apr 2019 15:55:09 -0700 Subject: [PATCH 3/5] Replace share_storage() with fuse_grid(). Internally, add a level of indirection to grid accesses. --- README.md | 7 +- include/yk_grid_api.hpp | 236 ++++++++----- include/yk_solution_api.hpp | 43 ++- src/compiler/lib/Expr.cpp | 8 +- src/compiler/lib/YaskKernel.cpp | 32 +- src/kernel/Makefile | 2 - src/kernel/lib/alloc.cpp | 19 +- src/kernel/lib/context.cpp | 28 +- src/kernel/lib/context.hpp | 26 +- src/kernel/lib/generic_grids.cpp | 8 +- src/kernel/lib/generic_grids.hpp | 26 +- src/kernel/lib/grid_apis.cpp | 400 +++++++++++----------- src/kernel/lib/new_grid.cpp | 24 +- src/kernel/lib/realv_grids.cpp | 73 ++-- src/kernel/lib/realv_grids.hpp | 378 +++++++++++--------- src/kernel/lib/settings.hpp | 278 ++++++++------- src/kernel/lib/setup.cpp | 51 +-- src/kernel/lib/soln_apis.cpp | 19 +- src/kernel/lib/stencil_calc.cpp | 30 +- src/kernel/tests/grid_test.cpp | 32 +- src/kernel/tests/yask_kernel_api_test.cpp | 11 +- src/kernel/tests/yask_kernel_api_test.py | 2 +- src/kernel/yask.sh | 2 +- 23 files changed, 977 insertions(+), 758 deletions(-) diff --git a/README.md b/README.md index 96c824ac..18dcab8a 100644 --- a/README.md +++ b/README.md @@ -58,7 +58,12 @@ YASK contains a domain-specific compiler to convert scalar stencil code to SIMD- for functional testing if you don't have native support for any given instruction set. ### Backward-compatibility notices, including changes in default behavior: -* Version 2.20.00 added checking of the step-dimension index value in the `yk_grid::get_element()` and similar APIs. Previously, invalid values silently "wrapped" around to valid ones. Now, the step index must be valid when reading, and the valid step indices are updated when writing. +* Version 2.20.00 added checking of the step-dimension index value in the `yk_grid::get_element()` and similar APIs. +Previously, invalid values silently "wrapped" around to valid values. +Now, the step index must be valid when reading, and the valid step indices are updated when writing. +The default for all `strict_indices` API parameters is now `true` to catch more programming errors and +increase consistency of behavior between "set" and "get" APIs. +The `share_storage()` APIs have been replaced with `fuse_grids()`. * Version 2.19.01 turned off multi-pass tuning by default. Enable with `-auto_tune_each_pass`. * Version 2.18.03 allowed the default radius to be stencil-specific and changed the names of example stencil "9axis" to "3axis_with_diags". * Version 2.18.00 added the ability to specify the global-domain size, and it will calculate the local-domain sizes from it. diff --git a/include/yk_grid_api.hpp b/include/yk_grid_api.hpp index 03d7b9a3..3442007b 100644 --- a/include/yk_grid_api.hpp +++ b/include/yk_grid_api.hpp @@ -43,15 +43,18 @@ namespace yask { /// A run-time data container. /** - A run-time "grid variable" (often referred to simply as a "grid", which is a bit of a misnomer) - is a generic term for any n-dimensional array. A 0-dim grid - is a scalar, a 1-dim grid is an array, etc. A run-time grid contains - data, unlike yc_grid, a compile-time grid variable. + A run-time YASK variable (usually referred to simply as a "grid", + which is a bit of a misnomer) is a generic term for any n-dimensional + array. A 0-dim grid is a scalar, a 1-dim grid is an array, etc. A + run-time variable actually contains data, unlike yc_grid, a + compile-time variable. Typically, access to each grid is obtained via yk_solution::get_grid(). You may also use yk_solution::new_grid() or yk_solution::new_fixed_size_grid() if you need a grid that is not part of the pre-defined solution. + Grid Dimensions + =============== Each dimension of a grid is one of the following: - The *step* dimension, typically time ("t"), as returned from yk_solution::get_step_dim_name(). @@ -61,11 +64,17 @@ namespace yask { These may be returned via yk_solution::get_misc_dim_names() if they were defined in the YASK compiler, or they may be any other name that is not a step or domain dimension. + Step Dimensions + -------------- + The step dimension, as defined during YASK compilation, + is the dimension in which the simulation proceeds. In the step dimension, there is no fixed first or last index. However, there is a finite allocation size, which is the number of values in the step dimension that are stored in memory. The valid indices in the step dimension are always consecutive and change based - on what was last written to the grid. For example: If a grid `A` has + on what was last written to the grid. + + For example: If a grid `A` has an allocation size of two (2) in the `t` step dimension, its initial valid `t` indices are 0 and 1. Calling `A->get_element({0, x})` or `A->get_element({1, x})` would return a value from `A` assuming `x` @@ -77,12 +86,26 @@ namespace yask { valid `t` indices in `A` would be 1 and 2, and `A(0, x)` is no longer stored in memory because the allocation size is only 2. Then, calling `A->get_element({1, x})` or `A->get_element({2, x})` would - succeed and `A->get_element({0, x})` would fail. Calling APIs that - set values in a grid such as set_element() will also update the valid - step index range. The current valid indices in the step dimension - can be retrieved via yk_grid::get_first_valid_step_index() and + succeed and `A->get_element({0, x})` would fail. + + Calling APIs that set values in a grid such as set_element() will + also update the valid step index range. The current valid indices in + the step dimension can be retrieved via + yk_grid::get_first_valid_step_index() and yk_grid::get_last_valid_step_index(). + If yk_solution::set_step_wrap(true) is called, any invalid value of a + step index provided to an API will silently "wrap-around" to a valid + value by effectively adding or subtracing multiples of the allocation + size as needed. For example, if the valid step indices are 7 and 8 + for a given grid, the indices 0 and 1 will wrap-around to 8 and 7, + respectively. This is not recommended for general use because it can + hide off-by-one-type errors. However, it may be useful for + applications that need to access a grid using absolute rather than + logical step indices. + + Domain Dimensions + -------------- In each domain dimension, grid sizes include the following components: - The *domain* is the elements to which the stencils are applied. @@ -123,17 +146,25 @@ namespace yask { Data in these overlapped areas are exchanged as needed during stencil application to maintain a consistent values as if there was only one rank. + Miscellaneous Dimensions + -------------- In each miscellaneous dimension, there is no padding or halos. There is a fixed allocation size, and each index must be between its first and last valid value. The valid miscellaneous indices may be retrieved via yk_grid::get_first_misc_index() and yk_grid::get_last_misc_index(). + Other Details + =========== + Elements + ----------- All sizes are expressed in numbers of elements. Each element may be a 4-byte (single precision) or 8-byte (double precision) floating-point value as returned by yk_solution::get_element_bytes(). + Data Storage + ----------- Initially, a grid is not assigned any allocated storage. This is done to allow modification of domain, padding, and other allocation sizes before allocation. @@ -144,8 +175,8 @@ namespace yask { yk_solution::prepare_solution() is called. - Storage for a specific grid may be allocated before calling yk_solution::prepare_solution() via yk_grid::alloc_storage(). - - **[Advanced]** Storage for a specific grid may be shared with another grid with - existing storage via yk_grid::share_storage(). + - **[Advanced]** A grid may be merged with another grid with existing storage + via yk_grid::fuse_grids(). */ class yk_grid { public: @@ -157,13 +188,6 @@ namespace yask { */ virtual const std::string& get_name() const =0; - /// Determine whether this grid is automatically resized based on the solution. - /** - @returns `true` if this grid was created via yk_solution::new_fixed_size_grid() - or `false` otherwise. - */ - virtual bool is_fixed_size() const =0; - /// Get the number of dimensions used in this grid. /** This may include domain, step, and/or miscellaneous dimensions. @@ -188,6 +212,13 @@ namespace yask { virtual bool is_dim_used(const std::string& dim) const =0; + /// Determine whether this grid is *not* automatically resized based on the solution. + /** + @returns `true` if this grid was created via yk_solution::new_fixed_size_grid() + or `false` otherwise. + */ + virtual bool is_fixed_size() const =0; + /// Get the first valid index in this rank in the specified dimension. /** This is a convenience function that provides the first possible @@ -202,7 +233,7 @@ namespace yask { @returns the first valid index. */ virtual idx_t - get_first_valid_index(const std::string& dim + get_first_local_index(const std::string& dim /**< [in] Name of dimension to get. Must be one of the names from get_dim_names(). */ ) const =0; @@ -220,7 +251,7 @@ namespace yask { @returns the last valid index. */ virtual idx_t - get_last_valid_index(const std::string& dim + get_last_local_index(const std::string& dim /**< [in] Name of dimension to get. Must be one of the names from get_dim_names(). */ ) const =0; @@ -229,7 +260,7 @@ namespace yask { For the domain dimensions, this includes the rank-domain and padding sizes. See the "Detailed Description" for \ref yk_grid for information on grid sizes. For any dimension `dim`, `get_alloc_size(dim) == - get_last_valid_index(dim) - get_first_valid_index(dim) + 1`; + get_last_local_index(dim) - get_first_local_index(dim) + 1`; @returns allocation in number of elements (not bytes). */ virtual idx_t @@ -241,7 +272,7 @@ namespace yask { /** The valid step indices in a grid are updated by calling yk_solution::run_solution() or one of the element-setting API functions. - Equivalient to get_first_valid_index(dim), where `dim` is the step dimension. + Equivalient to get_first_local_index(dim), where `dim` is the step dimension. @returns the first index in the step dimension that can be used in one of the element-getting API functions. This grid must use the step index. @@ -253,7 +284,7 @@ namespace yask { /** The valid step indices in a grid are updated by calling yk_solution::run_solution() or one of the element-setting API functions. - Equivalient to get_last_valid_index(dim), where `dim` is the step dimension. + Equivalient to get_last_local_index(dim), where `dim` is the step dimension. @returns the last index in the step dimension that can be used in one of the element-getting API functions. This grid must use the step index. @@ -401,7 +432,7 @@ namespace yask { /// Get the first index of a specified miscellaneous dimension. /** - Equivalent to get_first_valid_index(dim), where `dim` is a misc dimension. + Equivalent to get_first_local_index(dim), where `dim` is a misc dimension. @returns the first valid index in a non-step and non-domain dimension. */ virtual idx_t @@ -411,7 +442,7 @@ namespace yask { /// Get the last index of a specified miscellaneous dimension. /** - Equivalent to get_last_valid_index(dim), where `dim` is a misc dimension. + Equivalent to get_last_local_index(dim), where `dim` is a misc dimension. @returns the last valid index in a non-step and non-domain dimension. */ virtual idx_t @@ -421,14 +452,14 @@ namespace yask { /// Determine whether the given indices refer to an accessible element in this rank. /** - Provide indices in a list in the same order returned by get_dim_names(). - Domain indices are relative to the *overall* problem domain. + Provide indices in a list in the same order returned by get_dim_names() for this grid. + Domain index values are relative to the *overall* problem domain. @returns `true` if index values fall within the range returned by - get_first_valid_index(dim) and get_last_valid_index(dim) for each dimension + get_first_local_index(dim) and get_last_local_index(dim) for each dimension `dim` in the grid; `false` otherwise. */ virtual bool - are_indices_valid(const std::vector& indices + are_indices_local(const std::vector& indices /**< [in] List of indices, one for each grid dimension. */ ) const =0; #ifndef SWIG @@ -437,7 +468,7 @@ namespace yask { See get_last_misc_index(). */ virtual bool - are_indices_valid(const std::initializer_list& indices + are_indices_local(const std::initializer_list& indices /**< [in] List of indices, one for each grid dimension. */ ) const =0; #endif @@ -446,7 +477,7 @@ namespace yask { Provide indices in a list in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive, + get_first_local_index() and get_last_local_index(), inclusive, for each dimension in the grid. @returns value in grid at given indices. */ @@ -472,14 +503,16 @@ namespace yask { If the grid uses the step dimension, the value of the step index will be used to update the current valid step indices in the grid. If `strict_indices` is `false` and any non-step index values - are invalid as defined by are_indices_valid(), + are invalid as defined by are_indices_local(), the API will have no effect and return zero (0). If `strict_indices` is `true` and any non-step index values are invalid, the API will throw an exception. + If storage has not been allocated for this grid, this will have no effect + and return zero (0) if `strict_indices` is `false`, + or it will throw an exception if `strict_indices` is `true`. @note The parameter value is a double-precision floating-point value, but it will be converted to single-precision if yk_solution::get_element_bytes() returns 4. - If storage has not been allocated for this grid, this will have no effect. @returns Number of elements set, which will be one (1) if the indices are valid and zero (0) if they are not. */ @@ -487,7 +520,7 @@ namespace yask { set_element(double val /**< [in] Element in grid will be set to this. */, const std::vector& indices /**< [in] List of indices, one for each grid dimension. */, - bool strict_indices = false + bool strict_indices = true /**< [in] If true, indices must be within domain or padding. If false, indices outside of domain and padding result in no change to grid. */ ) =0; @@ -502,7 +535,7 @@ namespace yask { set_element(double val /**< [in] Element in grid will be set to this. */, const std::initializer_list& indices /**< [in] List of indices, one for each grid dimension. */, - bool strict_indices = false + bool strict_indices = true /**< [in] If true, indices must be within domain or padding. If false, indices outside of domain and padding result in no change to grid. */ ) =0; @@ -523,7 +556,7 @@ namespace yask { Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive. + get_first_local_index() and get_last_local_index(), inclusive. @returns Number of elements read. */ virtual idx_t @@ -539,20 +572,22 @@ namespace yask { Provide indices in a list in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive. + get_first_local_index() and get_last_local_index(), inclusive. Updates are OpenMP atomic, meaning that this function can be called by several OpenMP threads without causing a race condition. + If storage has not been allocated for this grid, this will have no effect + and return zero (0) if `strict_indices` is `false`, + or it will throw an exception if `strict_indices` is `true`. @note The parameter value is a double-precision floating-point value, but it will be converted to single-precision if yk_solution::get_element_bytes() returns 4. - If storage has not been allocated for this grid, this will have no effect. @returns Number of elements updated. */ virtual idx_t add_to_element(double val /**< [in] This value will be added to element in grid. */, const std::vector& indices /**< [in] List of indices, one for each grid dimension. */, - bool strict_indices = false + bool strict_indices = true /**< [in] If true, indices must be within domain or padding. If false, indices outside of domain and padding result in no change to grid. */ ) =0; @@ -567,7 +602,7 @@ namespace yask { add_to_element(double val /**< [in] This value will be added to element in grid. */, const std::initializer_list& indices /**< [in] List of indices, one for each grid dimension. */, - bool strict_indices = false + bool strict_indices = true /**< [in] If true, indices must be within domain or padding. If false, indices outside of domain and padding result in no change to grid. */ ) =0; @@ -577,11 +612,10 @@ namespace yask { /** Sets all allocated elements, including those in the domain and padding area to the same specified value. + If storage has not been allocated, this will have no effect. @note The parameter is a double-precision floating-point value, but it will be converted to single-precision if yk_solution::get_element_bytes() returns 4. - @note If storage has not been allocated via yk_solution::prepare_solution(), - this will have no effect. */ virtual void set_all_elements_same(double val /**< [in] All elements will be set to this. */ ) =0; @@ -593,9 +627,11 @@ namespace yask { Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive, + get_first_local_index() and get_last_local_index(), inclusive, if `strict_indices` is `true`. - If storage has not been allocated for this grid, this will have no effect. + If storage has not been allocated for this grid, this will have no effect + and return zero (0) if `strict_indices` is `false`, + or it will throw an exception if `strict_indices` is `true`. @returns Number of elements set. */ virtual idx_t @@ -604,7 +640,7 @@ namespace yask { /**< [in] List of initial indices, one for each grid dimension. */, const std::vector& last_indices /**< [in] List of final indices, one for each grid dimension. */, - bool strict_indices = false + bool strict_indices = true /**< [in] If true, indices must be within domain or padding. If false, only elements within the allocation of this grid will be set, and elements outside will be ignored. */ ) =0; @@ -624,8 +660,9 @@ namespace yask { Provide indices in two lists in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive. - If storage has not been allocated for this grid, this will have no effect. + get_first_local_index() and get_last_local_index(), inclusive. + If storage has not been allocated for this grid, this will + throw an exception. @returns Number of elements written. */ virtual idx_t @@ -647,7 +684,7 @@ namespace yask { Provide indices in the same order returned by get_dim_names(). Indices are relative to the *overall* problem domain. Index values must fall between the values returned by - get_first_valid_index() and get_last_valid_index(), inclusive, for + get_first_local_index() and get_last_local_index(), inclusive, for each dimension in both grids. @returns Number of elements copied. */ @@ -865,7 +902,7 @@ namespace yask { /// **[Advanced]** Get the first accessible index in this grid in this rank in the specified domain dimension. /** - Equivalent to get_first_valid_index(dim), where `dim` is a domain dimension. + Equivalent to get_first_local_index(dim), where `dim` is a domain dimension. @returns First valid index in this grid. */ virtual idx_t @@ -876,7 +913,7 @@ namespace yask { /// **[Advanced]** Get the last accessible index in this grid in this rank in the specified domain dimension. /** - Equivalent to get_last_valid_index(dim), where `dim` is a domain dimension. + Equivalent to get_last_local_index(dim), where `dim` is a domain dimension. @returns Last valid index in this grid. */ virtual idx_t @@ -922,8 +959,6 @@ namespace yask { /** This will release storage allocated via any of the options described in the "Detailed Description" for \ref yk_grid. - If the data was shared between two or more grids, the data will - be retained by the remaining grids. */ virtual void release_storage() =0; @@ -934,8 +969,9 @@ namespace yask { must be the same: - Number of dimensions. - Name of each dimension, in the same order. + - Vector folding in each dimension. - Allocation size in each dimension. - - Rank domain size in each domain dimension. + - Rank (local) domain size in each domain dimension. - Padding size in each domain dimension. The following do not have to be identical: @@ -947,34 +983,70 @@ namespace yask { virtual bool is_storage_layout_identical(const yk_grid_ptr other) const =0; - /// **[Advanced]** Use existing data-storage from specified grid. + /// **[Advanced]** Merge this grid with another grid. /** - This is an alternative to allocating data storage via - yk_solution::prepare_solution() or alloc_storage(). - In this case, data from a grid in this or another solution will be shared with - this grid. - In order to successfully share storage, the following conditions must hold: - - The source grid must already have storage allocated. - - The two grids must have the same dimensions in the same order. - - The two grids must have the same domain sizes in all domain dimensions. - - The two grids must have the same allocation sizes in non-domain dimensions. - - The required padding size of this grid must be less than or - equal to the actual padding size of the source grid in all domain - dimensions. The required padding size of this grid will be equal to - or greater than its halo size. It is not strictly necessary that the - two grids have the same halo sizes, but that is a sufficient condition. - - Any pre-existing storage will be released before allocation as via release_storage(). - The padding size(s) of this grid will be set to that of the source grid. - After calling share_storage(), changes in one grid via set_all_elements() - or set_element() will be visible in the other grid. + After calling this API, both this grid and the `other` + grid will effectively become a reference to the same shared grid. + Any subsequent API applied to this grid or the + `other` grid will access the same data and/or + effect the same changes. + There are two categories of data associated with + a grid, and the source of each is specified independently: + - The _meta-data_ includes the name, dimensions, sizes, etc., + i.e., everything about the grid apart from the storage. + If `use_meta_data_from_other` is `true`, the resulting shared + grid will use the meta-data from the `other` grid; + if `use_meta_data_from_other` is `false`, the resulting shared grid + will use the meta-data from this grid. + - The _storage_ holds the actual values of the data elements + if storage has been allocated. + If `use_storage_from_other` is `true`, the resulting shared + grid will use the storage from the `other` grid; + if `use_storage_from_other` is `false`, the resulting shared grid + will use the storage from this grid. + + Implications: + - If `use_meta_data_from_other` and `use_storage_from_other` + are both `false`, this grid remains unaltered, and the + `other` grid becomes a reference to this grid. + - If `use_meta_data_from_other` and `use_storage_from_other` + are both `true`, the `other` grid remains unaltered, and this + grid becomes a reference to the `other` grid. + - If `use_meta_data_from_other` and `use_storage_from_other` + are different, and if + and the source storage is already allocated, the size of the + source storage must match that required by the source + meta-data. In other words, the value of + yk_grid::get_num_storage_bytes() must return the same value from + both grids prior to fusing. + - The storage of the resulting shared grid will be + allocated or unallocated depending on that of the source grid. + Any pre-existing storage in the non-source grid will be released. + - After fusing, any API applied to the shared grid via this + grid or the `other` grid will be visible to both, including + release_storage(). + + To ensure that the kernels created by the YASK compiler work + properly, if either this grid and/or the `other` grid is used in + a kernel and its meta-data is being replaced, the dimensions and + fold-lengths must remain unchanged or an exception will the + thrown. It is the responsibility of the API programmer to ensure + that the storage, local domain sizes, halos, etc. of the grid + are set to be compatible with the solution before calling + yk_solution::run_solution(). See allocation options and more information about grid sizes in the "Detailed Description" for \ref yk_grid. */ virtual void - share_storage(yk_grid_ptr source - /**< [in] Grid from which storage will be shared. */) =0; + fuse_grids(yk_grid_ptr other + /**< [in] Grid to be merged with this grid. */, + bool use_meta_data_from_other + /**< [in] If `true`, use meta-data from `other` grid; + if `false`, use meta-data from this grid. */, + bool use_storage_from_other + /**< [in] If `true`, use element storage from `other` grid; + if `false`, use storage from this grid. */) =0; /// **[Advanced]** Get pointer to raw data storage buffer. /** @@ -1052,22 +1124,28 @@ namespace yask { Must be one of the names from yk_solution::get_domain_dim_names(). */ ) const =0; - /// **[Deprecated]** Use are_indices_valid() instead. + /// **[Deprecated]** Use are_indices_local() instead. virtual bool is_element_allocated(const std::vector& indices /**< [in] List of indices, one for each grid dimension. */ ) const { - return are_indices_valid(indices); + return are_indices_local(indices); } #ifndef SWIG - /// **[Deprecated]** Use are_indices_valid() instead. + /// **[Deprecated]** Use are_indices_local() instead. virtual bool is_element_allocated(const std::initializer_list& indices /**< [in] List of indices, one for each grid dimension. */ ) const { - return are_indices_valid(indices); + return are_indices_local(indices); } #endif + /// **[Deprecated]** Use fuse_grids() instead. + virtual void + share_storage(yk_grid_ptr other) { + fuse_grids(other, false, true); + } + }; /** @}*/ diff --git a/include/yk_solution_api.hpp b/include/yk_solution_api.hpp index 222175f6..dd91e785 100644 --- a/include/yk_solution_api.hpp +++ b/include/yk_solution_api.hpp @@ -683,8 +683,8 @@ namespace yask { whether or not it was defined via yc_node_factory::new_misc_index(). - Grids created via new_grid() cannot be direct inputs or outputs of stencil equations. However, data in a grid created via new_grid() - can be shared with a pre-defined grid via yk_grid::share_storage() - if and only if the sizes of all dimensions are compatible. + can be merged with a pre-defined grid via yk_grid::fuse_grids() + if the grids are compatible. If you want a grid that is not automatically resized based on the solution settings, use new_fixed_size_grid() instead. @@ -733,12 +733,13 @@ namespace yask { The following behaviors are different from both pre-defined grids and those created via new_grid(): - Calls to set_rank_domain_size() will *not* automatically resize - the corresponding domain size in this grid--this is where the term "fixed" originates. + the corresponding local-domain size in this grid--this is where the term "fixed" applies. - In contrast, for each domain dimension of the grid, - the new grid's domain size can be changed independently of the domain - size of the application. + the new grid's local-domain size can be changed independently of the domain + size of the solution. - This grid's first domain index in this rank will be fixed at zero (0) - regardless of this rank's position. + in each domain dimension regardless of this rank's position. + In other words, this grid does not participate in "domain decomposition". - This grid's padding size will be affected only by calls to yk_grid::set_min_pad_size(), etc., i.e., *not* via yk_solution::set_min_pad_size(). @@ -760,8 +761,8 @@ namespace yask { whether or not it was defined via yc_node_factory::new_misc_index(). - Grids created via new_fixed_size_grid() cannot be direct inputs or outputs of stencil equations. However, data in a grid created via new_fixed_size_grid() - can be shared with a pre-defined grid via yk_grid::share_storage() - if and only if the sizes of all dimensions are compatible. + can be shared with a pre-defined grid via yk_grid::fuse_grids() + if the grids are compatible. @note A new grid contains only the meta-data for the grid; data storage is not yet allocated. @@ -849,15 +850,31 @@ namespace yask { apply_command_line_options(const std::string& args /**< [in] String of arguments to parse. */ ) =0; - /// **[Advanced]** Use data-storage from existing grids in specified solution. + /// **[Advanced]** Merge grid variables with another solution. /** - Calls yk_grid::share_storage() for each pair of grids that have the same name + Calls yk_grid::fuse_grids() for each pair of grids that have the same name in this solution and the source solution. - All conditions listed in yk_grid::share_storage() must hold for each pair. + All conditions listed in yk_grid::fuse_grids() must hold for each pair. */ virtual void - share_grid_storage(yk_solution_ptr source - /**< [in] Solution from which grid storage will be shared. */) =0; + fuse_grids(yk_solution_ptr source + /**< [in] Solution from which grids will be merged. */, + bool use_meta_data_from_other + /**< [in] Whether to keep meta-data from `other` solution's grids. */, + bool use_storage_from_other + /**< [in] Whether to keep element storage from `other` solution's grids. */) =0; + + /// **[Advanced]** Set whether invalid step indices alias to valid ones. + virtual void + set_step_wrap(bool do_wrap + /**< [in] Whether to allow any step index. */) =0; + + /// **[Advanced]** Get whether invalid step indices alias to valid ones. + /** + @returns Whether any step index is allowed. + */ + virtual bool + get_step_wrap() const =0; }; /// Statistics from calls to run_solution(). diff --git a/src/compiler/lib/Expr.cpp b/src/compiler/lib/Expr.cpp index 8c209849..9b811dbb 100644 --- a/src/compiler/lib/Expr.cpp +++ b/src/compiler/lib/Expr.cpp @@ -750,10 +750,12 @@ namespace yask { } string GridPoint::getGridPtr() const { string gname = _grid->getName(); - string expr = "static_cast<_context_type::" + gname + "_type*>(_context_data->" + gname; + string expr = "(static_cast<_context_type::" + gname + "_type*>(_context_data->"; if (_grid->isScratch()) - expr += "_list[region_thread_idx].get()"; - expr += ")"; + expr += gname + "_list[region_thread_idx]"; + else + expr += gname + "_ptr"; + expr += ".get()->gbp()))"; return expr; } bool GridPoint::isGridFoldable() const { diff --git a/src/compiler/lib/YaskKernel.cpp b/src/compiler/lib/YaskKernel.cpp index b7468161..34e5fbc0 100644 --- a/src/compiler/lib/YaskKernel.cpp +++ b/src/compiler/lib/YaskKernel.cpp @@ -329,9 +329,12 @@ namespace yask { ctorCode += "\n // Grid '" + grid + "'.\n"; ctorCode += " " + grid + "_dim_names = {" + gdims.makeDimStr(", ", "\"", "\"") + "};\n"; - string initCode = " " + grid + "_ptr = std::make_shared<" + typeDef + + string gbp = grid + "_base_ptr"; + string initCode = " " + grid + "_ptr_type " + gbp + " = std::make_shared<" + typeDef + ">(*this, \"" + grid + "\", " + grid + "_dim_names);\n" - " assert(" + grid + "_ptr);\n"; + " assert(" + gbp + ");\n" + " " + grid + "_ptr = std::make_shared(" + gbp + ");\n" + " assert(" + grid + "_ptr->gbp());\n"; // Grid vars. if (gp->isScratch()) { @@ -342,9 +345,9 @@ namespace yask { } else { - // Actual grid ptr declaration. - os << " " << ptrTypeDef << " " << grid << "_ptr;\n" << - " " << typeDef << "* " << grid << ";\n"; + // Grid ptr declaration. + // Default ctor gives null ptr. + os << " YkGridPtr " << grid << "_ptr;\n"; } // Alloc-setting code. @@ -376,7 +379,7 @@ namespace yask { int oval = 0; if (dtype == STEP_INDEX) { aval = gp->getStepDimSize(); - initCode += " " + grid + "_ptr->_set_dynamic_step_alloc(" + + initCode += " " + grid + "_base_ptr->_set_dynamic_step_alloc(" + (gp->is_dynamic_step_alloc() ? "true" : "false") + ");\n"; } else { @@ -401,7 +404,7 @@ namespace yask { } // dims. // Allow dynamic misc alloc setting if not interleaved. - initCode += " " + grid + "_ptr->_set_dynamic_misc_alloc(" + + initCode += " " + grid + "_base_ptr->_set_dynamic_misc_alloc(" + (_settings._innerMisc ? "false" : "true") + ");\n"; @@ -410,8 +413,7 @@ namespace yask { // Grid init. ctorCode += initCode; - ctorCode += " " + grid + " = " + grid + "_ptr.get();\n"; - ctorCode += " addGrid(" + grid + "_ptr, "; + ctorCode += " addGrid(" + grid + "_ptr, true, "; if (_eqBundles.getOutputGrids().count(gp)) ctorCode += "true /* is an output grid */"; else @@ -423,9 +425,9 @@ namespace yask { else { scratchCode += " " + grid + "_list.clear();\n" " for (int i = 0; i < num_threads; i++) {\n" - " " + ptrTypeDef + " " + grid + "_ptr;\n" + + " YkGridPtr " + grid + "_ptr;\n" + initCode + - " " + grid + "_ptr->set_scratch(true);\n" + + " " + grid + "_base_ptr->set_scratch(true);\n" + " " + grid + "_list.push_back(" + grid + "_ptr);\n" " }\n"; } @@ -441,8 +443,8 @@ namespace yask { newGridCode += "\n // Scalar grids.\n"; if (!firstGrid) newGridCode += " else"; - newGridCode += " if (dims == " + grid + "_dim_names) gp = std::make_shared<" + - typeDef + ">(*this, name, dims);\n"; + newGridCode += " if (dims == " + grid + "_dim_names)\n" + " gp = std::make_shared<" + typeDef + ">(*this, name, dims);\n"; } } // grids. @@ -467,9 +469,9 @@ namespace yask { // New-grid method. os << "\n // Make a new grid iff its dims match any in the stencil.\n" " // Returns pointer to the new grid or nullptr if no match.\n" - " virtual YkGridPtr newStencilGrid(const std::string& name," + " virtual GridBasePtr newStencilGrid(const std::string& name," " const GridDimNames& dims) {\n" - " YkGridPtr gp;\n" << + " GridBasePtr gp;\n" << newGridCode << " return gp;\n" " } // newStencilGrid\n"; diff --git a/src/kernel/Makefile b/src/kernel/Makefile index a5292691..fbdc71d9 100644 --- a/src/kernel/Makefile +++ b/src/kernel/Makefile @@ -591,13 +591,11 @@ kernel: $(YK_EXEC) $(YK_SCRIPT) $(MAKE_REPORT_FILE) echo "*** Run command:" $(YK_SCRIPT) "-stencil" $(YK_STENCIL) "-arch" $(YK_ARCH) "[options]" $(YK_LIB): $(YK_OBJS) $(YK_EXT_OBJS) - - rm -f $(MAKE_REPORT_FILE) $(MKDIR) $(dir $@) $(CXX_PREFIX) $(YK_CXX) $(YK_CXXFLAGS) -shared -o $@ $^ $(YK_LIBS) @ls -l $@ $(YK_EXEC): yask_main.cpp $(YK_LIB) - - rm -f $(MAKE_REPORT_FILE) $(MKDIR) $(dir $@) $(CXX_PREFIX) $(YK_LD) $(YK_CXXFLAGS) $< $(YK_LFLAGS) -o $@ $(YK_LIBS) @ls -l $@ diff --git a/src/kernel/lib/alloc.cpp b/src/kernel/lib/alloc.cpp index ab30a158..9d7ef7e1 100644 --- a/src/kernel/lib/alloc.cpp +++ b/src/kernel/lib/alloc.cpp @@ -170,6 +170,7 @@ namespace yask { if (!gp) continue; auto& gname = gp->get_name(); + auto& gb = gp->gb(); // Grid data. // Don't alloc if already done. @@ -181,7 +182,7 @@ namespace yask { auto p = _grid_data_buf[numa_pref]; assert(p); gp->set_storage(p, npbytes[numa_pref]); - os << gp->make_info_string() << endl; + os << gb.make_info_string() << endl; } // Determine padded size (also offset to next location). @@ -212,7 +213,7 @@ namespace yask { // Otherwise, just print existing grid info. else if (pass == 1) - os << gp->make_info_string() << endl; + os << gb.make_info_string() << endl; } // Reset the counters @@ -294,9 +295,8 @@ namespace yask { // by considering my rank's right side data and vice-versa. // Thus, all ranks must have consistent data that contribute // to these calculations. - for (auto gp : gridPtrs) { - if (!gp || gp->is_scratch() || gp->is_fixed_size()) - continue; + for (auto& gp : origGridPtrs) { + auto& gb = gp->gb(); auto& gname = gp->get_name(); bool grid_vec_ok = vec_ok; @@ -444,8 +444,8 @@ namespace yask { // Begin/end vars to indicate what part // of main grid to read from or write to based on // the current neighbor being processed. - IdxTuple copy_begin = gp->get_allocs(); - IdxTuple copy_end = gp->get_allocs(); // one past last! + IdxTuple copy_begin = gb.get_allocs(); + IdxTuple copy_end = gb.get_allocs(); // one past last! // Adjust along domain dims in this grid. for (auto& dim : domain_dims.getDims()) { @@ -532,7 +532,7 @@ namespace yask { // Sizes of buffer in all dims of this grid. // Also, set begin/end value for non-domain dims. - IdxTuple buf_sizes = gp->get_allocs(); + IdxTuple buf_sizes = gb.get_allocs(); bool buf_vec_ok = grid_vec_ok; for (auto& dname : gp->get_dim_names()) { idx_t dsize = 1; @@ -851,6 +851,7 @@ namespace yask { assert(gp); auto& gname = gp->get_name(); int numa_pref = gp->get_numa_preferred(); + auto& gb = gp->gb(); // Loop through each domain dim. for (auto& dim : domain_dims.getDims()) { @@ -878,7 +879,7 @@ namespace yask { auto p = _scratch_data_buf[numa_pref]; assert(p); gp->set_storage(p, npbytes[numa_pref]); - TRACE_MSG(gp->make_info_string()); + TRACE_MSG(gb.make_info_string()); } // Determine size used (also offset to next location). diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index 52e49ba0..4b8eb44f 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -1484,9 +1484,10 @@ namespace yask { assert(sv); // Get ptr to the scratch grid for this thread. - auto gp = sv->at(thread_idx); + auto& gp = sv->at(thread_idx); assert(gp); - assert(gp->is_scratch()); + auto& gb = gp->gb(); + assert(gb.is_scratch()); // i: index for stencil dims, j: index for domain dims. DOMAIN_VAR_LOOP(i, j) { @@ -1495,7 +1496,7 @@ namespace yask { auto& dname = dim.getName(); // Is this dim used in this grid? - int posn = gp->get_dim_posn(dname); + int posn = gb.get_dim_posn(dname); if (posn >= 0) { // Set rank offset of grid based on starting point of rank. @@ -1533,7 +1534,9 @@ namespace yask { idx_t errs = 0; for (size_t gi = 0; gi < gridPtrs.size(); gi++) { TRACE_MSG("Grid '" << ref.gridPtrs[gi]->get_name() << "'..."); - errs += gridPtrs[gi]->compare(ref.gridPtrs[gi].get()); + auto& gb = gridPtrs[gi]->gb(); + auto* rgbp = ref.gridPtrs[gi]->gbp(); + errs += gb.compare(rgbp); } return errs; @@ -1628,9 +1631,10 @@ namespace yask { // Loop thru all grids. for (auto& gp : gridPtrs) { + auto& gb = gp->gb(); // Don't swap scratch grids. - if (gp->is_scratch()) + if (gb.is_scratch()) continue; // Only need to swap grids that have any MPI buffers. @@ -1649,7 +1653,7 @@ namespace yask { // Only need to swap grids whose halos are not up-to-date // for this step. - if (!gp->is_dirty(t)) + if (!gb.is_dirty(t)) continue; // Swap this grid. @@ -1708,7 +1712,8 @@ namespace yask { for (auto gtsi : gridsToSwap) { gi++; auto& gname = gtsi.first; - auto gp = gtsi.second; + auto& gp = gtsi.second; + auto& gb = gp->gb(); auto& grid_mpi_data = mpiData.at(gname); MPI_Request* grid_recv_reqs = grid_mpi_data.recv_reqs.data(); MPI_Request* grid_send_reqs = grid_mpi_data.send_reqs.data(); @@ -1895,8 +1900,8 @@ namespace yask { // Mark grids as up-to-date when done. for (idx_t si = firstStepsToSwap[gp]; si <= lastStepsToSwap[gp]; si++) { - if (gp->is_dirty(si)) { - gp->set_dirty(false, si); + if (gb.is_dirty(si)) { + gb.set_dirty(false, si); TRACE_MSG("exchange_halos: grid '" << gname << "' marked as clean at step-index " << si); } @@ -1949,12 +1954,13 @@ namespace yask { // Output grids for this bundle. NB: don't need to mark // scratch grids as dirty because they are never exchanged. for (auto gp : sb->outputGridPtrs) { + auto& gb = gp->gb(); // Update if not already done. if (grids_done[gp].count(t_out) == 0) { - gp->update_valid_step(t_out); + gb.update_valid_step(t_out); if (mark_dirty) - gp->set_dirty(true, t_out); + gb.set_dirty(true, t_out); TRACE_MSG("grid '" << gp->get_name() << "' updated at step " << t_out); grids_done[gp].insert(t_out); diff --git a/src/kernel/lib/context.hpp b/src/kernel/lib/context.hpp index 0e92009e..10fe92a9 100644 --- a/src/kernel/lib/context.hpp +++ b/src/kernel/lib/context.hpp @@ -217,11 +217,15 @@ namespace yask { // which they should be evaluated within a step. BundlePackList stPacks; - // All non-scratch grids. + // All non-scratch grids, including those created by APIs. GridPtrs gridPtrs; GridPtrMap gridMap; - // Only grids that are updated by the stencils. + // Only grids defined by the YASK compiler. + GridPtrs origGridPtrs; + GridPtrMap origGridMap; + + // Only grids defined by the YASK compiler that are updated by the stencils. GridPtrs outputGridPtrs; GridPtrMap outputGridMap; @@ -306,7 +310,7 @@ namespace yask { AutoTuner& getAT() { return _at; } // Add a new grid to the containers. - virtual void addGrid(YkGridPtr gp, bool is_output); + virtual void addGrid(YkGridPtr gp, bool is_orig, bool is_output); virtual void addScratch(GridPtrs& scratch_vec) { scratchVecs.push_back(&scratch_vec); } @@ -481,8 +485,8 @@ namespace yask { // Make a new grid iff its dims match any in the stencil. // Returns pointer to the new grid or nullptr if no match. - virtual YkGridPtr newStencilGrid (const std::string & name, - const GridDimNames & dims) =0; + virtual GridBasePtr newStencilGrid (const std::string & name, + const GridDimNames & dims) =0; // Make a new grid with 'name' and 'dims'. // Set sizes if 'sizes' is non-null. @@ -578,7 +582,9 @@ namespace yask { virtual void run_solution(idx_t step_index) { run_solution(step_index, step_index); } - virtual void share_grid_storage(yk_solution_ptr source); + virtual void fuse_grids(yk_solution_ptr other, + bool use_meta_data_from_other, + bool use_storage_from_other); // APIs that access settings. virtual void set_overall_domain_size(const std::string& dim, idx_t size); @@ -596,6 +602,14 @@ namespace yask { virtual idx_t get_num_ranks(const std::string& dim) const; virtual idx_t get_rank_index(const std::string& dim) const; virtual std::string apply_command_line_options(const std::string& args); + virtual bool get_step_wrap() const { + STATE_VARS(this); + return opts->_step_wrap; + } + virtual void set_step_wrap(bool do_wrap) { + STATE_VARS(this); + opts->_step_wrap = do_wrap; + } virtual bool set_default_numa_preferred(int numa_node) { STATE_VARS(this); #ifdef USE_NUMA diff --git a/src/kernel/lib/generic_grids.cpp b/src/kernel/lib/generic_grids.cpp index 9a263ec2..dad88685 100644 --- a/src/kernel/lib/generic_grids.cpp +++ b/src/kernel/lib/generic_grids.cpp @@ -74,16 +74,16 @@ namespace yask { // Make some descriptive info. string GenericGridBase::make_info_string(const string& elem_name) const { stringstream oss; + oss << "'" << _name << "' "; if (_grid_dims.getNumDims() == 0) oss << "scalar"; else - oss << _grid_dims.getNumDims() << "-D grid (" << + oss << _grid_dims.getNumDims() << "-D var (" << _grid_dims.makeDimValStr(" * ") << ")"; - oss << " '" << _name << "'"; if (_elems) - oss << " with data at " << _elems << " containing "; + oss << " with storage at " << _elems << " containing "; else - oss << " with data not yet allocated for "; + oss << " with storage not yet allocated for "; oss << makeByteStr(get_num_bytes()) << " (" << makeNumStr(get_num_elems()) << " " << elem_name << " element(s) of " << diff --git a/src/kernel/lib/generic_grids.hpp b/src/kernel/lib/generic_grids.hpp index d6c2fb64..5a773003 100644 --- a/src/kernel/lib/generic_grids.hpp +++ b/src/kernel/lib/generic_grids.hpp @@ -75,20 +75,6 @@ namespace yask { virtual ~GenericGridBase() { } - // Get state info. - KernelStatePtr& get_state() { - assert(_state); - return _state; - } - const KernelStatePtr& get_state() const { - assert(_state); - return _state; - } - std::ostream& get_ostr() const { - STATE_VARS(this); - return os; - } - // Perform default allocation. // For other options, // programmer should call get_num_elems() or get_num_bytes() and @@ -115,8 +101,10 @@ namespace yask { #endif } - // Access dims of this grid. - const IdxTuple& get_dims() const { return _grid_dims; } + // Access dims of this grid (not necessarily same as solution dims). + const IdxTuple& get_dims() const { + return _grid_dims; + } // Get number of elements. virtual idx_t get_num_elems() const { @@ -199,6 +187,12 @@ namespace yask { // 'base' should provide get_num_bytes() bytes at offset bytes. virtual void set_storage(std::shared_ptr& base, size_t offset); + // Share storage from another grid. + virtual void share_storage(const GenericGridBase* src) { + _base = src->_base; + _elems = src->_elems; + } + // Check for equality, assuming same layout. // Return number of mismatches greater than epsilon. virtual idx_t count_diffs(const GenericGridBase* ref, diff --git a/src/kernel/lib/grid_apis.cpp b/src/kernel/lib/grid_apis.cpp index 8b88f80f..2ef7a731 100644 --- a/src/kernel/lib/grid_apis.cpp +++ b/src/kernel/lib/grid_apis.cpp @@ -37,66 +37,73 @@ namespace yask { // APIs to get info from vars: one with name of dim with a lot // of checking, and one with index of dim with no checking. #define GET_GRID_API(api_name, expr, step_ok, domain_ok, misc_ok, prep_req) \ - idx_t YkGridBase::api_name(const string& dim) const { \ - STATE_VARS(this); \ + idx_t YkGridImpl::api_name(const string& dim) const { \ + STATE_VARS(gbp()); \ dims->checkDimType(dim, #api_name, step_ok, domain_ok, misc_ok); \ - int posn = get_dim_posn(dim, true, #api_name); \ + int posn = gb().get_dim_posn(dim, true, #api_name); \ idx_t mbit = 1LL << posn; \ - if (prep_req && _rank_offsets[posn] < 0) \ + if (prep_req && gb()._rank_offsets[posn] < 0) \ THROW_YASK_EXCEPTION("Error: '" #api_name "()' called on grid '" + \ get_name() + "' before calling 'prepare_solution()'"); \ auto rtn = expr; \ return rtn; \ } \ - idx_t YkGridBase::api_name(int posn) const { \ - STATE_VARS(this); \ + idx_t YkGridImpl::api_name(int posn) const { \ + STATE_VARS(gbp()); \ idx_t mbit = 1LL << posn; \ auto rtn = expr; \ return rtn; \ } - GET_GRID_API(get_first_valid_index, _rank_offsets[posn] + _local_offsets[posn] - _actl_left_pads[posn], true, true, true, true) - GET_GRID_API(get_last_valid_index, _rank_offsets[posn] + _local_offsets[posn] + _domains[posn] + _actl_right_pads[posn] - 1, true, true, true, true) - GET_GRID_API(get_first_misc_index, _local_offsets[posn], false, false, true, false) - GET_GRID_API(get_last_misc_index, _local_offsets[posn] + _domains[posn] - 1, false, false, true, false) - GET_GRID_API(get_rank_domain_size, _domains[posn], false, true, false, false) - GET_GRID_API(get_left_pad_size, _actl_left_pads[posn], false, true, false, false) - GET_GRID_API(get_right_pad_size, _actl_right_pads[posn], false, true, false, false) - GET_GRID_API(get_left_halo_size, _left_halos[posn], false, true, false, false) - GET_GRID_API(get_right_halo_size, _right_halos[posn], false, true, false, false) - GET_GRID_API(get_left_extra_pad_size, _actl_left_pads[posn] - _left_halos[posn], false, true, false, false) - GET_GRID_API(get_right_extra_pad_size, _actl_right_pads[posn] - _right_halos[posn], false, true, false, false) - GET_GRID_API(get_alloc_size, _allocs[posn], true, true, true, false) - GET_GRID_API(get_first_rank_domain_index, _rank_offsets[posn], false, true, false, true) - GET_GRID_API(get_last_rank_domain_index, _rank_offsets[posn] + _domains[posn] - 1, false, true, false, true) - GET_GRID_API(get_first_rank_halo_index, _rank_offsets[posn] - _left_halos[posn], false, true, false, true) - GET_GRID_API(get_last_rank_halo_index, _rank_offsets[posn] + _domains[posn] + _right_halos[posn] - 1, false, true, false, true) - GET_GRID_API(get_first_rank_alloc_index, _rank_offsets[posn] + _local_offsets[posn] - _actl_left_pads[posn], false, true, false, true) - GET_GRID_API(get_last_rank_alloc_index, _rank_offsets[posn] + _local_offsets[posn] + _domains[posn] + _actl_right_pads[posn] - 1, false, true, false, true) - GET_GRID_API(_get_left_wf_ext, _left_wf_exts[posn], true, true, true, false) - GET_GRID_API(_get_right_wf_ext, _right_wf_exts[posn], true, true, true, false) - GET_GRID_API(_get_vec_len, _vec_lens[posn], true, true, true, true) - GET_GRID_API(_get_rank_offset, _rank_offsets[posn], true, true, true, true) - GET_GRID_API(_get_local_offset, _local_offsets[posn], true, true, true, false) - - GET_GRID_API(get_pad_size, _actl_left_pads[posn]; DEPRECATED(get_pad_size), false, true, false, false) - GET_GRID_API(get_halo_size, _left_halos[posn]; DEPRECATED(get_halo_size), false, true, false, false) - GET_GRID_API(get_extra_pad_size, _actl_left_pads[posn] - _left_halos[posn]; DEPRECATED(get_extra_pad_size), false, true, false, false) + + // Internal APIs. + GET_GRID_API(_get_left_wf_ext, gb()._left_wf_exts[posn], true, true, true, false) + GET_GRID_API(_get_right_wf_ext, gb()._right_wf_exts[posn], true, true, true, false) + GET_GRID_API(_get_vec_len, gb()._vec_lens[posn], true, true, true, true) + GET_GRID_API(_get_rank_offset, gb()._rank_offsets[posn], true, true, true, true) + GET_GRID_API(_get_local_offset, gb()._local_offsets[posn], true, true, true, false) + + // Exposed APIs. + GET_GRID_API(get_first_local_index, gb().get_first_local_index(posn), true, true, true, true) + GET_GRID_API(get_last_local_index, gb().get_last_local_index(posn), true, true, true, true) + GET_GRID_API(get_first_misc_index, gb()._local_offsets[posn], false, false, true, false) + GET_GRID_API(get_last_misc_index, gb()._local_offsets[posn] + gb()._domains[posn] - 1, false, false, true, false) + GET_GRID_API(get_rank_domain_size, gb()._domains[posn], false, true, false, false) + GET_GRID_API(get_left_pad_size, gb()._actl_left_pads[posn], false, true, false, false) + GET_GRID_API(get_right_pad_size, gb()._actl_right_pads[posn], false, true, false, false) + GET_GRID_API(get_left_halo_size, gb()._left_halos[posn], false, true, false, false) + GET_GRID_API(get_right_halo_size, gb()._right_halos[posn], false, true, false, false) + GET_GRID_API(get_left_extra_pad_size, gb()._actl_left_pads[posn] - gb()._left_halos[posn], false, true, false, false) + GET_GRID_API(get_right_extra_pad_size, gb()._actl_right_pads[posn] - gb()._right_halos[posn], false, true, false, false) + GET_GRID_API(get_alloc_size, gb()._allocs[posn], true, true, true, false) + GET_GRID_API(get_first_rank_domain_index, gb()._rank_offsets[posn], false, true, false, true) + GET_GRID_API(get_last_rank_domain_index, gb()._rank_offsets[posn] + gb()._domains[posn] - 1, false, true, false, true) + GET_GRID_API(get_first_rank_halo_index, gb()._rank_offsets[posn] - gb()._left_halos[posn], false, true, false, true) + GET_GRID_API(get_last_rank_halo_index, gb()._rank_offsets[posn] + gb()._domains[posn] + + gb()._right_halos[posn] - 1, false, true, false, true) + GET_GRID_API(get_first_rank_alloc_index, gb().get_first_local_index(posn), false, true, false, true) + GET_GRID_API(get_last_rank_alloc_index, gb().get_last_local_index(posn), false, true, false, true) + + // Deprecated APIs. + GET_GRID_API(get_pad_size, gb()._actl_left_pads[posn]; DEPRECATED(get_pad_size), false, true, false, false) + GET_GRID_API(get_halo_size, gb()._left_halos[posn]; DEPRECATED(get_halo_size), false, true, false, false) + GET_GRID_API(get_extra_pad_size, gb()._actl_left_pads[posn] - gb()._left_halos[posn]; + DEPRECATED(get_extra_pad_size), false, true, false, false) #undef GET_GRID_API // APIs to set vars. #define COMMA , #define SET_GRID_API(api_name, expr, step_ok, domain_ok, misc_ok) \ - void YkGridBase::api_name(const string& dim, idx_t n) { \ - STATE_VARS(this); \ + void YkGridImpl::api_name(const string& dim, idx_t n) { \ + STATE_VARS(gbp()); \ TRACE_MSG("grid '" << get_name() << "'." \ #api_name "('" << dim << "', " << n << ")"); \ dims->checkDimType(dim, #api_name, step_ok, domain_ok, misc_ok); \ - int posn = get_dim_posn(dim, true, #api_name); \ + int posn = gb().get_dim_posn(dim, true, #api_name); \ idx_t mbit = 1LL << posn; \ expr; \ } \ - void YkGridBase::api_name(int posn, idx_t n) { \ - STATE_VARS(this); \ + void YkGridImpl::api_name(int posn, idx_t n) { \ + STATE_VARS(gbp()); \ idx_t mbit = 1LL << posn; \ int dim = posn; \ expr; \ @@ -104,46 +111,45 @@ namespace yask { // These are the internal, unchecked access functions that allow // changes prohibited thru the APIs. - SET_GRID_API(_set_rank_offset, _rank_offsets[posn] = n, true, true, true) - SET_GRID_API(_set_local_offset, _local_offsets[posn] = n; - assert(imod_flr(n, _vec_lens[posn]) == 0); - _vec_local_offsets[posn] = n / _vec_lens[posn], true, true, true) - SET_GRID_API(_set_domain_size, _domains[posn] = n; resize(), true, true, true) - SET_GRID_API(_set_left_pad_size, _actl_left_pads[posn] = n; resize(), true, true, true) - SET_GRID_API(_set_right_pad_size, _actl_right_pads[posn] = n; resize(), true, true, true) - SET_GRID_API(_set_left_wf_ext, _left_wf_exts[posn] = n; resize(), true, true, true) - SET_GRID_API(_set_right_wf_ext, _right_wf_exts[posn] = n; resize(), true, true, true) - SET_GRID_API(_set_alloc_size, _domains[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_rank_offset, gb()._rank_offsets[posn] = n, true, true, true) + SET_GRID_API(_set_local_offset, gb()._local_offsets[posn] = n; + assert(imod_flr(n, gb()._vec_lens[posn]) == 0); + gb()._vec_local_offsets[posn] = n / gb()._vec_lens[posn], true, true, true) + SET_GRID_API(_set_domain_size, gb()._domains[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_left_pad_size, gb()._actl_left_pads[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_right_pad_size, gb()._actl_right_pads[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_left_wf_ext, gb()._left_wf_exts[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_right_wf_ext, gb()._right_wf_exts[posn] = n; resize(), true, true, true) + SET_GRID_API(_set_alloc_size, gb()._domains[posn] = n; resize(), true, true, true) // These are the safer ones used in the APIs. - SET_GRID_API(set_left_halo_size, _left_halos[posn] = n; resize(), false, true, false) - SET_GRID_API(set_right_halo_size, _right_halos[posn] = n; resize(), false, true, false) - SET_GRID_API(set_halo_size, _left_halos[posn] = _right_halos[posn] = n; resize(), false, true, false) - SET_GRID_API(set_alloc_size, _domains[posn] = n; resize(), - _is_dynamic_step_alloc, _fixed_size, _is_dynamic_misc_alloc) - SET_GRID_API(set_left_min_pad_size, _req_left_pads[posn] = n; resize(), false, true, false) - SET_GRID_API(set_right_min_pad_size, _req_right_pads[posn] = n; resize(), false, true, false) - SET_GRID_API(set_min_pad_size, _req_left_pads[posn] = _req_right_pads[posn] = n; resize(), + SET_GRID_API(set_left_halo_size, gb()._left_halos[posn] = n; resize(), false, true, false) + SET_GRID_API(set_right_halo_size, gb()._right_halos[posn] = n; resize(), false, true, false) + SET_GRID_API(set_halo_size, gb()._left_halos[posn] = gb()._right_halos[posn] = n; resize(), false, true, false) + SET_GRID_API(set_alloc_size, gb()._domains[posn] = n; resize(), + gb()._is_dynamic_step_alloc, gb()._fixed_size, gb()._is_dynamic_misc_alloc) + SET_GRID_API(set_left_min_pad_size, gb()._req_left_pads[posn] = n; resize(), false, true, false) + SET_GRID_API(set_right_min_pad_size, gb()._req_right_pads[posn] = n; resize(), false, true, false) + SET_GRID_API(set_min_pad_size, gb()._req_left_pads[posn] = gb()._req_right_pads[posn] = n; resize(), false, true, false) SET_GRID_API(set_left_extra_pad_size, - set_left_min_pad_size(posn, _left_halos[posn] + n), false, true, false) + set_left_min_pad_size(posn, gb()._left_halos[posn] + n), false, true, false) SET_GRID_API(set_right_extra_pad_size, - set_right_min_pad_size(posn, _right_halos[posn] + n), false, true, false) + set_right_min_pad_size(posn, gb()._right_halos[posn] + n), false, true, false) SET_GRID_API(set_extra_pad_size, set_left_extra_pad_size(posn, n); set_right_extra_pad_size(posn, n), false, true, false) - SET_GRID_API(set_first_misc_index, _local_offsets[posn] = n, false, false, _is_new_grid) + SET_GRID_API(set_first_misc_index, gb()._local_offsets[posn] = n, false, false, gb()._is_user_grid) #undef COMMA #undef SET_GRID_API - bool YkGridBase::is_storage_layout_identical(const yk_grid_ptr other) const { - auto op = dynamic_pointer_cast(other); - assert(op); + bool YkGridImpl::is_storage_layout_identical(const YkGridImpl* op, + bool check_sizes) const { // Same size? - if (get_num_storage_bytes() != op->get_num_storage_bytes()) + if (check_sizes && get_num_storage_bytes() != op->get_num_storage_bytes()) return false; - // Same dims? + // Same num dims? if (get_num_dims() != op->get_num_dims()) return false; for (int i = 0; i < get_num_dims(); i++) { @@ -153,173 +159,158 @@ namespace yask { if (dname != op->get_dim_name(i)) return false; - // Same dim sizes? - if (_domains[i] != op->_domains[i]) - return false; - if (_actl_left_pads[i] != op->_actl_left_pads[i]) + // Same folding? + if (gb()._vec_lens[i] != op->gb()._vec_lens[i]) return false; - if (_actl_right_pads[i] != op->_actl_right_pads[i]) - return false; - } - return true; - } - - void YkGridBase::share_storage(yk_grid_ptr source) { - STATE_VARS(this); - auto sp = dynamic_pointer_cast(source); - assert(sp); - if (!sp->get_raw_storage_buffer()) { - THROW_YASK_EXCEPTION("Error: share_storage() called without source storage allocated"); - } - - // Determine required padding from halos. - Indices left_pads2 = getReqdPad(_left_halos, _left_wf_exts); - Indices right_pads2 = getReqdPad(_right_halos, _right_wf_exts); - - // NB: requirements to successful share_storage() is not as strict as - // is_storage_layout_identical(). See note on pad & halo below and API docs. - for (int i = 0; i < get_num_dims(); i++) { - auto dname = get_dim_name(i); - - // Same dims? - if (sp->get_num_dims() != get_num_dims() || - sp->get_dim_name(i) != dname) - THROW_YASK_EXCEPTION("Error: share_storage() called with incompatible grids: " + - make_info_string() + " and " + sp->make_info_string()); - - - // Check folding. - if (_vec_lens[i] != sp->_vec_lens[i]) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: attempt to share storage from grid '" << - sp->get_name() << "' of fold-length " << - sp->_vec_lens[i] << " with grid '" << get_name() << - "' of fold-length " << _vec_lens[i] << - " in '" << dname << "' dim"); - } - - // Not a domain dim? - bool is_domain = domain_dims.lookup(dname) != 0; - if (!is_domain) { - auto tas = get_alloc_size(dname); - auto sas = sp->get_alloc_size(dname); - if (tas != sas) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: attempt to share storage from grid '" << - sp->get_name() << "' of alloc-size " << sas << - " with grid '" << get_name() << "' of alloc-size " << - tas << " in '" << dname << "' dim"); - } - } - - // Domain dim. - else { - auto tdom = get_rank_domain_size(i); - auto sdom = sp->get_rank_domain_size(i); - if (tdom != sdom) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: attempt to share storage from grid '" << - sp->get_name() << "' of domain-size " << sdom << - " with grid '" << get_name() << - "' of domain-size " << tdom << " in '" << dname << "' dim"); - } - - // Halo and pad sizes don't have to be the same. - // Requirement is that halo (reqd pad) of target fits inside of pad of source. - auto spad = sp->get_left_pad_size(i); - if (left_pads2[i] > spad) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: attempt to share storage from grid '" << - sp->get_name() << "' of left padding-size " << spad << - ", which is insufficient for grid '" << get_name() << - "' requiring " << left_pads2[i] << " in '" << dname << "' dim"); - } - spad = sp->get_right_pad_size(i); - if (right_pads2[i] > spad) { - FORMAT_AND_THROW_YASK_EXCEPTION("Error: attempt to share storage from grid '" << - sp->get_name() << - "' of right padding-size " << spad << - ", which is insufficient for grid '" << get_name() << - "' requiring " << right_pads2[i] << " in '" << dname << "' dim"); - } + // Same dim sizes? + if (check_sizes) { + if (gb()._domains[i] != op->gb()._domains[i]) + return false; + if (gb()._actl_left_pads[i] != op->gb()._actl_left_pads[i]) + return false; + if (gb()._actl_right_pads[i] != op->gb()._actl_right_pads[i]) + return false; } } + return true; + } - // Copy pad sizes. - for (int i = 0; i < get_num_dims(); i++) { - auto dname = get_dim_name(i); - bool is_domain = domain_dims.lookup(dname) != 0; - if (is_domain) { - _actl_left_pads[i] = sp->_actl_left_pads[i]; - _actl_right_pads[i] = sp->_actl_right_pads[i]; + void YkGridImpl::fuse_grids(yk_grid_ptr other, + bool use_meta_data_from_other, + bool use_storage_from_other) { + STATE_VARS(gbp()); + auto op = dynamic_pointer_cast(other); + TRACE_MSG("fuse_grids(" << other.get() << ", " << use_meta_data_from_other << + ", " << use_storage_from_other << "): this=" << gb().make_info_string() << + "; other=" << op->gb().make_info_string()); + + assert(op); + YkGridImpl* md_src = use_meta_data_from_other ? op.get() : this; + YkGridImpl* st_src = use_storage_from_other ? op.get() : this; + + // Make sure size is compatible when replacing storage. + if (md_src != st_src && st_src->get_raw_storage_buffer() + && md_src->get_num_storage_bytes() != st_src->get_num_storage_bytes()) + THROW_YASK_EXCEPTION("Error: fuse_grids(): attempt to use allocated" + " storage of " + makeByteStr(st_src->get_num_storage_bytes()) + + " from grid '" + st_src->get_name() + "' in grid '" + + md_src->get_name() + "' that needs " + + makeByteStr(md_src->get_num_storage_bytes())); + + // Check conditions for fusing into a non-user grid. + bool force_native = false; + for (YkGridImpl* tgt : { this, op.get() }) { + if (!tgt->gb().is_user_grid()) { + force_native = true; + if (!tgt->is_storage_layout_identical(md_src, false)) + THROW_YASK_EXCEPTION("Error: fuse_grids(): attempt to replace meta-data" + " of " + tgt->gb().make_info_string() + + " used in solution with incompatible " + + md_src->gb().make_info_string()); } } - // Copy data. - release_storage(); - resize(); - if (!share_data(sp.get(), true)) { - THROW_YASK_EXCEPTION("Error: unexpected failure in data sharing"); - } + // Save ptr to source-storage grid before fusing meta-data. + GridBasePtr st_gbp = st_src->_gbp; // Shared-ptr to keep source active to end of method. + GenericGridBase* st_ggb = st_gbp->_ggb; + + // Fuse meta-data. + if (use_meta_data_from_other) + _gbp = md_src->_gbp; + else + op->_gbp = md_src->_gbp; + assert(_gbp == op->_gbp); + + // Make this grid look like a compiler-generated one if either + // of the original ones was. + if (force_native) + _gbp->set_user_grid(false); + assert(!_gbp->is_scratch()); + + // Fuse storage. + gg().share_storage(st_ggb); + + TRACE_MSG("after fuse_grids(" << other.get() << ", " << use_meta_data_from_other << + ", " << use_storage_from_other << "): this=" << gb().make_info_string() << + "; other=" << op->gb().make_info_string()); } // API get, set, etc. - bool YkGridBase::are_indices_valid(const Indices& indices) const { + bool YkGridImpl::are_indices_local(const Indices& indices) const { if (!is_storage_allocated()) return false; - return checkIndices(indices, "are_indices_valid", false, true, false); + return gb().checkIndices(indices, "are_indices_local", false, true, false); } - double YkGridBase::get_element(const Indices& indices) const { - STATE_VARS(this); - if (!is_storage_allocated()) { - THROW_YASK_EXCEPTION("Error: call to 'get_element' with no data allocated for grid '" + + double YkGridImpl::get_element(const Indices& indices) const { + STATE_VARS(gbp()); + TRACE_MSG("get_element({" << gb().makeIndexString(indices) << "}) on " << + gb().make_info_string()); + if (!is_storage_allocated()) + THROW_YASK_EXCEPTION("Error: call to 'get_element' with no storage allocated for grid '" + get_name() + "'"); - } - checkIndices(indices, "get_element", true, true, false); - idx_t asi = get_alloc_step_index(indices); - real_t val = readElem(indices, asi, __LINE__); - TRACE_MSG("get_element({" << makeIndexString(indices) << "}) on '" << + gb().checkIndices(indices, "get_element", true, true, false); + idx_t asi = gb().get_alloc_step_index(indices); + real_t val = gb().readElem(indices, asi, __LINE__); + TRACE_MSG("get_element({" << gb().makeIndexString(indices) << "}) on '" << get_name() + "' returns " << val); return double(val); } - idx_t YkGridBase::set_element(double val, + idx_t YkGridImpl::set_element(double val, const Indices& indices, bool strict_indices) { - STATE_VARS(this); + STATE_VARS(gbp()); + TRACE_MSG("set_element(" << val << ", {" << + gb().makeIndexString(indices) << "}, " << + strict_indices << ") on " << + gb().make_info_string()); idx_t nup = 0; + if (!get_raw_storage_buffer() && strict_indices) + THROW_YASK_EXCEPTION("Error: call to 'set_element' with no storage allocated for grid '" + + get_name() + "'"); if (get_raw_storage_buffer() && // Don't check step index because this is a write-only API // that updates the step index. - checkIndices(indices, "set_element", strict_indices, false, false)) { - idx_t asi = get_alloc_step_index(indices); - writeElem(real_t(val), indices, asi, __LINE__); + gb().checkIndices(indices, "set_element", strict_indices, false, false)) { + idx_t asi = gb().get_alloc_step_index(indices); + gb().writeElem(real_t(val), indices, asi, __LINE__); nup++; // Set appropriate dirty flag. - set_dirty_using_alloc_index(true, asi); + gb().set_dirty_using_alloc_index(true, asi); } TRACE_MSG("set_element(" << val << ", {" << - makeIndexString(indices) << "}, " << + gb().makeIndexString(indices) << "}, " << strict_indices << ") on '" << get_name() + "' returns " << nup); return nup; } - idx_t YkGridBase::add_to_element(double val, + idx_t YkGridImpl::add_to_element(double val, const Indices& indices, bool strict_indices) { - STATE_VARS(this); + STATE_VARS(gbp()); + TRACE_MSG("add_to_element(" << val << ", {" << + gb().makeIndexString(indices) << "}, " << + strict_indices << ") on " << + gb().make_info_string()); idx_t nup = 0; + if (!get_raw_storage_buffer() && strict_indices) + THROW_YASK_EXCEPTION("Error: call to 'add_to_element' with no storage allocated for grid '" + + get_name() + "'"); if (get_raw_storage_buffer() && // Check step index because this API must read before writing. - checkIndices(indices, "add_to_element", strict_indices, true, false)) { - idx_t asi = get_alloc_step_index(indices); - addToElem(real_t(val), indices, asi, __LINE__); + gb().checkIndices(indices, "add_to_element", strict_indices, true, false)) { + idx_t asi = gb().get_alloc_step_index(indices); + gb().addToElem(real_t(val), indices, asi, __LINE__); nup++; // Set appropriate dirty flag. - set_dirty_using_alloc_index(true, asi); + gb().set_dirty_using_alloc_index(true, asi); } TRACE_MSG("add_to_element(" << val << ", {" << - makeIndexString(indices) << "}, " << + gb().makeIndexString(indices) << "}, " << strict_indices << ") on '" << get_name() + "' returns " << nup); return nup; @@ -329,10 +320,13 @@ namespace yask { const Indices& first_indices, const Indices& last_indices) const { STATE_VARS(this); - if (!is_storage_allocated()) { - THROW_YASK_EXCEPTION("Error: call to 'get_elements_in_slice' with no data allocated for grid '" + - get_name() + "'"); - } + TRACE_MSG("get_elements_in_slice(" << buffer_ptr << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}) on " << + make_info_string()); + if (_ggb->get_storage() == 0) + THROW_YASK_EXCEPTION("Error: call to 'get_elements_in_slice' with no storage allocated for grid '" + + _ggb->get_name() + "'"); checkIndices(first_indices, "get_elements_in_slice", true, true, false); checkIndices(last_indices, "get_elements_in_slice", true, true, false); @@ -355,7 +349,7 @@ namespace yask { TRACE_MSG("get_elements_in_slice(" << buffer_ptr << ", {" << makeIndexString(first_indices) << "}, {" << makeIndexString(last_indices) << "}) on '" << - get_name() + "' returns " << nup); + _ggb->get_name() + "' returns " << nup); return nup; } idx_t YkGridBase::set_elements_in_slice_same(double val, @@ -363,8 +357,17 @@ namespace yask { const Indices& last_indices, bool strict_indices) { STATE_VARS(this); - if (!is_storage_allocated()) + TRACE_MSG("set_elements_in_slice_same(" << val << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}, " << + strict_indices << ") on " << + make_info_string()); + if (_ggb->get_storage() == 0) { + if (strict_indices) + THROW_YASK_EXCEPTION("Error: call to 'set_elements_in_slice_same' with no storage allocated for grid '" + + _ggb->get_name() + "'"); return 0; + } // 'Fixed' copy of indices. Indices first, last; @@ -397,15 +400,20 @@ namespace yask { makeIndexString(first_indices) << "}, {" << makeIndexString(last_indices) << "}, " << strict_indices << ") on '" << - get_name() + "' returns " << nup); + _ggb->get_name() + "' returns " << nup); return nup; } idx_t YkGridBase::set_elements_in_slice(const void* buffer_ptr, const Indices& first_indices, const Indices& last_indices) { STATE_VARS(this); - if (!is_storage_allocated()) - return 0; + TRACE_MSG("set_elements_in_slice(" << buffer_ptr << ", {" << + makeIndexString(first_indices) << "}, {" << + makeIndexString(last_indices) << "}) on " << + make_info_string()); + if (_ggb->get_storage() == 0) + THROW_YASK_EXCEPTION("Error: call to 'set_elements_in_slice' with no storage allocated for grid '" + + _ggb->get_name() + "'"); checkIndices(first_indices, "set_elements_in_slice", true, false, false); checkIndices(last_indices, "set_elements_in_slice", true, false, false); @@ -433,7 +441,7 @@ namespace yask { TRACE_MSG("set_elements_in_slice(" << buffer_ptr << ", {" << makeIndexString(first_indices) << "}, {" << makeIndexString(last_indices) << "}) on '" << - get_name() + "' returns " << nup); + _ggb->get_name() + "' returns " << nup); return nup; } diff --git a/src/kernel/lib/new_grid.cpp b/src/kernel/lib/new_grid.cpp index 3adadc54..f94435d5 100644 --- a/src/kernel/lib/new_grid.cpp +++ b/src/kernel/lib/new_grid.cpp @@ -46,7 +46,7 @@ namespace yask { // First, try to make a grid that matches the layout in // the stencil. - YkGridPtr gp = newStencilGrid(name, gdims); + GridBasePtr gp = newStencilGrid(name, gdims); // No match. if (!gp) { @@ -121,8 +121,14 @@ namespace yask { // Mark as non-resizable if sizes provided. gp->set_fixed_size(got_sizes); + // Mark as created via API. + gp->set_user_grid(true); + + // Wrap with a Yk grid. + YkGridPtr ygp = make_shared(gp); + // Add to context. - addGrid(gp, false); // mark as non-output grid. + addGrid(ygp, false, false); // mark as non-orig, non-output grid. // Set sizes as provided. if (got_sizes) { @@ -131,18 +137,18 @@ namespace yask { auto& gdim = gdims[i]; // Domain size. - gp->_set_domain_size(i, sizes->at(i)); + ygp->_set_domain_size(i, sizes->at(i)); // Pads. // Set via both 'extra' and 'min'; larger result will be used. if (domain_dims.lookup(gdim)) { - gp->set_extra_pad_size(i, opts->_extra_pad_sizes[gdim]); - gp->set_min_pad_size(i, opts->_min_pad_sizes[gdim]); + ygp->set_extra_pad_size(i, opts->_extra_pad_sizes[gdim]); + ygp->set_min_pad_size(i, opts->_min_pad_sizes[gdim]); } // Offsets. - gp->_set_rank_offset(i, 0); - gp->_set_local_offset(i, 0); + ygp->_set_rank_offset(i, 0); + ygp->_set_local_offset(i, 0); } } @@ -150,8 +156,6 @@ namespace yask { else update_grid_info(); - // Mark as created via API. - gp->set_new_grid(true); - return gp; + return ygp; } } // namespace yask. diff --git a/src/kernel/lib/realv_grids.cpp b/src/kernel/lib/realv_grids.cpp index 1a331ae6..f46c3be4 100644 --- a/src/kernel/lib/realv_grids.cpp +++ b/src/kernel/lib/realv_grids.cpp @@ -93,7 +93,7 @@ namespace yask { for (auto& d : domain_dims.getDims()) { auto& dname = d.getName(); - if (!is_dim_used(dname)) + if (!_ggb->is_dim_used(dname)) return false; } return true; @@ -165,9 +165,9 @@ namespace yask { // element of a vector. In addition, this vec-len should be the // global one, not the one for this grid to handle the case where // this grid is not vectorized. - for (int i = 0; i < get_num_dims(); i++) { + for (int i = 0; i < _ggb->get_num_dims(); i++) { if (mp[i] >= 1) { - auto& dname = get_dim_name(i); + auto& dname = _ggb->get_dim_name(i); auto* p = dims->_fold_pts.lookup(dname); if (p) { assert (*p >= 1); @@ -185,30 +185,30 @@ namespace yask { STATE_VARS(this); // Original size. - auto p = get_raw_storage_buffer(); + auto p = _ggb->get_storage(); IdxTuple old_allocs = get_allocs(); // Check settings. - for (int i = 0; i < get_num_dims(); i++) { + for (int i = 0; i < _ggb->get_num_dims(); i++) { if (_left_halos[i] < 0) - THROW_YASK_EXCEPTION("Error: negative left halo in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative left halo in grid '" + _ggb->get_name() + "'"); if (_right_halos[i] < 0) - THROW_YASK_EXCEPTION("Error: negative right halo in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative right halo in grid '" + _ggb->get_name() + "'"); if (_left_wf_exts[i] < 0) - THROW_YASK_EXCEPTION("Error: negative left wave-front ext in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative left wave-front ext in grid '" + _ggb->get_name() + "'"); if (_right_wf_exts[i] < 0) - THROW_YASK_EXCEPTION("Error: negative right wave-front ext in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative right wave-front ext in grid '" + _ggb->get_name() + "'"); if (_req_left_pads[i] < 0) - THROW_YASK_EXCEPTION("Error: negative left padding in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative left padding in grid '" + _ggb->get_name() + "'"); if (_req_right_pads[i] < 0) - THROW_YASK_EXCEPTION("Error: negative right padding in grid '" + get_name() + "'"); + THROW_YASK_EXCEPTION("Error: negative right padding in grid '" + _ggb->get_name() + "'"); } // Increase padding as needed and calculate new allocs. Indices new_left_pads = getReqdPad(_left_halos, _left_wf_exts); Indices new_right_pads = getReqdPad(_right_halos, _right_wf_exts); IdxTuple new_allocs(old_allocs); - for (int i = 0; i < get_num_dims(); i++) { + for (int i = 0; i < _ggb->get_num_dims(); i++) { idx_t mbit = 1LL << i; // New allocation in each dim. @@ -241,7 +241,7 @@ namespace yask { // Make inner dim an odd number of vecs. // This reportedly helps avoid some uarch aliasing. - if (!p && get_dim_name(i) == inner_dim && + if (!p && _ggb->get_dim_name(i) == inner_dim && (new_allocs[i] / _vec_lens[i]) % 2 == 0) { new_right_pads[i] += _vec_lens[i]; new_allocs[i] += _vec_lens[i]; @@ -259,7 +259,7 @@ namespace yask { // resize() on failure. if (p && old_allocs != new_allocs) { THROW_YASK_EXCEPTION("Error: attempt to change allocation size of grid '" + - get_name() + "' from " + + _ggb->get_name() + "' from " + makeIndexString(old_allocs, " * ") + " to " + makeIndexString(new_allocs, " * ") + " after storage has been allocated"); @@ -270,7 +270,7 @@ namespace yask { _actl_left_pads = new_left_pads; _actl_right_pads = new_right_pads; size_t new_dirty = 1; // default if no step dim. - for (int i = 0; i < get_num_dims(); i++) { + for (int i = 0; i < _ggb->get_num_dims(); i++) { idx_t mbit = 1LL << i; // Calc vec-len values. @@ -300,7 +300,7 @@ namespace yask { if (old_allocs != new_allocs || old_dirty != new_dirty) { Indices first_allocs = _rank_offsets.subElements(_actl_left_pads); Indices end_allocs = first_allocs.addElements(_allocs); - TRACE_MSG("grid '" << get_name() << "' resized from " << + TRACE_MSG("grid '" << _ggb->get_name() << "' resized from " << makeIndexString(old_allocs, " * ") << " to " << makeIndexString(new_allocs, " * ") << " at [" << makeIndexString(first_allocs) << " ... " << @@ -320,7 +320,7 @@ namespace yask { bool domain_ok, bool misc_ok) const { STATE_VARS(this); - if (!is_dim_used(dim)) + if (!_ggb->is_dim_used(dim)) THROW_YASK_EXCEPTION("Error in " + fn_name + "(): dimension '" + dim + "' not found in " + make_info_string()); dims->checkDimType(dim, fn_name, step_ok, domain_ok, misc_ok); @@ -334,14 +334,14 @@ namespace yask { STATE_VARS(this); if (!ref) { os << "** mismatch: no reference grid.\n"; - return get_num_storage_elements(); + return _allocs.product(); // total number of elements. } // Dims & sizes same? if (!_ggb->are_dims_and_sizes_same(*ref->_ggb)) { os << "** mismatch due to incompatible grids: " << make_info_string() << " and " << ref->make_info_string() << ".\n"; - return get_num_storage_elements(); + return _allocs.product(); // total number of elements. } // Quick check for errors, assuming same layout and @@ -378,8 +378,8 @@ namespace yask { // TODO: check points in outermost halo. auto& dname = pt.getDimName(i); if (domain_dims.lookup(dname)) { - auto first_ok = get_first_rank_domain_index(dname); - auto last_ok = get_last_rank_domain_index(dname); + auto first_ok = _rank_offsets[i]; + auto last_ok = first_ok + _domains[i] - 1; if (opt[i] < first_ok || opt[i] > last_ok) ok = false; } @@ -396,12 +396,12 @@ namespace yask { errs++; if (errs <= maxPrint) { if (errs < maxPrint) - os << "** mismatch at " << get_name() << + os << "** mismatch at " << _ggb->get_name() << "(" << opt.makeDimValStr() << "): " << te << " != " << re << endl; else os << "** Additional errors not printed for grid '" << - get_name() << "'.\n"; + _ggb->get_name() << "'.\n"; } } } @@ -421,8 +421,9 @@ namespace yask { bool check_step, // check step index. bool normalize, // div by vec lens. Indices* clipped_indices) const { + STATE_VARS(this); bool all_ok = true; - auto n = get_num_dims(); + auto n = _ggb->get_num_dims(); if (indices.getNumDims() != n) { FORMAT_AND_THROW_YASK_EXCEPTION("Error: '" << fn << "' called with " << indices.getNumDims() << @@ -435,19 +436,19 @@ namespace yask { bool is_step_dim = _step_dim_mask & mbit; idx_t idx = indices[i]; bool ok = false; - auto& dname = get_dim_name(i); + auto& dname = _ggb->get_dim_name(i); // If this is the step dim and we're not checking // it, then anything is ok. - if (is_step_dim && !check_step) + if (is_step_dim && (!check_step || opts->_step_wrap)) ok = true; // Otherwise, check range. else { // First..last indices. - auto first_ok = get_first_valid_index(i); - auto last_ok = get_last_valid_index(i); + auto first_ok = get_first_local_index(i); + auto last_ok = get_last_local_index(i); if (idx >= first_ok && idx <= last_ok) ok = true; @@ -457,7 +458,7 @@ namespace yask { THROW_YASK_EXCEPTION("Error: " + fn + ": index in dim '" + dname + "' is " + to_string(idx) + ", which is not in allowed range [" + to_string(first_ok) + "..." + to_string(last_ok) + - "] of grid '" + get_name() + "'"); + "] of grid '" + _ggb->get_name() + "'"); } // Update the output indices. @@ -488,16 +489,16 @@ namespace yask { if (_has_step_dim) { // If 't' is before first step, pull offset back. - if (t < get_first_valid_step_index()) - _local_offsets[+Indices::step_posn] = t; + if (t < get_first_local_index(step_posn)) + _local_offsets[step_posn] = t; // If 't' is after last step, push offset out. - else if (t > get_last_valid_step_index()) - _local_offsets[+Indices::step_posn] = t - _domains[+Indices::step_posn] + 1; + else if (t > get_last_local_index(step_posn)) + _local_offsets[step_posn] = t - _domains[step_posn] + 1; TRACE_MSG("update_valid_step(" << t << "): valid step(s) in '" << - get_name() << "' are now [" << get_first_valid_step_index() << - " ... " << get_last_valid_step_index() << "]"); + _ggb->get_name() << "' are now [" << get_first_local_index(step_posn) << + " ... " << get_last_local_index(step_posn) << "]"); } } @@ -535,7 +536,7 @@ namespace yask { string str; if (msg.length()) str = msg + ": "; - str += get_name() + "[" + + str += _ggb->get_name() + "[" + makeIndexString(idxs) + "] = " + to_string(eval); if (line) str += " at line " + to_string(line); diff --git a/src/kernel/lib/realv_grids.hpp b/src/kernel/lib/realv_grids.hpp index d527a987..940f3615 100644 --- a/src/kernel/lib/realv_grids.hpp +++ b/src/kernel/lib/realv_grids.hpp @@ -27,15 +27,15 @@ IN THE SOFTWARE. namespace yask { - // Underlying storage. + // Underlying storage using GenericGrids. typedef GenericGridTemplate RealElemGrid; typedef GenericGridTemplate RealVecGrid; - // Base class implementing all yk_grids. Can be used for grids - // that contain either individual elements or vectors. + // Base class implementing all yk_grid functionality. Used for + // grids that contain either individual elements or vectors. class YkGridBase : - public KernelStateBase, - public virtual yk_grid { + public KernelStateBase { + friend class YkGridImpl; // Rank and local offsets in domain dim: @@ -56,10 +56,9 @@ namespace yask { // Local offset must be a vector multiple. protected: - // Underlying storage. A GenericGrid is similar to a YkGrid, but it - // doesn't have stencil features like padding, halos, offsets, etc. - // Holds name of grid, names of dims, sizes of dims, memory layout, - // actual data, message stream. + // Underlying storage. A GenericGrid doesn't have stencil features + // like padding, halos, offsets, etc. Holds name of grid, names of + // dims, sizes of dims, memory layout, actual data. GenericGridBase* _ggb = 0; // The following masks have one bit for each dim in the grid. @@ -111,7 +110,7 @@ namespace yask { bool _is_scratch = false; // Whether this was created via an API. - bool _is_new_grid = false; + bool _is_user_grid = false; // Convenience function to format indices like // "x=5, y=3". @@ -133,37 +132,23 @@ namespace yask { bool domain_ok, bool misc_ok) const; - // Share data from source grid of type GT. - template - bool _share_data(YkGridBase* src, - bool die_on_failure) { - auto* tp = dynamic_cast(_ggb); - if (!tp) { - if (die_on_failure) - THROW_YASK_EXCEPTION("Error in share_data(): " - "target grid not of expected type (internal inconsistency)"); - return false; - } - auto* sp = dynamic_cast(src->_ggb); - if (!sp) { - if (die_on_failure) - THROW_YASK_EXCEPTION("Error in share_data(): source grid " + - src->make_info_string() + - " not of same type as target grid " + - make_info_string()); - return false; - } - - // Shallow-copy GenericGrid object. - // This will copy its meta-data and share the elements. - *tp = *sp; - return true; + // Index math. + inline idx_t get_first_local_index(idx_t posn) const { + return _rank_offsets[posn] + _local_offsets[posn] - _actl_left_pads[posn]; } - - // Share data from source grid. - // Must be implemented by a concrete class - // using the templated function above. - virtual bool share_data(YkGridBase* src, bool die_on_failure) =0; + inline idx_t get_last_local_index(idx_t posn) const { + return _rank_offsets[posn] + _local_offsets[posn] + _domains[posn] + _actl_right_pads[posn] - 1; + } + + // Make sure indices are in range. + // Optionally fix them to be in range and return in 'fixed_indices'. + // If 'normalize', make rank-relative, divide by vlen and return in 'fixed_indices'. + virtual bool checkIndices(const Indices& indices, + const std::string& fn, // name for error msg. + bool strict_indices, // die if out-of-range. + bool check_step, // check step index. + bool normalize, // div by vec lens. + Indices* fixed_indices = NULL) const; // Resize or fail if already allocated. virtual void resize(); @@ -183,18 +168,6 @@ namespace yask { virtual ~YkGridBase() { } // Step-indices. - virtual idx_t get_first_valid_step_index() const { - if (!_has_step_dim) - THROW_YASK_EXCEPTION("Error: 'get_first_valid_step_index()' called on grid '" + - get_name() + "' that does not use the step dimension"); - return _local_offsets[+Indices::step_posn]; - } - virtual idx_t get_last_valid_step_index() const { - if (!_has_step_dim) - THROW_YASK_EXCEPTION("Error: 'get_last_valid_step_index()' called on grid '" + - get_name() + "' that does not use the step dimension"); - return _local_offsets[+Indices::step_posn] + _domains[+Indices::step_posn] - 1; - } void update_valid_step(idx_t t); inline void update_valid_step(const Indices& indices) { if (_has_step_dim) @@ -214,7 +187,6 @@ namespace yask { } // Resize flag accessors. - virtual bool is_fixed_size() const { return _fixed_size; } virtual void set_fixed_size(bool is_fixed) { _fixed_size = is_fixed; if (is_fixed) { @@ -223,15 +195,9 @@ namespace yask { _is_dynamic_misc_alloc = true; } } - virtual bool is_dynamic_step_alloc() const { - return _is_dynamic_step_alloc; - } virtual void _set_dynamic_step_alloc(bool is_dynamic) { _is_dynamic_step_alloc = is_dynamic; } - virtual bool is_dynamic_misc_alloc() const { - return _is_dynamic_misc_alloc; - } virtual void _set_dynamic_misc_alloc(bool is_dynamic) { _is_dynamic_misc_alloc = is_dynamic; } @@ -240,7 +206,9 @@ namespace yask { virtual bool is_domain_var() const; // Scratch accessors. - virtual bool is_scratch() const { return _is_scratch; } + virtual bool is_scratch() const { + return _is_scratch; + } virtual void set_scratch(bool is_scratch) { _is_scratch = is_scratch; if (is_scratch) @@ -248,34 +216,23 @@ namespace yask { } // New-grid accessors. - virtual bool is_new_grid() const { return _is_new_grid; } - virtual void set_new_grid(bool is_new_grid) { - _is_new_grid = is_new_grid; - if (_is_new_grid) { + virtual bool is_user_grid() const { + return _is_user_grid; + } + virtual void set_user_grid(bool is_user_grid) { + _is_user_grid = is_user_grid; + if (_is_user_grid) { _is_dynamic_step_alloc = true; _is_dynamic_misc_alloc = true; } } - // NUMA accessors. - virtual int get_numa_preferred() const { return _ggb->get_numa_pref(); } - virtual bool set_numa_preferred(int numa_node) { - return _ggb->set_numa_pref(numa_node); - } - // Lookup position by dim name. // Return -1 or die if not found, depending on flag. virtual int get_dim_posn(const std::string& dim, bool die_on_failure = false, const std::string& die_msg = "") const; - // Get dim name by posn. - virtual const std::string& get_dim_name(int n) const { - assert(n >= 0); - assert(n < get_num_dims()); - return _ggb->get_dim_name(n); - } - // Adjust logical time index to 0-based index // using temporal allocation size. inline idx_t _wrap_step(idx_t t) const { @@ -309,33 +266,47 @@ namespace yask { return allocs; } - // Get the messsage output stream. - virtual std::ostream& get_ostr() const { - return _ggb->get_ostr(); + // Make a human-readable description of the grid var. + virtual std::string _make_info_string() const =0; + virtual std::string make_info_string() const { + std::stringstream oss; + if (is_scratch()) oss << "scratch "; + if (is_user_grid()) oss << "user-defined "; + if (_fixed_size) oss << "fixed-size "; + oss << _make_info_string() << " and meta-data at " << + (void*)this; + return oss.str(); } - // Make a human-readable description. - virtual std::string make_info_string() const =0; - // Check for equality. // Return number of mismatches greater than epsilon. virtual idx_t compare(const YkGridBase* ref, real_t epsilon = EPSILON, int maxPrint = 20) const; - // Make sure indices are in range. - // Optionally fix them to be in range and return in 'fixed_indices'. - // If 'normalize', make rank-relative, divide by vlen and return in 'fixed_indices'. - virtual bool checkIndices(const Indices& indices, - const std::string& fn, // name for error msg. - bool strict_indices, // die if out-of-range. - bool check_step, // check step index. - bool normalize, // div by vec lens. - Indices* fixed_indices = NULL) const; - - // Set elements to a sequence of values using seed. - // Cf. set_all_elements_same(). + // Set elements. virtual void set_all_elements_in_seq(double seed) =0; + virtual void set_all_elements_same(double seed) =0; + + // Set/get_elements_in_slice(). + virtual idx_t set_elements_in_slice_same(double val, + const Indices& first_indices, + const Indices& last_indices, + bool strict_indices); + virtual idx_t set_elements_in_slice(const void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices); + virtual idx_t get_elements_in_slice(void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) const; + + // Possibly vectorized version of set/get_elements_in_slice(). + virtual idx_t set_vecs_in_slice(const void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) =0; + virtual idx_t get_vecs_in_slice(void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) const =0; // Get a pointer to one element. // Indices are relative to overall problem domain. @@ -396,34 +367,115 @@ namespace yask { const real_vec_t& val, int line) const; - // APIs not defined above. + }; + typedef std::shared_ptr GridBasePtr; + + // Implementation of yk_grid interface. Class contains no real data, + // just a pointer to the underlying data and meta-data. This allows grid + // data to be shared and moved without changing pointers. + class YkGridImpl : public virtual yk_grid { + protected: + GridBasePtr _gbp; + + public: + YkGridImpl() { } + YkGridImpl(const GridBasePtr& gp) : _gbp(gp) { } + virtual ~YkGridImpl() { } + + inline void set_gbp(const GridBasePtr& gp) { + _gbp = gp; + } + inline YkGridBase& gb() { + assert(_gbp.get()); + return *(_gbp.get()); + } + inline YkGridBase& gb() const { + assert(_gbp.get()); + return *(_gbp.get()); + } + inline YkGridBase* gbp() { + return _gbp.get(); + } + inline YkGridBase* gbp() const { + return _gbp.get(); + } + inline GenericGridBase& gg() { + assert(gb()._ggb); + return *(gb()._ggb); + } + inline GenericGridBase& gg() const { + assert(gb()._ggb); + return *(gb()._ggb); + } + + // Pass-thru methods to base. + void set_all_elements_in_seq(double seed) { + gb().set_all_elements_in_seq(seed); + } + idx_t set_vecs_in_slice(const void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) { + return gb().set_vecs_in_slice(buffer_ptr, first_indices, last_indices); + } + idx_t get_vecs_in_slice(void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) const { + return gb().get_vecs_in_slice(buffer_ptr, first_indices, last_indices); + } + void resize() { + gb().resize(); + } + + // APIs. // See yask_kernel_api.hpp. virtual const std::string& get_name() const { - return _ggb->get_name(); + return gg().get_name(); } virtual bool is_dim_used(const std::string& dim) const { - return _ggb->is_dim_used(dim); + return gg().is_dim_used(dim); } virtual int get_num_dims() const { - return _ggb->get_num_dims(); + return gg().get_num_dims(); + } + virtual const std::string& get_dim_name(int n) const { + assert(n >= 0); + assert(n < get_num_dims()); + return gg().get_dim_name(n); } virtual GridDimNames get_dim_names() const { - std::vector dims; + std::vector dims(get_num_dims()); for (int i = 0; i < get_num_dims(); i++) - dims.push_back(get_dim_name(i)); + dims.at(i) = get_dim_name(i); return dims; } + virtual bool is_fixed_size() const { + return gb()._fixed_size; + } + virtual bool is_dynamic_step_alloc() const { + return gb()._is_dynamic_step_alloc; + } + virtual bool is_dynamic_misc_alloc() const { + return gb()._is_dynamic_misc_alloc; + } + virtual int get_numa_preferred() const { + return gg().get_numa_pref(); + } + virtual bool set_numa_preferred(int numa_node) { + return gg().set_numa_pref(numa_node); + } - // Possibly vectorized version of set/get_elements_in_slice(). - virtual idx_t set_vecs_in_slice(const void* buffer_ptr, - const Indices& first_indices, - const Indices& last_indices) { - return set_elements_in_slice(buffer_ptr, first_indices, last_indices); + virtual idx_t get_first_valid_step_index() const { + if (!gb()._has_step_dim) + THROW_YASK_EXCEPTION("Error: 'get_first_valid_step_index()' called on grid '" + + get_name() + "' that does not use the step dimension"); + return gb()._local_offsets[+Indices::step_posn]; } - virtual idx_t get_vecs_in_slice(void* buffer_ptr, - const Indices& first_indices, - const Indices& last_indices) const { - return get_elements_in_slice(buffer_ptr, first_indices, last_indices); + virtual idx_t get_last_valid_step_index() const { + if (!gb()._has_step_dim) + THROW_YASK_EXCEPTION("Error: 'get_last_valid_step_index()' called on grid '" + + get_name() + "' that does not use the step dimension"); + return gb()._local_offsets[+Indices::step_posn] + + gb()._domains[+Indices::step_posn] - 1; } #define GET_GRID_API(api_name) \ @@ -453,8 +505,8 @@ namespace yask { SET_GRID_API(_set_right_wf_ext) // Exposed APIs. - GET_GRID_API(get_first_valid_index) - GET_GRID_API(get_last_valid_index) + GET_GRID_API(get_first_local_index) + GET_GRID_API(get_last_local_index) GET_GRID_API(get_rank_domain_size) GET_GRID_API(get_first_rank_domain_index) GET_GRID_API(get_last_rank_domain_index) @@ -491,7 +543,7 @@ namespace yask { #undef SET_GRID_API virtual std::string format_indices(const Indices& indices) const { - std::string str = get_name() + "(" + makeIndexString(indices) + ")"; + std::string str = get_name() + "(" + gb().makeIndexString(indices) + ")"; return str; } virtual std::string format_indices(const GridIndices& indices) const { @@ -503,14 +555,14 @@ namespace yask { return format_indices(indices2); } - virtual bool are_indices_valid(const Indices& indices) const; - virtual bool are_indices_valid(const GridIndices& indices) const { + virtual bool are_indices_local(const Indices& indices) const; + virtual bool are_indices_local(const GridIndices& indices) const { const Indices indices2(indices); - return are_indices_valid(indices2); + return are_indices_local(indices2); } - virtual bool are_indices_valid(const std::initializer_list& indices) const { + virtual bool are_indices_local(const std::initializer_list& indices) const { const Indices indices2(indices); - return are_indices_valid(indices2); + return are_indices_local(indices2); } virtual double get_element(const Indices& indices) const; @@ -524,7 +576,9 @@ namespace yask { } virtual idx_t get_elements_in_slice(void* buffer_ptr, const Indices& first_indices, - const Indices& last_indices) const; + const Indices& last_indices) const { + return gb().get_elements_in_slice(buffer_ptr, first_indices, last_indices); + } virtual idx_t get_elements_in_slice(void* buffer_ptr, const GridIndices& first_indices, const GridIndices& last_indices) const { @@ -563,11 +617,15 @@ namespace yask { return add_to_element(val, indices2, strict_indices); } - virtual void set_all_elements_same(double val) =0; + virtual void set_all_elements_same(double val) { + gb().set_all_elements_same(val); + } virtual idx_t set_elements_in_slice_same(double val, const Indices& first_indices, const Indices& last_indices, - bool strict_indices); + bool strict_indices) { + return gb().set_elements_in_slice_same(val, first_indices, last_indices, strict_indices); + } virtual idx_t set_elements_in_slice_same(double val, const GridIndices& first_indices, const GridIndices& last_indices, @@ -579,7 +637,9 @@ namespace yask { virtual idx_t set_elements_in_slice(const void* buffer_ptr, const Indices& first_indices, - const Indices& last_indices); + const Indices& last_indices) { + return gb().set_elements_in_slice(buffer_ptr, first_indices, last_indices); + } virtual idx_t set_elements_in_slice(const void* buffer_ptr, const GridIndices& first_indices, const GridIndices& last_indices) { @@ -589,28 +649,40 @@ namespace yask { } virtual void alloc_storage() { - _ggb->default_alloc(); - get_ostr() << make_info_string() << std::endl; + STATE_VARS(gbp()); + gg().default_alloc(); + os << gb().make_info_string() << std::endl; } virtual void release_storage() { - _ggb->release_storage(); + STATE_VARS(gbp()); + TRACE_MSG("release_storage(): " << gb().make_info_string()); + gg().release_storage(); + TRACE_MSG("after release_storage(): " << gb().make_info_string()); } - virtual void share_storage(yk_grid_ptr source); virtual bool is_storage_allocated() const { - return _ggb->get_storage() != 0; + return gg().get_storage() != 0; } virtual idx_t get_num_storage_bytes() const { - return idx_t(_ggb->get_num_bytes()); + return idx_t(gg().get_num_bytes()); } virtual idx_t get_num_storage_elements() const { - return _allocs.product(); - } - virtual bool is_storage_layout_identical(const yk_grid_ptr other) const; + return gb()._allocs.product(); + } + virtual bool is_storage_layout_identical(const YkGridImpl* other, + bool check_sizes) const; + virtual bool is_storage_layout_identical(const yk_grid_ptr other) const { + auto op = std::dynamic_pointer_cast(other); + assert(op); + return is_storage_layout_identical(op.get(), true); + } + virtual void fuse_grids(yk_grid_ptr other, + bool use_meta_data_from_other, + bool use_storage_from_other); virtual void* get_raw_storage_buffer() { - return _ggb->get_storage(); + return gg().get_storage(); } virtual void set_storage(std::shared_ptr base, size_t offset) { - _ggb->set_storage(base, offset); + gg().set_storage(base, offset); } }; @@ -624,11 +696,6 @@ namespace yask { typedef GenericGrid _grid_type; _grid_type _data; - // Share data from source grid. - virtual bool share_data(YkGridBase* src, bool die_on_failure) { - return _share_data<_grid_type>(src, die_on_failure); - } - public: YkElemGrid(KernelStateBase& state, std::string name, @@ -645,7 +712,7 @@ namespace yask { } // Make a human-readable description. - virtual std::string make_info_string() const { + virtual std::string _make_info_string() const { return _data.make_info_string("FP"); } @@ -664,7 +731,7 @@ namespace yask { idx_t alloc_step_idx, bool checkBounds=true) const final { STATE_VARS_CONST(this); - TRACE_MEM_MSG(get_name() << "." << "YkElemGrid::getElemPtr(" << + TRACE_MEM_MSG(_data.get_name() << "." << "YkElemGrid::getElemPtr(" << idxs.makeValStr(get_num_dims()) << ")"); const auto n = _data.get_num_dims(); Indices adj_idxs(n); @@ -722,6 +789,17 @@ namespace yask { return e; } + // Non-vectorized fall-back versions. + virtual idx_t set_vecs_in_slice(const void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) { + return set_elements_in_slice(buffer_ptr, first_indices, last_indices); + } + virtual idx_t get_vecs_in_slice(void* buffer_ptr, + const Indices& first_indices, + const Indices& last_indices) const { + return get_elements_in_slice(buffer_ptr, first_indices, last_indices); + } }; // YkElemGrid. // YASK grid of real vectors. @@ -739,11 +817,6 @@ namespace yask { // Positions of grid dims in vector fold dims. Indices _vec_fold_posns; - // Share data from source grid. - virtual bool share_data(YkGridBase* src, bool die_on_failure) { - return _share_data<_grid_type>(src, die_on_failure); - } - public: YkVecGrid(KernelStateBase& stateb, const std::string& name, @@ -792,7 +865,7 @@ namespace yask { } // Make a human-readable description. - virtual std::string make_info_string() const { + virtual std::string _make_info_string() const { return _data.make_info_string("SIMD FP"); } @@ -820,7 +893,7 @@ namespace yask { idx_t alloc_step_idx, bool checkBounds=true) const final { STATE_VARS_CONST(this); - TRACE_MEM_MSG(get_name() << "." << "YkVecGrid::getElemPtr(" << + TRACE_MEM_MSG(_data.get_name() << "." << "YkVecGrid::getElemPtr(" << idxs.makeValStr(get_num_dims()) << ")"); // Use template vec lengths instead of run-time values for @@ -932,7 +1005,7 @@ namespace yask { idx_t alloc_step_idx, bool checkBounds=true) const { STATE_VARS_CONST(this); - TRACE_MEM_MSG(get_name() << "." << "YkVecGrid::getVecPtrNorm(" << + TRACE_MEM_MSG(_data.get_name() << "." << "YkVecGrid::getVecPtrNorm(" << vec_idxs.makeValStr(get_num_dims()) << ")"); static constexpr int nvls = sizeof...(_templ_vec_lens); @@ -1031,7 +1104,7 @@ namespace yask { const Indices& first_indices, const Indices& last_indices) { STATE_VARS(this); - if (!is_storage_allocated()) + if (_data.get_storage() == 0) return 0; Indices firstv, lastv; checkIndices(first_indices, "set_vecs_in_slice", true, false, true, &firstv); @@ -1086,9 +1159,9 @@ namespace yask { const Indices& first_indices, const Indices& last_indices) const { STATE_VARS(this); - if (!is_storage_allocated()) - FORMAT_AND_THROW_YASK_EXCEPTION("Error: call to 'get_vecs_in_slice' with no data allocated for grid '" << - get_name()); + if (_data.get_storage() == 0) + FORMAT_AND_THROW_YASK_EXCEPTION("Error: call to 'get_vecs_in_slice' with no storage allocated for grid '" << + _data.get_name()); Indices firstv, lastv; checkIndices(first_indices, "get_vecs_in_slice", true, true, true, &firstv); checkIndices(last_indices, "get_vecs_in_slice", true, true, true, &lastv); @@ -1137,7 +1210,6 @@ namespace yask { // Return number of writes. return n; } - }; // YkVecGrid. } // namespace. diff --git a/src/kernel/lib/settings.hpp b/src/kernel/lib/settings.hpp index dff6bbab..479700b8 100644 --- a/src/kernel/lib/settings.hpp +++ b/src/kernel/lib/settings.hpp @@ -30,9 +30,10 @@ namespace yask { // Forward defns. class StencilContext; class YkGridBase; + class YkGridImpl; // Some derivations from grid types. - typedef std::shared_ptr YkGridPtr; + typedef std::shared_ptr YkGridPtr; typedef std::set GridPtrSet; typedef std::vector GridPtrs; typedef std::map GridPtrMap; @@ -173,6 +174,132 @@ namespace yask { }; typedef std::shared_ptr DimsPtr; + // Utility to determine number of points in a "sizes" var. + inline idx_t get_num_domain_points(const IdxTuple& sizes) { + assert(sizes.getNumDims() == NUM_STENCIL_DIMS); + idx_t pts = 1; + DOMAIN_VAR_LOOP(i, j) + pts *= sizes[i]; + return pts; + } + + // Application settings to control size and perf of stencil code. Most + // of these vars can be set via cmd-line options and/or APIs. + class KernelSettings { + + protected: + + // Default sizes. + idx_t def_block = 32; // TODO: calculate this. + + // Make a null output stream. + // TODO: put this somewhere else. + yask_output_factory yof; + yask_output_ptr nullop = yof.new_null_output(); + + public: + + // Ptr to problem dimensions (NOT sizes), folding, etc. + // This is solution info from the YASK compiler. + DimsPtr _dims; + + // Sizes in elements (points). + // All these tuples contain stencil dims, even the ones that + // don't strictly need them. + IdxTuple _global_sizes; // Overall problem domain sizes. + IdxTuple _rank_sizes; // This rank's domain sizes. + IdxTuple _region_sizes; // region size (used for wave-front tiling). + IdxTuple _block_group_sizes; // block-group size (only used for 'grouped' region loops). + IdxTuple _block_sizes; // block size (used for each outer thread). + IdxTuple _mini_block_group_sizes; // mini-block-group size (only used for 'grouped' block loops). + IdxTuple _mini_block_sizes; // mini-block size (used for wave-fronts in blocks). + IdxTuple _sub_block_group_sizes; // sub-block-group size (only used for 'grouped' mini-block loops). + IdxTuple _sub_block_sizes; // sub-block size (used for each nested thread). + IdxTuple _min_pad_sizes; // minimum spatial padding (including halos). + IdxTuple _extra_pad_sizes; // extra spatial padding (outside of halos). + + // MPI settings. + IdxTuple _num_ranks; // number of ranks in each dim. + IdxTuple _rank_indices; // my rank index in each dim. + bool find_loc = true; // whether my rank index needs to be calculated. + int msg_rank = 0; // rank that prints informational messages. + bool overlap_comms = true; // overlap comms with computation. + bool use_shm = false; // use shared memory if possible. + idx_t _min_exterior = 0; // minimum size of MPI exterior to calculate. + + // OpenMP settings. + int max_threads = 0; // Initial number of threads to use overall; 0=>OMP default. + int thread_divisor = 1; // Reduce number of threads by this amount. + int num_block_threads = 1; // Number of threads to use for a block. + bool bind_block_threads = false; // Bind block threads to indices. + + // Grid behavior. + bool _step_wrap = false; // Allow invalid step indices to alias to valid ones. + + // Stencil-dim posn in which to apply block-thread binding. + // TODO: make this a cmd-line parameter. + int _bind_posn = 1; + + // Tuning. + bool _do_auto_tune = false; // whether to do auto-tuning. + bool _tune_mini_blks = false; // auto-tune mini-blks instead of blks. + bool _allow_pack_tuners = false; // allow per-pack tuners when possible. + + // Debug. + bool force_scalar = false; // Do only scalar ops. + bool _trace = false; // Print verbose tracing. + + // NUMA settings. + int _numa_pref = NUMA_PREF; + int _numa_pref_max = 128; // GiB to alloc before using PMEM. + + // Ctor/dtor. + KernelSettings(DimsPtr dims, KernelEnvPtr env); + virtual ~KernelSettings() { } + + protected: + // Add options to set one domain var to a cmd-line parser. + virtual void _add_domain_option(CommandLineParser& parser, + const std::string& prefix, + const std::string& descrip, + IdxTuple& var, + bool allow_step = false); + + idx_t findNumSubsets(std::ostream& os, + IdxTuple& inner_sizes, const std::string& inner_name, + const IdxTuple& outer_sizes, const std::string& outer_name, + const IdxTuple& mults, const std::string& step_dim); + + public: + // Add options to a cmd-line parser to set the settings. + virtual void add_options(CommandLineParser& parser); + + // Print usage message. + void print_usage(std::ostream& os, + CommandLineParser& parser, + const std::string& pgmName, + const std::string& appNotes, + const std::vector& appExamples) const; + + // Make sure all user-provided settings are valid by rounding-up + // values as needed. + // Called from prepare_solution(), so it doesn't normally need to be called from user code. + // Prints informational info to 'os'. + virtual void adjustSettings(std::ostream& os); + virtual void adjustSettings() { + adjustSettings(nullop->get_ostream()); + } + + // Determine if this is the first or last rank in given dim. + virtual bool is_first_rank(const std::string dim) { + return _rank_indices[dim] == 0; + } + virtual bool is_last_rank(const std::string dim) { + return _rank_indices[dim] == _num_ranks[dim] - 1; + } + }; + typedef std::shared_ptr KernelSettingsPtr; + // MPI neighbor info. class MPIInfo { @@ -434,131 +561,6 @@ namespace yask { virtual MPIBuf& getBuf(MPIBufs::BufDir bd, const IdxTuple& neighbor_offsets); }; - // Utility to determine number of points in a "sizes" var. - inline idx_t get_num_domain_points(const IdxTuple& sizes) { - assert(sizes.getNumDims() == NUM_STENCIL_DIMS); - idx_t pts = 1; - DOMAIN_VAR_LOOP(i, j) - pts *= sizes[i]; - return pts; - } - - // Application settings to control size and perf of stencil code. - class KernelSettings { - - protected: - - // Default sizes. - idx_t def_block = 32; // TODO: calculate this. - - // Make a null output stream. - // TODO: put this somewhere else. - yask_output_factory yof; - yask_output_ptr nullop = yof.new_null_output(); - - public: - - // Copy of problem dimensions (NOT sizes). - DimsPtr _dims; - - // Sizes in elements (points). - // All these tuples contain stencil dims, even the ones that - // don't strictly need them. - IdxTuple _global_sizes; // Overall problem domain sizes. - IdxTuple _rank_sizes; // This rank's domain sizes. - IdxTuple _region_sizes; // region size (used for wave-front tiling). - IdxTuple _block_group_sizes; // block-group size (only used for 'grouped' region loops). - IdxTuple _block_sizes; // block size (used for each outer thread). - IdxTuple _mini_block_group_sizes; // mini-block-group size (only used for 'grouped' block loops). - IdxTuple _mini_block_sizes; // mini-block size (used for wave-fronts in blocks). - IdxTuple _sub_block_group_sizes; // sub-block-group size (only used for 'grouped' mini-block loops). - IdxTuple _sub_block_sizes; // sub-block size (used for each nested thread). - IdxTuple _min_pad_sizes; // minimum spatial padding (including halos). - IdxTuple _extra_pad_sizes; // extra spatial padding (outside of halos). - - // MPI settings. - IdxTuple _num_ranks; // number of ranks in each dim. - IdxTuple _rank_indices; // my rank index in each dim. - bool find_loc = true; // whether my rank index needs to be calculated. - int msg_rank = 0; // rank that prints informational messages. - bool overlap_comms = true; // overlap comms with computation. - bool use_shm = false; // use shared memory if possible. - idx_t _min_exterior = 0; // minimum size of MPI exterior to calculate. - - // OpenMP settings. - int max_threads = 0; // Initial number of threads to use overall; 0=>OMP default. - int thread_divisor = 1; // Reduce number of threads by this amount. - int num_block_threads = 1; // Number of threads to use for a block. - bool bind_block_threads = false; // Bind block threads to indices. - - // Stencil-dim posn in which to apply block-thread binding. - int _bind_posn = 1; - - // Tuning. - bool _do_auto_tune = false; // whether to do auto-tuning. - bool _tune_mini_blks = false; // auto-tune mini-blks instead of blks. - bool _allow_pack_tuners = false; // allow per-pack tuners when possible. - - // Debug. - bool force_scalar = false; // Do only scalar ops. - bool _trace = false; // Print verbose tracing. - - // Prefetch distances. - // Prefetching must be enabled via YASK_PREFETCH_L[12] macros. - int _prefetch_L1_dist = 1; - int _prefetch_L2_dist = 2; - - // NUMA settings. - int _numa_pref = NUMA_PREF; - int _numa_pref_max = 128; // GiB to alloc before using PMEM. - - // Ctor/dtor. - KernelSettings(DimsPtr dims, KernelEnvPtr env); - virtual ~KernelSettings() { } - - protected: - // Add options to set one domain var to a cmd-line parser. - virtual void _add_domain_option(CommandLineParser& parser, - const std::string& prefix, - const std::string& descrip, - IdxTuple& var, - bool allow_step = false); - - idx_t findNumSubsets(std::ostream& os, - IdxTuple& inner_sizes, const std::string& inner_name, - const IdxTuple& outer_sizes, const std::string& outer_name, - const IdxTuple& mults, const std::string& step_dim); - - public: - // Add options to a cmd-line parser to set the settings. - virtual void add_options(CommandLineParser& parser); - - // Print usage message. - void print_usage(std::ostream& os, - CommandLineParser& parser, - const std::string& pgmName, - const std::string& appNotes, - const std::vector& appExamples) const; - - // Make sure all user-provided settings are valid by rounding-up - // values as needed. - // Called from prepare_solution(), so it doesn't normally need to be called from user code. - // Prints informational info to 'os'. - virtual void adjustSettings(std::ostream& os); - virtual void adjustSettings() { - adjustSettings(nullop->get_ostream()); - } - - // Determine if this is the first or last rank in given dim. - virtual bool is_first_rank(const std::string dim) { - return _rank_indices[dim] == 0; - } - virtual bool is_last_rank(const std::string dim) { - return _rank_indices[dim] == _num_ranks[dim] - 1; - } - }; - typedef std::shared_ptr KernelSettingsPtr; - // A collection of solution meta-data whose ownership is shared between // various objects. struct KernelState { @@ -567,10 +569,10 @@ namespace yask { // Output stream for messages. yask_output_ptr _debug; - // Env. + // Environment (mostly MPI). KernelEnvPtr _env; - // Command-line and env parameters. + // User settings. KernelSettingsPtr _opts; bool _use_pack_tuners = false; @@ -587,18 +589,18 @@ namespace yask { // TODO: move to Dims. int _outer_posn = -1; // -1 => not set. - // MPI info. + // MPI neighbor info. MPIInfoPtr _mpiInfo; }; typedef std::shared_ptr KernelStatePtr; // Macro to define and set commonly-needed state vars efficiently. - // 'parent_p' is pointer to object containing 'KernelStatePtr _state'. + // '_ksbp' is pointer to a 'KernelStateBase' object. // '*_posn' vars are positions in stencil_dims. -#define STATE_VARS0(parent_p, pfx) \ - pfx auto* pp = parent_p; \ - assert(pp); \ - pfx auto* state = pp->_state.get(); \ +#define STATE_VARS0(_ksbp, pfx) \ + pfx auto* ksbp = _ksbp; \ + assert(ksbp); \ + pfx auto* state = ksbp->_state.get(); \ assert(state); \ assert(state->_debug.get()); \ auto& os = state->_debug.get()->get_ostream(); \ @@ -623,8 +625,8 @@ namespace yask { assert(step_posn == +Indices::step_posn); \ constexpr int outer_posn = 1; \ const int inner_posn = state->_inner_posn -#define STATE_VARS(parent_p) STATE_VARS0(parent_p,) -#define STATE_VARS_CONST(parent_p) STATE_VARS0(parent_p, const) +#define STATE_VARS(_ksbp) STATE_VARS0(_ksbp,) +#define STATE_VARS_CONST(_ksbp) STATE_VARS0(_ksbp, const) // A base class containing a shared pointer to a kernel state. // Used to ensure that the shared state object stays allocated when @@ -667,10 +669,6 @@ namespace yask { // Set debug output to cout if my_rank == msg_rank // or a null stream otherwise. std::ostream& set_ostr(); - std::ostream& get_ostr() const { - STATE_VARS(this); - return os; - } // Set number of threads w/o using thread-divisor. // Return number of threads. diff --git a/src/kernel/lib/setup.cpp b/src/kernel/lib/setup.cpp index ac3563fd..98b2f4fd 100644 --- a/src/kernel/lib/setup.cpp +++ b/src/kernel/lib/setup.cpp @@ -529,38 +529,44 @@ namespace yask { // Reset max halos to zero. max_halos = dims->_domain_dims; - // Loop through each non-scratch grid. - for (auto gp : gridPtrs) { - assert(gp); - - // Ignore manually-sized grid. - if (gp->is_fixed_size()) - continue; - - // Loop through each domain dim. - for (auto& dim : domain_dims.getDims()) { - auto& dname = dim.getName(); + // Loop through each domain dim. + for (auto& dim : domain_dims.getDims()) { + auto& dname = dim.getName(); + + // Each non-scratch grid. + for (auto gp : gridPtrs) { + assert(gp); + if (!gp->is_dim_used(dname)) + continue; - if (gp->is_dim_used(dname)) { + // Don't resize manually-sized grid. + if (!gp->is_fixed_size()) { // Rank domains. gp->_set_domain_size(dname, opts->_rank_sizes[dname]); - + // Pads. // Set via both 'extra' and 'min'; larger result will be used. gp->set_extra_pad_size(dname, opts->_extra_pad_sizes[dname]); gp->set_min_pad_size(dname, opts->_min_pad_sizes[dname]); - + // Offsets. gp->_set_rank_offset(dname, rank_domain_offsets[dname]); gp->_set_local_offset(dname, 0); - - // Update max halo across grids, used for temporal angles. - max_halos[dname] = max(max_halos[dname], gp->get_left_halo_size(dname)); - max_halos[dname] = max(max_halos[dname], gp->get_right_halo_size(dname)); } } - } // grids. + + // Each grid used in the solution. + for (auto gp : origGridPtrs) { + assert(gp); + if (!gp->is_dim_used(dname)) + continue; + + // Update max halo across grids, used for temporal angles. + max_halos[dname] = max(max_halos[dname], gp->get_left_halo_size(dname)); + max_halos[dname] = max(max_halos[dname], gp->get_right_halo_size(dname)); + } + } // Calculate wave-front shifts. // See the wavefront diagram in run_solution() for description @@ -635,18 +641,13 @@ namespace yask { // back to the grids. It's useful to store this redundant info // in the grids, because there it's indexed by grid dims instead // of domain dims. This makes it faster to do grid indexing. - for (auto gp : gridPtrs) { + for (auto gp : origGridPtrs) { assert(gp); - // Ignore manually-sized grid. - if (gp->is_fixed_size()) - continue; - // Loop through each domain dim. for (auto& dim : domain_dims.getDims()) { auto& dname = dim.getName(); if (gp->is_dim_used(dname)) { - // Set extensions to be the same as the global ones. gp->_set_left_wf_ext(dname, left_wf_exts[dname]); gp->_set_right_wf_ext(dname, right_wf_exts[dname]); diff --git a/src/kernel/lib/soln_apis.cpp b/src/kernel/lib/soln_apis.cpp index e726a9f8..2be73a43 100644 --- a/src/kernel/lib/soln_apis.cpp +++ b/src/kernel/lib/soln_apis.cpp @@ -54,10 +54,12 @@ namespace yask { GET_SOLN_API(get_rank_index, opts->_rank_indices[dim], false, true, false, true) #undef GET_SOLN_API - // The grid sizes updated any time these settings are changed. + // The grid sizes are updated any time these settings are changed. #define SET_SOLN_API(api_name, expr, step_ok, domain_ok, misc_ok, reset_prep) \ void StencilContext::api_name(const string& dim, idx_t n) { \ STATE_VARS(this); \ + TRACE_MSG("solution '" << get_name() << "'." \ + #api_name "('" << dim << "', " << n << ")"); \ dims->checkDimType(dim, #api_name, step_ok, domain_ok, misc_ok); \ expr; \ update_grid_info(); \ @@ -279,6 +281,7 @@ namespace yask { // Dealloc grids, etc. void StencilContext::end_solution() { STATE_VARS(this); + TRACE_MSG("end_solution()..."); // Final halo exchange (usually not needed). exchange_halos(); @@ -298,7 +301,9 @@ namespace yask { set_max_threads(); } - void StencilContext::share_grid_storage(yk_solution_ptr source) { + void StencilContext::fuse_grids(yk_solution_ptr source, + bool use_meta_data_from_other, + bool use_storage_from_other) { auto sp = dynamic_pointer_cast(source); assert(sp); @@ -307,7 +312,7 @@ namespace yask { auto si = sp->gridMap.find(gname); if (si != sp->gridMap.end()) { auto sgp = si->second; - gp->share_storage(sgp); + gp->fuse_grids(sgp, use_meta_data_from_other, use_storage_from_other); } } } @@ -337,7 +342,7 @@ namespace yask { } // Add a new grid to the containers. - void StencilContext::addGrid(YkGridPtr gp, bool is_output) { + void StencilContext::addGrid(YkGridPtr gp, bool is_orig, bool is_output) { STATE_VARS(this); assert(gp); auto& gname = gp->get_name(); @@ -348,6 +353,12 @@ namespace yask { gridPtrs.push_back(gp); gridMap[gname] = gp; + // Add to orig list and map if 'is_orig'. + if (is_orig) { + origGridPtrs.push_back(gp); + origGridMap[gname] = gp; + } + // Add to output list and map if 'is_output'. if (is_output) { outputGridPtrs.push_back(gp); diff --git a/src/kernel/lib/stencil_calc.cpp b/src/kernel/lib/stencil_calc.cpp index 8372de75..3c08afc5 100644 --- a/src/kernel/lib/stencil_calc.cpp +++ b/src/kernel/lib/stencil_calc.cpp @@ -701,9 +701,10 @@ namespace yask { assert(sv); // Get the one for this thread. - auto gp = sv->at(region_thread_idx); + auto& gp = sv->at(region_thread_idx); assert(gp); - assert(gp->is_scratch()); + auto& gb = gp->gb(); + assert(gb.is_scratch()); // i: index for stencil dims, j: index for domain dims. DOMAIN_VAR_LOOP(i, j) { @@ -711,7 +712,7 @@ namespace yask { auto& dname = dim.getName(); // Is this dim used in this grid? - int posn = gp->get_dim_posn(dname); + int posn = gb.get_dim_posn(dname); if (posn >= 0) { // Get halos, which need to be written to for @@ -729,13 +730,13 @@ namespace yask { // Make sure grid covers index bounds. TRACE_MSG("adjust_span: mini-blk [" << - idxs.begin[i] << "..." << - idxs.end[i] << ") adjusted to [" << - adj_idxs.begin[i] << "..." << - adj_idxs.end[i] << ") within scratch-grid '" << - gp->get_name() << "' allocated [" << - gp->get_first_rank_alloc_index(posn) << "..." << - gp->get_last_rank_alloc_index(posn) << "] in dim '" << dname << "'"); + idxs.begin[i] << "..." << + idxs.end[i] << ") adjusted to [" << + adj_idxs.begin[i] << "..." << + adj_idxs.end[i] << ") within scratch-grid '" << + gp->get_name() << "' allocated [" << + gp->get_first_rank_alloc_index(posn) << "..." << + gp->get_last_rank_alloc_index(posn) << "] in dim '" << dname << "'"); assert(adj_idxs.begin[i] >= gp->get_first_rank_alloc_index(posn)); assert(adj_idxs.end[i] <= gp->get_last_rank_alloc_index(posn) + 1); @@ -793,8 +794,7 @@ namespace yask { // Calc the work stats. // Requires MPI barriers! void BundlePack::init_work_stats() { - ostream& os = _context->get_ostr(); - auto& env = _context->get_env(); + STATE_VARS(this); num_reads_per_step = 0; num_writes_per_step = 0; @@ -879,7 +879,8 @@ namespace yask { // Classify vars. GridPtrs idvars, imvars, odvars, omvars, iodvars, iomvars; // i[nput], o[utput], d[omain], m[isc]. for (auto gp : sg->inputGridPtrs) { - bool isdom = gp->is_domain_var(); + auto& gb = gp->gb(); + bool isdom = gb.is_domain_var(); auto& ogps = sg->outputGridPtrs; bool isout = find(ogps.begin(), ogps.end(), gp) != ogps.end(); if (isout) { @@ -895,7 +896,8 @@ namespace yask { } } for (auto gp : sg->outputGridPtrs) { - bool isdom = gp->is_domain_var(); + auto& gb = gp->gb(); + bool isdom = gb.is_domain_var(); auto& igps = sg->inputGridPtrs; bool isin = find(igps.begin(), igps.end(), gp) != igps.end(); if (!isin) { diff --git a/src/kernel/tests/grid_test.cpp b/src/kernel/tests/grid_test.cpp index 00770eac..5bdf55ef 100644 --- a/src/kernel/tests/grid_test.cpp +++ b/src/kernel/tests/grid_test.cpp @@ -70,12 +70,14 @@ int main(int argc, char** argv) { os << "0-D test...\n"; GridDimNames gdims; string name = "test grid"; - YkGridPtr g0 = make_shared>(*context, name, gdims); + auto gb0 = make_shared>(*context, name, gdims); + YkGridPtr g0 = make_shared(gb0); g0->alloc_storage(); - os << g0->make_info_string() << endl; - YkGridPtr g1 = make_shared>(*context, name, gdims); + os << gb0->make_info_string() << endl; + auto gb1 = make_shared>(*context, name, gdims); + YkGridPtr g1 = make_shared(gb1); g1->alloc_storage(); - os << g1->make_info_string() << endl; + os << gb1->make_info_string() << endl; double val = 3.14; os << "Testing with " << val << endl; @@ -91,8 +93,10 @@ int main(int argc, char** argv) { os << "3-D test...\n"; GridDimNames gdims = {"x", "y", "z"}; string name = "test grid"; - YkGridPtr g3 = make_shared>(*context, name, gdims); - YkGridPtr g3f = make_shared>(*context, name, gdims); + auto gb3 = make_shared>(*context, name, gdims); + YkGridPtr g3 = make_shared(gb3); + auto gb3f = make_shared>(*context, name, gdims); + YkGridPtr g3f = make_shared(gb3f); int i = 0; int min_pad = 3; for (auto dname : gdims) { @@ -103,21 +107,21 @@ int main(int argc, char** argv) { i++; } g3->alloc_storage(); - os << g3->make_info_string() << endl; + os << gb3->make_info_string() << endl; g3f->alloc_storage(); - os << g3f->make_info_string() << endl; + os << gb3f->make_info_string() << endl; os << "Copying seq of vals\n"; - g3->set_all_elements_in_seq(1.0); - auto sizes = g3->get_allocs(); + gb3->set_all_elements_in_seq(1.0); + auto sizes = gb3->get_allocs(); sizes.visitAllPointsInParallel([&](const IdxTuple& pt, size_t idx) { IdxTuple pt2 = pt; for (auto dname : gdims) pt2[dname] += g3->get_first_rank_alloc_index(dname); Indices ipt(pt2); - auto val = g3->readElem(ipt, 0, __LINE__); - g3f->writeElem(val, ipt, 0, __LINE__); + auto val = gb3->readElem(ipt, 0, __LINE__); + gb3f->writeElem(val, ipt, 0, __LINE__); return true; }); os << "Checking seq of vals\n"; @@ -128,8 +132,8 @@ int main(int argc, char** argv) { pt2[dname] += g3->get_first_rank_alloc_index(dname); Indices ipt(pt2); ipt.addConst(-min_pad); - auto val = g3->readElem(ipt, 0, __LINE__); - auto valf = g3f->readElem(ipt, 0, __LINE__); + auto val = gb3->readElem(ipt, 0, __LINE__); + auto valf = gb3f->readElem(ipt, 0, __LINE__); assert(val == valf); return true; }); diff --git a/src/kernel/tests/yask_kernel_api_test.cpp b/src/kernel/tests/yask_kernel_api_test.cpp index 9e7fa7c9..087a51b8 100644 --- a/src/kernel/tests/yask_kernel_api_test.cpp +++ b/src/kernel/tests/yask_kernel_api_test.cpp @@ -192,18 +192,19 @@ int main() { // Init the values using the indices created above. double val = 2.0; - idx_t nset = grid->set_elements_in_slice_same(val, first_indices, last_indices); + bool strict_indices = false; // because first/last_indices are global. + idx_t nset = grid->set_elements_in_slice_same(val, first_indices, last_indices, strict_indices); os << " " << nset << " element(s) set in sub-range from " << grid->format_indices(first_indices) << " to " << grid->format_indices(last_indices) << ".\n"; - if (grid->is_element_allocated(first_indices)) { + if (grid->are_indices_local(first_indices)) { auto val2 = grid->get_element(first_indices); os << " first element == " << val2 << ".\n"; assert(val2 == val); } else os << " first element NOT in rank.\n"; - if (grid->is_element_allocated(last_indices)) { + if (grid->are_indices_local(last_indices)) { auto val2 = grid->get_element(last_indices); os << " last element == " << val2 << ".\n"; assert(val2 == val); @@ -215,12 +216,12 @@ int main() { nset = grid->add_to_element(1.0, first_indices); nset += grid->add_to_element(3.0, last_indices); os << " " << nset << " element(s) updated.\n"; - if (grid->is_element_allocated(first_indices)) { + if (grid->are_indices_local(first_indices)) { auto val2 = grid->get_element(first_indices); os << " first element == " << val2 << ".\n"; assert(val2 == val + 1.0); } - if (grid->is_element_allocated(last_indices)) { + if (grid->are_indices_local(last_indices)) { auto val2 = grid->get_element(last_indices); os << " last element == " << val2 << ".\n"; assert(val2 == val + 3.0); diff --git a/src/kernel/tests/yask_kernel_api_test.py b/src/kernel/tests/yask_kernel_api_test.py index 432a9e81..dcb57d85 100755 --- a/src/kernel/tests/yask_kernel_api_test.py +++ b/src/kernel/tests/yask_kernel_api_test.py @@ -296,7 +296,7 @@ def init_grid(grid, timestep) : print("Set " + repr(nset) + " element(s) in rank " + repr(env.get_rank_index())) # Init the values within the small cube. - nset = grid.set_elements_in_slice_same(0.5, first_indices, last_indices) + nset = grid.set_elements_in_slice_same(0.5, first_indices, last_indices, False) print("Set " + repr(nset) + " element(s) in rank " + repr(env.get_rank_index())) # Print the initial contents of the grid. diff --git a/src/kernel/yask.sh b/src/kernel/yask.sh index fae81f27..bf1f02fc 100755 --- a/src/kernel/yask.sh +++ b/src/kernel/yask.sh @@ -317,7 +317,7 @@ else fi # Commands to capture some important system status and config info for benchmark documentation. -config_cmds="uname -a; sleep 1; uptime; sed '/^$/q' /proc/cpuinfo; lscpu; $dump /proc/cmdline; $dump /proc/meminfo; free -gt; numactl -H; ulimit -a" +config_cmds="sleep 1; uptime; lscpu; sed '/^$/q' /proc/cpuinfo; uname -a; $dump /etc/system-release; $dump /proc/cmdline; $dump /proc/meminfo; free -gt; numactl -H; ulimit -a" # Command sequence to be run in a shell. # Captures From 4029c1537592a17336b65f84e88daa3c8ad73a52 Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Fri, 12 Apr 2019 11:29:13 -0700 Subject: [PATCH 4/5] Simplify the fuse_grids() API. --- README.md | 5 ++- include/yk_grid_api.hpp | 75 ++++++++------------------------- include/yk_solution_api.hpp | 6 +-- src/compiler/lib/YaskKernel.cpp | 2 +- src/kernel/lib/context.cpp | 2 +- src/kernel/lib/context.hpp | 6 +-- src/kernel/lib/grid_apis.cpp | 60 +++++++++----------------- src/kernel/lib/new_grid.cpp | 2 +- src/kernel/lib/realv_grids.hpp | 4 +- src/kernel/lib/setup.cpp | 24 +++++------ src/kernel/lib/soln_apis.cpp | 13 +++--- 11 files changed, 63 insertions(+), 136 deletions(-) diff --git a/README.md b/README.md index 18dcab8a..9515ced2 100644 --- a/README.md +++ b/README.md @@ -60,10 +60,11 @@ YASK contains a domain-specific compiler to convert scalar stencil code to SIMD- ### Backward-compatibility notices, including changes in default behavior: * Version 2.20.00 added checking of the step-dimension index value in the `yk_grid::get_element()` and similar APIs. Previously, invalid values silently "wrapped" around to valid values. -Now, the step index must be valid when reading, and the valid step indices are updated when writing. +Now, by default, the step index must be valid when reading, and the valid step indices are updated when writing. +The old behavior of silent index wrapping may be restored via `set_step_wrap(true)`. The default for all `strict_indices` API parameters is now `true` to catch more programming errors and increase consistency of behavior between "set" and "get" APIs. -The `share_storage()` APIs have been replaced with `fuse_grids()`. +Also, the advanced `share_storage()` APIs have been replaced with `fuse_grids()`. * Version 2.19.01 turned off multi-pass tuning by default. Enable with `-auto_tune_each_pass`. * Version 2.18.03 allowed the default radius to be stencil-specific and changed the names of example stencil "9axis" to "3axis_with_diags". * Version 2.18.00 added the ability to specify the global-domain size, and it will calculate the local-domain sizes from it. diff --git a/include/yk_grid_api.hpp b/include/yk_grid_api.hpp index 3442007b..de99d234 100644 --- a/include/yk_grid_api.hpp +++ b/include/yk_grid_api.hpp @@ -985,68 +985,33 @@ namespace yask { /// **[Advanced]** Merge this grid with another grid. /** - After calling this API, both this grid and the `other` - grid will effectively become a reference to the same shared grid. + After calling this API, this grid + grid will effectively become another reference to the `source` grid. Any subsequent API applied to this grid or the - `other` grid will access the same data and/or + `source` grid will access the same data and/or effect the same changes. - There are two categories of data associated with - a grid, and the source of each is specified independently: - - The _meta-data_ includes the name, dimensions, sizes, etc., - i.e., everything about the grid apart from the storage. - If `use_meta_data_from_other` is `true`, the resulting shared - grid will use the meta-data from the `other` grid; - if `use_meta_data_from_other` is `false`, the resulting shared grid - will use the meta-data from this grid. - - The _storage_ holds the actual values of the data elements - if storage has been allocated. - If `use_storage_from_other` is `true`, the resulting shared - grid will use the storage from the `other` grid; - if `use_storage_from_other` is `false`, the resulting shared grid - will use the storage from this grid. - - Implications: - - If `use_meta_data_from_other` and `use_storage_from_other` - are both `false`, this grid remains unaltered, and the - `other` grid becomes a reference to this grid. - - If `use_meta_data_from_other` and `use_storage_from_other` - are both `true`, the `other` grid remains unaltered, and this - grid becomes a reference to the `other` grid. - - If `use_meta_data_from_other` and `use_storage_from_other` - are different, and if - and the source storage is already allocated, the size of the - source storage must match that required by the source - meta-data. In other words, the value of - yk_grid::get_num_storage_bytes() must return the same value from - both grids prior to fusing. - - The storage of the resulting shared grid will be + + Storage implications: + - The storage of the this grid will become allocated or unallocated depending on that of the source grid. - Any pre-existing storage in the non-source grid will be released. - - After fusing, any API applied to the shared grid via this - grid or the `other` grid will be visible to both, including - release_storage(). + Any pre-existing storage in this grid will be released. + - After fusing, calling release_storage() on this grid + or the `source` grid will apply to both. To ensure that the kernels created by the YASK compiler work - properly, if either this grid and/or the `other` grid is used in - a kernel and its meta-data is being replaced, the dimensions and - fold-lengths must remain unchanged or an exception will the - thrown. It is the responsibility of the API programmer to ensure - that the storage, local domain sizes, halos, etc. of the grid - are set to be compatible with the solution before calling - yk_solution::run_solution(). + properly, if this grid is used in a kernel, the dimensions and + fold-lengths of the `source` grid must be identical or an + exception will the thrown. If the `source` grid is a fixed-size + grid, the storage, local domain sizes, halos, etc. of the grid + are set to be compatible with the solution. Otherwise, + yk_solution::prepare_solution() will throw an exception. See allocation options and more information about grid sizes in the "Detailed Description" for \ref yk_grid. */ virtual void - fuse_grids(yk_grid_ptr other - /**< [in] Grid to be merged with this grid. */, - bool use_meta_data_from_other - /**< [in] If `true`, use meta-data from `other` grid; - if `false`, use meta-data from this grid. */, - bool use_storage_from_other - /**< [in] If `true`, use element storage from `other` grid; - if `false`, use storage from this grid. */) =0; + fuse_grids(yk_grid_ptr source + /**< [in] Grid to be merged with this grid. */) =0; /// **[Advanced]** Get pointer to raw data storage buffer. /** @@ -1140,12 +1105,6 @@ namespace yask { } #endif - /// **[Deprecated]** Use fuse_grids() instead. - virtual void - share_storage(yk_grid_ptr other) { - fuse_grids(other, false, true); - } - }; /** @}*/ diff --git a/include/yk_solution_api.hpp b/include/yk_solution_api.hpp index dd91e785..913a0b29 100644 --- a/include/yk_solution_api.hpp +++ b/include/yk_solution_api.hpp @@ -858,11 +858,7 @@ namespace yask { */ virtual void fuse_grids(yk_solution_ptr source - /**< [in] Solution from which grids will be merged. */, - bool use_meta_data_from_other - /**< [in] Whether to keep meta-data from `other` solution's grids. */, - bool use_storage_from_other - /**< [in] Whether to keep element storage from `other` solution's grids. */) =0; + /**< [in] Solution from which grids will be merged. */) =0; /// **[Advanced]** Set whether invalid step indices alias to valid ones. virtual void diff --git a/src/compiler/lib/YaskKernel.cpp b/src/compiler/lib/YaskKernel.cpp index 34e5fbc0..a7ea6eeb 100644 --- a/src/compiler/lib/YaskKernel.cpp +++ b/src/compiler/lib/YaskKernel.cpp @@ -460,7 +460,7 @@ namespace yask { os << "\n // Create grids (but do not allocate data in them).\n" << ctorCode << "\n // Update grids with context info.\n" - " update_grid_info();\n"; + " update_grid_info(false);\n"; // end of ctor. os << " } // ctor" << endl; diff --git a/src/kernel/lib/context.cpp b/src/kernel/lib/context.cpp index 4b8eb44f..02a415a3 100644 --- a/src/kernel/lib/context.cpp +++ b/src/kernel/lib/context.cpp @@ -74,7 +74,7 @@ namespace yask { opts->_mini_block_sizes.setValsSame(0); opts->_sub_block_sizes.setValsSame(0); opts->adjustSettings(); - update_grid_info(); + update_grid_info(true); // Copy these settings to packs and realloc scratch grids. for (auto& sp : stPacks) diff --git a/src/kernel/lib/context.hpp b/src/kernel/lib/context.hpp index 10fe92a9..3160eef3 100644 --- a/src/kernel/lib/context.hpp +++ b/src/kernel/lib/context.hpp @@ -361,7 +361,7 @@ namespace yask { // Set grid sizes and offsets. // This should be called anytime a setting or offset is changed. - virtual void update_grid_info(); + virtual void update_grid_info(bool force); // Set temporal blocking data. // This should be called anytime a block size is changed. @@ -582,9 +582,7 @@ namespace yask { virtual void run_solution(idx_t step_index) { run_solution(step_index, step_index); } - virtual void fuse_grids(yk_solution_ptr other, - bool use_meta_data_from_other, - bool use_storage_from_other); + virtual void fuse_grids(yk_solution_ptr other); // APIs that access settings. virtual void set_overall_domain_size(const std::string& dim, idx_t size); diff --git a/src/kernel/lib/grid_apis.cpp b/src/kernel/lib/grid_apis.cpp index 2ef7a731..b93be574 100644 --- a/src/kernel/lib/grid_apis.cpp +++ b/src/kernel/lib/grid_apis.cpp @@ -176,64 +176,42 @@ namespace yask { return true; } - void YkGridImpl::fuse_grids(yk_grid_ptr other, - bool use_meta_data_from_other, - bool use_storage_from_other) { + void YkGridImpl::fuse_grids(yk_grid_ptr src) { STATE_VARS(gbp()); - auto op = dynamic_pointer_cast(other); - TRACE_MSG("fuse_grids(" << other.get() << ", " << use_meta_data_from_other << - ", " << use_storage_from_other << "): this=" << gb().make_info_string() << - "; other=" << op->gb().make_info_string()); - + auto op = dynamic_pointer_cast(src); + TRACE_MSG("fuse_grids(" << src.get() << "): this=" << gb().make_info_string() << + "; source=" << op->gb().make_info_string()); assert(op); - YkGridImpl* md_src = use_meta_data_from_other ? op.get() : this; - YkGridImpl* st_src = use_storage_from_other ? op.get() : this; - - // Make sure size is compatible when replacing storage. - if (md_src != st_src && st_src->get_raw_storage_buffer() - && md_src->get_num_storage_bytes() != st_src->get_num_storage_bytes()) - THROW_YASK_EXCEPTION("Error: fuse_grids(): attempt to use allocated" - " storage of " + makeByteStr(st_src->get_num_storage_bytes()) + - " from grid '" + st_src->get_name() + "' in grid '" + - md_src->get_name() + "' that needs " + - makeByteStr(md_src->get_num_storage_bytes())); + auto* sp = op.get(); + assert(!_gbp->is_scratch()); // Check conditions for fusing into a non-user grid. bool force_native = false; - for (YkGridImpl* tgt : { this, op.get() }) { - if (!tgt->gb().is_user_grid()) { - force_native = true; - if (!tgt->is_storage_layout_identical(md_src, false)) - THROW_YASK_EXCEPTION("Error: fuse_grids(): attempt to replace meta-data" - " of " + tgt->gb().make_info_string() + - " used in solution with incompatible " + - md_src->gb().make_info_string()); - } + if (gb().is_user_grid()) { + force_native = true; + if (!is_storage_layout_identical(sp, false)) + THROW_YASK_EXCEPTION("Error: fuse_grids(): attempt to replace meta-data" + " of " + gb().make_info_string() + + " used in solution with incompatible " + + sp->gb().make_info_string()); } // Save ptr to source-storage grid before fusing meta-data. - GridBasePtr st_gbp = st_src->_gbp; // Shared-ptr to keep source active to end of method. + GridBasePtr st_gbp = sp->_gbp; // Shared-ptr to keep source active to end of method. GenericGridBase* st_ggb = st_gbp->_ggb; // Fuse meta-data. - if (use_meta_data_from_other) - _gbp = md_src->_gbp; - else - op->_gbp = md_src->_gbp; - assert(_gbp == op->_gbp); - - // Make this grid look like a compiler-generated one if either - // of the original ones was. + _gbp = sp->_gbp; + + // Tag grid as a non-user grid if the original one was. if (force_native) _gbp->set_user_grid(false); - assert(!_gbp->is_scratch()); // Fuse storage. gg().share_storage(st_ggb); - TRACE_MSG("after fuse_grids(" << other.get() << ", " << use_meta_data_from_other << - ", " << use_storage_from_other << "): this=" << gb().make_info_string() << - "; other=" << op->gb().make_info_string()); + TRACE_MSG("after fuse_grids: this=" << gb().make_info_string() << + "; source=" << op->gb().make_info_string()); } // API get, set, etc. diff --git a/src/kernel/lib/new_grid.cpp b/src/kernel/lib/new_grid.cpp index f94435d5..baee3fea 100644 --- a/src/kernel/lib/new_grid.cpp +++ b/src/kernel/lib/new_grid.cpp @@ -154,7 +154,7 @@ namespace yask { // Set sizes based on solution settings. else - update_grid_info(); + update_grid_info(false); return ygp; } diff --git a/src/kernel/lib/realv_grids.hpp b/src/kernel/lib/realv_grids.hpp index 940f3615..4bf93fdd 100644 --- a/src/kernel/lib/realv_grids.hpp +++ b/src/kernel/lib/realv_grids.hpp @@ -675,9 +675,7 @@ namespace yask { assert(op); return is_storage_layout_identical(op.get(), true); } - virtual void fuse_grids(yk_grid_ptr other, - bool use_meta_data_from_other, - bool use_storage_from_other); + virtual void fuse_grids(yk_grid_ptr other); virtual void* get_raw_storage_buffer() { return gg().get_storage(); } diff --git a/src/kernel/lib/setup.cpp b/src/kernel/lib/setup.cpp index 98b2f4fd..26e2f0f8 100644 --- a/src/kernel/lib/setup.cpp +++ b/src/kernel/lib/setup.cpp @@ -517,9 +517,9 @@ namespace yask { // Set non-scratch grid sizes and offsets based on settings. // Set wave-front settings. // This should be called anytime a setting or rank offset is changed. - void StencilContext::update_grid_info() { + void StencilContext::update_grid_info(bool force) { STATE_VARS(this); - TRACE_MSG("update_grid_info()..."); + TRACE_MSG("update_grid_info(" << force << ")..."); // If we haven't finished constructing the context, it's too early // to do this. @@ -538,9 +538,12 @@ namespace yask { assert(gp); if (!gp->is_dim_used(dname)) continue; + auto& gb = gp->gb(); - // Don't resize manually-sized grid. - if (!gp->is_fixed_size()) { + // Don't resize manually-sized grid + // unless it is a solution grid and 'force' is 'true'. + if (!gp->is_fixed_size() || + (!gb.is_user_grid() && force)) { // Rank domains. gp->_set_domain_size(dname, opts->_rank_sizes[dname]); @@ -554,17 +557,12 @@ namespace yask { gp->_set_rank_offset(dname, rank_domain_offsets[dname]); gp->_set_local_offset(dname, 0); } - } - - // Each grid used in the solution. - for (auto gp : origGridPtrs) { - assert(gp); - if (!gp->is_dim_used(dname)) - continue; // Update max halo across grids, used for temporal angles. - max_halos[dname] = max(max_halos[dname], gp->get_left_halo_size(dname)); - max_halos[dname] = max(max_halos[dname], gp->get_right_halo_size(dname)); + if (!gb.is_user_grid()) { + max_halos[dname] = max(max_halos[dname], gp->get_left_halo_size(dname)); + max_halos[dname] = max(max_halos[dname], gp->get_right_halo_size(dname)); + } } } diff --git a/src/kernel/lib/soln_apis.cpp b/src/kernel/lib/soln_apis.cpp index 2be73a43..4ca599a3 100644 --- a/src/kernel/lib/soln_apis.cpp +++ b/src/kernel/lib/soln_apis.cpp @@ -62,7 +62,7 @@ namespace yask { #api_name "('" << dim << "', " << n << ")"); \ dims->checkDimType(dim, #api_name, step_ok, domain_ok, misc_ok); \ expr; \ - update_grid_info(); \ + update_grid_info(false); \ if (reset_prep) rank_bb.bb_valid = ext_bb.bb_valid = false; \ } SET_SOLN_API(set_rank_index, opts->_rank_indices[dim] = n; @@ -132,8 +132,9 @@ namespace yask { opts->adjustSettings(os); // Set offsets in grids and find WF extensions - // based on the grids' halos. - update_grid_info(); + // based on the grids' halos. Force setting + // the size of all solution grids. + update_grid_info(true); // Determine bounding-boxes for all bundles. // This must be done after finding WF extensions. @@ -301,9 +302,7 @@ namespace yask { set_max_threads(); } - void StencilContext::fuse_grids(yk_solution_ptr source, - bool use_meta_data_from_other, - bool use_storage_from_other) { + void StencilContext::fuse_grids(yk_solution_ptr source) { auto sp = dynamic_pointer_cast(source); assert(sp); @@ -312,7 +311,7 @@ namespace yask { auto si = sp->gridMap.find(gname); if (si != sp->gridMap.end()) { auto sgp = si->second; - gp->fuse_grids(sgp, use_meta_data_from_other, use_storage_from_other); + gp->fuse_grids(sgp); } } } From 4cbb00dd9066d7a42c740a0ea48496d39133d792 Mon Sep 17 00:00:00 2001 From: "chuck.yount" Date: Fri, 12 Apr 2019 15:32:12 -0700 Subject: [PATCH 5/5] Update compiler info. --- README.md | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 9515ced2..dc32ab35 100644 --- a/README.md +++ b/README.md @@ -25,11 +25,20 @@ YASK contains a domain-specific compiler to convert scalar stencil code to SIMD- for multi-socket and multi-node operation or Intel(R) Parallel Studio XE Composer Edition for C++ Linux for single-socket only - (2018 or later; 2019 or later recommended and required when using g++ 8 or later). - Building a YASK kernel with the Gnu compiler is possible, but only useful - for functional testing. The performance - of the kernel built from the Gnu compiler has been observed to be up to 7x lower - than the same kernel built using the Intel compiler. + (2018 or later; 2019.3 or later recommended). + * There was an issue in Intel(R) MPI versions 2019.1 and 2019.2 that + caused the application to crash when allocating very + large shared-memory (shm) regions, so those + versions are not recommended when using the `-use_shm` feature. + This issue was resolved in MPI version 2019.3. + * If you are using g++ version 8.x or later, Intel(R) C++ version 2019.x or later + is required. + * Building a YASK kernel with the Gnu C++ compiler is possible. + Limited testing with g++ 8.2.0 shows the "iso3dfd" kernel + runs about 30% slower compared to the same kernel built with + the Intel C++ compiler. + Older Gnu C++ compilers can produce kernels that run + many times slower. * Gnu C++ compiler, g++ (4.9.0 or later; 8.2.0 or later recommended). * Linux libraries `librt` and `libnuma`. * Perl (5.010 or later).