Skip to content

Commit

Permalink
Merge pull request #281 from intel/develop
Browse files Browse the repository at this point in the history
Develop
  • Loading branch information
chuckyount authored Sep 7, 2023
2 parents b57fda6 + e89fdab commit 76791f4
Show file tree
Hide file tree
Showing 11 changed files with 71 additions and 56 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,11 +48,14 @@ and Intel(R) graphics processors.
to make the generated code easier for humans to read.
You'll get a warning when running `make` if one of these doesn't exist.
Everything will still work, but the generated code will be difficult to read.
Reading the generated code is only necessary for debug or curiosity.
Reading the generated code is only necessary for debug, performance analysis, etc.
* SWIG (4.0.0 or later):
http://www.swig.org, for creating the Python interface.
* Python 3 (3.6.1 or later):
https://www.python.org/downloads, for creating and using the Python interface.
Included with Intel(R) oneAPI HPC Toolkit.
* Python `numpy` package for running Python interface tests.
Included with Intel(R) oneAPI HPC Toolkit.
* Doxygen (1.9.0 or later):
https://www.doxygen.nl, for creating updated API documentation.
If you're not changing the API documentation, you can view the existing documentation
Expand Down
Binary file modified docs/YASK-tutorial.pdf
Binary file not shown.
9 changes: 6 additions & 3 deletions src/common/common_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ namespace yask {
// for numbers above 9 (at least up to 99).

// Format: "major.minor.patch[-alpha|-beta]".
const string version = "4.04.04";
const string version = "4.04.05";

string yask_get_version_string() {
return version;
Expand All @@ -57,10 +57,12 @@ namespace yask {

// Return num with SI multiplier and "iB" suffix,
// e.g., 412KiB.
// Use only for storage bytes, e.g., not for
// rates like bytes/sec.
string make_byte_str(size_t nbytes)
{
if (!is_suffix_print_enabled)
return to_string(nbytes);
return to_string(nbytes) + " Bytes";

ostringstream os;
double num = double(nbytes);
Expand Down Expand Up @@ -89,7 +91,8 @@ namespace yask {
}

// Return num with SI multiplier, e.g. "3.14M".
// Use this one for rates, etc.
// Use this one for printing any number that is
// not number of storage bytes.
string make_num_str(idx_t num) {
if (!is_suffix_print_enabled || (num > -1000 && num < 1000))
return to_string(num);
Expand Down
8 changes: 4 additions & 4 deletions src/common/tuple.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -202,8 +202,8 @@ namespace yask {
size_t prev_size = 1;

// Loop thru dims.
int start_dim = _first_inner ? 0 : size()-1;
int end_dim = _first_inner ? size() : -1;
int start_dim = _first_inner ? 0 : get_num_dims()-1;
int end_dim = _first_inner ? get_num_dims() : -1;
int step_dim = _first_inner ? 1 : -1;
for (int di = start_dim; di != end_dim; di += step_dim) {
auto& i = _q.at(di);
Expand Down Expand Up @@ -242,8 +242,8 @@ namespace yask {
size_t prev_size = 1;

// Loop thru dims.
int start_dim = _first_inner ? 0 : size()-1;
int stop_dim = _first_inner ? size() : -1;
int start_dim = _first_inner ? 0 : get_num_dims()-1;
int stop_dim = _first_inner ? get_num_dims() : -1;
int step_dim = _first_inner ? 1 : -1;
for (int di = start_dim; di != stop_dim; di += step_dim) {
auto& i = _q.at(di);
Expand Down
4 changes: 2 additions & 2 deletions src/compiler/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ YC_CXX := $(CXX)
YC_CXXOPT ?= -O2
YC_CXXDBG ?= -g
YC_CXXFLAGS :=
YC_CXXFLAGS_API := -std=c++17
YC_CXXFLAGS_API += -Wall -Wno-unknown-pragmas -Wno-unused-variable
YC_CXXWARN := -Wall -Wno-unknown-pragmas -Wno-unused-variable
YC_CXXFLAGS_API := -std=c++17 $(YC_CXXWARN)
YC_CXX_INCFLAGS := $(addprefix -I,$(YC_INC_DIRS))
YC_CXX_INCFLAGS_API := $(addprefix -I,$(INC_DIR))

Expand Down
38 changes: 20 additions & 18 deletions src/kernel/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -31,20 +31,23 @@
YASK_BASE ?= $(abspath ../..)
include $(YASK_BASE)/src/common/common.mk

# Initial default settings for the YASK kernel library.
# Default settings for invoking the YASK compiler.
# These can be overridden on the 'make' command-line.
# See src/common/common.mk for more setting vars.
numa ?= 1
allow_new_var_types ?= 1
streaming_stores ?= 0
use_rcp ?= 0
use_ptrs ?= 1
use_safe_ptrs ?= 0
outer_domain_layout ?= 0
inner_misc_layout ?= 1
first_inner ?= 1
early_loads ?= 0
min_buffer_len ?= 1

# Default settings for the YASK kernel library.
# These can be overridden on the 'make' command-line.
# See src/common/common.mk for more setting vars.
numa ?= 1
allow_new_var_types ?= 1
streaming_stores ?= 0
use_rcp ?= 0
trace ?= 0
trace_mem ?= 0
check ?= 0
Expand All @@ -56,9 +59,7 @@ check ?= 0

# YASK compiler settings for offload.
ifeq ($(offload),1)
pfd_l1 := 0
pfd_l2 := 0
inner_loop_dim := 1
inner_loop_dim := 1
outer_domain_layout := 1
endif

Expand Down Expand Up @@ -87,7 +88,7 @@ ifeq ($(TARGET),knl)
VEC_MACROS += USE_RCP28
endif
MACROS += NUMA_PREF=1
pfd_l1 ?= 1
pfd_l1 := 1

else ifeq ($(TARGET),avx512)

Expand Down Expand Up @@ -156,18 +157,18 @@ endif
ifneq ($(step_dim),)
YC_FLAGS += -step-dim $(step_dim)
endif
ifneq ($(pfd_l1),)
YC_FLAGS += -l1-prefetch-dist $(pfd_l1)
endif
ifneq ($(pfd_l2),)
YC_FLAGS += -l2-prefetch-dist $(pfd_l2)
endif
ifneq ($(inner_loop_dim),)
YC_FLAGS += -inner-loop-dim $(inner_loop_dim)
endif
ifneq ($(min_buffer_len),)
YC_FLAGS += -min-buffer-len $(min_buffer_len)
endif
ifneq ($(pfd_l1),)
YC_FLAGS += -l1-prefetch-dist $(pfd_l1)
endif
ifneq ($(pfd_l2),)
YC_FLAGS += -l2-prefetch-dist $(pfd_l2)
endif

# Stencil compiler flags that are boolean.
ifeq ($(use_ptrs),1)
Expand Down Expand Up @@ -351,7 +352,7 @@ endif
# Compiler-specific settings.

# Create a compiler invocation to test for macro settings.
YK_CXX_TEST := $(YK_CXX)
YK_CXX_TEST := $(YK_CXXCMD)
cxx_is_llvm_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_LLVM_COMPILER)
cxx_is_clang := $(call MACRO_DEF,$(YK_CXX_TEST),__clang__)
cxx_is_intel := $(call MACRO_DEF,$(YK_CXX_TEST),__INTEL_COMPILER)
Expand Down Expand Up @@ -512,7 +513,8 @@ NANO_BLOCK_LOOP_ORDER ?= DOMAIN_LOOP_DIMS
ifeq ($(offload),1)
NANO_BLOCK_LOOP_OMP ?= omp target teams distribute thread_limit(thread_limit) device(KernelEnv::_omp_devn)
NANO_BLOCK_LOOP_FLAGS += -omp '$(NANO_BLOCK_LOOP_OMP)'
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) omp loop($(NANO_BLOCK_LOOP_ORDER)) { }
NANO_BLOCK_LOOP_MODS += omp
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) loop($(NANO_BLOCK_LOOP_ORDER)) { }
else
NANO_BLOCK_LOOP_CODE := $(NANO_BLOCK_LOOP_MODS) loop($(NANO_BLOCK_LOOP_ORDER)) { }
endif
Expand Down
43 changes: 20 additions & 23 deletions src/kernel/lib/alloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,14 @@ namespace yask {
else if (numa_available() != -1) {
numa_set_bind_policy(0);
if (numa_pref >= 0 && numa_pref <= numa_max_node())
numa_alloc_onnode(nbytes, numa_pref);
p = numa_alloc_onnode(nbytes, numa_pref);
else
numa_alloc_local(nbytes);
p = numa_alloc_local(nbytes);
// Interleaved not available.

if (!p)
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
" using numa-node (or policy) " + to_string(numa_pref));
}
else
THROW_YASK_EXCEPTION("explicit NUMA policy allocation is not available");
Expand Down Expand Up @@ -158,15 +162,6 @@ namespace yask {

#endif // not USE_NUMA_POLICY_LIB.

#else
THROW_YASK_EXCEPTION("NUMA allocation is not enabled; build with numa=1");
#endif // USE_NUMA.

// Should not get here w/null p; throw exception.
if (!p)
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
" using numa-node (or policy) " + to_string(numa_pref));

// Check alignment.
if ((size_t(p) & (CACHELINE_BYTES - 1)) != 0)
FORMAT_AND_THROW_YASK_EXCEPTION("NUMA-allocated " << p << " is not " <<
Expand All @@ -177,6 +172,10 @@ namespace yask {

// Return as a char* as required for shared_ptr ctor.
return static_cast<char*>(p);

#else
THROW_YASK_EXCEPTION("NUMA allocation is not enabled; build with numa=1");
#endif // USE_NUMA.
}

// Reverse numa_alloc().
Expand Down Expand Up @@ -217,38 +216,36 @@ namespace yask {

void *p = 0;

#ifdef USE_OFFLOAD
THROW_YASK_EXCEPTION("mapping offload device memory to shm not yet supported; "
"use '-no-use_shm' option");

// Allocate using MPI shm.
#ifdef USE_MPI
#elif defined(USE_MPI)
assert(shm_comm);
assert(shm_win);
MPI_Info win_info;
MPI_Info_create(&win_info);
MPI_Info_set(win_info, "alloc_shared_noncontig", "true");
MPI_Win_allocate_shared(nbytes, 1, win_info, *shm_comm, &p, shm_win);
MPI_Info_free(&win_info);
MPI_Win_lock_all(0, *shm_win);
#else
THROW_YASK_EXCEPTION("MPI shm allocation is not enabled; build with mpi=1");
#endif

if (!p)
THROW_YASK_EXCEPTION("cannot allocate " + make_byte_str(nbytes) +
" using MPI shm");
MPI_Info_free(&win_info);
MPI_Win_lock_all(0, *shm_win);

// Check alignment.
if ((size_t(p) & (CACHELINE_BYTES - 1)) != 0)
FORMAT_AND_THROW_YASK_EXCEPTION("MPI shm-allocated " << p << " is not " <<
CACHELINE_BYTES << "-byte aligned");

#ifdef USE_OFFLOAD
THROW_YASK_EXCEPTION("mapping offload device memory to shm not yet supported; "
"use '-no-use_shm'");
#endif

// Cannot typically use huge pages for shm, so not calling set_huge().

// Return as a char* as required for shared_ptr ctor.
return static_cast<char*>(p);
#else
THROW_YASK_EXCEPTION("MPI shm allocation is not enabled; build with mpi=1");
#endif
}

// Reverse shm_alloc().
Expand Down
2 changes: 0 additions & 2 deletions src/kernel/lib/auto_tuner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -431,8 +431,6 @@ namespace yask {
}
} // beyond next neighbor of center.
} // while(true) search for new setting to try.

THROW_YASK_EXCEPTION("(internal fault) exited from infinite loop");
} // eval.

// Apply best settings if avail, and adjust other settings.
Expand Down
2 changes: 2 additions & 0 deletions src/kernel/lib/indices.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -544,13 +544,15 @@ namespace yask {
static_assert(std::is_trivially_copyable<Indices>::value,
"Needed for OpenMP offload");

#if 0
// Define OMP reductions on Indices.
#pragma omp declare reduction(min_idxs : Indices : \
omp_out = omp_out.min_elements(omp_in) ) \
initializer (omp_priv = omp_orig)
#pragma omp declare reduction(max_idxs : Indices : \
omp_out = omp_out.max_elements(omp_in) ) \
initializer (omp_priv = omp_orig)
#endif

// Layout base class.
// This class hierarchy is NOT virtual.
Expand Down
4 changes: 4 additions & 0 deletions src/kernel/yask.sh
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ while true; do
echo " -v"
echo " Shortcut for the following options:"
echo " $val"
echo " Adds '/tests' to path of log_dir."
echo " If you want to override any of these values, place them after '-v'."
echo " -show_arch"
echo " Print the default architecture string and exit."
Expand Down Expand Up @@ -357,6 +358,9 @@ dump="head -v -n -0"
# Init log file.
: ${logfile:=yask.$stencil.$arch.$exe_host.n$nnodes.r$nranks.`date +%Y-%m-%d_%H-%M-%S`_p$$.log}
if [[ -n "$logdir" ]]; then
if [[ $doval == 1 ]]; then
logdir="$logdir/tests"
fi
logfile="$logdir/$logfile"
fi
echo "Writing log to '$logfile'."
Expand Down
12 changes: 9 additions & 3 deletions utils/bin/gen_loops.pl
Original file line number Diff line number Diff line change
Expand Up @@ -961,8 +961,14 @@ ($)
# use OpenMP on next loop.
elsif (lc $tok eq 'omp') {

$features |= $bOmpPar;
print "info: using OpenMP on following loop(s).\n";
if ($OPT{omp} !~ /\w/) {
warn "info: ignoring OpenMP loop modifier because '-omp' argument is empty.\n";
}

else {
$features |= $bOmpPar;
print "info: using OpenMP on following loop(s).\n";
}
}

# generate manual-scheduling optimizations in next loop.
Expand Down Expand Up @@ -1238,7 +1244,7 @@ ()
" tiled: generate tiled scan within a >1D loop.\n",
" serpentine: generate reverse scan when enclosing loop index is odd.*\n",
" square_wave: generate 2D square-wave scan for two innermost dims of >1D loop.*\n",
" * Do not use these modifiers for YASK rank or block loops because they must\n",
" * Do not use these modifiers for YASK block or Mega-block loops because they must\n",
" execute with strictly-increasing indices when using temporal tiling.\n",
" Also, do not combile these modifiers with 'tiled' or 'manual'.\n",
"A 'ScanIndices' type must be defined in C++ code prior to including the generated code.\n",
Expand Down

0 comments on commit 76791f4

Please sign in to comment.