Skip to content

Commit

Permalink
Merge branch 'sub-domains'
Browse files Browse the repository at this point in the history
  • Loading branch information
chuckyount committed Feb 9, 2017
2 parents e6798b4 + 9134b36 commit fa0d613
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 49 deletions.
22 changes: 10 additions & 12 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@
#
# real_bytes: FP precision: 4=float, 8=double.
#
# fold: How to fold vectors (x*y*z).
# Vectorization in dimensions perpendicular to the inner loop
# (defined by BLOCK_LOOP_CODE below) often works well.
#
# cluster: How many folded vectors to evaluate simultaneously.
#
# eqs: comma-separated name=substr pairs used to group
# grid update equations into sets.
#
Expand Down Expand Up @@ -103,7 +109,7 @@ MACROS += MAX_EXCH_DIST=0
radius ?= 2
cluster ?= x=2

else ifeq ($(findstring awp,$(stencil)),awp)
else ifneq ($(findstring awp,$(stencil)),)
eqs ?= velocity=vel,stress=str
time_alloc ?= 1
def_block_size ?= 32
Expand Down Expand Up @@ -205,17 +211,14 @@ def_rank_size ?= 128
def_block_size ?= 64
def_pad ?= 1

# How to fold vectors (x*y*z).
# Vectorization in dimensions perpendicular to the inner loop
# (defined by BLOCK_LOOP_CODE below) often works well.

ifneq ($(findstring INTRIN512,$(MACROS)),) # 512 bits.

ifeq ($(real_bytes),4)
fold ?= x=4,y=4,z=1
else
fold ?= x=4,y=2,z=1
endif
cluster ?= x=1

else # not 512 bits.

Expand All @@ -224,14 +227,9 @@ fold ?= x=8
else
fold ?= x=4
endif
cluster ?= z=2

cluster ?= y=2

endif # 512 bits.

# How many vectors to compute at once (unrolling factor in
# each dimension).
cluster ?= x=1,y=1,z=1
endif # not 512 bits.

# More build flags.
ifeq ($(mpi),1)
Expand Down
Binary file modified docs/YASK-intro.pdf
Binary file not shown.
4 changes: 2 additions & 2 deletions stencil-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ while true; do
echo "The sh_prefix command is used to prefix a sub-shell."
echo "The exe_prefix command is used to prefix the executable (set to 'true' to avoid actual run)."
echo "If -host <hostname> is given, 'ssh <hostname>' will be pre-pended to the sh_prefix command."
echo "If -ranks <N> is given, 'mpirun -n <N>' is pre-pended to the exe_prefix command,"
echo "If -ranks <N> is given, 'mpirun -n <N> -ppn <N>' is pre-pended to the exe_prefix command,"
echo " and -nrx <N> is passed to the executable;"
echo " use -exe_prefix <command> explicitly if a different MPI command is needed,"
echo " and/or override -nrx as needed."
Expand Down Expand Up @@ -127,7 +127,7 @@ fi

# MPI
if [[ -n "$nranks" ]]; then
exe_prefix="mpirun -n $nranks $exe_prefix"
exe_prefix="mpirun -n $nranks -ppn $nranks $exe_prefix"
envs="$envs I_MPI_PRINT_VERSION=1 I_MPI_DEBUG=5"
fi

Expand Down
56 changes: 21 additions & 35 deletions stencil-tuner.pl
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ sub usage {
" -makePrefix=<CMD> Prefix make command with <CMD>.\n".
" -makeArgs=<ARGS> Pass additional <ARGS> to make command.\n".
" -runArgs=<ARGS> Pass additional <ARGS> to stencil-run command.\n".
" -ranks=<N> Number of ranks to use on host.\n".
" -ranks=<N> Number of ranks to use on host (x-dimension only).\n".
"\nstencil options:\n".
" -stencil=<NAME> Specify stencil: iso3dfd, 3axis, 9axis, 3plane, cube, ave, awp, ... (required).\n".
" -dp|-sp Specify FP precision (default is DP for 'ave' stencil, SP for others).\n".
Expand Down Expand Up @@ -396,7 +396,6 @@ sub usage {
# list of possible block-loop templates.
# D0..D3 will get replaced by bv..bz, but not necessarily in that order.
# modifiers 'pipeline' & 'prefetch' will be removed if not enabled.
# modifier 'omp' will be replaced w/'crew' if crew is used.
# modifier placeholder 'PATH' will be removed or changed as selected.
# this is the loop taken by each OpenMP task.
my @blockLoops =
Expand Down Expand Up @@ -551,7 +550,6 @@ sub usage {
[ -$maxPfdl2, $maxPfdl2, 1, 'pfdl2' ],

# other build options.
[ 0, 0, 1, 'crew' ], # crew enabled?
[ 0, 100, 1, 'exprSize' ], # expression-size threshold.
[ 0, $#schedules, 1, 'ompSchedule' ], # OMP for schedule.

Expand Down Expand Up @@ -807,30 +805,21 @@ ($$$)
while (<CMD>) {
push @cmdOut, $_;

if (/time-dim-size:\s*(\d+)/i) {
$timeDim = $1;
# E.g.,
# 4D (t=1 * x=8 * y=1 * z=1) 'vel_x' data is at 0x7fce08200000: 1.176K element(s) of 4 byte(s) each, 147 vector(s), 4.59375KiB.
# 3D (x=8 * y=1 * z=1) 'lambda' data is at 0x7fce0820f880: 600 element(s) of 4 byte(s) each, 75 vector(s), 2.34375KiB.
if (/^\s*4D.*t=(\d+)/) {
$numSpatialGrids += $1;
}
elsif (/num grids:\s*(\d+)/i) {
$numGrids = $1;
}
elsif (/num grids to be updated:\s*(\d+)/i) {
$numUpdatedGrids = $1;
elsif (/^\s*3D.*x=/) {
$numSpatialGrids += 1;
}
}
close CMD;
if (!$timeDim || !$numGrids || !$numUpdatedGrids) {
if (!$numSpatialGrids) {
map { print ">> $_"; } @cmdOut;

die "error: could not determine time-dimension size from '$cmd'.\n"
if !$timeDim;
die "error: could not determine number of grids from '$cmd'.\n"
if !$numGrids;
die "error: could not determine number of updated grids from '$cmd'.\n"
if !$numUpdatedGrids;
die "error: no grids defined in '$cmd'.\n";
}

# calculate number of spatial grids.
$numSpatialGrids = ($numGrids - $numUpdatedGrids) + ($numUpdatedGrids * $timeDim);
print "Determined that $numSpatialGrids spatial grids are allocated.\n";
}

Expand Down Expand Up @@ -1060,7 +1049,7 @@ ($$$$$$$)
if ($N == 1) {

# keep best rate.
if (defined $secs) {
if (defined $secs && $secs > 0) {
my $rate = $pts / $secs;
if (!defined $bestRate || $rate > $bestRate) {
print "new best rate is $rate pts/sec.\n";
Expand Down Expand Up @@ -1213,7 +1202,6 @@ sub fitness {
my @cvs = readHashes($h, 'c', 1); # in vectors, not in points!
my @ps = readHashes($h, 'p', 0);
my $fold = readHash($h, 'fold', 1);
my $crew = readHash($h, 'crew', 1);
my $exprSize = readHash($h, 'exprSize', 1);
my $thread_divisor_exp = readHash($h, 'thread_divisor_exp', 0);
my $bthreads_exp = readHash($h, 'bthreads_exp', 0);
Expand All @@ -1234,10 +1222,6 @@ sub fitness {
# block loops.
my $blockCode = makeLoopCode($h, 'block', 'b', 'v', \@blockLoops);
$blockCode =~ s/\bpipeline\b//g if !$pipe;
if ($crew) {
$blockCode =~ s/\bomp\b/crew/g; # replace omp w/crew.
$crew = 0 unless $blockCode =~ /\bcrew\b/; # disable crew if not found.
}
if ($pfdl1 > 0 && $pfdl2 > 0) {
$blockCode =~ s/\bprefetch\b/prefetch(L1,L2)/g;
} elsif ($pfdl1 > 0) {
Expand Down Expand Up @@ -1404,18 +1388,21 @@ sub fitness {
}

# other vars.
$mvars .= " omp_schedule=$scheduleStr crew=$crew expr_size=$exprSize";
$mvars .= " omp_schedule=$scheduleStr expr_size=$exprSize";
$mvars .= " mpi=1" if $nranks > 1;

# how to make.
my $makeCmd = getMakeCmd($macros, $mvars);

# how to run.
my $runCmd = getRunCmd();
$runCmd .= " -thread_divisor ".(1 << $thread_divisor_exp)." -block_threads ".(1 << $bthreads_exp);
my $runCmd = getRunCmd(); # shell command plus any extra args.
$runCmd .= " -ranks $nranks" if $nranks > 1;
my $args = ""; # exe args.
$args .= " -thread_divisor ".(1 << $thread_divisor_exp);
$args .= " -block_threads ".(1 << $bthreads_exp);

# sizes.
my $args = "-dn $vars";
$args .= " -dn $vars" if $vars > 1;
$args .= " -dx $ds[0] -dy $ds[1] -dz $ds[2]";
$args .= " -rx $rs[0] -ry $rs[1] -rz $rs[2]";
$args .= " -bx $bs[0] -by $bs[1] -bz $bs[2]";
Expand All @@ -1429,10 +1416,9 @@ sub fitness {

# various commands.
my $testCmd = "$runCmd -v"; # validation on a small problem size.
my $simCmd = "$runCmd -t 1 -dt 1 $args"; # simulation w/1 trial & 1 step.
$runCmd .= " -ranks $nranks" if $nranks > 1; # add MPI args for short and long runs.
my $shortRunCmd = "$runCmd -t 1 -dt $shortIters $args"; # fast run for 'upper-bound' time.
my $longRunCmd = "$runCmd -t $longTrials -dt $longIters $args"; # normal run w/more trials.
my $simCmd = "$runCmd $args -t 1 -dt 1"; # simulation w/1 trial & 1 step.
my $shortRunCmd = "$runCmd $args -t 1 -dt $shortIters"; # fast run for 'upper-bound' time.
my $longRunCmd = "$runCmd $args -t $longTrials -dt $longIters"; # normal run w/more trials.
my $cleanCmd = "make clean";

# add kill command to prevent runaway code.
Expand Down

0 comments on commit fa0d613

Please sign in to comment.