Skip to content

Commit

Permalink
Merge pull request #162 from andromeda/aec-b-bug-fixes
Browse files Browse the repository at this point in the history
AEC Bug fixes
  • Loading branch information
angelhof authored Mar 18, 2021
2 parents 58ffe04 + a7e10e9 commit e5f56ec
Show file tree
Hide file tree
Showing 1,681 changed files with 351 additions and 70 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
[submodule "parser/libdash"]
path = compiler/parser/libdash
url = git@github.com:angelhof/libdash.git
url = https://github.com/angelhof/libdash/
143 changes: 84 additions & 59 deletions compiler/gather_results.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
parser.add_argument('--all',
action='store_true',
help='generates all plots')
parser.add_argument('--debug',
action='store_true',
help='prints debugging info')

args = parser.parse_args()

Expand All @@ -28,6 +31,7 @@
print("See command usage with --help.")
exit(0)


SMALL_SIZE = 16
MEDIUM_SIZE = 18
BIGGER_SIZE = 20
Expand All @@ -52,6 +56,8 @@
BIG_UNIX50_RESULTS = "../evaluation/results/unix50_16_10737418240/"
COARSE_UNIX50_RESULTS = "../evaluation/results/unix50-naive/"

## Create the plots directory
os.makedirs("../evaluation/plots", exist_ok=True)

all_experiments = ["minimal_grep",
"minimal_sort",
Expand Down Expand Up @@ -118,7 +124,8 @@
"distr_auto_split": "split",
"distr_no_task_par_eager": "blocking-eager",
"distr_no_eager": "no-eager",
"distr_auto_split_fan_in_fan_out": "no-aux-cat-split"}
"distr_auto_split_fan_in_fan_out": "no-aux-cat-split",
"pash": "split"}

all_line_plots = ["split",
"mini-split",
Expand Down Expand Up @@ -223,7 +230,8 @@ def __rtruediv__(self, other):
return NotImplemented

if(self.value == 0):
print("Division by zero")
if(args.debug):
print("Division by zero")
return 0

## TODO: Change that to Result too
Expand Down Expand Up @@ -276,7 +284,8 @@ def safe_zero_div(a, b):
if(a is None or b is None):
return None
elif(b == 0):
print("WARNING: Division by zero")
if(args.debug):
print("WARNING: Division by zero")
return 0
else:
return a / b
Expand Down Expand Up @@ -375,9 +384,9 @@ def runtime_config_from_suffix(suffix):
runtime = suffix_to_runtime_config[suffix.split(".")[0]]
return runtime

def sequential_experiment_exec_time(prefix, scaleup_number):
def sequential_experiment_exec_time(prefix, scaleup_number, suffix='seq.time'):
config = Config(pash=False)
value = read_total_time('{}{}_seq.time'.format(prefix, scaleup_number))
value = read_total_time('{}{}_{}'.format(prefix, scaleup_number, suffix))
description = "execution time"
script_name = script_name_from_prefix(prefix)
result = Result(script_name, config, value, description)
Expand Down Expand Up @@ -440,6 +449,16 @@ def collect_distr_experiment_speedup_with_compilation(prefix, scaleup_numbers, s
# return (result_vec, compile_distr_speedups)
return (distr_speedups, compile_distr_speedups, seq_number)

## This function just collects the execution time for a specific suffix
def collect_experiment_scaleup_times_simple(prefix, scaleup_numbers, suffix='distr.time'):
_seq_number, distr_numbers, _compile_numbers = collect_experiment_scaleup_times(prefix, scaleup_numbers, suffix=suffix)
return distr_numbers

def collect_non_pash_experiment_scaleup_times(prefix, scaleup_numbers, suffix):
numbers = [sequential_experiment_exec_time(prefix, n, suffix)
for n in scaleup_numbers]
return numbers


def collect_experiment_command_number(prefix, suffix, scaleup_numbers):
command_numbers = [read_distr_command_number('{}{}_{}'.format(prefix, n, suffix))
Expand Down Expand Up @@ -556,61 +575,58 @@ def plot_scaleup_lines(experiment, all_scaleup_numbers, all_speedup_results, cus

return lines, best_result, no_eager_distr_speedup

def plot_sort_with_baseline(results_dir):
def plot_sort_with_baseline(results_dir, small=True):

all_scaleup_numbers = [2, 4, 8, 16, 32, 64]
sort_prefix = '{}/sort_'.format(results_dir)
baseline_sort_prefix = '{}/baseline_sort/baseline_sort_'.format(results_dir)
baseline_sort_opt_prefix = '{}/baseline_sort/baseline_sort_opt_'.format(results_dir)

## Collect all sort numbers
seq_number, distr_numbers, _ = collect_experiment_scaleup_times(sort_prefix, all_scaleup_numbers)
sort_distr_speedup = [safe_zero_div(seq_number, t) for i, t in enumerate(distr_numbers)]
# sort_distr_speedup = collect_distr_experiment_speedup(sort_prefix, all_scaleup_numbers)
baseline_sort_distr_speedup = collect_baseline_experiment_speedups(baseline_sort_prefix,
[1] + [num*2
for num in all_scaleup_numbers],
seq_number)
# baseline_sort_opt_distr_speedup = collect_baseline_experiment_speedups(baseline_sort_opt_prefix,
# [1] + all_scaleup_numbers[1:],
# seq_numbers[0])

# output_diff = check_output_diff_correctness(prefix, all_scaleup_numbers)
if(small):
infix="small_"
all_scaleup_numbers = [2, 16]
else:
infix=""
all_scaleup_numbers = [2, 4, 8, 16, 32, 64]

sort_prefix = '{}/baseline_sort/baseline_sort_{}'.format(results_dir, infix)
double_scaleup_numbers = [2 * num for num in all_scaleup_numbers]

pash_times = collect_non_pash_experiment_scaleup_times(sort_prefix,
all_scaleup_numbers,
suffix='pash.time')

pash_no_eager_times = collect_non_pash_experiment_scaleup_times(sort_prefix,
all_scaleup_numbers,
suffix='pash_no_eager.time')

sort_par_times = collect_non_pash_experiment_scaleup_times(sort_prefix,
double_scaleup_numbers,
suffix='parallel.time')

sort_times = collect_non_pash_experiment_scaleup_times(sort_prefix,
[2],
suffix='seq.time')
sort_time = sort_times[0]
# print(pash_times)
# print(sort_par_times)
# print(sort_time)

sort_distr_speedup = [safe_zero_div(sort_time, t) for t in pash_times]
no_eager_distr_speedup = [safe_zero_div(sort_time, t) for t in pash_no_eager_times]
baseline_sort_distr_speedup = [safe_zero_div(sort_time, t) for t in sort_par_times]

fig, ax = plt.subplots()

## Plot speedup
ax.set_ylabel('Speedup')
ax.set_xlabel('--width')
ax.plot(all_scaleup_numbers, sort_distr_speedup, '-o', linewidth=0.5, label='Pash')
## Add the no eager times if they exist
# try:
# no_task_par_eager_distr_speedup = collect_distr_experiment_speedup(sort_prefix,
# all_scaleup_numbers,
# 'distr_no_task_par_eager.time')
# ax.plot(all_scaleup_numbers, no_task_par_eager_distr_speedup, '-p', linewidth=0.5, label='Pash - Blocking Eager')
# except ValueError:
# pass

try:
no_eager_distr_speedup = collect_distr_experiment_speedup(sort_prefix,
all_scaleup_numbers,
'distr_no_eager.time')
ax.plot(all_scaleup_numbers, no_eager_distr_speedup, '-^', linewidth=0.5, label='Pash - No Eager')
except ValueError:
pass

ax.plot(all_scaleup_numbers, baseline_sort_distr_speedup[1:], '-p', linewidth=0.5, label='sort --parallel')
# ax.plot(all_scaleup_numbers, baseline_sort_opt_distr_speedup[1:], '-', linewidth=0.5, label='sort --parallel -S 30%')

ax.plot(all_scaleup_numbers, no_eager_distr_speedup, '-^', linewidth=0.5, label='Pash - No Eager')
ax.plot(all_scaleup_numbers, baseline_sort_distr_speedup, '-p', linewidth=0.5, label='sort --parallel')

plt.xticks(all_scaleup_numbers[1:])
plt.legend(loc='lower right')
# plt.title("Comparison with sort --parallel")


plt.tight_layout()
plt.savefig(os.path.join('../evaluation/plots', "sort_baseline_comparison_scaleup.pdf"),bbox_inches='tight')
plt.savefig(os.path.join('../evaluation/plots', "sort_baseline_{}comparison_scaleup.pdf".format(infix)),bbox_inches='tight')


def collect_format_input_size(experiment):
Expand Down Expand Up @@ -837,11 +853,15 @@ def aggregate_unix50_results(all_results, scaleup_numbers):
return avg_distr_results

def compute_and_print_aggrs(individual_results, absolute_seq_times_s):
mean = sum(individual_results) / len(individual_results)
median = statistics.median(individual_results)
geo_mean = math.exp(np.log(individual_results).sum() / len(individual_results))
mean = safe_zero_div(sum(individual_results), len(individual_results))
if (len(individual_results) > 0):
median = statistics.median(individual_results)
else:
median = 0
geo_mean = math.exp(safe_zero_div(np.log(individual_results).sum(),
len(individual_results)))
weighted_res = [i*a for i, a in zip(individual_results, absolute_seq_times_s)]
weighted_avg = sum(weighted_res) / sum(absolute_seq_times_s)
weighted_avg = safe_zero_div(sum(weighted_res), sum(absolute_seq_times_s))
print(" Mean:", mean)
print(" Median:", median)
print(" Geometric Mean:", geo_mean)
Expand Down Expand Up @@ -1045,7 +1065,10 @@ def plot_unix50_avg_speedup(all_results, scaleup_numbers, filename):

def collect_all_unix50_results(unix50_results_dir, scaleup_numbers=[2, 4, 8, 16], suffix='distr.time'):

files = [f for f in os.listdir(unix50_results_dir)]
try:
files = [f for f in os.listdir(unix50_results_dir)]
except:
files = []
# print(files)
pipeline_numbers = sorted(list(set([f.split('_')[2] for f in files])))
# print(pipeline_numbers)
Expand All @@ -1062,15 +1085,16 @@ def collect_all_unix50_results(unix50_results_dir, scaleup_numbers=[2, 4, 8, 16]

return (all_results, fan_in_fan_out_results)

def collect_unix50_scaleup_times(all_results, scaleup=[2,4,8,16], small_prefix=""):
def collect_unix50_scaleup_times(all_results, scaleup=[2,4,8,16], small_prefix="", scatter=True):

# print(all_results)

for parallelism in scaleup:
make_unix50_bar_chart(all_results, scaleup, parallelism, small_prefix=small_prefix)
make_unix50_scatter_plot(all_results, scaleup, parallelism)
if(scatter):
make_unix50_scatter_plot(all_results, scaleup, parallelism)

plot_unix50_avg_speedup(all_results, scaleup, "unix50_throughput_scaleup.pdf")
# plot_unix50_avg_speedup(all_results, scaleup, "unix50_throughput_scaleup.pdf")


# def collect_unix50_coarse_scaleup_times(all_results):
Expand Down Expand Up @@ -1148,9 +1172,9 @@ def plot_tiling_experiments(fig, gs, experiments, all_experiment_results, all_sc

def print_aggregates(prefix, averages, no_eager_averages):
## Print average, geo-mean
one_liner_averages = [sum(res)/len(res) for res in averages]
all_no_eager_averages = [sum(res)/len(res) for res in no_eager_averages]
geo_means = [math.exp(np.log(res).sum() / len(res))
one_liner_averages = [safe_zero_div(sum(res), len(res)) for res in averages]
all_no_eager_averages = [safe_zero_div(sum(res), len(res)) for res in no_eager_averages]
geo_means = [math.exp(safe_zero_div(np.log(res).sum(), len(res)))
for res in averages]
print(prefix, "One-liners Aggregated results:")
print(" |-- Averages:", one_liner_averages)
Expand Down Expand Up @@ -1666,9 +1690,10 @@ def any_wrong(correctness, experiment, line_plots):
if args.eurosys2021:
generate_tables(experiments, results_dir=SMALL_RESULTS, table_suffix="-small", small=True)
generate_tables(experiments)
collect_unix50_scaleup_times(small_unix50_results, scaleup=[4], small_prefix="_1GB")
collect_unix50_scaleup_times(big_unix50_results, scaleup=[16], small_prefix="_10GB")
plot_sort_with_baseline(RESULTS)
collect_unix50_scaleup_times(small_unix50_results, scaleup=[4], small_prefix="_1GB", scatter=False)
collect_unix50_scaleup_times(big_unix50_results, scaleup=[16], small_prefix="_10GB", scatter=False)
plot_sort_with_baseline(RESULTS, small=True)
plot_sort_with_baseline(RESULTS, small=False)

## Legacy plots
if args.all:
Expand Down
6 changes: 3 additions & 3 deletions docs/eval.md
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,7 @@ Annotations can be thought of as defining a bidirectional correspondence between
Since command behaviors (and correspondence) can change based on their arguments, annotations contain a sequence of predicates.
Each predicate is accompanied by information that instantiates the correspondence between a command and a dataflow node.
Annotations for about 60 popular commands are stored in [./annotations](../annotations)encoded as JSON.
Annotations for about 60 popular commands are stored in [./annotations](../annotations) encoded as JSON.
These average about 14 lines per annotation, for a total of 846 lines of annotations.
Below we present two example annotations for `chmod` and `cut`.
Expand Down Expand Up @@ -487,7 +487,7 @@ To plot the results from any of the above experiments, do the following:
```sh
cd $PASH_TOP/compiler
python3 gather_results.py
python3 gather_results.py --eurosys2021
```
This will create plots for all invocations of `evaluation/eurosys/execute_eurosys_one_liners.sh`, one for each flag.
Expand Down Expand Up @@ -526,7 +526,7 @@ To plot the results from any of the above experiments, do the following:
```sh
cd $PASH_TOP/compiler
python3 gather_results.py
python3 gather_results.py --eurosys2021
```
This will create plots for both "1GB --width 4" and for "10GB --width 16".
Expand Down
20 changes: 15 additions & 5 deletions evaluation/eurosys/execute_baseline_sort.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ while getopts 'slh' opt; do
case $opt in
s) evaluation_level=1 ;;
l) evaluation_level=2 ;;
h) echo "There are three possible execution levels:"
h) echo "There are two possible execution levels:"
echo "option -s: Small input | --width 2, 16"
echo "option -l: Big input | -- width 2, 4, 8, 16, 32, 64"
exit 0 ;;
Expand Down Expand Up @@ -69,17 +69,27 @@ for n_in in "${n_inputs[@]}"; do
export $(cut -d= -f1 $env_file)

p_n_in="$(( $n_in * 2 ))"
experiment="baseline_sort_${p_n_in}"
experiment="baseline_sort_${intermediary_prefix}${p_n_in}"
echo "Executing sort with parallel flag for parallelism: ${p_n_in}"
{ time /bin/bash $sort_parallel_script "${p_n_in}" > /tmp/seq_output ; } 2> >(tee "${results}${experiment}_${intermediary_prefix}seq.time" >&2)
{ time /bin/bash $sort_parallel_script "${p_n_in}" > /tmp/seq_output ; } 2> >(tee "${results}${experiment}_parallel.time" >&2)

echo "Generating input and intermediary scripts... be patient..."
python3 "$PASH_TOP/evaluation/generate_microbenchmark_intermediary_scripts.py" \
$microbenchmarks_dir "sort" $n_in $intermediary_dir $env_suffix

exec_script="${intermediary_dir}sort_${n_in}_seq.sh"
experiment="baseline_sort_${n_in}"
experiment="baseline_sort_${intermediary_prefix}${n_in}"

if [ "$n_in" -eq 2 ]; then
echo "Executing sort with bash"
{ time /bin/bash $exec_script ; } 1> /tmp/bash_output 2> >(tee "${results}${experiment}_seq.time" >&2)
fi

echo "Executing pash (no eager) on sort with --width ${n_in}"
{ time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time --no_eager $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash_no_eager.time" >&2)
diff -s /tmp/seq_output /tmp/pash_output | head

echo "Executing pash on sort with --width ${n_in}"
{ time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_${intermediary_prefix}pash.time" >&2)
{ time $PASH_TOP/pa.sh -w "${n_in}" --log_file /tmp/pash_log --output_time $exec_script ; } 1> /tmp/pash_output 2> >(tee "${results}${experiment}_pash.time" >&2)
diff -s /tmp/seq_output /tmp/pash_output | head
done
2 changes: 1 addition & 1 deletion evaluation/eurosys/execute_web_index_dish_evaluation.sh
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ while getopts 'slh' opt; do
case $opt in
s) input_number=1000 ;;
l) input_number=100000 ;;
h) echo "There are three possible execution levels:"
h) echo "There are two possible execution levels:"
echo "option -s: 1,000 urls (about 1.5 minutes in bash)"
echo "option -l: 100,000 urls (a couple hours in bash) (EuroSys evaluation)"
exit 0 ;;
Expand Down
1 change: 1 addition & 0 deletions evaluation/microbenchmarks/sort_env_small.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
IN=$PASH_TOP/evaluation/scripts/input/1G.txt
3 changes: 2 additions & 1 deletion evaluation/plots/.gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
!*.pdf
*.pdf
*.tex
1 change: 1 addition & 0 deletions evaluation/results/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.time
1 change: 1 addition & 0 deletions evaluation/results_archive/plots/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
!*.pdf
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
16 changes: 16 additions & 0 deletions evaluation/results_archive/plots/microbenchmarks-tablesmall.tex
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
\begin{tabular*}{\textwidth}{l @{\extracolsep{\fill}} llllllll}
\toprule
Script ~&~ Structure & Input &Seq. Time & \multicolumn{2}{l}{\#Nodes(16, 64)} &\multicolumn{2}{l}{Compile Time (16, 64)} & Highlights \\
\midrule
nfa-regex ~&~ $3\times\tsta$ & 1~GB & 7m59.843s & \todo{X} & \todo{X} & 0.000s & 0.000s & complex NFA regex \\
sort ~&~ $\tsta, \tpur$ & 10~GB & 2m6.756s & \todo{X} & \todo{X} & 0.000s & 0.000s & \tti{sort}ing \\
top-n ~&~ $2\times\tsta, 4\times\tpur$ & 10~GB & 7m6.963s & \todo{X} & \todo{X} & 0.000s & 0.000s & double \tti{sort}, \tti{uniq} reduction \\
wf ~&~ $3\times\tsta, 3\times\tpur$ & 10~GB & 2m10.746s & \todo{X} & \todo{X} & 0.000s & 0.000s & double \tti{sort}, \tti{uniq} reduction \\
spell ~&~ $4\times\tsta, 3\times\tpur$ & 3~GB & 46.520s & 195 & \todo{X} & 0.324s & 0.000s & comparisons (\tti{comm}) \\
difference ~&~ $2\times\tsta, 3\times\tpur$ & 10~GB & 2m35.132s & \todo{X} & \todo{X} & 0.000s & 0.000s & non-parallelizable \tti{diff}ing \\
bi-grams ~&~ $3\times\tsta, 3\times\tpur$ & 3~GB & 1m11.240s & 284 & \todo{X} & 0.301s & 0.000s & stream shifting and merging \\
set-difference ~&~ $5\times\tsta, 2\times\tpur$ & 10~GB & 5m2.111s & \todo{X} & \todo{X} & 0.000s & 0.000s & two pipelines merging to a \tti{comm} \\
sort-sort ~&~ $\tsta, 2\times\tpur$ & 10~GB & 3m8.657s & 154 & \todo{X} & 0.327s & 0.000s & parallelizable \tpur after \tpur \\
shortest-scripts ~&~ $5\times\tsta, 2\times\tpur$ & 85~MB & 3m4.233s & \todo{X} & \todo{X} & 0.000s & 0.000s & long \tsta pipeline ending with \tpur \\
\bottomrule
\end{tabular*}
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

real 0m18.506s
user 0m2.786s
sys 0m2.997s
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

real 0m20.549s
user 0m3.399s
sys 0m2.741s
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

real 0m46.010s
user 3m31.077s
sys 0m8.524s
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

real 0m53.170s
user 2m22.789s
sys 0m5.766s
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@

real 0m0.708s
user 0m1.636s
sys 0m0.197s
Files ../evaluation//intermediary//set-diff_seq_output and ../evaluation//intermediary//set-diff_gnu_parallel_output are identical
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit e5f56ec

Please sign in to comment.