From 32ce1be4406161c6e95524531f107cccc3f280e0 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 15:06:18 +0000 Subject: [PATCH 1/2] Add benchmark plot script --- benchmark/bench_bmat8.cpp | 15 ++-- benchmark/bench_epu8.cpp | 58 ++++++++-------- etc/bench_plot.py | 139 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+), 40 deletions(-) create mode 100755 etc/bench_plot.py diff --git a/benchmark/bench_bmat8.cpp b/benchmark/bench_bmat8.cpp index ebd4960a..dd7efccc 100644 --- a/benchmark/bench_bmat8.cpp +++ b/benchmark/bench_bmat8.cpp @@ -57,8 +57,7 @@ class Fix_BMat8 { pair_sample; // not const, transpose2 is in place }; -TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", - "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Row space size", "[BMat8][000]") { BENCHMARK_MEM_FN(row_space_size_ref, sample); BENCHMARK_MEM_FN(row_space_size_bitset, sample); BENCHMARK_MEM_FN(row_space_size_incl1, sample); @@ -66,14 +65,13 @@ TEST_CASE_METHOD(Fix_BMat8, "Row space size benchmarks 1000 BMat8", BENCHMARK_MEM_FN(row_space_size, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose benchmarks 1000 BMat8", "[BMat8][000]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose", "[BMat8][000]") { BENCHMARK_MEM_FN(transpose, sample); BENCHMARK_MEM_FN(transpose_mask, sample); BENCHMARK_MEM_FN(transpose_maskd, sample); } -TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR_EQ(transpose, pair_sample); BENCHMARK_MEM_FN_PAIR_EQ(transpose_mask, pair_sample); BENCHMARK_MEM_FN_PAIR_EQ(transpose_maskd, pair_sample); @@ -86,16 +84,13 @@ TEST_CASE_METHOD(Fix_BMat8, "Transpose pairs benchmarks 1000 BMat8", }; } -TEST_CASE_METHOD(Fix_BMat8, "Inclusion of row spaces benchmarks 1000 BMat8", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Row spaces inclusion", "[BMat8][002]") { BENCHMARK_MEM_FN_PAIR(row_space_included_ref, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included_bitset, pair_sample); BENCHMARK_MEM_FN_PAIR(row_space_included, pair_sample); } -TEST_CASE_METHOD(Fix_BMat8, - "Inclusion of row spaces benchmarks 1000 BMat8 by pairs", - "[BMat8][002]") { +TEST_CASE_METHOD(Fix_BMat8, "Pair row space inclusion", "[BMat8][002]") { BENCHMARK("rotating pairs implementation") { for (auto &pair : pair_sample) { auto res = BMat8::row_space_included2(pair.first, pair.second, diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index b60124fc..91c426a4 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -52,11 +52,11 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; static constexpr const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, - 9, 8, 11, 10, 13, 12, 15, 14}; + 9, 8, 11, 10, 13, 12, 15, 14}; static constexpr const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, - 7, 10, 9, 12, 11, 14, 13, 15}; + 7, 10, 9, 12, 11, 14, 13, 15}; static constexpr const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, - 0, FF, 0, FF, 0, FF, 0, FF}; + 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { b = HPCombi::permuted(a, even); @@ -79,7 +79,7 @@ inline epu8 insertion_sort(epu8 p) { return p; } -inline epu8 radix_sort(epu8 p) { +__attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { auto &a = HPCombi::as_array(p); std::array stat{}; for (int i = 0; i < 16; i++) @@ -111,7 +111,7 @@ static const epu8 bla = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; } // namespace -TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { +TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda | perms", std_sort, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", std_sort, Fix_epu8::perms); @@ -147,7 +147,6 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); } - TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { BENCHMARK_FREE_FN_PAIR(HPCombi::permuted_ref, pairs); BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); @@ -155,21 +154,18 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { /* int Bench_hsum() { - myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_ref2_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_ref3_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - - myBench("hsum_ref_nolmbd", HPCombi::horiz_sum_ref, sample.perms); - myBench("hsum_gen_nolmbd", HPCombi::horiz_sum_gen, sample.perms); - myBench("hsum_sum4_nolmbd", HPCombi::horiz_sum4, sample.perms); - myBench("hsum_sum3_nolmbd", HPCombi::horiz_sum3, sample.perms); - - MYBENCH("hsum_ref_lmbd", HPCombi::horiz_sum_ref, sample.perms); - MYBENCH("hsum_gen_lmbd", HPCombi::horiz_sum_gen, sample.perms); - MYBENCH("hsum_sum4_lmbd", HPCombi::horiz_sum4, sample.perms); - MYBENCH("hsum_sum3_lmbd", HPCombi::horiz_sum3, sample.perms); - return 0; +TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { + BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_sum3, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda ", horiz_sum_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda ", horiz_sum3, Fix_epu8::perms); } +/* // ################################################################################## int Bench_psum() { @@ -213,8 +209,9 @@ int Bench_pmax() { myBench("pmax_ref3_nolmbd", HPCombi::partial_max_ref, sample.perms); myBench("pmax_ref_nolmbd", HPCombi::partial_max_ref, sample.perms); - // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, sample.perms); - myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, sample.perms); + // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, +sample.perms); myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, +sample.perms); MYBENCH("pmax_ref_lmbd", HPCombi::partial_max_ref, sample.perms); // MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); @@ -248,8 +245,9 @@ int Bench_pmin() { myBench("pmin_ref3_nolmbd", HPCombi::partial_min_ref, sample.perms); myBench("pmin_ref_nolmbd", HPCombi::partial_min_ref, sample.perms); - // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, sample.perms); - myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, sample.perms); + // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, +sample.perms); myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, +sample.perms); MYBENCH("pmin_ref_lmbd", HPCombi::partial_min_ref, sample.perms); // MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); @@ -282,17 +280,17 @@ int Bench_eval() { ################################################################################## int Bench_first_diff() { MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); - MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, sample.perms); - MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, sample.perms); - return 0; + MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, +sample.perms); MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, +sample.perms); return 0; } // ################################################################################## int Bench_last_diff() { MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); - MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, sample.perms); - MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, sample.perms); - return 0; + MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, +sample.perms); MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, +sample.perms); return 0; } */ } // namespace HPCombi diff --git a/etc/bench_plot.py b/etc/bench_plot.py new file mode 100755 index 00000000..f619bf42 --- /dev/null +++ b/etc/bench_plot.py @@ -0,0 +1,139 @@ +#!/usr/bin/env python3 + +import os +import re +import statistics as stats +import sys + +import matplotlib +import numpy as np +from bs4 import BeautifulSoup +from matplotlib import pyplot as plt + +# This file should be from libsemigroups/etc + +matplotlib.rcParams["mathtext.fontset"] = "stix" +matplotlib.rcParams["font.family"] = "STIXGeneral" + +color = [ + (238 / 255, 20 / 255, 135 / 255), + (0 / 255, 221 / 255, 164 / 255), + (86 / 255, 151 / 255, 209 / 255), + (249 / 255, 185 / 255, 131 / 255), + (150 / 255, 114 / 255, 196 / 255), +] + +# Filenames should be: name.something.xml -> name.png + + +def normalize_xml(xml_fnam): + with open(xml_fnam, "r") as f: + xml = f.read() + xml = re.sub("<", "<", xml) + with open(xml_fnam, "w") as f: + f.write(xml) + + +def xml_stdout_get(xml, name): + try: + return xml.find("StdOut").find(name)["value"] + except (KeyError, TypeError, AttributeError): + return None + + +def time_unit(Y): + time_units = ("microseconds", "milliseconds", "seconds") + index = 0 + + while all(y > 1000 for y in Y) and index < len(time_units): + index += 1 + Y = [y / 1000 for y in Y] + return time_units[index], Y + +def add_plot(xml_fnam, num_bars=4): + global color; + current_bar = 0 + Y = [] + Y_for_comparison = None + labels = [] + + xml = BeautifulSoup(open(xml_fnam, "r"), "xml") + total_cols = 0 + xticks_label = [] + xticks_pos = [] + for x, test_case in enumerate(xml.find_all("TestCase")): + results = test_case.find_all("BenchmarkResults") + Y = ( + np.array([float(x.find("mean")["value"]) for x in results]) / 1 + ) # times in nanoseconds + X = np.arange(total_cols + 1, total_cols + len(Y) + 1, 1) + xticks_label.append(("\n" * (x % 2)) + test_case["name"]) + xticks_pos.append(total_cols + 1 + (len(Y) / 2) - 0.5) + bars = plt.bar( + X, + Y, + 1, + align="center", + color=color[:len(Y)], + ) + total_cols += len(Y) + 1 + plt.yscale("log", nonpositive="clip") + plt.ylabel("Time in ns") + plt.xticks(xticks_pos, xticks_label) + # plt.legend(loc="upper right") + + # print(Y) + # width = 1 + + + # plt.axhline( + # stats.mean(Y), + # color=color[current_bar], + # linestyle="--", + # lw=1, + # xmin=0.01, + # xmax=0.99, + # ) + + # current_bar += 1 + # if current_bar == num_bars - 1: + # Ys = zip(*sorted(zip(*Ys))) + # for i, Y in enumerate(Ys): + # X = np.arange(i, num_bars * len(Y), num_bars) + # bars = plt.bar( + # X, + # Y, + # width, + # align="center", + # color=color[i], + # label=labels[i], + # ) + # plt.xticks( + # np.arange(1, num_bars * (len(X) + 1), num_bars * 20), + # np.arange(0, len(X) + num_bars - 1, 20), + # ) + # plt.xlabel("Test case") + # plt.ylabel("Time (relative)") + # plt.legend(loc="upper left") + +def check_filename(xml_fnam): + if len(xml_fnam.split(".")) < 2: + raise ValueError( + f"expected filename of form x.xml found {xml_fnam}" + ) + + +from sys import argv + +args = sys.argv[1:] + +for x in args: + check_filename(x) + # TODO more arg checks +for x in args: + add_plot(x) +xml_fnam = args[0] +png_fnam = "".join(xml_fnam.split(".")[:-1]) + ".png" +print("Writing {} . . .".format(png_fnam)) +plt.savefig(png_fnam, format="png", dpi=300) +sys.exit(0) From 949e41c31fa82efa4cf23b1c95822600d2b81271 Mon Sep 17 00:00:00 2001 From: "James D. Mitchell" Date: Wed, 1 Nov 2023 15:12:23 +0000 Subject: [PATCH 2/2] Finalize bench_epu8 --- benchmark/bench_epu8.cpp | 210 +++++++++++++++++++-------------------- 1 file changed, 104 insertions(+), 106 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 91c426a4..a577ac2b 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -31,17 +31,17 @@ namespace { struct RoundsMask { constexpr RoundsMask() : arr() { - for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) - arr[i] = HPCombi::sorting_rounds[i] < HPCombi::epu8id; + for (unsigned i = 0; i < sorting_rounds.size(); ++i) + arr[i] = sorting_rounds[i] < epu8id; } - epu8 arr[HPCombi::sorting_rounds.size()]; + epu8 arr[sorting_rounds.size()]; }; const auto rounds_mask = RoundsMask(); inline epu8 sort_pair(epu8 a) { - for (unsigned i = 0; i < HPCombi::sorting_rounds.size(); ++i) { - epu8 minab, maxab, b = HPCombi::permuted(a, HPCombi::sorting_rounds[i]); + for (unsigned i = 0; i < sorting_rounds.size(); ++i) { + epu8 minab, maxab, b = permuted(a, sorting_rounds[i]); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(minab, maxab, rounds_mask.arr[i]); @@ -59,11 +59,11 @@ inline epu8 sort_odd_even(epu8 a) { 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { - b = HPCombi::permuted(a, even); + b = permuted(a, even); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(minab, maxab, mask); - b = HPCombi::permuted(a, odd); + b = permuted(a, odd); minab = simde_mm_min_epi8(a, b); maxab = simde_mm_max_epi8(a, b); a = simde_mm_blendv_epi8(maxab, minab, mask); @@ -72,7 +72,7 @@ inline epu8 sort_odd_even(epu8 a) { } inline epu8 insertion_sort(epu8 p) { - auto &a = HPCombi::as_array(p); + auto &a = as_array(p); for (int i = 0; i < 16; i++) for (int j = i; j > 0 && a[j] < a[j - 1]; j--) std::swap(a[j], a[j - 1]); @@ -80,7 +80,7 @@ inline epu8 insertion_sort(epu8 p) { } __attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { - auto &a = HPCombi::as_array(p); + auto &a = as_array(p); std::array stat{}; for (int i = 0; i < 16; i++) stat[a[i]]++; @@ -92,18 +92,18 @@ __attribute__((always_inline)) inline epu8 radix_sort(epu8 p) { } inline epu8 std_sort(epu8 &p) { - auto &ar = HPCombi::as_array(p); + auto &ar = as_array(p); std::sort(ar.begin(), ar.end()); return p; } inline epu8 arr_sort(epu8 &p) { - auto &ar = HPCombi::as_array(p); - return HPCombi::from_array(HPCombi::sorted_vect(ar)); + auto &ar = as_array(p); + return from_array(sorted_vect(ar)); } inline epu8 gen_sort(epu8 p) { - HPCombi::as_VectGeneric(p).sort(); + as_VectGeneric(p).sort(); return p; } @@ -133,8 +133,8 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda | perms", sort_pair, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda | perms", sort_pair, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda | perms", HPCombi::sorted, Fix_epu8::perms); - BENCHMARK_LAMBDA("| lambda | perms", HPCombi::sorted, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda | perms", sorted, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda | perms", sorted, Fix_epu8::perms); // lambda function is needed for inlining @@ -144,7 +144,7 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda | vects", insertion_sort, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", sort_odd_even, Fix_epu8::vects); BENCHMARK_LAMBDA("| lambda | vects", sort_pair, Fix_epu8::vects); - BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); + BENCHMARK_LAMBDA("| lambda | vects", sorted, Fix_epu8::vects); } TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { @@ -165,132 +165,130 @@ TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda ", horiz_sum4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda ", horiz_sum3, Fix_epu8::perms); } + +TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { + + BENCHMARK_FREE_FN("| no lambda", partial_sums_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_sums_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_sums_round, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_sums_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_sums_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms); +} /* // -################################################################################## -int Bench_psum() { - myBench("psum_ref1_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_ref2_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_ref3_nolmbd", HPCombi::partial_sums_ref, sample.perms); - - myBench("psum_ref_nolmbd", HPCombi::partial_sums_ref, sample.perms); - myBench("psum_gen_nolmbd", HPCombi::partial_sums_gen, sample.perms); - myBench("psum_rnd_nolmbd", HPCombi::partial_sums_round, sample.perms); - - MYBENCH("psum_ref_lmbd", HPCombi::partial_sums_ref, sample.perms); - MYBENCH("psum_gen_lmbd", HPCombi::partial_sums_gen, sample.perms); - MYBENCH("psum_rnd_lmbd", HPCombi::partial_sums_round, sample.perms); - return 0; -} // ################################################################################## int Bench_hmax() { - myBench("hmax_ref1_nolmbd", HPCombi::horiz_max_ref, sample.perms); - myBench("hmax_ref2_nolmbd", HPCombi::horiz_max_ref, sample.perms); - myBench("hmax_ref3_nolmbd", HPCombi::horiz_max_ref, sample.perms); - - myBench("hmax_ref_nolmbd", HPCombi::horiz_max_ref, sample.perms); - // myBench("hmax_gen_nolmbd", HPCombi::horiz_max_gen, sample.perms); - myBench("hmax_max4_nolmbd", HPCombi::horiz_max4, sample.perms); - myBench("hmax_max3_nolmbd", HPCombi::horiz_max3, sample.perms); - - MYBENCH("hmax_ref_lmbd", HPCombi::horiz_max_ref, sample.perms); - // MYBENCH("hmax_gen_lmbd", HPCombi::horiz_max_gen, sample.perms); - MYBENCH("hmax_max4_lmbd", HPCombi::horiz_max4, sample.perms); - MYBENCH("hmax_max3_lmbd", HPCombi::horiz_max3, sample.perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max4, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max3, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", horiz_max_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", horiz_max_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_max4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_pmax() { - myBench("pmax_ref1_nolmbd", HPCombi::partial_max_ref, sample.perms); - myBench("pmax_ref2_nolmbd", HPCombi::partial_max_ref, sample.perms); - myBench("pmax_ref3_nolmbd", HPCombi::partial_max_ref, sample.perms); - - myBench("pmax_ref_nolmbd", HPCombi::partial_max_ref, sample.perms); - // myBench("pmax_gen_nolmbd", HPCombi::partial_max_gen, -sample.perms); myBench("pmax_rnd_nolmbd", HPCombi::partial_max_round, -sample.perms); - - MYBENCH("pmax_ref_lmbd", HPCombi::partial_max_ref, sample.perms); - // MYBENCH("pmax_gen_lmbd", HPCombi::partial_max_gen, sample.perms); - MYBENCH("pmax_rnd_lmbd", HPCombi::partial_max_round, sample.perms); - return 0; + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_round, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_max_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_max_gen, +Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_round, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_hmin() { - myBench("hmin_ref1_nolmbd", HPCombi::horiz_min_ref, sample.perms); - myBench("hmin_ref2_nolmbd", HPCombi::horiz_min_ref, sample.perms); - myBench("hmin_ref3_nolmbd", HPCombi::horiz_min_ref, sample.perms); - - myBench("hmin_ref_nolmbd", HPCombi::horiz_min_ref, sample.perms); - // myBench("hmin_gen_nolmbd", HPCombi::horiz_min_gen, sample.perms); - myBench("hmin_min4_nolmbd", HPCombi::horiz_min4, sample.perms); - myBench("hmin_min3_nolmbd", HPCombi::horiz_min3, sample.perms); - - MYBENCH("hmin_ref_lmbd", HPCombi::horiz_min_ref, sample.perms); - // MYBENCH("hmin_gen_lmbd", HPCombi::horiz_min_gen, sample.perms); - MYBENCH("hmin_min4_lmbd", HPCombi::horiz_min4, sample.perms); - MYBENCH("hmin_min3_lmbd", HPCombi::horiz_min3, sample.perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min3, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", horiz_min_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", horiz_min_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_min4, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_pmin() { - myBench("pmin_ref1_nolmbd", HPCombi::partial_min_ref, sample.perms); - myBench("pmin_ref2_nolmbd", HPCombi::partial_min_ref, sample.perms); - myBench("pmin_ref3_nolmbd", HPCombi::partial_min_ref, sample.perms); - - myBench("pmin_ref_nolmbd", HPCombi::partial_min_ref, sample.perms); - // myBench("pmin_gen_nolmbd", HPCombi::partial_min_gen, -sample.perms); myBench("pmin_rnd_nolmbd", HPCombi::partial_min_round, -sample.perms); - - MYBENCH("pmin_ref_lmbd", HPCombi::partial_min_ref, sample.perms); - // MYBENCH("pmin_gen_lmbd", HPCombi::partial_min_gen, sample.perms); - MYBENCH("pmin_rnd_lmbd", HPCombi::partial_min_round, sample.perms); - return 0; + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, +Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round, +Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", partial_min_ref, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_min_gen, +Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_round, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_eval() { - myBench("eval_ref1_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_ref2_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_ref3_nolmbd", HPCombi::eval16_ref, sample.perms); - - myBench("eval_ref_nolmbd", HPCombi::eval16_ref, sample.perms); - myBench("eval_gen_nolmbd", HPCombi::eval16_gen, sample.perms); - myBench("eval_popcnt_nolmbd", HPCombi::eval16_popcount, sample.perms); - myBench("eval_arr_nolmbd", HPCombi::eval16_arr, sample.perms); - myBench("eval_cycle_nolmbd", HPCombi::eval16_cycle, sample.perms); - - MYBENCH("eval_ref_lmbd", HPCombi::eval16_ref, sample.perms); - MYBENCH("eval_gen_lmbd", HPCombi::eval16_gen, sample.perms); - MYBENCH("eval_popcnt_lmbd", HPCombi::eval16_popcount, sample.perms); - MYBENCH("eval_arr_lmbd", HPCombi::eval16_arr, sample.perms); - MYBENCH("eval_cycle_lmbd", HPCombi::eval16_cycle, sample.perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + + BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_arr, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", eval16_cycle, Fix_epu8::perms); + + BENCHMARK_LAMBDA("| lambda", eval16_ref, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_popcount, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_arr, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms); return 0; } // ################################################################################## int Bench_first_diff() { - MYBENCH2("firstDiff_ref_lmbd", HPCombi::first_diff_ref, sample.perms); - MYBENCH2("firstDiff_cmpstr_lmbd", HPCombi::first_diff_cmpstr, -sample.perms); MYBENCH2("firstDiff_mask_lmbd", HPCombi::first_diff_mask, -sample.perms); return 0; + MYBENCH2("firstDiff_ref_lmbd", first_diff_ref, Fix_epu8::perms); + MYBENCH2("firstDiff_cmpstr_lmbd", first_diff_cmpstr, +Fix_epu8::perms); MYBENCH2("firstDiff_mask_lmbd", first_diff_mask, +Fix_epu8::perms); return 0; } // ################################################################################## int Bench_last_diff() { - MYBENCH2("lastDiff_ref_lmbd", HPCombi::last_diff_ref, sample.perms); - MYBENCH2("lastDiff_cmpstr_lmbd", HPCombi::last_diff_cmpstr, -sample.perms); MYBENCH2("lastDiff_mask_lmbd", HPCombi::last_diff_mask, -sample.perms); return 0; + MYBENCH2("lastDiff_ref_lmbd", last_diff_ref, Fix_epu8::perms); + MYBENCH2("lastDiff_cmpstr_lmbd", last_diff_cmpstr, +Fix_epu8::perms); MYBENCH2("lastDiff_mask_lmbd", last_diff_mask, +Fix_epu8::perms); return 0; } */ } // namespace HPCombi