diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index a577ac2b..91ae319c 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -152,8 +152,6 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); } -/* -int Bench_hsum() { TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms); @@ -176,90 +174,59 @@ TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") { BENCHMARK_LAMBDA("| lambda", partial_sums_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms); } -/* -// -// -################################################################################## -int Bench_hmax() { +TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max4, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max3, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_max3, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max_ref, Fix_epu8::perms); // BENCHMARK_LAMBDA("| lambda", horiz_max_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_pmax() { + +TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_round, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", partial_max_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_max_round, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_ref, Fix_epu8::perms); - // BENCHMARK_LAMBDA("| lambda", partial_max_gen, -Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_round, -Fix_epu8::perms); return 0; + // BENCHMARK_LAMBDA("| lambda", partial_max_gen, Fix_epu8::perms); + BENCHMARK_LAMBDA("| lambda", partial_max_round, Fix_epu8::perms); } -// -################################################################################## -int Bench_hmin() { - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); +TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - - BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min3, -Fix_epu8::perms); + // BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min4, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", horiz_min3, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min_ref, Fix_epu8::perms); // BENCHMARK_LAMBDA("| lambda", horiz_min_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min4, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_pmin() { - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); +TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms); - // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, -Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round, -Fix_epu8::perms); - + // BENCHMARK_FREE_FN("| no lambda", partial_min_gen, Fix_epu8::perms); + BENCHMARK_FREE_FN("| no lambda", partial_min_round, Fix_epu8::perms); + // BENCHMARK_LAMBDA("| lambda", partial_min_gen, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_ref, Fix_epu8::perms); - // BENCHMARK_LAMBDA("| lambda", partial_min_gen, -Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_round, -Fix_epu8::perms); return 0; + BENCHMARK_LAMBDA("| lambda", partial_min_round, Fix_epu8::perms); } -// -################################################################################## -int Bench_eval() { - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); - +TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][000]") { BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms); @@ -271,24 +238,21 @@ int Bench_eval() { BENCHMARK_LAMBDA("| lambda", eval16_popcount, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", eval16_arr, Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms); - return 0; } -// -################################################################################## -int Bench_first_diff() { - MYBENCH2("firstDiff_ref_lmbd", first_diff_ref, Fix_epu8::perms); - MYBENCH2("firstDiff_cmpstr_lmbd", first_diff_cmpstr, -Fix_epu8::perms); MYBENCH2("firstDiff_mask_lmbd", first_diff_mask, -Fix_epu8::perms); return 0; +TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][000]") { + BENCHMARK_LAMBDA2("| lambda", first_diff_ref, Fix_epu8::pairs); +#ifdef SIMDE_X86_SSE4_2_NATIVE + BENCHMARK_LAMBDA2("| lambda", first_diff_cmpstr, Fix_epu8::pairs); +#endif + BENCHMARK_LAMBDA2("| lambda", first_diff_mask, Fix_epu8::pairs); } -// -################################################################################## -int Bench_last_diff() { - MYBENCH2("lastDiff_ref_lmbd", last_diff_ref, Fix_epu8::perms); - MYBENCH2("lastDiff_cmpstr_lmbd", last_diff_cmpstr, -Fix_epu8::perms); MYBENCH2("lastDiff_mask_lmbd", last_diff_mask, -Fix_epu8::perms); return 0; -} */ +TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") { + BENCHMARK_LAMBDA2("| lambda", last_diff_ref, Fix_epu8::pairs); +#ifdef SIMDE_X86_SSE4_2_NATIVE + BENCHMARK_LAMBDA2("| lambda", last_diff_cmpstr, Fix_epu8::pairs); +#endif + BENCHMARK_LAMBDA2("| lambda", last_diff_mask, Fix_epu8::pairs); +} } // namespace HPCombi diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 8d2db5d0..79f642f5 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -60,19 +60,16 @@ std::vector rand_transf(int sz) { std::vector> make_pair_sample(size_t sz) { std::vector> res{}; for (size_t i = 0; i < sz; i++) { - res.push_back(std::make_pair(HPCombi::random_epu8(15), - HPCombi::random_epu8(15))); + res.emplace_back(HPCombi::random_epu8(15), HPCombi::random_epu8(15)); } return res; } class Fix_epu8 { public: - Fix_epu8() : vects(rand_epu8(size)), - transf(rand_transf(size)), - perms(rand_perms(size)), - pairs(make_pair_sample(size)) - {} + Fix_epu8() + : vects(rand_epu8(size)), transf(rand_transf(size)), + perms(rand_perms(size)), pairs(make_pair_sample(size)) {} ~Fix_epu8() {} const std::vector vects; const std::vector transf; diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp index db4841cf..4cde8e0c 100644 --- a/benchmark/bench_main.hpp +++ b/benchmark/bench_main.hpp @@ -38,34 +38,39 @@ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ - for (auto &pair : sample) { \ - volatile auto val = pair.first.mem_fn(pair.second); \ +#define BENCHMARK_LAMBDA2(msg, free_fn, sample) \ + BENCHMARK(#free_fn " " msg) { \ + auto lambda__xxx = [](auto const &x, auto const &y) { \ + return free_fn(x, y); \ + }; \ + for (auto [x, y] : sample) { \ + volatile auto dummy = lambda__xxx(x, y); \ } \ return true; \ }; -#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ - BENCHMARK(#free_fn) { \ +#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ + BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - volatile auto val = free_fn(pair.first, pair.second); \ + volatile auto val = pair.first.mem_fn(pair.second); \ } \ return true; \ }; -#define BENCHMARK_FREE_FN_PAIR_EQ(mem_fn, sample) \ +#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ BENCHMARK(#mem_fn) { \ for (auto &pair : sample) { \ - REQUIRE(free_fn(pair.first) == free_fn(pair.second)); \ + auto val = \ + std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \ + REQUIRE(val.first == val.second); \ } \ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ +#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ + BENCHMARK(#free_fn) { \ for (auto &pair : sample) { \ - REQUIRE(pair.first.mem_fn() == pair.second.mem_fn()); \ + volatile auto val = free_fn(pair.first, pair.second); \ } \ return true; \ }; diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index be83c3b4..70d5d140 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -215,10 +215,11 @@ inline epu8 sort8_perm(epu8 &a) noexcept { inline epu8 random_epu8(uint16_t bnd) { epu8 res; - std::random_device rd; - std::default_random_engine e1(rd()); + static std::random_device rd; + static std::default_random_engine e1(rd()); std::uniform_int_distribution uniform_dist(0, bnd - 1); + for (size_t i = 0; i < 16; i++) res[i] = uniform_dist(e1); return res;