Skip to content

Commit

Permalink
Finalize bench_epu8
Browse files Browse the repository at this point in the history
  • Loading branch information
james-d-mitchell committed Nov 1, 2023
1 parent d8359e7 commit f14a337
Show file tree
Hide file tree
Showing 4 changed files with 57 additions and 90 deletions.
102 changes: 33 additions & 69 deletions benchmark/bench_epu8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") {
BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs);
}

/*
int Bench_hsum() {
TEST_CASE_METHOD(Fix_epu8, "hsum", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", horiz_sum_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_sum_gen, Fix_epu8::perms);
Expand All @@ -176,90 +174,59 @@ TEST_CASE_METHOD(Fix_epu8, "partial sums", "[Epu8][000]") {
BENCHMARK_LAMBDA("| lambda", partial_sums_gen, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", partial_sums_round, Fix_epu8::perms);
}
/*
//

//
##################################################################################
int Bench_hmax() {
TEST_CASE_METHOD(Fix_epu8, "horiz max", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms);

BENCHMARK_FREE_FN("| no lambda", horiz_max_ref, Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", horiz_max_gen,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max4,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_max3,
Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", horiz_max_gen, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_max4, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_max3, Fix_epu8::perms);

BENCHMARK_LAMBDA("| lambda", horiz_max_ref, Fix_epu8::perms);
// BENCHMARK_LAMBDA("| lambda", horiz_max_gen, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", horiz_max4, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", horiz_max3, Fix_epu8::perms);
return 0;
}
//
##################################################################################
int Bench_pmax() {

TEST_CASE_METHOD(Fix_epu8, "partial max", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms);

BENCHMARK_FREE_FN("| no lambda", partial_max_ref, Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", partial_max_gen,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_max_round,
Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", partial_max_gen, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_max_round, Fix_epu8::perms);

BENCHMARK_LAMBDA("| lambda", partial_max_ref, Fix_epu8::perms);
// BENCHMARK_LAMBDA("| lambda", partial_max_gen,
Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_max_round,
Fix_epu8::perms); return 0;
// BENCHMARK_LAMBDA("| lambda", partial_max_gen, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", partial_max_round, Fix_epu8::perms);
}

//
##################################################################################
int Bench_hmin() {
BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms);
TEST_CASE_METHOD(Fix_epu8, "horiz min", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_min_ref, Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", horiz_min_gen,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min4,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", horiz_min3,
Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", horiz_min_gen, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_min4, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", horiz_min3, Fix_epu8::perms);

BENCHMARK_LAMBDA("| lambda", horiz_min_ref, Fix_epu8::perms);
// BENCHMARK_LAMBDA("| lambda", horiz_min_gen, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", horiz_min4, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", horiz_min3, Fix_epu8::perms);
return 0;
}
//
##################################################################################
int Bench_pmin() {
BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms);

TEST_CASE_METHOD(Fix_epu8, "partial min", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", partial_min_ref, Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", partial_min_gen,
Fix_epu8::perms); BENCHMARK_FREE_FN("| no lambda", partial_min_round,
Fix_epu8::perms);
// BENCHMARK_FREE_FN("| no lambda", partial_min_gen, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", partial_min_round, Fix_epu8::perms);
// BENCHMARK_LAMBDA("| lambda", partial_min_gen, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", partial_min_ref, Fix_epu8::perms);
// BENCHMARK_LAMBDA("| lambda", partial_min_gen,
Fix_epu8::perms); BENCHMARK_LAMBDA("| lambda", partial_min_round,
Fix_epu8::perms); return 0;
BENCHMARK_LAMBDA("| lambda", partial_min_round, Fix_epu8::perms);
}

//
##################################################################################
int Bench_eval() {
BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms);
TEST_CASE_METHOD(Fix_epu8, "eval16", "[Epu8][000]") {
BENCHMARK_FREE_FN("| no lambda", eval16_ref, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", eval16_gen, Fix_epu8::perms);
BENCHMARK_FREE_FN("| no lambda", eval16_popcount, Fix_epu8::perms);
Expand All @@ -271,24 +238,21 @@ int Bench_eval() {
BENCHMARK_LAMBDA("| lambda", eval16_popcount, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", eval16_arr, Fix_epu8::perms);
BENCHMARK_LAMBDA("| lambda", eval16_cycle, Fix_epu8::perms);
return 0;
}

//
##################################################################################
int Bench_first_diff() {
MYBENCH2("firstDiff_ref_lmbd", first_diff_ref, Fix_epu8::perms);
MYBENCH2("firstDiff_cmpstr_lmbd", first_diff_cmpstr,
Fix_epu8::perms); MYBENCH2("firstDiff_mask_lmbd", first_diff_mask,
Fix_epu8::perms); return 0;
TEST_CASE_METHOD(Fix_epu8, "first diff", "[Epu8][000]") {
BENCHMARK_LAMBDA2("| lambda", first_diff_ref, Fix_epu8::pairs);
#ifdef SIMDE_X86_SSE4_2_NATIVE
BENCHMARK_LAMBDA2("| lambda", first_diff_cmpstr, Fix_epu8::pairs);
#endif
BENCHMARK_LAMBDA2("| lambda", first_diff_mask, Fix_epu8::pairs);
}

//
##################################################################################
int Bench_last_diff() {
MYBENCH2("lastDiff_ref_lmbd", last_diff_ref, Fix_epu8::perms);
MYBENCH2("lastDiff_cmpstr_lmbd", last_diff_cmpstr,
Fix_epu8::perms); MYBENCH2("lastDiff_mask_lmbd", last_diff_mask,
Fix_epu8::perms); return 0;
} */
TEST_CASE_METHOD(Fix_epu8, "last diff", "[Epu8][000]") {
BENCHMARK_LAMBDA2("| lambda", last_diff_ref, Fix_epu8::pairs);
#ifdef SIMDE_X86_SSE4_2_NATIVE
BENCHMARK_LAMBDA2("| lambda", last_diff_cmpstr, Fix_epu8::pairs);
#endif
BENCHMARK_LAMBDA2("| lambda", last_diff_mask, Fix_epu8::pairs);
}
} // namespace HPCombi
11 changes: 4 additions & 7 deletions benchmark/bench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,16 @@ std::vector<epu8> rand_transf(int sz) {
std::vector<std::pair<epu8, epu8>> make_pair_sample(size_t sz) {
std::vector<std::pair<epu8, epu8>> res{};
for (size_t i = 0; i < sz; i++) {
res.push_back(std::make_pair(HPCombi::random_epu8(15),
HPCombi::random_epu8(15)));
res.emplace_back(HPCombi::random_epu8(15), HPCombi::random_epu8(15));
}
return res;
}

class Fix_epu8 {
public:
Fix_epu8() : vects(rand_epu8(size)),
transf(rand_transf(size)),
perms(rand_perms(size)),
pairs(make_pair_sample(size))
{}
Fix_epu8()
: vects(rand_epu8(size)), transf(rand_transf(size)),
perms(rand_perms(size)), pairs(make_pair_sample(size)) {}
~Fix_epu8() {}
const std::vector<epu8> vects;
const std::vector<epu8> transf;
Expand Down
29 changes: 17 additions & 12 deletions benchmark/bench_main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,34 +38,39 @@
return true; \
};

#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \
BENCHMARK(#mem_fn) { \
for (auto &pair : sample) { \
volatile auto val = pair.first.mem_fn(pair.second); \
#define BENCHMARK_LAMBDA2(msg, free_fn, sample) \
BENCHMARK(#free_fn " " msg) { \
auto lambda__xxx = [](auto const &x, auto const &y) { \
return free_fn(x, y); \
}; \
for (auto [x, y] : sample) { \
volatile auto dummy = lambda__xxx(x, y); \
} \
return true; \
};

#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \
BENCHMARK(#free_fn) { \
#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \
BENCHMARK(#mem_fn) { \
for (auto &pair : sample) { \
volatile auto val = free_fn(pair.first, pair.second); \
volatile auto val = pair.first.mem_fn(pair.second); \
} \
return true; \
};

#define BENCHMARK_FREE_FN_PAIR_EQ(mem_fn, sample) \
#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \
BENCHMARK(#mem_fn) { \
for (auto &pair : sample) { \
REQUIRE(free_fn(pair.first) == free_fn(pair.second)); \
auto val = \
std::make_pair(pair.first.mem_fn(), pair.second.mem_fn()); \
REQUIRE(val.first == val.second); \
} \
return true; \
};

#define BENCHMARK_MEM_FN_PAIR_EQ(mem_fn, sample) \
BENCHMARK(#mem_fn) { \
#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \
BENCHMARK(#free_fn) { \
for (auto &pair : sample) { \
REQUIRE(pair.first.mem_fn() == pair.second.mem_fn()); \
volatile auto val = free_fn(pair.first, pair.second); \
} \
return true; \
};
5 changes: 3 additions & 2 deletions include/hpcombi/epu_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,11 @@ inline epu8 sort8_perm(epu8 &a) noexcept {

inline epu8 random_epu8(uint16_t bnd) {
epu8 res;
std::random_device rd;

std::default_random_engine e1(rd());
static std::random_device rd;
static std::default_random_engine e1(rd());
std::uniform_int_distribution<int> uniform_dist(0, bnd - 1);

for (size_t i = 0; i < 16; i++)
res[i] = uniform_dist(e1);
return res;
Expand Down

0 comments on commit f14a337

Please sign in to comment.