From e9c12e84bfb461ff79a07027b601de6f635910dc Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Wed, 1 Nov 2023 15:11:26 +0000 Subject: [PATCH] Add missing copy constructors + improved benchmark --- benchmark/bench_epu8.cpp | 12 +++++++--- benchmark/bench_fixture.hpp | 16 +++++++++++-- benchmark/bench_main.hpp | 6 ++--- include/hpcombi/epu.hpp | 2 ++ include/hpcombi/epu_impl.hpp | 8 +++++++ include/hpcombi/vect16.hpp | 2 ++ include/hpcombi/vect_generic.hpp | 8 +++++-- tests/test_epu.cpp | 19 ++++++++------- tests/test_main.hpp | 40 ++++++++++++++++++++++++-------- 9 files changed, 84 insertions(+), 29 deletions(-) diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index db17a42b..b60124fc 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -51,11 +51,11 @@ inline epu8 sort_pair(epu8 a) { inline epu8 sort_odd_even(epu8 a) { const uint8_t FF = 0xff; - static const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, + static constexpr const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; - static const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, + static constexpr const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 15}; - static const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, + static constexpr const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF, 0, FF}; epu8 b, minab, maxab; for (unsigned i = 0; i < 8; ++i) { @@ -147,6 +147,12 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") { BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects); } + +TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") { + BENCHMARK_FREE_FN_PAIR(HPCombi::permuted_ref, pairs); + BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs); +} + /* int Bench_hsum() { myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms); diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 5141cce6..8d2db5d0 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -22,7 +22,7 @@ using HPCombi::epu8; -constexpr uint_fast64_t size = 1000; +constexpr uint_fast64_t size = 10; // constexpr uint_fast64_t repeat = 100; std::vector rand_epu8(size_t sz) { @@ -57,15 +57,27 @@ std::vector rand_transf(int sz) { return res; } +std::vector> make_pair_sample(size_t sz) { + std::vector> res{}; + for (size_t i = 0; i < sz; i++) { + res.push_back(std::make_pair(HPCombi::random_epu8(15), + HPCombi::random_epu8(15))); + } + return res; +} + class Fix_epu8 { public: Fix_epu8() : vects(rand_epu8(size)), transf(rand_transf(size)), - perms(rand_perms(size)) {} + perms(rand_perms(size)), + pairs(make_pair_sample(size)) + {} ~Fix_epu8() {} const std::vector vects; const std::vector transf; const std::vector perms; + const std::vector> pairs; }; #endif // BENCH_FIXTURE diff --git a/benchmark/bench_main.hpp b/benchmark/bench_main.hpp index fcb49f35..3d8cb90b 100644 --- a/benchmark/bench_main.hpp +++ b/benchmark/bench_main.hpp @@ -48,10 +48,10 @@ return true; \ }; -#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \ - BENCHMARK(#mem_fn) { \ +#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \ + BENCHMARK(#free_fn) { \ for (auto &pair : sample) { \ - volatile auto val = pair.first.mem_fn(pair.second); \ + volatile auto val = free_fn(pair.first, pair.second); \ } \ return true; \ }; diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 69988adf..5f1fbe88 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -172,6 +172,8 @@ inline bool equal(epu8 a, epu8 b) noexcept { /** Non equality of #HPCombi::epu8 */ inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); } +/** Permuting a #HPCombi::epu8 */ +inline epu8 permuted_ref(epu8 a, epu8 b) noexcept; /** Permuting a #HPCombi::epu8 */ inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index c7369458..be83c3b4 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -49,6 +49,14 @@ namespace HPCombi { /// Implementation part for inline functions ////////////////////////////////// /////////////////////////////////////////////////////////////////////////////// +/** Permuting a #HPCombi::epu8 */ +inline epu8 permuted_ref(epu8 a, epu8 b) noexcept { + epu8 res; + for (uint64_t i = 0; i < 16; i++) + res[i] = a[b[i] & 0xF]; + return res; +} + // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp index d0e5930b..49c3e760 100644 --- a/include/hpcombi/vect16.hpp +++ b/include/hpcombi/vect16.hpp @@ -33,6 +33,8 @@ struct alignas(16) Vect16 { epu8 v; Vect16() = default; + constexpr Vect16(const Vect16 &v) = default; + constexpr Vect16(epu8 x) : v(x) {} Vect16(std::initializer_list il, uint8_t def = 0) : v(Epu8(il, def)) {} diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 36f309d0..5e1e1558 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -47,6 +47,8 @@ template struct VectGeneric { array v; VectGeneric() = default; + constexpr VectGeneric(const VectGeneric &v) = default; + VectGeneric(const std::array &_v) : v(_v) {} // NOLINT VectGeneric(std::initializer_list il, Expo def = 0) { HPCOMBI_ASSERT(il.size() <= Size); @@ -106,8 +108,10 @@ template struct VectGeneric { VectGeneric permuted(const VectGeneric &u) const { VectGeneric res; - for (uint64_t i = 0; i < Size; i++) - res[i] = v[u[i]]; + for (uint64_t i = 0; i < Size; i++) { + if (u[i] < Size) + res[i] = v[u[i]]; + } return res; } diff --git a/tests/test_epu.cpp b/tests/test_epu.cpp index 6b31f0ef..2079a60f 100644 --- a/tests/test_epu.cpp +++ b/tests/test_epu.cpp @@ -200,6 +200,7 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") { epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}), Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15})); } +TEST_AGREES2_FUN_EPU8(Fix, permuted, permuted_ref, v, "[Epu8][011]") TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") { CHECK_THAT(shifted_left(P01), Equals(P10)); @@ -441,10 +442,10 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") { Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168, 175, 182, 189, 196, 203})); } -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_round, v, +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") { CHECK(horiz_max_ref(zero) == 0); @@ -485,9 +486,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") { CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") { CHECK(horiz_min_ref(zero) == 0); @@ -529,9 +530,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") { CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") -TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]") +TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][030]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); diff --git a/tests/test_main.hpp b/tests/test_main.hpp index 475e3fba..3c93e6a2 100644 --- a/tests/test_main.hpp +++ b/tests/test_main.hpp @@ -23,32 +23,34 @@ #include #include -#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK(fun(p) == ref(p)); \ + CHECK(p.fun() == p.ref()); \ } \ } -#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ - CHECK_THAT(fun(p), Equals(ref(p))); \ + CHECK(fun(p) == ref(p)); \ } \ } -#define TEST_AGREES(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES2(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ - for (auto p : vct) { \ - CHECK(p.fun() == p.ref()); \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK(p1.fun(p2) == p1.ref(p2)); \ + } \ } \ } -#define TEST_AGREES2(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES2_FUN(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p1 : vct) { \ for (auto p2 : vct) { \ - CHECK(p1.fun(p2) == p1.ref(p2)); \ + CHECK(fun(p1, p2) == ref(p1, p2)); \ } \ } \ } @@ -60,13 +62,31 @@ } \ } -#define TEST_AGREES_EPU8_FUN(fixture, ref, fun, vct, tags) \ +#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \ TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ for (auto p : vct) { \ CHECK_THAT(fun(p), Equals(ref(p))); \ } \ } +#define TEST_AGREES2_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK_THAT(p1.fun(p2), Equals(p1.ref(p2))); \ + } \ + } \ + } + +#define TEST_AGREES2_FUN_EPU8(fixture, ref, fun, vct, tags) \ + TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \ + for (auto p1 : vct) { \ + for (auto p2 : vct) { \ + CHECK_THAT(fun(p1, p2), Equals(ref(p1, p2))); \ + } \ + } \ + } + struct Equals : Catch::Matchers::MatcherGenericBase { Equals(HPCombi::epu8 v) : v(v) {}