Skip to content

Commit

Permalink
Add missing copy constructors + improved benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
hivert committed Nov 1, 2023
1 parent 4309f36 commit e9c12e8
Show file tree
Hide file tree
Showing 9 changed files with 84 additions and 29 deletions.
12 changes: 9 additions & 3 deletions benchmark/bench_epu8.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,11 @@ inline epu8 sort_pair(epu8 a) {

inline epu8 sort_odd_even(epu8 a) {
const uint8_t FF = 0xff;
static const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6,
static constexpr const epu8 even = {1, 0, 3, 2, 5, 4, 7, 6,
9, 8, 11, 10, 13, 12, 15, 14};
static const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8,
static constexpr const epu8 odd = {0, 2, 1, 4, 3, 6, 5, 8,
7, 10, 9, 12, 11, 14, 13, 15};
static const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF,
static constexpr const epu8 mask = {0, FF, 0, FF, 0, FF, 0, FF,
0, FF, 0, FF, 0, FF, 0, FF};
epu8 b, minab, maxab;
for (unsigned i = 0; i < 8; ++i) {
Expand Down Expand Up @@ -147,6 +147,12 @@ TEST_CASE_METHOD(Fix_epu8, "Sorting", "[Perm16][000]") {
BENCHMARK_LAMBDA("| lambda | vects", HPCombi::sorted, Fix_epu8::vects);
}


TEST_CASE_METHOD(Fix_epu8, "Permuting", "[Epu8][001]") {
BENCHMARK_FREE_FN_PAIR(HPCombi::permuted_ref, pairs);
BENCHMARK_FREE_FN_PAIR(HPCombi::permuted, pairs);
}

/*
int Bench_hsum() {
myBench("hsum_ref1_nolmbd", HPCombi::horiz_sum_ref, sample.perms);
Expand Down
16 changes: 14 additions & 2 deletions benchmark/bench_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@

using HPCombi::epu8;

constexpr uint_fast64_t size = 1000;
constexpr uint_fast64_t size = 10;
// constexpr uint_fast64_t repeat = 100;

std::vector<epu8> rand_epu8(size_t sz) {
Expand Down Expand Up @@ -57,15 +57,27 @@ std::vector<epu8> rand_transf(int sz) {
return res;
}

std::vector<std::pair<epu8, epu8>> make_pair_sample(size_t sz) {
std::vector<std::pair<epu8, epu8>> res{};
for (size_t i = 0; i < sz; i++) {
res.push_back(std::make_pair(HPCombi::random_epu8(15),
HPCombi::random_epu8(15)));
}
return res;
}

class Fix_epu8 {
public:
Fix_epu8() : vects(rand_epu8(size)),
transf(rand_transf(size)),
perms(rand_perms(size)) {}
perms(rand_perms(size)),
pairs(make_pair_sample(size))
{}
~Fix_epu8() {}
const std::vector<epu8> vects;
const std::vector<epu8> transf;
const std::vector<epu8> perms;
const std::vector<std::pair<epu8, epu8>> pairs;
};

#endif // BENCH_FIXTURE
6 changes: 3 additions & 3 deletions benchmark/bench_main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,10 @@
return true; \
};

#define BENCHMARK_MEM_FN_PAIR(mem_fn, sample) \
BENCHMARK(#mem_fn) { \
#define BENCHMARK_FREE_FN_PAIR(free_fn, sample) \
BENCHMARK(#free_fn) { \
for (auto &pair : sample) { \
volatile auto val = pair.first.mem_fn(pair.second); \
volatile auto val = free_fn(pair.first, pair.second); \
} \
return true; \
};
2 changes: 2 additions & 0 deletions include/hpcombi/epu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,8 @@ inline bool equal(epu8 a, epu8 b) noexcept {
/** Non equality of #HPCombi::epu8 */
inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }

/** Permuting a #HPCombi::epu8 */
inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
/** Permuting a #HPCombi::epu8 */
inline epu8 permuted(epu8 a, epu8 b) noexcept {
return simde_mm_shuffle_epi8(a, b);
Expand Down
8 changes: 8 additions & 0 deletions include/hpcombi/epu_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ namespace HPCombi {
/// Implementation part for inline functions //////////////////////////////////
///////////////////////////////////////////////////////////////////////////////

/** Permuting a #HPCombi::epu8 */
inline epu8 permuted_ref(epu8 a, epu8 b) noexcept {
epu8 res;
for (uint64_t i = 0; i < 16; i++)
res[i] = a[b[i] & 0xF];
return res;
}

// Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255)
inline uint64_t first_mask(epu8 msk, size_t bound) {
uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound)));
Expand Down
2 changes: 2 additions & 0 deletions include/hpcombi/vect16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ struct alignas(16) Vect16 {
epu8 v;

Vect16() = default;
constexpr Vect16(const Vect16 &v) = default;

constexpr Vect16(epu8 x) : v(x) {}
Vect16(std::initializer_list<uint8_t> il, uint8_t def = 0)
: v(Epu8(il, def)) {}
Expand Down
8 changes: 6 additions & 2 deletions include/hpcombi/vect_generic.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,8 @@ template <size_t Size, typename Expo = uint8_t> struct VectGeneric {
array v;

VectGeneric() = default;
constexpr VectGeneric(const VectGeneric &v) = default;

VectGeneric(const std::array<Expo, Size> &_v) : v(_v) {} // NOLINT
VectGeneric(std::initializer_list<Expo> il, Expo def = 0) {
HPCOMBI_ASSERT(il.size() <= Size);
Expand Down Expand Up @@ -106,8 +108,10 @@ template <size_t Size, typename Expo = uint8_t> struct VectGeneric {

VectGeneric permuted(const VectGeneric &u) const {
VectGeneric res;
for (uint64_t i = 0; i < Size; i++)
res[i] = v[u[i]];
for (uint64_t i = 0; i < Size; i++) {
if (u[i] < Size)
res[i] = v[u[i]];
}
return res;
}

Expand Down
19 changes: 10 additions & 9 deletions tests/test_epu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ TEST_CASE_METHOD(Fix, "Epu8::permuted", "[Epu8][011]") {
epu8{2, 2, 1, 2, 3, 6, 12, 4, 5, 16, 17, 11, 12, 13, 14, 15}),
Equals(epu8{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15}));
}
TEST_AGREES2_FUN_EPU8(Fix, permuted, permuted_ref, v, "[Epu8][011]")

TEST_CASE_METHOD(Fix, "Epu8::shifted_left", "[Epu8][012]") {
CHECK_THAT(shifted_left(P01), Equals(P10));
Expand Down Expand Up @@ -441,10 +442,10 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][029]") {
Equals(epu8{23, 28, 49, 54, 97, 133, 140, 147, 154, 161, 168,
175, 182, 189, 196, 203}));
}
TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums_round, v,
TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_gen, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums_round, v,
"[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][030]")

TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][033]") {
CHECK(horiz_max_ref(zero) == 0);
Expand Down Expand Up @@ -485,9 +486,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][038]") {
CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15)));
CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43)));
}
TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_max_ref, partial_max, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_round, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][030]")

TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][042]") {
CHECK(horiz_min_ref(zero) == 0);
Expand Down Expand Up @@ -529,9 +530,9 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][043]") {
CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev));
CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5)));
}
TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]")
TEST_AGREES_EPU8_FUN(Fix, partial_min_ref, partial_min, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_round, v, "[Epu8][030]")
TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][030]")

TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][047]") {
CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0)));
Expand Down
40 changes: 30 additions & 10 deletions tests/test_main.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,34 @@
#include <catch2/catch_test_macros.hpp>
#include <catch2/matchers/catch_matchers_templated.hpp>

#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \
#define TEST_AGREES(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p : vct) { \
CHECK(fun(p) == ref(p)); \
CHECK(p.fun() == p.ref()); \
} \
}

#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \
#define TEST_AGREES_FUN(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p : vct) { \
CHECK_THAT(fun(p), Equals(ref(p))); \
CHECK(fun(p) == ref(p)); \
} \
}

#define TEST_AGREES(fixture, ref, fun, vct, tags) \
#define TEST_AGREES2(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p : vct) { \
CHECK(p.fun() == p.ref()); \
for (auto p1 : vct) { \
for (auto p2 : vct) { \
CHECK(p1.fun(p2) == p1.ref(p2)); \
} \
} \
}

#define TEST_AGREES2(fixture, ref, fun, vct, tags) \
#define TEST_AGREES2_FUN(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p1 : vct) { \
for (auto p2 : vct) { \
CHECK(p1.fun(p2) == p1.ref(p2)); \
CHECK(fun(p1, p2) == ref(p1, p2)); \
} \
} \
}
Expand All @@ -60,13 +62,31 @@
} \
}

#define TEST_AGREES_EPU8_FUN(fixture, ref, fun, vct, tags) \
#define TEST_AGREES_FUN_EPU8(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p : vct) { \
CHECK_THAT(fun(p), Equals(ref(p))); \
} \
}

#define TEST_AGREES2_EPU8(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p1 : vct) { \
for (auto p2 : vct) { \
CHECK_THAT(p1.fun(p2), Equals(p1.ref(p2))); \
} \
} \
}

#define TEST_AGREES2_FUN_EPU8(fixture, ref, fun, vct, tags) \
TEST_CASE_METHOD(fixture, #ref " == " #fun, tags) { \
for (auto p1 : vct) { \
for (auto p2 : vct) { \
CHECK_THAT(fun(p1, p2), Equals(ref(p1, p2))); \
} \
} \
}

struct Equals : Catch::Matchers::MatcherGenericBase {
Equals(HPCombi::epu8 v) : v(v) {}

Expand Down

0 comments on commit e9c12e8

Please sign in to comment.