From c298c95f7fa23c9ba7af0feb05b9d731b927d14d Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 5 Nov 2023 20:07:15 +0000 Subject: [PATCH] Preparing for xpu/perm32: - created build.hpp for TPUBuild - epu8id and similar are now Epu8.id() - improved doc --- benchmark/bench_epu8.cpp | 4 +- benchmark/bench_fixture.hpp | 4 +- examples/pattern.cpp | 4 +- include/hpcombi/bmat8_impl.hpp | 18 ++--- include/hpcombi/builder.hpp | 115 ++++++++++++++++++++++++++++++++ include/hpcombi/epu8.hpp | 76 +++------------------ include/hpcombi/epu8_impl.hpp | 39 +++++------ include/hpcombi/perm16.hpp | 8 +-- include/hpcombi/perm16_impl.hpp | 8 +-- tests/test_epu8.cpp | 80 +++++++++++----------- tests/test_perm16.cpp | 4 +- 11 files changed, 208 insertions(+), 152 deletions(-) create mode 100644 include/hpcombi/builder.hpp diff --git a/benchmark/bench_epu8.cpp b/benchmark/bench_epu8.cpp index 68a113e2..74abb2ab 100644 --- a/benchmark/bench_epu8.cpp +++ b/benchmark/bench_epu8.cpp @@ -1,5 +1,5 @@ //****************************************************************************// -// Copyright (C) 2018 Florent Hivert , // +// Copyright (C) 2018-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -32,7 +32,7 @@ namespace { struct RoundsMask { constexpr RoundsMask() : arr() { for (unsigned i = 0; i < sorting_rounds.size(); ++i) - arr[i] = sorting_rounds[i] < epu8id; + arr[i] = sorting_rounds[i] < Epu8.id(); } epu8 arr[sorting_rounds.size()]; }; diff --git a/benchmark/bench_fixture.hpp b/benchmark/bench_fixture.hpp index 8d086551..6bc746c7 100644 --- a/benchmark/bench_fixture.hpp +++ b/benchmark/bench_fixture.hpp @@ -1,5 +1,5 @@ //****************************************************************************// -// Copyright (C) 2016 Florent Hivert , // +// Copyright (C) 2016-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -35,7 +35,7 @@ std::vector rand_epu8(size_t sz) { inline epu8 rand_perm() { static std::random_device rd; static std::mt19937 g(rd()); - epu8 res = HPCombi::epu8id; + epu8 res = HPCombi::Epu8.id(); auto &ar = HPCombi::as_array(res); std::shuffle(ar.begin(), ar.end(), g); return res; diff --git a/examples/pattern.cpp b/examples/pattern.cpp index 2241b23b..50656b8a 100644 --- a/examples/pattern.cpp +++ b/examples/pattern.cpp @@ -68,8 +68,8 @@ void make_subsets_of_size(int n, int k) { template epu8 extract_pattern(epu8 perm, epu8 permset) { epu8 cst = Epu8({}, Size); - epu8 res = permuted(perm, permset) | (epu8id >= cst); - res = sort_perm(res) & (epu8id < cst); + epu8 res = permuted(perm, permset) | (Epu8.id() >= cst); + res = sort_perm(res) & (Epu8.id() < cst); return res; } diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index d536a79a..eb01f386 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -254,14 +254,14 @@ namespace detail { inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept { static const epu8 bound08 = simde_mm_slli_epi32( - static_cast(epu8id), 3); // shift for *8 + static_cast(Epu8.id()), 3); // shift for *8 static const epu8 bound18 = bound08 + Epu8(0x80); for (size_t slice8 = 0; slice8 < 16; slice8++) { epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */ epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5); set0 |= (bm5 == bound08) & shft; set1 |= (bm5 == bound18) & shft; - block = simde_mm_shuffle_epi8(block, right_cycle); + block = simde_mm_shuffle_epi8(block, Epu8.right_cycle()); } } } @@ -277,7 +277,7 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept { res1 = epu8{}; for (size_t r = 0; r < 16; r++) { detail::row_space_update_bitset(block0 | block1, res0, res1); - block1 = simde_mm_shuffle_epi8(block1, right_cycle); + block1 = simde_mm_shuffle_epi8(block1, Epu8.right_cycle()); } } @@ -292,7 +292,7 @@ inline uint64_t BMat8::row_space_size_bitset() const noexcept { inline uint64_t BMat8::row_space_size_incl1() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); - epu8 block = epu8id; + epu8 block = Epu8.id(); uint64_t res = 0; for (size_t r = 0; r < 16; r++) { epu8 orincl{}; @@ -308,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl1() const noexcept { inline uint64_t BMat8::row_space_size_incl() const noexcept { epu8 in = simde_mm_set_epi64x(_data, _data); - epu8 block = epu8id; + epu8 block = Epu8.id(); uint64_t res = 0; for (size_t r = 0; r < 16; r++) { epu8 orincl = ((in | block) == block) & in; @@ -466,11 +466,11 @@ inline Perm16 BMat8::right_perm_action_on_basis_ref(BMat8 bm) const { } inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept { - epu8 x = permuted(simde_mm_set_epi64x(_data, 0), epu8rev); - epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), epu8rev); + epu8 x = permuted(simde_mm_set_epi64x(_data, 0), Epu8.rev()); + epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), Epu8.rev()); // Vector ternary operator is not supported by clang. - // return (x != (epu8 {})) ? permutation_of(y, x) : epu8id; - return simde_mm_blendv_epi8(epu8id, permutation_of(y, x), x != epu8{}); + // return (x != (epu8 {})) ? permutation_of(y, x) : Epu8.id(); + return simde_mm_blendv_epi8(Epu8.id(), permutation_of(y, x), x != epu8{}); } // Not noexcept because std::ostream::operator<< isn't diff --git a/include/hpcombi/builder.hpp b/include/hpcombi/builder.hpp new file mode 100644 index 00000000..283b2785 --- /dev/null +++ b/include/hpcombi/builder.hpp @@ -0,0 +1,115 @@ +//////////////////////////////////////////////////////////////////////////////// +// Copyright (C) 2023 Florent Hivert , // +// // +// Distributed under the terms of the GNU General Public License (GPL) // +// // +// This code is distributed in the hope that it will be useful, // +// but WITHOUT ANY WARRANTY; without even the implied warranty of // +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // +// General Public License for more details. // +// // +// The full text of the GPL is available at: // +// // +// http://www.gnu.org/licenses/ // +//////////////////////////////////////////////////////////////////////////////// + +#ifndef HPCOMBI_BUILDER_HPP_INCLUDED +#define HPCOMBI_BUILDER_HPP_INCLUDED + +namespace HPCombi { + +/** Class for factory object associated to a SIMD packed unsigned integers. + * @details + * The main purpose of this class is to be able to construct in a \c constexpr + * way various instances of the \c TPU SIMD vector type. The behavior of + * an instance of \c TPUBuild is designed to mimic the behavior of \c TPU + * if it where a class: + * - calling \c operator() on an instance which acts similarly to a + * class constructor, + * - calling a member function such as #id acts as a static member function. + */ +template struct TPUBuild { + + /// Type of the elements + using type_elem = typename std::remove_reference_t; + + /// Size of the elements + static constexpr size_t size_elem = sizeof(type_elem); + + /// Number of elements + static constexpr size_t size = sizeof(TPU) / size_elem; + + /// Array equivalent type + using array = std::array; + + template + static constexpr TPU make_helper(Fun f, std::index_sequence) { + static_assert(std::is_invocable_v); + return TPU{f(Is)...}; + } + + /// Construct a TPU from an \c std::initializer_list and a default value + inline constexpr TPU operator()(std::initializer_list il, + type_elem def) const { + HPCOMBI_ASSERT(il.size() <= size); + array res; + std::copy(il.begin(), il.end(), res.begin()); + std::fill(res.begin() + il.size(), res.end(), def); + return reinterpret_cast(res); + } + + /// Construct a TPU from a function giving the values at \f$1,2,\dots\f$ + template inline constexpr TPU operator()(Fun f) const { + static_assert(std::is_invocable_v); + return make_helper(f, std::make_index_sequence{}); + } + + /// Construct a constant TPU + inline constexpr TPU operator()(type_elem c) const { + return operator()([c](auto) { return c; }); + } + /// explicit overloading for int constants + inline constexpr TPU operator()(int c) const { + return operator()(type_elem(c)); + } + /// explicit overloading for size_t constants + inline constexpr TPU operator()(size_t c) const { + return operator()(type_elem(c)); + } + + /// Return the identity element of type \c TPU + constexpr TPU id() const { return operator()([](type_elem i) { return i; }); } + /// Return the reversed element of type \c TPU + constexpr TPU rev() const { + return (*this)([](type_elem i) { return size - 1 - i; }); + } + /// Left cycle \c TPU permutation + constexpr TPU left_cycle() const { + return (*this)([](type_elem i) { return (i + size - 1) % size; }); + } + /// Right cycle \c TPU permutation + constexpr TPU right_cycle() const { + return (*this)([](type_elem i) { return (i + 1) % size; }); + } + /// Left shift \c TPU, duplicating the rightmost entry + constexpr TPU left_dup() const { + return (*this)([](type_elem i) { return i == 15 ? 15 : i + 1; }); + } + /// Right shift \c TPU, duplicating the leftmost entry + constexpr TPU right_dup() const { + return (*this)([](type_elem i) { return i == 0 ? 0 : i - 1; }); + } + /// Popcount \c TPU: the ith entry contains the number of bits set in i + constexpr TPU popcount() const { + return (*this)([](type_elem i) { + return (((i & 0x01) != 0 ? 1 : 0) + ((i & 0x02) != 0 ? 1 : 0) + + ((i & 0x04) != 0 ? 1 : 0) + ((i & 0x08) != 0 ? 1 : 0) + + ((i & 0x10) != 0 ? 1 : 0) + ((i & 0x20) != 0 ? 1 : 0) + + ((i & 0x40) != 0 ? 1 : 0) + ((i & 0x80) != 0 ? 1 : 0)); + }); + } +}; + +} // namespace HPCombi + +#endif // HPCOMBI_BUILDER_HPP_INCLUDED diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index c40f7f3d..18a45f44 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -26,6 +26,7 @@ #include // for make_index_sequence, ind... #include "debug.hpp" // for HPCOMBI_ASSERT +#include "builder.hpp" // for TPUBuild #include "vect_generic.hpp" // for VectGeneric #include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde... @@ -49,73 +50,12 @@ static_assert(alignof(epu8) == 16, /// Currently not really used except in experiments using xpu8 = uint8_t __attribute__((vector_size(32))); -namespace detail { // Implementation detail code - -/// Factory object for various SIMD constants in particular constexpr -template struct TPUBuild { - // Type for Packed Unsigned integer (TPU) - using type_elem = typename std::remove_reference_t; - static constexpr size_t size_elem = sizeof(type_elem); - static constexpr size_t size = sizeof(TPU) / size_elem; - - using array = std::array; - - template - static constexpr TPU make_helper(Fun f, std::index_sequence) { - static_assert(std::is_invocable_v); - return TPU{f(Is)...}; - } - - inline TPU operator()(std::initializer_list il, - type_elem def) const { - HPCOMBI_ASSERT(il.size() <= size); - array res; - std::copy(il.begin(), il.end(), res.begin()); - std::fill(res.begin() + il.size(), res.end(), def); - return reinterpret_cast(res); - } - - template inline constexpr TPU operator()(Fun f) const { - static_assert(std::is_invocable_v); - return make_helper(f, std::make_index_sequence{}); - } - - inline constexpr TPU operator()(type_elem c) const { - return make_helper([c](auto) { return c; }, - std::make_index_sequence{}); - } - // explicit overloading for int constants - inline constexpr TPU operator()(int c) const { - return operator()(type_elem(c)); - } - inline constexpr TPU operator()(size_t c) const { - return operator()(type_elem(c)); - } -}; - -} // namespace detail - -// Single instance of the TPUBuild factory object -static constexpr detail::TPUBuild Epu8; - -/// The identity #HPCombi::epu8 -/// The image of i by the identity function -constexpr epu8 epu8id = Epu8([](uint8_t i) { return i; }); -/// The reverted identity #HPCombi::epu8 -constexpr epu8 epu8rev = Epu8([](uint8_t i) { return 15 - i; }); -/// Left cycle #HPCombi::epu8 permutation -constexpr epu8 left_cycle = Epu8([](uint8_t i) { return (i + 15) % 16; }); -/// Right cycle #HPCombi::epu8 permutation -constexpr epu8 right_cycle = Epu8([](uint8_t i) { return (i + 1) % 16; }); -/// Left shift #HPCombi::epu8, duplicating the rightmost entry -constexpr epu8 left_dup = Epu8([](uint8_t i) { return i == 15 ? 15 : i + 1; }); -/// Right shift #HPCombi::epu8, duplicating the leftmost entry -constexpr epu8 right_dup = Epu8([](uint8_t i) { return i == 0 ? 0 : i - 1; }); -/// Popcount #HPCombi::epu8: the ith entry contains the number of bits set in i -constexpr epu8 popcount4 = Epu8([](uint8_t i) { - return ((i & 1) != 0 ? 1 : 0) + ((i & 2) != 0 ? 1 : 0) + - ((i & 4) != 0 ? 1 : 0) + ((i & 8) != 0 ? 1 : 0); -}); + +/** Factory object acting as a class constructor for type #HPCombi::epu8. + * see #HPCombi::TPUBuild for usage and capability + */ +constexpr TPUBuild Epu8 {}; + /** Cast a #HPCombi::epu8 to a c++ \c std::array * @@ -189,7 +129,7 @@ inline epu8 shifted_right(epu8 a) noexcept { */ inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ -inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); } +inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); } /** Vector min between two #HPCombi::epu8 0 */ inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); } diff --git a/include/hpcombi/epu8_impl.hpp b/include/hpcombi/epu8_impl.hpp index b6da4688..eeb5da64 100644 --- a/include/hpcombi/epu8_impl.hpp +++ b/include/hpcombi/epu8_impl.hpp @@ -59,11 +59,11 @@ inline epu8 permuted_ref(epu8 a, epu8 b) noexcept { // Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255) inline uint64_t first_mask(epu8 msk, size_t bound) { - uint64_t res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + uint64_t res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound))); return res == 0 ? 16 : (__builtin_ffsll(res) - 1); } inline uint64_t last_mask(epu8 msk, size_t bound) { - auto res = simde_mm_movemask_epi8(msk & (epu8id < Epu8(bound))); + auto res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound))); return res == 0 ? 16 : (63 - __builtin_clzll(res)); } @@ -128,7 +128,7 @@ template inline epu8 network_sort(epu8 res, std::array rounds) { for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increasing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round; epu8 b = permuted(res, round); // res = mask ? min(res,b) : max(res,b); is not accepted by clang res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask); @@ -139,10 +139,10 @@ inline epu8 network_sort(epu8 res, std::array rounds) { /// Apply a sorting network in place and return the permutation template inline epu8 network_sort_perm(epu8 &v, std::array rounds) { - epu8 res = epu8id; + epu8 res = Epu8.id(); for (auto round : rounds) { // This conditional should be optimized out by the compiler - epu8 mask = Increasing ? round < epu8id : epu8id < round; + epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round; epu8 b = permuted(v, round); epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask); v = simde_mm_blendv_epi8(v, b, cmp); @@ -231,7 +231,7 @@ inline void merge_rev(epu8 &a, epu8 &b) noexcept { b = network_sort(b, merge_rounds); } inline void merge(epu8 &a, epu8 &b) noexcept { - a = permuted(a, epu8rev); + a = permuted(a, Epu8.rev()); merge_rev(a, b); } // TODO : AVX2 version. @@ -453,10 +453,10 @@ inline epu8 eval16_gen(epu8 v) noexcept { return from_array(as_VectGeneric(v).eval().v); } inline epu8 eval16_cycle(epu8 v) noexcept { - epu8 res = -(epu8id == v); + epu8 res = -(Epu8.id() == v); for (int i = 1; i < 16; i++) { - v = permuted(v, left_cycle); - res -= (epu8id == v); + v = permuted(v, Epu8.left_cycle()); + res -= (Epu8.id() == v); } return res; } @@ -470,11 +470,12 @@ inline epu8 eval16_popcount(epu8 v) noexcept { } inline epu8 popcount16(epu8 v) noexcept { - return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4); + return (permuted(Epu8.popcount(), v & Epu8(0x0f)) + + permuted(Epu8.popcount(), v >> 4)); } inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) && @@ -482,13 +483,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { } inline bool is_transformation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 // (v = Perm16::one() or last diff index < 16) @@ -499,22 +500,22 @@ inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and // (v = Perm16::one() or last diff index < 16) - return _mm_cmpestri(epu8id, 16, v, 16, FIRST_NON_ZERO) == 16 && - _mm_cmpestri(v, 16, epu8id, 16, FIRST_NON_ZERO) == 16 && + return _mm_cmpestri(Epu8.id(), 16, v, 16, FIRST_NON_ZERO) == 16 && + _mm_cmpestri(v, 16, Epu8.id(), 16, FIRST_NON_ZERO) == 16 && (diff == 16 || diff < k); } #endif inline bool is_permutation_sort(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); - return equal(sorted(v), epu8id) && (diff == 16 || diff < k); + uint64_t diff = last_diff(v, Epu8.id(), 16); + return equal(sorted(v), Epu8.id()) && (diff == 16 || diff < k); } inline bool is_permutation_eval(epu8 v, const size_t k) noexcept { - uint64_t diff = last_diff(v, epu8id, 16); + uint64_t diff = last_diff(v, Epu8.id(), 16); return equal(eval16(v), Epu8({}, 1)) && (diff == 16 || diff < k); } diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp index d6a02376..bd70fdb9 100644 --- a/include/hpcombi/perm16.hpp +++ b/include/hpcombi/perm16.hpp @@ -60,7 +60,7 @@ struct alignas(16) PTransf16 : public Vect16 { } //! The identity partial transformation. - static constexpr PTransf16 one() { return epu8id; } + static constexpr PTransf16 one() { return Epu8.id(); } //! The product of two partial transformations. PTransf16 operator*(const PTransf16 &p) const { return HPCombi::permuted(v, p.v) | (p.v == Epu8(0xFF)); @@ -129,7 +129,7 @@ struct Transf16 : public PTransf16 { } //! The identity transformation. - static constexpr Transf16 one() { return epu8id; } + static constexpr Transf16 one() { return Epu8.id(); } //! The product of two transformations. Transf16 operator*(const Transf16 &p) const { return HPCombi::permuted(v, p.v); @@ -159,7 +159,7 @@ struct PPerm16 : public PTransf16 { } //! The identity partial permutations. - static constexpr PPerm16 one() { return epu8id; } + static constexpr PPerm16 one() { return Epu8.id(); } //! The product of two partial perrmutations. PPerm16 operator*(const PPerm16 &p) const { return this->PTransf16::operator*(p); @@ -216,7 +216,7 @@ struct Perm16 : public Transf16 /* public PPerm : diamond problem */ { // being defined (see https://stackoverflow.com/questions/11928089/) // therefore we chose to have functions. //! The identity partial permutation. - static constexpr Perm16 one() { return epu8id; } + static constexpr Perm16 one() { return Epu8.id(); } //! The product of two permutations Perm16 operator*(const Perm16 &p) const { return HPCombi::permuted(v, p.v); diff --git a/include/hpcombi/perm16_impl.hpp b/include/hpcombi/perm16_impl.hpp index 3e3f96ee..368425ee 100644 --- a/include/hpcombi/perm16_impl.hpp +++ b/include/hpcombi/perm16_impl.hpp @@ -20,7 +20,7 @@ namespace HPCombi { /////////////////////////////////////////////////////////////////////////////// inline PTransf16::PTransf16(std::initializer_list il) - : Vect16(epu8id) { + : Vect16(Epu8.id()) { HPCOMBI_ASSERT(il.size() <= 16); std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin()); } @@ -43,7 +43,7 @@ inline uint32_t PTransf16::domain_bitset(bool complement) const { return simde_mm_movemask_epi8(domain_mask(complement)); } inline PTransf16 PTransf16::right_one() const { - return domain_mask(true) | epu8id; + return domain_mask(true) | Epu8.id(); } #ifdef SIMDE_X86_SSE4_2_NATIVE @@ -64,7 +64,7 @@ inline uint32_t PTransf16::image_bitset(bool complement) const { return simde_mm_movemask_epi8(image_mask(complement)); } inline PTransf16 PTransf16::left_one() const { - return image_mask(true) | epu8id; + return image_mask(true) | Epu8.id(); } inline uint32_t PTransf16::rank_ref() const { decltype(Epu8)::array tmp{}; @@ -349,7 +349,7 @@ inline epu8 Perm16::cycles_partition() const { } inline uint8_t Perm16::nb_cycles_unroll() const { - epu8 res = (epu8id == cycles_partition()); + epu8 res = (Epu8.id() == cycles_partition()); return __builtin_popcountl(simde_mm_movemask_epi8(res)); } diff --git a/tests/test_epu8.cpp b/tests/test_epu8.cpp index cd3ab5b7..ac689cc7 100644 --- a/tests/test_epu8.cpp +++ b/tests/test_epu8.cpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2016-2018 Florent Hivert , // +// Copyright (C) 2016-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -40,8 +40,8 @@ struct Fix { Pw(epu8{5, 5, 2, 9, 1, 6, 12, 4, 0, 4, 4, 4, 12, 13, 14, 15}), P5(Epu8({}, 5)), Pc(Epu8({23, 5, 21, 5, 43, 36}, 7)), // Elements should be sorted in alphabetic order here - v({zero, P01, epu8id, P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, - Pv, Pw, P5, epu8rev, Pc}), + v({zero, P01, Epu8.id(), P10, P11, P1, P112, Pa, Pb, RP, Pa1, Pa2, P51, + Pv, Pw, P5, Epu8.rev(), Pc}), av({{5, 5, 2, 5, 1, 6, 12, 4, 0, 3, 2, 11, 12, 13, 14, 15}}) {} ~Fix() = default; @@ -222,7 +222,7 @@ TEST_CASE_METHOD(Fix, "Epu8::shifted_right", "[Epu8][014]") { } TEST_CASE_METHOD(Fix, "Epu8::reverted", "[Epu8][015]") { - CHECK_THAT(reverted(epu8id), Equals(epu8rev)); + CHECK_THAT(reverted(Epu8.id()), Equals(Epu8.rev())); for (auto x : v) { CHECK_THAT(x, Equals(reverted(reverted(x)))); } @@ -246,7 +246,7 @@ TEST_CASE_METHOD(Fix, "Epu8::from_array", "[Epu8][017]") { } TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { - CHECK(is_sorted(epu8id)); + CHECK(is_sorted(Epu8.id())); CHECK( is_sorted(epu8{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15})); CHECK(is_sorted(Epu8({0, 1}, 2))); @@ -258,7 +258,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { CHECK(!is_sorted(Epu8({0, 0, 2}, 1))); CHECK(!is_sorted(Epu8({6}, 5))); - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK(is_sorted(x)); auto &refx = as_array(x); #ifndef __clang__ @@ -268,7 +268,7 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { while (std::next_permutation(refx.begin(), refx.begin() + 9)) { CHECK(!is_sorted(x)); } - x = epu8id; + x = Epu8.id(); while (std::next_permutation(refx.begin() + 8, refx.begin() + 16)) { CHECK(!is_sorted(x)); } @@ -285,11 +285,11 @@ TEST_CASE_METHOD(Fix, "Epu8::is_sorted", "[Epu8][018]") { TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { CHECK_THAT( sorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - Equals(epu8id)); + Equals(Epu8.id())); for (auto &x : v) { CHECK_THAT(sorted(x), IsSorted); } - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK_THAT(sorted(x), IsSorted); auto &refx = as_array(x); do { @@ -307,11 +307,11 @@ TEST_CASE_METHOD(Fix, "Epu8::sorted", "[Epu8][019]") { TEST_CASE_METHOD(Fix, "Epu8::revsorted", "[Epu8][020]") { CHECK_THAT( revsorted(epu8{0, 1, 3, 2, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}), - Equals(epu8rev)); + Equals(Epu8.rev())); for (auto &x : v) { CHECK_THAT(reverted(revsorted(x)), IsSorted); } - epu8 x = epu8id; + epu8 x = Epu8.id(); CHECK_THAT(x, IsSorted); auto &refx = as_array(x); do { @@ -360,12 +360,12 @@ TEST_CASE_METHOD(Fix, "Epu8::sort8_perm", "[Epu8][022]") { } TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][023]") { - CHECK_THAT(permutation_of(epu8id, epu8id), Equals(epu8id)); - CHECK_THAT(permutation_of(Pa, Pa), Equals(epu8id)); - CHECK_THAT(permutation_of(epu8rev, epu8id), Equals(epu8rev)); - CHECK_THAT(permutation_of(epu8id, epu8rev), Equals(epu8rev)); - CHECK_THAT(permutation_of(epu8rev, epu8rev), Equals(epu8id)); - CHECK_THAT(permutation_of(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of(Epu8.id(), Epu8.id()), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Pa, Pa), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Epu8.rev(), Epu8.id()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of(Epu8.id(), Epu8.rev()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of(Epu8.rev(), Epu8.rev()), Equals(Epu8.id())); + CHECK_THAT(permutation_of(Epu8.id(), RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), @@ -373,12 +373,12 @@ TEST_CASE_METHOD(Fix, "Epu8::permutation_of", "[Epu8][023]") { 14, 15})); } TEST_CASE_METHOD(Fix, "Epu8::permutation_of_ref", "[Epu8][024]") { - CHECK_THAT(permutation_of_ref(epu8id, epu8id), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(epu8rev, epu8id), Equals(epu8rev)); - CHECK_THAT(permutation_of_ref(epu8id, epu8rev), Equals(epu8rev)); - CHECK_THAT(permutation_of_ref(epu8rev, epu8rev), Equals(epu8id)); - CHECK_THAT(permutation_of_ref(epu8id, RP), Equals(RP)); + CHECK_THAT(permutation_of_ref(Epu8.id(), Epu8.id()), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Pa, Pa), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Epu8.rev(), Epu8.id()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of_ref(Epu8.id(), Epu8.rev()), Equals(Epu8.rev())); + CHECK_THAT(permutation_of_ref(Epu8.rev(), Epu8.rev()), Equals(Epu8.id())); + CHECK_THAT(permutation_of_ref(Epu8.id(), RP), Equals(RP)); const uint8_t FF = 0xff; CHECK_THAT((permutation_of_ref(Pv, Pv) | epu8{FF, FF, FF, FF, 0, 0, FF, 0, 0, 0, FF, 0, FF, 0, 0, 0}), @@ -434,7 +434,7 @@ TEST_CASE_METHOD(Fix, "Epu8::remove_dups", "[Epu8][026]") { TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][027]") { CHECK(horiz_sum_ref(zero) == 0); CHECK(horiz_sum_ref(P01) == 1); - CHECK(horiz_sum_ref(epu8id) == 120); + CHECK(horiz_sum_ref(Epu8.id()) == 120); CHECK(horiz_sum_ref(P10) == 1); CHECK(horiz_sum_ref(P11) == 2); CHECK(horiz_sum_ref(P1) == 16); @@ -444,7 +444,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_sum_ref", "[Epu8][027]") { CHECK(horiz_sum_ref(P51) == 90); CHECK(horiz_sum_ref(Pv) == 110); CHECK(horiz_sum_ref(P5) == 80); - CHECK(horiz_sum_ref(epu8rev) == 120); + CHECK(horiz_sum_ref(Epu8.rev()) == 120); CHECK(horiz_sum_ref(Pc) == 203); } @@ -456,12 +456,12 @@ TEST_AGREES_FUN(Fix, horiz_sum_ref, horiz_sum, v, "[Epu8][031]") TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][032]") { CHECK_THAT(partial_sums_ref(zero), Equals(zero)); CHECK_THAT(partial_sums_ref(P01), Equals(Epu8({0}, 1))); - CHECK_THAT(partial_sums_ref(epu8id), + CHECK_THAT(partial_sums_ref(Epu8.id()), Equals(epu8{0, 1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 66, 78, 91, 105, 120})); CHECK_THAT(partial_sums_ref(P10), Equals(P1)); CHECK_THAT(partial_sums_ref(P11), Equals(Epu8({1}, 2))); - CHECK_THAT(partial_sums_ref(P1), Equals(epu8id + Epu8({}, 1))); + CHECK_THAT(partial_sums_ref(P1), Equals(Epu8.id() + Epu8({}, 1))); CHECK_THAT(partial_sums_ref(P112), Equals(epu8{1, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30})); @@ -481,7 +481,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_sums_ref", "[Epu8][032]") { CHECK_THAT(partial_sums_ref(P5), Equals(epu8{5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80})); - CHECK_THAT(partial_sums_ref(epu8rev), + CHECK_THAT(partial_sums_ref(Epu8.rev()), Equals(epu8{15, 29, 42, 54, 65, 75, 84, 92, 99, 105, 110, 114, 117, 119, 120, 120})); CHECK_THAT(partial_sums_ref(Pc), @@ -496,7 +496,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_sums_ref, partial_sums, v, "[Epu8][035]") TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][036]") { CHECK(horiz_max_ref(zero) == 0); CHECK(horiz_max_ref(P01) == 1); - CHECK(horiz_max_ref(epu8id) == 15); + CHECK(horiz_max_ref(Epu8.id()) == 15); CHECK(horiz_max_ref(P10) == 1); CHECK(horiz_max_ref(P11) == 1); CHECK(horiz_max_ref(P1) == 1); @@ -506,7 +506,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_max_ref", "[Epu8][036]") { CHECK(horiz_max_ref(P51) == 6); CHECK(horiz_max_ref(Pv) == 15); CHECK(horiz_max_ref(P5) == 5); - CHECK(horiz_max_ref(epu8rev) == 15); + CHECK(horiz_max_ref(Epu8.rev()) == 15); CHECK(horiz_max_ref(Pc) == 43); } @@ -518,7 +518,7 @@ TEST_AGREES_FUN(Fix, horiz_max_ref, horiz_max, v, "[Epu8][040]") TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][041]") { CHECK_THAT(partial_max_ref(zero), Equals(zero)); CHECK_THAT(partial_max_ref(P01), Equals(Epu8({0}, 1))); - CHECK_THAT(partial_max_ref(epu8id), Equals(epu8id)); + CHECK_THAT(partial_max_ref(Epu8.id()), Equals(Epu8.id())); CHECK_THAT(partial_max_ref(P10), Equals(P1)); CHECK_THAT(partial_max_ref(P11), Equals(P1)); CHECK_THAT(partial_max_ref(P1), Equals(P1)); @@ -529,7 +529,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_max_ref", "[Epu8][041]") { CHECK_THAT(partial_max_ref(Pv), Equals(epu8{5, 5, 5, 5, 5, 6, 12, 12, 12, 12, 12, 12, 12, 13, 14, 15})); CHECK_THAT(partial_max_ref(P5), Equals(P5)); - CHECK_THAT(partial_max_ref(epu8rev), Equals(Epu8({}, 15))); + CHECK_THAT(partial_max_ref(Epu8.rev()), Equals(Epu8({}, 15))); CHECK_THAT(partial_max_ref(Pc), Equals(Epu8({23, 23, 23, 23}, 43))); } TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max_gen, v, "[Epu8][042]") @@ -539,7 +539,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_max_ref, partial_max, v, "[Epu8][044]") TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][045]") { CHECK(horiz_min_ref(zero) == 0); CHECK(horiz_min_ref(P01) == 0); - CHECK(horiz_min_ref(epu8id) == 0); + CHECK(horiz_min_ref(Epu8.id()) == 0); CHECK(horiz_min_ref(P10) == 0); CHECK(horiz_min_ref(P11) == 0); CHECK(horiz_min_ref(P1) == 1); @@ -549,7 +549,7 @@ TEST_CASE_METHOD(Fix, "Epu8::horiz_min_ref", "[Epu8][045]") { CHECK(horiz_min_ref(P51) == 1); CHECK(horiz_min_ref(Pv) == 0); CHECK(horiz_min_ref(P5) == 5); - CHECK(horiz_min_ref(epu8rev) == 0); + CHECK(horiz_min_ref(Epu8.rev()) == 0); CHECK(horiz_min_ref(Pc) == 5); } @@ -561,7 +561,7 @@ TEST_AGREES_FUN(Fix, horiz_min_ref, horiz_min, v, "[Epu8][049]") TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][050]") { CHECK_THAT(partial_min_ref(zero), Equals(zero)); CHECK_THAT(partial_min_ref(P01), Equals(zero)); - CHECK_THAT(partial_min_ref(epu8id), Equals(zero)); + CHECK_THAT(partial_min_ref(Epu8.id()), Equals(zero)); CHECK_THAT(partial_min_ref(P10), Equals(P10)); CHECK_THAT(partial_min_ref(P11), Equals(P11)); CHECK_THAT(partial_min_ref(P1), Equals(P1)); @@ -573,7 +573,7 @@ TEST_CASE_METHOD(Fix, "Epu8::partial_min_ref", "[Epu8][050]") { Equals(Epu8({5, 5, 2, 2, 1, 1, 1, 1, }, 0))); // clang-format on CHECK_THAT(partial_min_ref(P5), Equals(P5)); - CHECK_THAT(partial_min_ref(epu8rev), Equals(epu8rev)); + CHECK_THAT(partial_min_ref(Epu8.rev()), Equals(Epu8.rev())); CHECK_THAT(partial_min_ref(Pc), Equals(Epu8({23}, 5))); } TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min_gen, v, "[Epu8][051]") @@ -583,7 +583,7 @@ TEST_AGREES_FUN_EPU8(Fix, partial_min_ref, partial_min, v, "[Epu8][053]") TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][054]") { CHECK_THAT(eval16_ref(zero), Equals(Epu8({16}, 0))); CHECK_THAT(eval16_ref(P01), Equals(Epu8({15, 1}, 0))); - CHECK_THAT(eval16_ref(epu8id), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(Epu8.id()), Equals(Epu8({}, 1))); CHECK_THAT(eval16_ref(P10), Equals(Epu8({15, 1}, 0))); CHECK_THAT(eval16_ref(P11), Equals(Epu8({14, 2}, 0))); CHECK_THAT(eval16_ref(P1), Equals(Epu8({0, 16}, 0))); @@ -595,7 +595,7 @@ TEST_CASE_METHOD(Fix, "Epu8::eval16_ref", "[Epu8][054]") { CHECK_THAT(eval16_ref(Pv), Equals(epu8{1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1})); CHECK_THAT(eval16_ref(P5), Equals(Epu8({0, 0, 0, 0, 0, 16}, 0))); - CHECK_THAT(eval16_ref(epu8rev), Equals(Epu8({}, 1))); + CHECK_THAT(eval16_ref(Epu8.rev()), Equals(Epu8({}, 1))); CHECK_THAT(eval16_ref(Pc), Equals(Epu8({0, 0, 0, 0, 0, 2, 0, 10}, 0))); } @@ -605,8 +605,8 @@ TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_arr, v, "[Epu8][057]") TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16_gen, v, "[Epu8][058]") TEST_AGREES_FUN_EPU8(Fix, eval16_ref, eval16, v, "[Epu8][059]") -TEST_CASE("Epu8::popcount4", "[Epu8][060]") { - CHECK_THAT(popcount4, +TEST_CASE("Epu8::popcount", "[Epu8][060]") { + CHECK_THAT(Epu8.popcount(), Equals(epu8{0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4})); } diff --git a/tests/test_perm16.cpp b/tests/test_perm16.cpp index 5e8f5255..d58fd6c6 100644 --- a/tests/test_perm16.cpp +++ b/tests/test_perm16.cpp @@ -1,5 +1,5 @@ //////////////////////////////////////////////////////////////////////////////// -// Copyright (C) 2017 Florent Hivert , // +// Copyright (C) 2017-2023 Florent Hivert , // // // // Distributed under the terms of the GNU General Public License (GPL) // // // @@ -24,7 +24,7 @@ const uint8_t FF = 0xff; namespace { std::vector all_perms(uint8_t sz) { std::vector res{}; - epu8 x = HPCombi::epu8id; + epu8 x = HPCombi::Epu8.id(); res.push_back(x); auto &refx = HPCombi::as_array(x); #ifndef __clang__