From 8f085ec37cefb7a50e47edb443dce8f2f328e995 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Sun, 29 Oct 2023 16:58:23 +0000 Subject: [PATCH] noexcept in epu --- include/hpcombi/epu.hpp | 154 +++++++++++++++---------------- include/hpcombi/epu_impl.hpp | 122 ++++++++++++------------ include/hpcombi/vect_generic.hpp | 4 +- 3 files changed, 141 insertions(+), 139 deletions(-) diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp index 8d24905f..3f99db02 100644 --- a/include/hpcombi/epu.hpp +++ b/include/hpcombi/epu.hpp @@ -120,21 +120,21 @@ constexpr epu8 popcount4 = Epu8([](uint8_t i) { * * This is usually faster for algorithm using a lot of indexed access. */ -inline decltype(Epu8)::array &as_array(epu8 &v) { +inline decltype(Epu8)::array &as_array(epu8 &v) noexcept { return reinterpret_cast(v); } /** Cast a constant #HPCombi::epu8 to a C++ \c std::array * * This is usually faster for algorithm using a lot of indexed access. */ -inline const decltype(Epu8)::array &as_array(const epu8 &v) { +inline const decltype(Epu8)::array &as_array(const epu8 &v) noexcept { return reinterpret_cast(v); } /** Cast a C++ \c std::array to a #HPCombi::epu8 */ // Passing the argument by reference triggers a segfault in gcc // Since vector types doesn't belongs to the standard, I didn't manage // to know if I'm using undefined behavior here. -inline epu8 from_array(decltype(Epu8)::array a) { +inline epu8 from_array(decltype(Epu8)::array a) noexcept { return reinterpret_cast(a); } @@ -155,70 +155,70 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) { } /** Test whether all the entries of a #HPCombi::epu8 are zero */ -inline bool is_all_zero(epu8 a) { return simde_mm_testz_si128(a, a); } +inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); } /** Test whether all the entries of a #HPCombi::epu8 are one */ -inline bool is_all_one(epu8 a) { return simde_mm_testc_si128(a, Epu8(0xFF)); } +inline bool is_all_one(epu8 a) noexcept { return simde_mm_testc_si128(a, Epu8(0xFF)); } /** Equality of #HPCombi::epu8 */ -inline bool equal(epu8 a, epu8 b) { +inline bool equal(epu8 a, epu8 b) noexcept { return is_all_zero(simde_mm_xor_si128(a, b)); } /** Non equality of #HPCombi::epu8 */ -inline bool not_equal(epu8 a, epu8 b) { return !equal(a, b); } +inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); } /** Permuting a #HPCombi::epu8 */ -inline epu8 permuted(epu8 a, epu8 b) { return simde_mm_shuffle_epi8(a, b); } +inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); } /** Left shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_right(epu8 a) { return simde_mm_bslli_si128(a, 1); } +inline epu8 shifted_right(epu8 a) noexcept { return simde_mm_bslli_si128(a, 1); } /** Right shifted of a #HPCombi::epu8 inserting a 0 * @warning we use the convention that the 0 entry is on the left ! */ -inline epu8 shifted_left(epu8 a) { return simde_mm_bsrli_si128(a, 1); } +inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); } /** Reverting a #HPCombi::epu8 */ -inline epu8 reverted(epu8 a) { return permuted(a, epu8rev); } +inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); } /** Vector min between two #HPCombi::epu8 0 */ -inline epu8 min(epu8 a, epu8 b) { return simde_mm_min_epu8(a, b); } +inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); } /** Vector max between two #HPCombi::epu8 0 */ -inline epu8 max(epu8 a, epu8 b) { return simde_mm_max_epu8(a, b); } +inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); } /** Testing if a #HPCombi::epu8 is sorted */ -inline bool is_sorted(epu8 a); +inline bool is_sorted(epu8 a) noexcept; /** Return a sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds */ -inline epu8 sorted(epu8 a); +inline epu8 sorted(epu8 a) noexcept; /** Return a #HPCombi::epu8 with the two half sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8 */ -inline epu8 sorted8(epu8 a); +inline epu8 sorted8(epu8 a) noexcept; /** Return a reverse sorted #HPCombi::epu8 * @details * @par Algorithm: * Uses the 9 stages sorting network #sorting_rounds */ -inline epu8 revsorted(epu8 a); +inline epu8 revsorted(epu8 a) noexcept; /** Return a #HPCombi::epu8 with the two half reverse sorted * @details * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8 */ -inline epu8 revsorted8(epu8 a); +inline epu8 revsorted8(epu8 a) noexcept; /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort_perm(epu8 &a); +inline epu8 sort_perm(epu8 &a) noexcept; /** Sort \c this and return the sorting permutation * @details * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8 */ -inline epu8 sort8_perm(epu8 &a); +inline epu8 sort8_perm(epu8 &a) noexcept; /** @class common_permutation_of * @brief Find if a vector is a permutation of one other @@ -232,16 +232,16 @@ inline epu8 sort8_perm(epu8 &a); /** @copydoc common_permutation_of @par Algorithm: uses string matching cpmestrm intrisics */ -inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b); +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept; #endif /** @copydoc common_permutation_of @par Algorithm: reference implementation */ -inline epu8 permutation_of_ref(epu8 a, epu8 b); +inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept; /** @copydoc common_permutation_of @par Algorithm: architecture dependent */ -inline epu8 permutation_of(epu8 a, epu8 b); +inline epu8 permutation_of(epu8 a, epu8 b) noexcept; /** A prime number good for hashing */ constexpr uint64_t prime = 0x9e3779b97f4a7bb9; @@ -262,7 +262,7 @@ inline epu8 random_epu8(uint16_t bnd); * @return the vector \c a where repeated occurrences of entries are replaced * by \c repl */ -inline epu8 remove_dups(epu8 a, uint8_t repl = 0); +inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept; /** @class common_horiz_sum * @brief Horizontal sum of a #HPCombi::epu8 @@ -279,25 +279,25 @@ inline epu8 remove_dups(epu8 a, uint8_t repl = 0); * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_sum_ref(epu8); +inline uint8_t horiz_sum_ref(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_sum_gen(epu8); +inline uint8_t horiz_sum_gen(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_sum4(epu8); +inline uint8_t horiz_sum4(epu8) noexcept; /** @copydoc common_horiz_sum * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_sum3(epu8); +inline uint8_t horiz_sum3(epu8) noexcept; /** @copydoc common_horiz_sum */ -inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); } +inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); } /** @class common_partial_sums * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -313,20 +313,20 @@ inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_sums_ref(epu8); +inline epu8 partial_sums_ref(epu8) noexcept; /** @copydoc common_partial_sums * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_sums_gen(epu8); +inline epu8 partial_sums_gen(epu8) noexcept; /** @copydoc common_partial_sums * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_sums_round(epu8); +inline epu8 partial_sums_round(epu8) noexcept; /** @copydoc common_partial_sums */ -inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); } +inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); } /** @class common_horiz_max * @brief Horizontal sum of a #HPCombi::epu8 @@ -342,25 +342,25 @@ inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_max_ref(epu8); +inline uint8_t horiz_max_ref(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_max_gen(epu8); +inline uint8_t horiz_max_gen(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_max4(epu8); +inline uint8_t horiz_max4(epu8) noexcept; /** @copydoc common_horiz_max * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_max3(epu8); +inline uint8_t horiz_max3(epu8) noexcept; /** @copydoc common_horiz_max */ -inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); } +inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); } /** @class common_partial_max * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -376,20 +376,20 @@ inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_max_ref(epu8); +inline epu8 partial_max_ref(epu8) noexcept; /** @copydoc common_partial_max * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_max_gen(epu8); +inline epu8 partial_max_gen(epu8) noexcept; /** @copydoc common_partial_max * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_max_round(epu8); +inline epu8 partial_max_round(epu8) noexcept; /** @copydoc common_partial_max */ -inline epu8 partial_max(epu8 v) { return partial_max_round(v); } +inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); } /** @class common_horiz_min * @brief Horizontal sum of a #HPCombi::epu8 @@ -405,25 +405,25 @@ inline epu8 partial_max(epu8 v) { return partial_max_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint8_t horiz_min_ref(epu8); +inline uint8_t horiz_min_ref(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline uint8_t horiz_min_gen(epu8); +inline uint8_t horiz_min_gen(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * 4-stages parallel algorithm */ -inline uint8_t horiz_min4(epu8); +inline uint8_t horiz_min4(epu8) noexcept; /** @copydoc common_horiz_min * @par Algorithm: * 3-stages parallel algorithm + indexed access */ -inline uint8_t horiz_min3(epu8); +inline uint8_t horiz_min3(epu8) noexcept; /** @copydoc common_horiz_min */ -inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); } +inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); } /** @class common_partial_min * @brief Horizontal partial sum of a #HPCombi::epu8 @@ -439,20 +439,20 @@ inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 partial_min_ref(epu8); +inline epu8 partial_min_ref(epu8) noexcept; /** @copydoc common_partial_min * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access * through #HPCombi::VectGeneric */ -inline epu8 partial_min_gen(epu8); +inline epu8 partial_min_gen(epu8) noexcept; /** @copydoc common_partial_min * @par Algorithm: * 4-stages parallel algorithm */ -inline epu8 partial_min_round(epu8); +inline epu8 partial_min_round(epu8) noexcept; /** @copydoc common_partial_min */ -inline epu8 partial_min(epu8 v) { return partial_min_round(v); } +inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); } /** @class common_eval16 * @brief Evaluation of a #HPCombi::epu8 @@ -471,24 +471,24 @@ inline epu8 partial_min(epu8 v) { return partial_min_round(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline epu8 eval16_ref(epu8 v); +inline epu8 eval16_ref(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and cast to array */ -inline epu8 eval16_arr(epu8 v); +inline epu8 eval16_arr(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using cyclic shifting */ -inline epu8 eval16_cycle(epu8 v); +inline epu8 eval16_cycle(epu8 v) noexcept; /** @copydoc common_eval16 * @par Algorithm: * Vector @f$O(n)@f$ using popcount */ -inline epu8 eval16_popcount(epu8 v); +inline epu8 eval16_popcount(epu8 v) noexcept; /** @copydoc common_eval16 */ -inline epu8 eval16(epu8 v) { return eval16_cycle(v); } +inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); } /** @class common_first_diff * @brief The first difference between two #HPCombi::epu8 @@ -512,21 +512,21 @@ inline epu8 eval16(epu8 v) { return eval16_cycle(v); } * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept; #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_first_diff * @par Algorithm: * Using \c cmpestri instruction */ -inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept; #endif /** @copydoc common_first_diff * @par Algorithm: * Using vector comparison and mask */ -inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept; /** @copydoc common_first_diff */ -inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { +inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept { return first_diff_mask(a, b, bound); } @@ -552,53 +552,53 @@ inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) { * @par Algorithm: * Reference @f$O(n)@f$ algorithm using loop and indexed access */ -inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept; #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_last_diff * @par Algorithm: * Using \c cmpestri instruction */ -inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept; #endif /** @copydoc common_last_diff * @par Algorithm: * Using vector comparison and mask */ -inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16); +inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept; /** @copydoc common_last_diff */ -inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) { +inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept { return last_diff_mask(a, b, bound); } /** Lexicographic comparison between two #HPCombi::epu8 */ -inline bool less(epu8 a, epu8 b); +inline bool less(epu8 a, epu8 b) noexcept; /** Partial lexicographic comparison between two #HPCombi::epu8 * @param a, b : the vectors to compare * @param k : the bound for the lexicographic comparison * @return a positive, negative or zero int8_t depending on the result */ -inline int8_t less_partial(epu8 a, epu8 b, int k); +inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept; /** return the index of the first zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t first_zero(epu8 v, int bnd); +inline uint64_t first_zero(epu8 v, int bnd) noexcept; /** return the index of the last zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t last_zero(epu8 v, int bnd); +inline uint64_t last_zero(epu8 v, int bnd) noexcept; /** return the index of the first non zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t first_non_zero(epu8 v, int bnd); +inline uint64_t first_non_zero(epu8 v, int bnd) noexcept; /** return the index of the last non zero entry or 16 if there are none * Only index smaller than bound are taken into account. */ -inline uint64_t last_non_zero(epu8 v, int bnd); +inline uint64_t last_non_zero(epu8 v, int bnd) noexcept; /** a vector popcount function */ -inline epu8 popcount16(epu8 v); +inline epu8 popcount16(epu8 v) noexcept; /** Test for partial transformation * @details @@ -615,7 +615,7 @@ inline epu8 popcount16(epu8 v); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_partial_transformation(epu8 v, const size_t k = 16); +inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept; /** Test for transformation * @details @@ -632,7 +632,7 @@ inline bool is_partial_transformation(epu8 v, const size_t k = 16); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}@f$ * is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_transformation(epu8 v, const size_t k = 16); +inline bool is_transformation(epu8 v, const size_t k = 16) noexcept; /** Test for partial permutations * @details @@ -650,7 +650,7 @@ inline bool is_transformation(epu8 v, const size_t k = 16); * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$ * is encoded by the array {2,0,5,0xFF,0xFF,4,6,7,8,9,10,11,12,13,14,15} */ -inline bool is_partial_permutation(epu8 v, const size_t k = 16); +inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept; /** @class common_is_permutation * @details @@ -671,16 +671,16 @@ inline bool is_partial_permutation(epu8 v, const size_t k = 16); /** @copydoc common_is_permutation @par Algorithm: uses string matching cpmestri intrisics */ -inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16); +inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept; #endif /** @copydoc common_is_permutation @par Algorithm: sort the vector and compare to identity */ -inline bool is_permutation_sort(epu8 v, const size_t k = 16); +inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept; /** @copydoc common_is_permutation @par Algorithm: architecture dependent */ -inline bool is_permutation(epu8 v, const size_t k = 16); +inline bool is_permutation(epu8 v, const size_t k = 16) noexcept; } // namespace HPCombi diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp index 75501df3..d89ef989 100644 --- a/include/hpcombi/epu_impl.hpp +++ b/include/hpcombi/epu_impl.hpp @@ -59,22 +59,22 @@ inline uint64_t last_mask(epu8 msk, size_t bound) { return res == 0 ? 16 : (63 - __builtin_clzll(res)); } -inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) noexcept { for (size_t i = 0; i < bound; i++) if (a[i] != b[i]) return i; return 16; } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept { return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF)); } #endif -inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) { +inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) noexcept { return first_mask(a != b, bound); } -inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) noexcept { while (bound != 0) { --bound; if (a[bound] != b[bound]) @@ -83,35 +83,35 @@ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) { return 16; } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept { return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF)); } #endif -inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) { +inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) noexcept { return last_mask(a != b, bound); } -inline bool less(epu8 a, epu8 b) { +inline bool less(epu8 a, epu8 b) noexcept { uint64_t diff = first_diff(a, b); return (diff < 16) && (a[diff] < b[diff]); } -inline int8_t less_partial(epu8 a, epu8 b, int k) { +inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept { uint64_t diff = first_diff(a, b, k); return (diff == 16) ? 0 : static_cast(a[diff]) - static_cast(b[diff]); } -inline uint64_t first_zero(epu8 v, int bnd) { +inline uint64_t first_zero(epu8 v, int bnd) noexcept { return first_mask(v == epu8{}, bnd); } -inline uint64_t last_zero(epu8 v, int bnd) { +inline uint64_t last_zero(epu8 v, int bnd) noexcept { return last_mask(v == epu8{}, bnd); } -inline uint64_t first_non_zero(epu8 v, int bnd) { +inline uint64_t first_non_zero(epu8 v, int bnd) noexcept { return first_mask(v != epu8{}, bnd); } -inline uint64_t last_non_zero(epu8 v, int bnd) { +inline uint64_t last_non_zero(epu8 v, int bnd) noexcept { return last_mask(v != epu8{}, bnd); } @@ -182,20 +182,22 @@ constexpr std::array sorting_rounds8 }}; // clang-format on -inline bool is_sorted(epu8 a) { +inline bool is_sorted(epu8 a) noexcept { return simde_mm_movemask_epi8(shifted_right(a) > a) == 0; } -inline epu8 sorted(epu8 a) { return network_sort(a, sorting_rounds); } -inline epu8 sorted8(epu8 a) { return network_sort(a, sorting_rounds8); } -inline epu8 revsorted(epu8 a) { return network_sort(a, sorting_rounds); } -inline epu8 revsorted8(epu8 a) { +inline epu8 sorted(epu8 a) noexcept { + return network_sort(a, sorting_rounds); +} +inline epu8 sorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } +inline epu8 revsorted(epu8 a) noexcept { return network_sort(a, sorting_rounds); } +inline epu8 revsorted8(epu8 a) noexcept { return network_sort(a, sorting_rounds8); } -inline epu8 sort_perm(epu8 &a) { +inline epu8 sort_perm(epu8 &a) noexcept{ return network_sort_perm(a, sorting_rounds); } -inline epu8 sort8_perm(epu8 &a) { +inline epu8 sort8_perm(epu8 &a) noexcept{ return network_sort_perm(a, sorting_rounds8); } @@ -210,7 +212,7 @@ inline epu8 random_epu8(uint16_t bnd) { return res; } -inline epu8 remove_dups(epu8 v, uint8_t repl) { +inline epu8 remove_dups(epu8 v, uint8_t repl) noexcept { // Vector ternary operator is not supported by clang. // return (v != shifted_right(v) ? v : Epu8(repl); return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v)); @@ -233,7 +235,7 @@ constexpr std::array inverting_rounds{{ #define FIND_IN_VECT_COMPL \ (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK) -inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { +inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept { epu8 res = -static_cast(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT)); for (epu8 round : inverting_rounds) { a = permuted(a, round); @@ -244,7 +246,7 @@ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) { } #endif -inline epu8 permutation_of_ref(epu8 a, epu8 b) { +inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept { auto ar = as_array(a); epu8 res{}; for (size_t i = 0; i < 16; i++) { @@ -253,7 +255,7 @@ inline epu8 permutation_of_ref(epu8 a, epu8 b) { } return res; } -inline epu8 permutation_of(epu8 a, epu8 b) { +inline epu8 permutation_of(epu8 a, epu8 b) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE return permutation_of_cmpestrm(a, b); #else @@ -289,15 +291,15 @@ constexpr std::array mining_rounds{{ #undef FF -inline uint8_t horiz_sum_ref(epu8 v) { +inline uint8_t horiz_sum_ref(epu8 v) noexcept { uint8_t res = 0; for (size_t i = 0; i < 16; i++) res += v[i]; return res; } -inline uint8_t horiz_sum_gen(epu8 v) { return as_VectGeneric(v).horiz_sum(); } -inline uint8_t horiz_sum4(epu8 v) { return partial_sums_round(v)[15]; } -inline uint8_t horiz_sum3(epu8 v) { +inline uint8_t horiz_sum_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_sum(); } +inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; } +inline uint8_t horiz_sum3(epu8 v) noexcept { auto sr = summing_rounds; v += permuted(v, sr[0]); v += permuted(v, sr[1]); @@ -305,32 +307,32 @@ inline uint8_t horiz_sum3(epu8 v) { return v[7] + v[15]; } -inline epu8 partial_sums_ref(epu8 v) { +inline epu8 partial_sums_ref(epu8 v) noexcept { epu8 res{}; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = res[i - 1] + v[i]; return res; } -inline epu8 partial_sums_gen(epu8 v) { +inline epu8 partial_sums_gen(epu8 v) noexcept { as_VectGeneric(v).partial_sums_inplace(); return v; } -inline epu8 partial_sums_round(epu8 v) { +inline epu8 partial_sums_round(epu8 v) noexcept { for (epu8 round : summing_rounds) v += permuted(v, round); return v; } -inline uint8_t horiz_max_ref(epu8 v) { +inline uint8_t horiz_max_ref(epu8 v) noexcept { uint8_t res = 0; for (size_t i = 0; i < 16; i++) res = std::max(res, v[i]); return res; } -inline uint8_t horiz_max_gen(epu8 v) { return as_VectGeneric(v).horiz_max(); } -inline uint8_t horiz_max4(epu8 v) { return partial_max_round(v)[15]; } -inline uint8_t horiz_max3(epu8 v) { +inline uint8_t horiz_max_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_max(); } +inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; } +inline uint8_t horiz_max3(epu8 v) noexcept { auto sr = summing_rounds; v = max(v, permuted(v, sr[0])); v = max(v, permuted(v, sr[1])); @@ -338,32 +340,32 @@ inline uint8_t horiz_max3(epu8 v) { return std::max(v[7], v[15]); } -inline epu8 partial_max_ref(epu8 v) { +inline epu8 partial_max_ref(epu8 v) noexcept { epu8 res; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = std::max(res[i - 1], v[i]); return res; } -inline epu8 partial_max_gen(epu8 v) { +inline epu8 partial_max_gen(epu8 v) noexcept { as_VectGeneric(v).partial_max_inplace(); return v; } -inline epu8 partial_max_round(epu8 v) { +inline epu8 partial_max_round(epu8 v) noexcept { for (epu8 round : summing_rounds) v = max(v, permuted(v, round)); return v; } -inline uint8_t horiz_min_ref(epu8 v) { +inline uint8_t horiz_min_ref(epu8 v) noexcept { uint8_t res = 255; for (size_t i = 0; i < 16; i++) res = std::min(res, v[i]); return res; } -inline uint8_t horiz_min_gen(epu8 v) { return as_VectGeneric(v).horiz_min(); } -inline uint8_t horiz_min4(epu8 v) { return partial_min_round(v)[15]; } -inline uint8_t horiz_min3(epu8 v) { +inline uint8_t horiz_min_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_min(); } +inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; } +inline uint8_t horiz_min3(epu8 v) noexcept { auto sr = mining_rounds; v = min(v, permuted(v, sr[0])); v = min(v, permuted(v, sr[1])); @@ -371,24 +373,24 @@ inline uint8_t horiz_min3(epu8 v) { return std::min(v[7], v[15]); } -inline epu8 partial_min_ref(epu8 v) { +inline epu8 partial_min_ref(epu8 v) noexcept { epu8 res; res[0] = v[0]; for (size_t i = 1; i < 16; i++) res[i] = std::min(res[i - 1], v[i]); return res; } -inline epu8 partial_min_gen(epu8 v) { +inline epu8 partial_min_gen(epu8 v) noexcept { as_VectGeneric(v).partial_min_inplace(); return v; } -inline epu8 partial_min_round(epu8 v) { +inline epu8 partial_min_round(epu8 v) noexcept { for (epu8 round : mining_rounds) v = min(v, permuted(v, round)); return v; } -inline epu8 eval16_ref(epu8 v) { +inline epu8 eval16_ref(epu8 v) noexcept { epu8 res{}; for (size_t i = 0; i < 16; i++) if (v[i] < 16) @@ -396,7 +398,7 @@ inline epu8 eval16_ref(epu8 v) { return res; } -inline epu8 eval16_arr(epu8 v8) { +inline epu8 eval16_arr(epu8 v8) noexcept { decltype(Epu8)::array res{}; auto v = as_array(v8); for (size_t i = 0; i < 16; i++) @@ -404,10 +406,10 @@ inline epu8 eval16_arr(epu8 v8) { res[v[i]]++; return from_array(res); } -inline epu8 eval16_gen(epu8 v) { +inline epu8 eval16_gen(epu8 v) noexcept { return from_array(as_VectGeneric(v).eval().v); } -inline epu8 eval16_cycle(epu8 v) { +inline epu8 eval16_cycle(epu8 v) noexcept { epu8 res = -(epu8id == v); for (int i = 1; i < 16; i++) { v = permuted(v, left_cycle); @@ -415,7 +417,7 @@ inline epu8 eval16_cycle(epu8 v) { } return res; } -inline epu8 eval16_popcount(epu8 v) { +inline epu8 eval16_popcount(epu8 v) noexcept { epu8 res{}; for (size_t i = 0; i < 16; i++) { res[i] = @@ -424,11 +426,11 @@ inline epu8 eval16_popcount(epu8 v) { return res; } -inline epu8 popcount16(epu8 v) { +inline epu8 popcount16(epu8 v) noexcept { return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4); } -inline bool is_partial_transformation(epu8 v, const size_t k) { +inline bool is_partial_transformation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x + 1 <= 16) and // (v = Perm16::one() or last diff index < 16) @@ -436,13 +438,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) { (diff == 16 || diff < k); } -inline bool is_transformation(epu8 v, const size_t k) { +inline bool is_transformation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) && (diff == 16 || diff < k); } -inline bool is_partial_permutation(epu8 v, const size_t k) { +inline bool is_partial_permutation(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x <= 15) and // (forall x < 15, multiplicity x v <= 1 @@ -453,7 +455,7 @@ inline bool is_partial_permutation(epu8 v, const size_t k) { } #ifdef SIMDE_X86_SSE4_2_NATIVE -inline bool is_permutation_cmpestri(epu8 v, const size_t k) { +inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); // (forall x in v, x in Perm16::one()) and // (forall x in Perm16::one(), x in v) and @@ -464,12 +466,12 @@ inline bool is_permutation_cmpestri(epu8 v, const size_t k) { } #endif -inline bool is_permutation_sort(epu8 v, const size_t k) { +inline bool is_permutation_sort(epu8 v, const size_t k) noexcept { uint64_t diff = last_diff(v, epu8id, 16); return equal(sorted(v), epu8id) && (diff == 16 || diff < k); } -inline bool is_permutation(epu8 v, const size_t k) { +inline bool is_permutation(epu8 v, const size_t k) noexcept { #ifdef SIMDE_X86_SSE4_2_NATIVE return is_permutation_cmpestri(v, k); #else @@ -496,19 +498,19 @@ inline std::string to_string(HPCombi::epu8 const &a) { } template <> struct equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const { + bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { return HPCombi::equal(lhs, rhs); } }; template <> struct not_equal_to { - bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const { + bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept { return HPCombi::not_equal(lhs, rhs); } }; template <> struct hash { - inline size_t operator()(HPCombi::epu8 a) const { + inline size_t operator()(HPCombi::epu8 a) const noexcept { unsigned __int128 v0 = simde_mm_extract_epi64(a, 0); unsigned __int128 v1 = simde_mm_extract_epi64(a, 1); return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64; @@ -528,7 +530,7 @@ template <> struct less { // but we don't care when using in std::set. // 10% faster than calling the lexicographic comparison operator ! inline size_t operator()(const HPCombi::epu8 &v1, - const HPCombi::epu8 &v2) const { + const HPCombi::epu8 &v2) const noexcept { simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2); return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0]; } diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp index 6260186a..36f309d0 100644 --- a/include/hpcombi/vect_generic.hpp +++ b/include/hpcombi/vect_generic.hpp @@ -166,14 +166,14 @@ template struct VectGeneric { return true; } - uint64_t horiz_sum() const { + uint64_t horiz_sum() const noexcept { Expo res = 0; for (uint64_t i = 0; i < Size; i++) res += v[i]; return res; } - VectGeneric partial_sums() const { + VectGeneric partial_sums() const noexcept { auto res = *this; for (uint64_t i = 1; i < Size; i++) res[i] += res[i - 1];