From 8f085ec37cefb7a50e47edb443dce8f2f328e995 Mon Sep 17 00:00:00 2001
From: Florent Hivert <Florent.Hivert@lri.fr>
Date: Sun, 29 Oct 2023 16:58:23 +0000
Subject: [PATCH] noexcept in epu

---
 include/hpcombi/epu.hpp          | 154 +++++++++++++++----------------
 include/hpcombi/epu_impl.hpp     | 122 ++++++++++++------------
 include/hpcombi/vect_generic.hpp |   4 +-
 3 files changed, 141 insertions(+), 139 deletions(-)
diff --git a/include/hpcombi/epu.hpp b/include/hpcombi/epu.hpp
index 8d24905f..3f99db02 100644
--- a/include/hpcombi/epu.hpp
+++ b/include/hpcombi/epu.hpp
@@ -120,21 +120,21 @@ constexpr epu8 popcount4 = Epu8([](uint8_t i) {
  *
  *  This is usually faster for algorithm using a lot of indexed access.
  */
-inline decltype(Epu8)::array &as_array(epu8 &v) {
+inline decltype(Epu8)::array &as_array(epu8 &v) noexcept {
     return reinterpret_cast<decltype(Epu8)::array &>(v);
 }
 /** Cast a constant #HPCombi::epu8 to a C++ \c std::array
  *
  *  This is usually faster for algorithm using a lot of indexed access.
  */
-inline const decltype(Epu8)::array &as_array(const epu8 &v) {
+inline const decltype(Epu8)::array &as_array(const epu8 &v) noexcept {
     return reinterpret_cast<const decltype(Epu8)::array &>(v);
 }
 /** Cast a C++ \c std::array to a #HPCombi::epu8 */
 // Passing the argument by reference triggers a segfault in gcc
 // Since vector types doesn't belongs to the standard, I didn't manage
 // to know if I'm using undefined behavior here.
-inline epu8 from_array(decltype(Epu8)::array a) {
+inline epu8 from_array(decltype(Epu8)::array a) noexcept {
     return reinterpret_cast<const epu8 &>(a);
 }
 
@@ -155,70 +155,70 @@ inline const VectGeneric<16> &as_VectGeneric(const epu8 &v) {
 }
 
 /** Test whether all the entries of a #HPCombi::epu8 are zero */
-inline bool is_all_zero(epu8 a) { return simde_mm_testz_si128(a, a); }
+inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
 /** Test whether all the entries of a #HPCombi::epu8 are one */
-inline bool is_all_one(epu8 a) { return simde_mm_testc_si128(a, Epu8(0xFF)); }
+inline bool is_all_one(epu8 a) noexcept { return simde_mm_testc_si128(a, Epu8(0xFF)); }
 
 /** Equality of #HPCombi::epu8 */
-inline bool equal(epu8 a, epu8 b) {
+inline bool equal(epu8 a, epu8 b) noexcept {
     return is_all_zero(simde_mm_xor_si128(a, b));
 }
 /** Non equality of #HPCombi::epu8 */
-inline bool not_equal(epu8 a, epu8 b) { return !equal(a, b); }
+inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
 
 /** Permuting a #HPCombi::epu8 */
-inline epu8 permuted(epu8 a, epu8 b) { return simde_mm_shuffle_epi8(a, b); }
+inline epu8 permuted(epu8 a, epu8 b) noexcept { return simde_mm_shuffle_epi8(a, b); }
 /** Left shifted of a #HPCombi::epu8 inserting a 0
  * @warning we use the convention that the 0 entry is on the left !
  */
-inline epu8 shifted_right(epu8 a) { return simde_mm_bslli_si128(a, 1); }
+inline epu8 shifted_right(epu8 a) noexcept { return simde_mm_bslli_si128(a, 1); }
 /** Right shifted of a #HPCombi::epu8 inserting a 0
  * @warning we use the convention that the 0 entry is on the left !
  */
-inline epu8 shifted_left(epu8 a) { return simde_mm_bsrli_si128(a, 1); }
+inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
 /** Reverting a #HPCombi::epu8 */
-inline epu8 reverted(epu8 a) { return permuted(a, epu8rev); }
+inline epu8 reverted(epu8 a) noexcept { return permuted(a, epu8rev); }
 
 /** Vector min between two #HPCombi::epu8 0 */
-inline epu8 min(epu8 a, epu8 b) { return simde_mm_min_epu8(a, b); }
+inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
 /** Vector max between two #HPCombi::epu8 0 */
-inline epu8 max(epu8 a, epu8 b) { return simde_mm_max_epu8(a, b); }
+inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
 
 /** Testing if a #HPCombi::epu8 is sorted */
-inline bool is_sorted(epu8 a);
+inline bool is_sorted(epu8 a) noexcept;
 /** Return a sorted #HPCombi::epu8
  * @details
  * @par Algorithm:
  * Uses the 9 stages sorting network #sorting_rounds
  */
-inline epu8 sorted(epu8 a);
+inline epu8 sorted(epu8 a) noexcept;
 /** Return a #HPCombi::epu8 with the two half sorted
  * @details
  * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8
  */
-inline epu8 sorted8(epu8 a);
+inline epu8 sorted8(epu8 a) noexcept;
 /** Return a reverse sorted #HPCombi::epu8
  * @details
  * @par Algorithm:
  * Uses the 9 stages sorting network #sorting_rounds
  */
-inline epu8 revsorted(epu8 a);
+inline epu8 revsorted(epu8 a) noexcept;
 /** Return a #HPCombi::epu8 with the two half reverse sorted
  * @details
  * @par Algorithm: Uses a 6 stages sorting network #sorting_rounds8
  */
-inline epu8 revsorted8(epu8 a);
+inline epu8 revsorted8(epu8 a) noexcept;
 
 /** Sort \c this and return the sorting permutation
  * @details
  * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8
  */
-inline epu8 sort_perm(epu8 &a);
+inline epu8 sort_perm(epu8 &a) noexcept;
 /** Sort \c this and return the sorting permutation
  * @details
  * @par Algorithm: Uses a 9 stages sorting network #sorting_rounds8
  */
-inline epu8 sort8_perm(epu8 &a);
+inline epu8 sort8_perm(epu8 &a) noexcept;
 
 /** @class common_permutation_of
  * @brief Find if a vector is a permutation of one other
@@ -232,16 +232,16 @@ inline epu8 sort8_perm(epu8 &a);
 /** @copydoc common_permutation_of
     @par Algorithm: uses string matching cpmestrm intrisics
  */
-inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b);
+inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
 #endif
 /** @copydoc common_permutation_of
     @par Algorithm: reference implementation
  */
-inline epu8 permutation_of_ref(epu8 a, epu8 b);
+inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
 /** @copydoc common_permutation_of
     @par Algorithm: architecture dependent
  */
-inline epu8 permutation_of(epu8 a, epu8 b);
+inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
 
 /** A prime number good for hashing */
 constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
@@ -262,7 +262,7 @@ inline epu8 random_epu8(uint16_t bnd);
  * @return the vector \c a where repeated occurrences of entries are replaced
  *    by \c repl
  */
-inline epu8 remove_dups(epu8 a, uint8_t repl = 0);
+inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
 
 /** @class common_horiz_sum
  * @brief Horizontal sum of a  #HPCombi::epu8
@@ -279,25 +279,25 @@ inline epu8 remove_dups(epu8 a, uint8_t repl = 0);
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline uint8_t horiz_sum_ref(epu8);
+inline uint8_t horiz_sum_ref(epu8) noexcept;
 /** @copydoc common_horiz_sum
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline uint8_t horiz_sum_gen(epu8);
+inline uint8_t horiz_sum_gen(epu8) noexcept;
 /** @copydoc common_horiz_sum
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline uint8_t horiz_sum4(epu8);
+inline uint8_t horiz_sum4(epu8) noexcept;
 /** @copydoc common_horiz_sum
  *  @par Algorithm:
  *  3-stages parallel algorithm + indexed access
  */
-inline uint8_t horiz_sum3(epu8);
+inline uint8_t horiz_sum3(epu8) noexcept;
 /** @copydoc common_horiz_sum */
-inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); }
+inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
 
 /** @class common_partial_sums
  * @brief Horizontal partial sum of a #HPCombi::epu8
@@ -313,20 +313,20 @@ inline uint8_t horiz_sum(epu8 v) { return horiz_sum3(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline epu8 partial_sums_ref(epu8);
+inline epu8 partial_sums_ref(epu8) noexcept;
 /** @copydoc common_partial_sums
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline epu8 partial_sums_gen(epu8);
+inline epu8 partial_sums_gen(epu8) noexcept;
 /** @copydoc common_partial_sums
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline epu8 partial_sums_round(epu8);
+inline epu8 partial_sums_round(epu8) noexcept;
 /** @copydoc common_partial_sums */
-inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); }
+inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
 
 /** @class common_horiz_max
  * @brief Horizontal sum of a  #HPCombi::epu8
@@ -342,25 +342,25 @@ inline epu8 partial_sums(epu8 v) { return partial_sums_round(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline uint8_t horiz_max_ref(epu8);
+inline uint8_t horiz_max_ref(epu8) noexcept;
 /** @copydoc common_horiz_max
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline uint8_t horiz_max_gen(epu8);
+inline uint8_t horiz_max_gen(epu8) noexcept;
 /** @copydoc common_horiz_max
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline uint8_t horiz_max4(epu8);
+inline uint8_t horiz_max4(epu8) noexcept;
 /** @copydoc common_horiz_max
  *  @par Algorithm:
  *  3-stages parallel algorithm + indexed access
  */
-inline uint8_t horiz_max3(epu8);
+inline uint8_t horiz_max3(epu8) noexcept;
 /** @copydoc common_horiz_max */
-inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); }
+inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
 
 /** @class common_partial_max
  * @brief Horizontal partial sum of a #HPCombi::epu8
@@ -376,20 +376,20 @@ inline uint8_t horiz_max(epu8 v) { return horiz_max4(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline epu8 partial_max_ref(epu8);
+inline epu8 partial_max_ref(epu8) noexcept;
 /** @copydoc common_partial_max
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline epu8 partial_max_gen(epu8);
+inline epu8 partial_max_gen(epu8) noexcept;
 /** @copydoc common_partial_max
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline epu8 partial_max_round(epu8);
+inline epu8 partial_max_round(epu8) noexcept;
 /** @copydoc common_partial_max */
-inline epu8 partial_max(epu8 v) { return partial_max_round(v); }
+inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
 
 /** @class common_horiz_min
  * @brief Horizontal sum of a  #HPCombi::epu8
@@ -405,25 +405,25 @@ inline epu8 partial_max(epu8 v) { return partial_max_round(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline uint8_t horiz_min_ref(epu8);
+inline uint8_t horiz_min_ref(epu8) noexcept;
 /** @copydoc common_horiz_min
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline uint8_t horiz_min_gen(epu8);
+inline uint8_t horiz_min_gen(epu8) noexcept;
 /** @copydoc common_horiz_min
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline uint8_t horiz_min4(epu8);
+inline uint8_t horiz_min4(epu8) noexcept;
 /** @copydoc common_horiz_min
  *  @par Algorithm:
  *  3-stages parallel algorithm + indexed access
  */
-inline uint8_t horiz_min3(epu8);
+inline uint8_t horiz_min3(epu8) noexcept;
 /** @copydoc common_horiz_min */
-inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); }
+inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
 
 /** @class common_partial_min
  * @brief Horizontal partial sum of a #HPCombi::epu8
@@ -439,20 +439,20 @@ inline uint8_t horiz_min(epu8 v) { return horiz_min4(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline epu8 partial_min_ref(epu8);
+inline epu8 partial_min_ref(epu8) noexcept;
 /** @copydoc common_partial_min
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  *  through #HPCombi::VectGeneric
  */
-inline epu8 partial_min_gen(epu8);
+inline epu8 partial_min_gen(epu8) noexcept;
 /** @copydoc common_partial_min
  *  @par Algorithm:
  *  4-stages parallel algorithm
  */
-inline epu8 partial_min_round(epu8);
+inline epu8 partial_min_round(epu8) noexcept;
 /** @copydoc common_partial_min */
-inline epu8 partial_min(epu8 v) { return partial_min_round(v); }
+inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
 
 /** @class common_eval16
  * @brief Evaluation of a #HPCombi::epu8
@@ -471,24 +471,24 @@ inline epu8 partial_min(epu8 v) { return partial_min_round(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline epu8 eval16_ref(epu8 v);
+inline epu8 eval16_ref(epu8 v) noexcept;
 /** @copydoc common_eval16
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and cast to array
  */
-inline epu8 eval16_arr(epu8 v);
+inline epu8 eval16_arr(epu8 v) noexcept;
 /** @copydoc common_eval16
  *  @par Algorithm:
  *  Vector @f$O(n)@f$ using cyclic shifting
  */
-inline epu8 eval16_cycle(epu8 v);
+inline epu8 eval16_cycle(epu8 v) noexcept;
 /** @copydoc common_eval16
  *  @par Algorithm:
  *  Vector @f$O(n)@f$ using popcount
  */
-inline epu8 eval16_popcount(epu8 v);
+inline epu8 eval16_popcount(epu8 v) noexcept;
 /** @copydoc common_eval16 */
-inline epu8 eval16(epu8 v) { return eval16_cycle(v); }
+inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
 
 /** @class common_first_diff
  * @brief The first difference between two #HPCombi::epu8
@@ -512,21 +512,21 @@ inline epu8 eval16(epu8 v) { return eval16_cycle(v); }
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
 #ifdef SIMDE_X86_SSE4_2_NATIVE
 /** @copydoc common_first_diff
  *  @par Algorithm:
  *  Using \c cmpestri instruction
  */
-inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
 #endif
 /** @copydoc common_first_diff
  *  @par Algorithm:
  *  Using vector comparison and mask
  */
-inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
 /** @copydoc common_first_diff */
-inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) {
+inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
     return first_diff_mask(a, b, bound);
 }
 
@@ -552,53 +552,53 @@ inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) {
  *  @par Algorithm:
  *  Reference @f$O(n)@f$ algorithm using loop and indexed access
  */
-inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
 #ifdef SIMDE_X86_SSE4_2_NATIVE
 /** @copydoc common_last_diff
  *  @par Algorithm:
  *  Using \c cmpestri instruction
  */
-inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
 #endif
 /** @copydoc common_last_diff
  *  @par Algorithm:
  *  Using vector comparison and mask
  */
-inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16);
+inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
 /** @copydoc common_last_diff */
-inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) {
+inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
     return last_diff_mask(a, b, bound);
 }
 
 /** Lexicographic comparison between two #HPCombi::epu8 */
-inline bool less(epu8 a, epu8 b);
+inline bool less(epu8 a, epu8 b) noexcept;
 /** Partial lexicographic comparison between two #HPCombi::epu8
  * @param a, b : the vectors to compare
  * @param k : the bound for the lexicographic comparison
  * @return a positive, negative or zero int8_t depending on the result
  */
-inline int8_t less_partial(epu8 a, epu8 b, int k);
+inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
 
 /** return the index of the first zero entry or 16 if there are none
  *  Only index smaller than bound are taken into account.
  */
-inline uint64_t first_zero(epu8 v, int bnd);
+inline uint64_t first_zero(epu8 v, int bnd) noexcept;
 /** return the index of the last zero entry or 16 if there are none
  *  Only index smaller than bound are taken into account.
  */
-inline uint64_t last_zero(epu8 v, int bnd);
+inline uint64_t last_zero(epu8 v, int bnd) noexcept;
 /** return the index of the first non zero entry or 16 if there are none
  *  Only index smaller than bound are taken into account.
  */
-inline uint64_t first_non_zero(epu8 v, int bnd);
+inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
 /** return the index of the last non zero entry or 16 if there are none
  *  Only index smaller than bound are taken into account.
  */
-inline uint64_t last_non_zero(epu8 v, int bnd);
+inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
 
 /** a vector popcount function
  */
-inline epu8 popcount16(epu8 v);
+inline epu8 popcount16(epu8 v) noexcept;
 
 /** Test for partial transformation
  * @details
@@ -615,7 +615,7 @@ inline epu8 popcount16(epu8 v);
  * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$
  * is encoded by the array {2,0,5,0xff,0xff,4,6,7,8,9,10,11,12,13,14,15}
  */
-inline bool is_partial_transformation(epu8 v, const size_t k = 16);
+inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
 
 /** Test for transformation
  * @details
@@ -632,7 +632,7 @@ inline bool is_partial_transformation(epu8 v, const size_t k = 16);
  * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 2 1 4 \end{matrix}@f$
  * is encoded by the array {2,0,5,2,1,4,6,7,8,9,10,11,12,13,14,15}
  */
-inline bool is_transformation(epu8 v, const size_t k = 16);
+inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
 
 /** Test for partial permutations
  * @details
@@ -650,7 +650,7 @@ inline bool is_transformation(epu8 v, const size_t k = 16);
  * @f$\begin{matrix}0 1 2 3 4 5\\ 2 0 5 . . 4 \end{matrix}@f$
  * is encoded by the array {2,0,5,0xFF,0xFF,4,6,7,8,9,10,11,12,13,14,15}
  */
-inline bool is_partial_permutation(epu8 v, const size_t k = 16);
+inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
 
 /** @class common_is_permutation
  * @details
@@ -671,16 +671,16 @@ inline bool is_partial_permutation(epu8 v, const size_t k = 16);
 /** @copydoc common_is_permutation
     @par Algorithm: uses string matching cpmestri intrisics
  */
-inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16);
+inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
 #endif
 /** @copydoc common_is_permutation
     @par Algorithm: sort the vector and compare to identity
  */
-inline bool is_permutation_sort(epu8 v, const size_t k = 16);
+inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
 /** @copydoc common_is_permutation
     @par Algorithm: architecture dependent
  */
-inline bool is_permutation(epu8 v, const size_t k = 16);
+inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
 
 }  // namespace HPCombi
 
diff --git a/include/hpcombi/epu_impl.hpp b/include/hpcombi/epu_impl.hpp
index 75501df3..d89ef989 100644
--- a/include/hpcombi/epu_impl.hpp
+++ b/include/hpcombi/epu_impl.hpp
@@ -59,22 +59,22 @@ inline uint64_t last_mask(epu8 msk, size_t bound) {
     return res == 0 ? 16 : (63 - __builtin_clzll(res));
 }
 
-inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) {
+inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
     for (size_t i = 0; i < bound; i++)
         if (a[i] != b[i])
             return i;
     return 16;
 }
 #ifdef SIMDE_X86_SSE4_2_NATIVE
-inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) {
+inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
     return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF));
 }
 #endif
-inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) {
+inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
     return first_mask(a != b, bound);
 }
 
-inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) {
+inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
     while (bound != 0) {
         --bound;
         if (a[bound] != b[bound])
@@ -83,35 +83,35 @@ inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) {
     return 16;
 }
 #ifdef SIMDE_X86_SSE4_2_NATIVE
-inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) {
+inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
     return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF));
 }
 #endif
-inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) {
+inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
     return last_mask(a != b, bound);
 }
 
-inline bool less(epu8 a, epu8 b) {
+inline bool less(epu8 a, epu8 b) noexcept {
     uint64_t diff = first_diff(a, b);
     return (diff < 16) && (a[diff] < b[diff]);
 }
-inline int8_t less_partial(epu8 a, epu8 b, int k) {
+inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept {
     uint64_t diff = first_diff(a, b, k);
     return (diff == 16)
                ? 0
                : static_cast<int8_t>(a[diff]) - static_cast<int8_t>(b[diff]);
 }
 
-inline uint64_t first_zero(epu8 v, int bnd) {
+inline uint64_t first_zero(epu8 v, int bnd) noexcept {
     return first_mask(v == epu8{}, bnd);
 }
-inline uint64_t last_zero(epu8 v, int bnd) {
+inline uint64_t last_zero(epu8 v, int bnd) noexcept {
     return last_mask(v == epu8{}, bnd);
 }
-inline uint64_t first_non_zero(epu8 v, int bnd) {
+inline uint64_t first_non_zero(epu8 v, int bnd) noexcept {
     return first_mask(v != epu8{}, bnd);
 }
-inline uint64_t last_non_zero(epu8 v, int bnd) {
+inline uint64_t last_non_zero(epu8 v, int bnd) noexcept {
     return last_mask(v != epu8{}, bnd);
 }
 
@@ -182,20 +182,22 @@ constexpr std::array<epu8, 6> sorting_rounds8
 }};
 // clang-format on
 
-inline bool is_sorted(epu8 a) {
+inline bool is_sorted(epu8 a) noexcept {
     return simde_mm_movemask_epi8(shifted_right(a) > a) == 0;
 }
-inline epu8 sorted(epu8 a) { return network_sort<true>(a, sorting_rounds); }
-inline epu8 sorted8(epu8 a) { return network_sort<true>(a, sorting_rounds8); }
-inline epu8 revsorted(epu8 a) { return network_sort<false>(a, sorting_rounds); }
-inline epu8 revsorted8(epu8 a) {
+inline epu8 sorted(epu8 a) noexcept {
+    return network_sort<true>(a, sorting_rounds);
+}
+inline epu8 sorted8(epu8 a) noexcept { return network_sort<true>(a, sorting_rounds8); }
+inline epu8 revsorted(epu8 a) noexcept { return network_sort<false>(a, sorting_rounds); }
+inline epu8 revsorted8(epu8 a) noexcept {
     return network_sort<false>(a, sorting_rounds8);
 }
 
-inline epu8 sort_perm(epu8 &a) {
+inline epu8 sort_perm(epu8 &a)  noexcept{
     return network_sort_perm<true>(a, sorting_rounds);
 }
-inline epu8 sort8_perm(epu8 &a) {
+inline epu8 sort8_perm(epu8 &a)  noexcept{
     return network_sort_perm<true>(a, sorting_rounds8);
 }
 
@@ -210,7 +212,7 @@ inline epu8 random_epu8(uint16_t bnd) {
     return res;
 }
 
-inline epu8 remove_dups(epu8 v, uint8_t repl) {
+inline epu8 remove_dups(epu8 v, uint8_t repl) noexcept {
     // Vector ternary operator is not supported by clang.
     // return (v != shifted_right(v) ? v : Epu8(repl);
     return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v));
@@ -233,7 +235,7 @@ constexpr std::array<epu8, 3> inverting_rounds{{
 #define FIND_IN_VECT_COMPL                                                     \
     (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK)
 
-inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) {
+inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept {
     epu8 res = -static_cast<epu8>(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT));
     for (epu8 round : inverting_rounds) {
         a = permuted(a, round);
@@ -244,7 +246,7 @@ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) {
 }
 #endif
 
-inline epu8 permutation_of_ref(epu8 a, epu8 b) {
+inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept {
     auto ar = as_array(a);
     epu8 res{};
     for (size_t i = 0; i < 16; i++) {
@@ -253,7 +255,7 @@ inline epu8 permutation_of_ref(epu8 a, epu8 b) {
     }
     return res;
 }
-inline epu8 permutation_of(epu8 a, epu8 b) {
+inline epu8 permutation_of(epu8 a, epu8 b) noexcept {
 #ifdef SIMDE_X86_SSE4_2_NATIVE
     return permutation_of_cmpestrm(a, b);
 #else
@@ -289,15 +291,15 @@ constexpr std::array<epu8, 4> mining_rounds{{
 
 #undef FF
 
-inline uint8_t horiz_sum_ref(epu8 v) {
+inline uint8_t horiz_sum_ref(epu8 v) noexcept {
     uint8_t res = 0;
     for (size_t i = 0; i < 16; i++)
         res += v[i];
     return res;
 }
-inline uint8_t horiz_sum_gen(epu8 v) { return as_VectGeneric(v).horiz_sum(); }
-inline uint8_t horiz_sum4(epu8 v) { return partial_sums_round(v)[15]; }
-inline uint8_t horiz_sum3(epu8 v) {
+inline uint8_t horiz_sum_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_sum(); }
+inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; }
+inline uint8_t horiz_sum3(epu8 v) noexcept {
     auto sr = summing_rounds;
     v += permuted(v, sr[0]);
     v += permuted(v, sr[1]);
@@ -305,32 +307,32 @@ inline uint8_t horiz_sum3(epu8 v) {
     return v[7] + v[15];
 }
 
-inline epu8 partial_sums_ref(epu8 v) {
+inline epu8 partial_sums_ref(epu8 v) noexcept {
     epu8 res{};
     res[0] = v[0];
     for (size_t i = 1; i < 16; i++)
         res[i] = res[i - 1] + v[i];
     return res;
 }
-inline epu8 partial_sums_gen(epu8 v) {
+inline epu8 partial_sums_gen(epu8 v) noexcept {
     as_VectGeneric(v).partial_sums_inplace();
     return v;
 }
-inline epu8 partial_sums_round(epu8 v) {
+inline epu8 partial_sums_round(epu8 v) noexcept {
     for (epu8 round : summing_rounds)
         v += permuted(v, round);
     return v;
 }
 
-inline uint8_t horiz_max_ref(epu8 v) {
+inline uint8_t horiz_max_ref(epu8 v) noexcept {
     uint8_t res = 0;
     for (size_t i = 0; i < 16; i++)
         res = std::max(res, v[i]);
     return res;
 }
-inline uint8_t horiz_max_gen(epu8 v) { return as_VectGeneric(v).horiz_max(); }
-inline uint8_t horiz_max4(epu8 v) { return partial_max_round(v)[15]; }
-inline uint8_t horiz_max3(epu8 v) {
+inline uint8_t horiz_max_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_max(); }
+inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; }
+inline uint8_t horiz_max3(epu8 v) noexcept {
     auto sr = summing_rounds;
     v = max(v, permuted(v, sr[0]));
     v = max(v, permuted(v, sr[1]));
@@ -338,32 +340,32 @@ inline uint8_t horiz_max3(epu8 v) {
     return std::max(v[7], v[15]);
 }
 
-inline epu8 partial_max_ref(epu8 v) {
+inline epu8 partial_max_ref(epu8 v) noexcept {
     epu8 res;
     res[0] = v[0];
     for (size_t i = 1; i < 16; i++)
         res[i] = std::max(res[i - 1], v[i]);
     return res;
 }
-inline epu8 partial_max_gen(epu8 v) {
+inline epu8 partial_max_gen(epu8 v) noexcept {
     as_VectGeneric(v).partial_max_inplace();
     return v;
 }
-inline epu8 partial_max_round(epu8 v) {
+inline epu8 partial_max_round(epu8 v) noexcept {
     for (epu8 round : summing_rounds)
         v = max(v, permuted(v, round));
     return v;
 }
 
-inline uint8_t horiz_min_ref(epu8 v) {
+inline uint8_t horiz_min_ref(epu8 v) noexcept {
     uint8_t res = 255;
     for (size_t i = 0; i < 16; i++)
         res = std::min(res, v[i]);
     return res;
 }
-inline uint8_t horiz_min_gen(epu8 v) { return as_VectGeneric(v).horiz_min(); }
-inline uint8_t horiz_min4(epu8 v) { return partial_min_round(v)[15]; }
-inline uint8_t horiz_min3(epu8 v) {
+inline uint8_t horiz_min_gen(epu8 v) noexcept { return as_VectGeneric(v).horiz_min(); }
+inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; }
+inline uint8_t horiz_min3(epu8 v) noexcept {
     auto sr = mining_rounds;
     v = min(v, permuted(v, sr[0]));
     v = min(v, permuted(v, sr[1]));
@@ -371,24 +373,24 @@ inline uint8_t horiz_min3(epu8 v) {
     return std::min(v[7], v[15]);
 }
 
-inline epu8 partial_min_ref(epu8 v) {
+inline epu8 partial_min_ref(epu8 v) noexcept {
     epu8 res;
     res[0] = v[0];
     for (size_t i = 1; i < 16; i++)
         res[i] = std::min(res[i - 1], v[i]);
     return res;
 }
-inline epu8 partial_min_gen(epu8 v) {
+inline epu8 partial_min_gen(epu8 v) noexcept {
     as_VectGeneric(v).partial_min_inplace();
     return v;
 }
-inline epu8 partial_min_round(epu8 v) {
+inline epu8 partial_min_round(epu8 v) noexcept {
     for (epu8 round : mining_rounds)
         v = min(v, permuted(v, round));
     return v;
 }
 
-inline epu8 eval16_ref(epu8 v) {
+inline epu8 eval16_ref(epu8 v) noexcept {
     epu8 res{};
     for (size_t i = 0; i < 16; i++)
         if (v[i] < 16)
@@ -396,7 +398,7 @@ inline epu8 eval16_ref(epu8 v) {
     return res;
 }
 
-inline epu8 eval16_arr(epu8 v8) {
+inline epu8 eval16_arr(epu8 v8) noexcept {
     decltype(Epu8)::array res{};
     auto v = as_array(v8);
     for (size_t i = 0; i < 16; i++)
@@ -404,10 +406,10 @@ inline epu8 eval16_arr(epu8 v8) {
             res[v[i]]++;
     return from_array(res);
 }
-inline epu8 eval16_gen(epu8 v) {
+inline epu8 eval16_gen(epu8 v) noexcept {
     return from_array(as_VectGeneric(v).eval().v);
 }
-inline epu8 eval16_cycle(epu8 v) {
+inline epu8 eval16_cycle(epu8 v) noexcept {
     epu8 res = -(epu8id == v);
     for (int i = 1; i < 16; i++) {
         v = permuted(v, left_cycle);
@@ -415,7 +417,7 @@ inline epu8 eval16_cycle(epu8 v) {
     }
     return res;
 }
-inline epu8 eval16_popcount(epu8 v) {
+inline epu8 eval16_popcount(epu8 v) noexcept {
     epu8 res{};
     for (size_t i = 0; i < 16; i++) {
         res[i] =
@@ -424,11 +426,11 @@ inline epu8 eval16_popcount(epu8 v) {
     return res;
 }
 
-inline epu8 popcount16(epu8 v) {
+inline epu8 popcount16(epu8 v) noexcept {
     return permuted(popcount4, (v & Epu8(0x0f))) + permuted(popcount4, v >> 4);
 }
 
-inline bool is_partial_transformation(epu8 v, const size_t k) {
+inline bool is_partial_transformation(epu8 v, const size_t k) noexcept {
     uint64_t diff = last_diff(v, epu8id, 16);
     // (forall x in v, x + 1 <= 16)  and
     // (v = Perm16::one()   or  last diff index < 16)
@@ -436,13 +438,13 @@ inline bool is_partial_transformation(epu8 v, const size_t k) {
            (diff == 16 || diff < k);
 }
 
-inline bool is_transformation(epu8 v, const size_t k) {
+inline bool is_transformation(epu8 v, const size_t k) noexcept {
     uint64_t diff = last_diff(v, epu8id, 16);
     return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) &&
            (diff == 16 || diff < k);
 }
 
-inline bool is_partial_permutation(epu8 v, const size_t k) {
+inline bool is_partial_permutation(epu8 v, const size_t k) noexcept {
     uint64_t diff = last_diff(v, epu8id, 16);
     // (forall x in v, x <= 15)  and
     // (forall x < 15, multiplicity x v <= 1
@@ -453,7 +455,7 @@ inline bool is_partial_permutation(epu8 v, const size_t k) {
 }
 
 #ifdef SIMDE_X86_SSE4_2_NATIVE
-inline bool is_permutation_cmpestri(epu8 v, const size_t k) {
+inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept {
     uint64_t diff = last_diff(v, epu8id, 16);
     // (forall x in v, x in Perm16::one())  and
     // (forall x in Perm16::one(), x in v)  and
@@ -464,12 +466,12 @@ inline bool is_permutation_cmpestri(epu8 v, const size_t k) {
 }
 #endif
 
-inline bool is_permutation_sort(epu8 v, const size_t k) {
+inline bool is_permutation_sort(epu8 v, const size_t k) noexcept {
     uint64_t diff = last_diff(v, epu8id, 16);
     return equal(sorted(v), epu8id) && (diff == 16 || diff < k);
 }
 
-inline bool is_permutation(epu8 v, const size_t k) {
+inline bool is_permutation(epu8 v, const size_t k) noexcept {
 #ifdef SIMDE_X86_SSE4_2_NATIVE
     return is_permutation_cmpestri(v, k);
 #else
@@ -496,19 +498,19 @@ inline std::string to_string(HPCombi::epu8 const &a) {
 }
 
 template <> struct equal_to<HPCombi::epu8> {
-    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const {
+    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept {
         return HPCombi::equal(lhs, rhs);
     }
 };
 
 template <> struct not_equal_to<HPCombi::epu8> {
-    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const {
+    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept {
         return HPCombi::not_equal(lhs, rhs);
     }
 };
 
 template <> struct hash<HPCombi::epu8> {
-    inline size_t operator()(HPCombi::epu8 a) const {
+    inline size_t operator()(HPCombi::epu8 a) const noexcept {
         unsigned __int128 v0 = simde_mm_extract_epi64(a, 0);
         unsigned __int128 v1 = simde_mm_extract_epi64(a, 1);
         return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64;
@@ -528,7 +530,7 @@ template <> struct less<HPCombi::epu8> {
     //          but we don't care when using in std::set.
     // 10% faster than calling the lexicographic comparison operator !
     inline size_t operator()(const HPCombi::epu8 &v1,
-                             const HPCombi::epu8 &v2) const {
+                             const HPCombi::epu8 &v2) const noexcept {
         simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2);
         return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0];
     }
diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp
index 6260186a..36f309d0 100644
--- a/include/hpcombi/vect_generic.hpp
+++ b/include/hpcombi/vect_generic.hpp
@@ -166,14 +166,14 @@ template <size_t Size, typename Expo = uint8_t> struct VectGeneric {
         return true;
     }
 
-    uint64_t horiz_sum() const {
+    uint64_t horiz_sum() const noexcept {
         Expo res = 0;
         for (uint64_t i = 0; i < Size; i++)
             res += v[i];
         return res;
     }
 
-    VectGeneric partial_sums() const {
+    VectGeneric partial_sums() const noexcept {
         auto res = *this;
         for (uint64_t i = 1; i < Size; i++)
             res[i] += res[i - 1];