From d4a5791036b9f394e192e427d4f8dad442575d46 Mon Sep 17 00:00:00 2001 From: Florent Hivert Date: Mon, 23 Oct 2023 14:32:16 +0200 Subject: [PATCH] Replaced intrinsic by builtin for popcount --- include/bmat8_impl.hpp | 14 +++++++------- include/epu_impl.hpp | 2 +- include/perm16_impl.hpp | 8 ++++---- list_intrin.txt | 4 ++-- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/include/bmat8_impl.hpp b/include/bmat8_impl.hpp index 124f9cd5..85fed966 100644 --- a/include/bmat8_impl.hpp +++ b/include/bmat8_impl.hpp @@ -276,10 +276,10 @@ inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const { inline uint64_t BMat8::row_space_size_bitset() const { epu8 res0 {}, res1 {}; row_space_bitset(res0, res1); - return (_mm_popcnt_u64(_mm_extract_epi64(res0, 0)) + - _mm_popcnt_u64(_mm_extract_epi64(res1, 0)) + - _mm_popcnt_u64(_mm_extract_epi64(res0, 1)) + - _mm_popcnt_u64(_mm_extract_epi64(res1, 1))); + return (__builtin_popcountll(_mm_extract_epi64(res0, 0)) + + __builtin_popcountll(_mm_extract_epi64(res1, 0)) + + __builtin_popcountll(_mm_extract_epi64(res0, 1)) + + __builtin_popcountll(_mm_extract_epi64(res1, 1))); } inline uint64_t BMat8::row_space_size_incl1() const { @@ -292,7 +292,7 @@ inline uint64_t BMat8::row_space_size_incl1() const { orincl |= ((in | block) == block) & in; in = permuted(in, rotboth); } - res += _mm_popcnt_u64(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; @@ -308,7 +308,7 @@ inline uint64_t BMat8::row_space_size_incl() const { in = permuted(in, rotboth); orincl |= ((in | block) == block) & in; } - res += _mm_popcnt_u64(_mm_movemask_epi8(block == orincl)); + res += __builtin_popcountll(_mm_movemask_epi8(block == orincl)); block += Epu8(16); } return res; @@ -399,7 +399,7 @@ inline std::vector BMat8::rows() const { inline size_t BMat8::nr_rows() const { epu8 x = _mm_set_epi64x(_data, 0); - return _mm_popcnt_u64(_mm_movemask_epi8(x != epu8 {})); + return __builtin_popcountll(_mm_movemask_epi8(x != epu8 {})); } static HPCOMBI_CONSTEXPR epu8 diff --git a/include/epu_impl.hpp b/include/epu_impl.hpp index 0a2d7f40..c085abff 100644 --- a/include/epu_impl.hpp +++ b/include/epu_impl.hpp @@ -404,7 +404,7 @@ inline epu8 eval16_cycle(epu8 v) { inline epu8 eval16_popcount(epu8 v) { epu8 res{}; for (size_t i = 0; i < 16; i++) { - res[i] = _mm_popcnt_u32(_mm_movemask_epi8(v == Epu8(uint8_t(i)))); + res[i] = __builtin_popcountl(_mm_movemask_epi8(v == Epu8(uint8_t(i)))); } return res; } diff --git a/include/perm16_impl.hpp b/include/perm16_impl.hpp index 365f4f6f..5ea3af2a 100644 --- a/include/perm16_impl.hpp +++ b/include/perm16_impl.hpp @@ -76,7 +76,7 @@ inline uint32_t PTransf16::rank_ref() const { return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0)); } inline uint32_t PTransf16::rank() const { - return _mm_popcnt_u32(image_bitset()); + return __builtin_popcountl(image_bitset()); } inline epu8 PTransf16::fix_points_mask(bool complement) const { @@ -107,7 +107,7 @@ inline uint8_t PTransf16::largest_moved_point() const { } /** Returns the number of fix points of \c *this */ inline uint8_t PTransf16::nb_fix_points() const { - return _mm_popcnt_u32(fix_points_bitset()); + return __builtin_popcountl(fix_points_bitset()); } inline static HPCOMBI_CONSTEXPR uint8_t hilo_exchng_fun(uint8_t i) { @@ -307,7 +307,7 @@ inline uint8_t Perm16::nb_descents_ref() const { return res; } inline uint8_t Perm16::nb_descents() const { - return _mm_popcnt_u32(_mm_movemask_epi8(v < shifted_right(v))); + return __builtin_popcountl(_mm_movemask_epi8(v < shifted_right(v))); } inline uint8_t Perm16::nb_cycles_ref() const { @@ -338,7 +338,7 @@ inline epu8 Perm16::cycles_partition() const { inline uint8_t Perm16::nb_cycles_unroll() const { epu8 res = (epu8id == cycles_partition()); - return _mm_popcnt_u32(_mm_movemask_epi8(res)); + return __builtin_popcountl(_mm_movemask_epi8(res)); } inline bool Perm16::left_weak_leq_ref(Perm16 other) const { diff --git a/list_intrin.txt b/list_intrin.txt index df995b99..263f95d7 100644 --- a/list_intrin.txt +++ b/list_intrin.txt @@ -11,8 +11,8 @@ _mm_max_epu8;__m128i(),__m128i() _mm_min_epi8;__m128i(),__m128i() _mm_min_epu8;__m128i(),__m128i() _mm_movemask_epi8;__m128i() -_mm_popcnt_u32;1 -_mm_popcnt_u64;1 +__builtin_popcountl;1 +__builtin_popcountll;1 _mm_set_epi64x;1,1 _mm_shuffle_epi8;__m128i(),__m128i() _mm_slli_epi32;__m128i(),1