From 0882b6dd636c29013b0dac50cf36869428dfd102 Mon Sep 17 00:00:00 2001 From: serge-sans-paille Date: Mon, 9 Sep 2024 09:19:26 +0200 Subject: [PATCH] Provide batch_bool::count() to count the number of positive value in a batch Fix #1042 --- .../arch/generic/xsimd_generic_logical.hpp | 45 +++++++++++++++++++ include/xsimd/arch/xsimd_emulated.hpp | 14 ++++++ include/xsimd/types/xsimd_api.hpp | 14 ++++++ test/test_batch_bool.cpp | 10 +++++ 4 files changed, 83 insertions(+) diff --git a/include/xsimd/arch/generic/xsimd_generic_logical.hpp b/include/xsimd/arch/generic/xsimd_generic_logical.hpp index 69d9657e1..bc25b1155 100644 --- a/include/xsimd/arch/generic/xsimd_generic_logical.hpp +++ b/include/xsimd/arch/generic/xsimd_generic_logical.hpp @@ -14,6 +14,8 @@ #include "./xsimd_generic_details.hpp" +#include + namespace xsimd { @@ -22,6 +24,49 @@ namespace xsimd using namespace types; + // count + template + XSIMD_INLINE size_t count(batch_bool const& self, requires_arch) noexcept + { + uint64_t m = self.mask(); + XSIMD_IF_CONSTEXPR(batch_bool::size < 14) + { + // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + return (m * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; + } + else + { +#if defined __has_builtin +#if __has_builtin(__builtin_popcountg) +#define builtin_popcount(v) __builtin_popcountg(v) +#endif +#endif + +#ifdef builtin_popcount + return builtin_popcount(m); +#else + // FIXME: we could do better by dispatching to the appropriate + // popcount instruction depending on the arch... + XSIMD_IF_CONSTEXPR(batch_bool::size <= 32) + { + uint32_t m32 = m; + // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + m32 = m32 - ((m32 >> 1) & 0x55555555); // reuse input as temporary + m32 = (m32 & 0x33333333) + ((m32 >> 2) & 0x33333333); // temp + return (((m32 + (m32 >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; // count + } + else + { + // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp + m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp + m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp + return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count + } +#endif + } + } + // from mask template XSIMD_INLINE batch_bool from_mask(batch_bool const&, uint64_t mask, requires_arch) noexcept diff --git a/include/xsimd/arch/xsimd_emulated.hpp b/include/xsimd/arch/xsimd_emulated.hpp index ef7fd0191..2f4585bbb 100644 --- a/include/xsimd/arch/xsimd_emulated.hpp +++ b/include/xsimd/arch/xsimd_emulated.hpp @@ -230,6 +230,20 @@ namespace xsimd return r; } +#if 0 + // count + template ::size> + XSIMD_INLINE size_t count(batch_bool const& x, requires_arch>) noexcept + { + uint64_t m = x.mask(); + // https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel + m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp + m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp + m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp + return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count + } +#endif + // store_complex namespace detail { diff --git a/include/xsimd/types/xsimd_api.hpp b/include/xsimd/types/xsimd_api.hpp index afa41893d..725655dd3 100644 --- a/include/xsimd/types/xsimd_api.hpp +++ b/include/xsimd/types/xsimd_api.hpp @@ -631,6 +631,20 @@ namespace xsimd return kernel::cosh(x, A {}); } + /** + * @ingroup batch_reducers + * + * Count the number of values set to true in the batch \c x + * @param x boolean or batch of boolean + * @return the result of the counting. + */ + template + XSIMD_INLINE size_t count(batch_bool const& x) noexcept + { + detail::static_check_supported_config(); + return kernel::count(x, A {}); + } + /** * @ingroup batch_arithmetic * diff --git a/test/test_batch_bool.cpp b/test/test_batch_bool.cpp index d28c57bb0..d36b8b994 100644 --- a/test/test_batch_bool.cpp +++ b/test/test_batch_bool.cpp @@ -441,6 +441,14 @@ struct batch_bool_test CHECK_EQ(batch_bool_type::from_mask(bool_g.interspersed.mask()).mask(), bool_g.interspersed.mask()); } + void test_count() const + { + auto bool_g = xsimd::get_bool {}; + CHECK_EQ(count(bool_g.all_false), 0); + CHECK_EQ(count(bool_g.all_true), batch_bool_type::size); + CHECK_EQ(count(bool_g.half), batch_bool_type::size / 2); + } + void test_comparison() const { auto bool_g = xsimd::get_bool {}; @@ -485,6 +493,8 @@ TEST_CASE_TEMPLATE("[xsimd batch bool]", B, BATCH_TYPES) SUBCASE("mask") { Test.test_mask(); } + SUBCASE("count") { Test.test_count(); } + SUBCASE("eq neq") { Test.test_comparison(); } } #endif