Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Provide batch_bool::count() to count the number of positive value in … #1048

Merged
merged 1 commit into from
Sep 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 45 additions & 0 deletions include/xsimd/arch/generic/xsimd_generic_logical.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

#include "./xsimd_generic_details.hpp"

#include <climits>

namespace xsimd
{

Expand All @@ -22,6 +24,49 @@ namespace xsimd

using namespace types;

// count
template <class A, class T>
XSIMD_INLINE size_t count(batch_bool<T, A> const& self, requires_arch<generic>) noexcept
{
uint64_t m = self.mask();
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size < 14)
{
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
return (m * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
}
else
{
#if defined __has_builtin
#if __has_builtin(__builtin_popcountg)
#define builtin_popcount(v) __builtin_popcountg(v)
#endif
#endif

#ifdef builtin_popcount
return builtin_popcount(m);
#else
// FIXME: we could do better by dispatching to the appropriate
// popcount instruction depending on the arch...
XSIMD_IF_CONSTEXPR(batch_bool<T, A>::size <= 32)
{
uint32_t m32 = m;
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
m32 = m32 - ((m32 >> 1) & 0x55555555); // reuse input as temporary
m32 = (m32 & 0x33333333) + ((m32 >> 2) & 0x33333333); // temp
return (((m32 + (m32 >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24; // count
}
else
{
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp
m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
}
#endif
}
}

// from mask
template <class A, class T>
XSIMD_INLINE batch_bool<T, A> from_mask(batch_bool<T, A> const&, uint64_t mask, requires_arch<generic>) noexcept
Expand Down
14 changes: 14 additions & 0 deletions include/xsimd/arch/xsimd_emulated.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,20 @@ namespace xsimd
return r;
}

#if 0
// count
template <class A, class T, size_t N = 8 * sizeof(T) * batch<T, A>::size>
XSIMD_INLINE size_t count(batch_bool<T, A> const& x, requires_arch<emulated<N>>) noexcept
{
uint64_t m = x.mask();
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
m = m - ((m >> 1) & (uint64_t) ~(uint64_t)0 / 3); // temp
m = (m & (uint64_t) ~(uint64_t)0 / 15 * 3) + ((m >> 2) & (uint64_t) ~(uint64_t)0 / 15 * 3); // temp
m = (m + (m >> 4)) & (uint64_t) ~(uint64_t)0 / 255 * 15; // temp
return (m * ((uint64_t) ~(uint64_t)0 / 255)) >> (sizeof(uint64_t) - 1) * CHAR_BIT; // count
}
#endif

// store_complex
namespace detail
{
Expand Down
14 changes: 14 additions & 0 deletions include/xsimd/types/xsimd_api.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,20 @@ namespace xsimd
return kernel::cosh<A>(x, A {});
}

/**
* @ingroup batch_reducers
*
* Count the number of values set to true in the batch \c x
* @param x boolean or batch of boolean
* @return the result of the counting.
*/
template <class T, class A>
XSIMD_INLINE size_t count(batch_bool<T, A> const& x) noexcept
{
detail::static_check_supported_config<T, A>();
return kernel::count<A>(x, A {});
}

/**
* @ingroup batch_arithmetic
*
Expand Down
10 changes: 10 additions & 0 deletions test/test_batch_bool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,14 @@ struct batch_bool_test
CHECK_EQ(batch_bool_type::from_mask(bool_g.interspersed.mask()).mask(), bool_g.interspersed.mask());
}

void test_count() const
{
auto bool_g = xsimd::get_bool<batch_bool_type> {};
CHECK_EQ(count(bool_g.all_false), 0);
CHECK_EQ(count(bool_g.all_true), batch_bool_type::size);
CHECK_EQ(count(bool_g.half), batch_bool_type::size / 2);
}

void test_comparison() const
{
auto bool_g = xsimd::get_bool<batch_bool_type> {};
Expand Down Expand Up @@ -485,6 +493,8 @@ TEST_CASE_TEMPLATE("[xsimd batch bool]", B, BATCH_TYPES)

SUBCASE("mask") { Test.test_mask(); }

SUBCASE("count") { Test.test_count(); }

SUBCASE("eq neq") { Test.test_comparison(); }
}
#endif
Loading