From 60a79fe888760fe52589f120a24da6d25748ee4e Mon Sep 17 00:00:00 2001 From: Henning Baldersheim Date: Wed, 21 Aug 2024 12:19:37 +0000 Subject: [PATCH] Use std::popcount instead of gcc __builtin_popcount --- eval/src/tests/ann/xp-lsh-nns.cpp | 2 +- eval/src/vespa/eval/eval/hamming_distance.h | 4 +++- .../searchlib/common/condensedbitvectors.cpp | 18 ++++++++---------- .../searchlib/common/condensedbitvectors.h | 4 ++-- .../vespalib/util/binary_hamming_distance.cpp | 7 ++++--- 5 files changed, 18 insertions(+), 17 deletions(-) diff --git a/eval/src/tests/ann/xp-lsh-nns.cpp b/eval/src/tests/ann/xp-lsh-nns.cpp index 76660d1b2a6a..225c1c286b0d 100644 --- a/eval/src/tests/ann/xp-lsh-nns.cpp +++ b/eval/src/tests/ann/xp-lsh-nns.cpp @@ -29,7 +29,7 @@ static inline int hash_dist(const LsMaskHash &h1, const LsMaskHash &h2) { for (size_t o = 0; o < NUM_HASH_WORDS; ++o) { uint64_t hx = h1.bits[o] ^ h2.bits[o]; hx &= (h1.mask[o] | h2.mask[o]); - cnt += __builtin_popcountl(hx); + cnt += std::popcount(hx); } return cnt; } diff --git a/eval/src/vespa/eval/eval/hamming_distance.h b/eval/src/vespa/eval/eval/hamming_distance.h index a21d48ceaa42..ef76a11d0bc8 100644 --- a/eval/src/vespa/eval/eval/hamming_distance.h +++ b/eval/src/vespa/eval/eval/hamming_distance.h @@ -2,12 +2,14 @@ #pragma once +#include + namespace vespalib::eval { inline double hamming_distance(double a, double b) { uint8_t x = (uint8_t) (int8_t) a; uint8_t y = (uint8_t) (int8_t) b; - return __builtin_popcount(x ^ y); + return std::popcount(uint8_t(x ^ y)); } } diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp index 9ce47d773860..b4278df94567 100644 --- a/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp @@ -24,10 +24,8 @@ class CondensedBitVectorT : public CondensedBitVector } } private: - static uint8_t countBits(T v) { - return ((sizeof(T)) <= 4) - ? __builtin_popcount(v) - : __builtin_popcountl(v); + static constexpr uint8_t countBits(T v) noexcept { + return std::popcount(v); } T computeMask(const KeySet & keys) const __attribute__ ((noinline)) { T mask(0); @@ -38,13 +36,13 @@ class CondensedBitVectorT : public CondensedBitVector return mask; } static const uint64_t B = 1ul; - void initializeCountVector(const KeySet & keys, CountVector & cv) const override { + void initializeCountVector(const KeySet & keys, std::span cv) const override { struct S { void operator () (uint8_t & cv, uint8_t v) { cv = v; } }; computeCountVector(computeMask(keys), cv, S()); } - void addCountVector(const KeySet & keys, CountVector & cv) const override { + void addCountVector(const KeySet & keys, std::span cv) const override { struct S { void operator () (uint8_t & cv, uint8_t v) { cv += v; } }; @@ -56,10 +54,10 @@ class CondensedBitVectorT : public CondensedBitVector } template - void computeCountVector(T mask, CountVector & cv, F func) const __attribute__((noinline)); + void computeCountVector(T mask, std::span cv, F func) const __attribute__((noinline)); template - void computeTail(T mask, CountVector & cv, F func, size_t i) const __attribute__((noinline)); + void computeTail(T mask, std::span cv, F func, size_t i) const __attribute__((noinline)); void set(Key key, uint32_t index, bool v) override { assert(key < getKeyCapacity()); @@ -92,7 +90,7 @@ class CondensedBitVectorT : public CondensedBitVector template template void -CondensedBitVectorT::computeCountVector(T mask, CountVector & cv, F func) const +CondensedBitVectorT::computeCountVector(T mask, std::span cv, F func) const { size_t i(0); const size_t UNROLL = 2; @@ -109,7 +107,7 @@ CondensedBitVectorT::computeCountVector(T mask, CountVector & cv, F func) con template template void -CondensedBitVectorT::computeTail(T mask, CountVector & cv, F func, size_t i) const +CondensedBitVectorT::computeTail(T mask, std::span cv, F func, size_t i) const { auto* v = &_v.acquire_elem_ref(0); for (; i < cv.size(); i++) { diff --git a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h index 668bf1e44671..2d914163b7f3 100644 --- a/searchlib/src/vespa/searchlib/common/condensedbitvectors.h +++ b/searchlib/src/vespa/searchlib/common/condensedbitvectors.h @@ -18,8 +18,8 @@ class CondensedBitVector virtual ~CondensedBitVector(); - virtual void initializeCountVector(const KeySet & keys, CountVector & v) const = 0; - virtual void addCountVector(const KeySet & keys, CountVector & v) const = 0; + virtual void initializeCountVector(const KeySet & keys, std::span v) const = 0; + virtual void addCountVector(const KeySet & keys, std::span v) const = 0; virtual void set(Key key, uint32_t index, bool v) = 0; virtual bool get(Key key, uint32_t index) const = 0; virtual void clearIndex(uint32_t index) = 0; diff --git a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp index 5f63925bfefe..f22fbaf20951 100644 --- a/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp +++ b/vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp @@ -1,6 +1,7 @@ // Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root. #include "binary_hamming_distance.h" #include +#include namespace vespalib { @@ -21,11 +22,11 @@ binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept { const auto *words_b = static_cast(rhs); for (; (i+UNROLL_CNT) * WORD_SZ <= sz; i += UNROLL_CNT) { for (uint8_t j=0; j < UNROLL_CNT; j++) { - sum += __builtin_popcountl(words_a[i+j] ^ words_b[i+j]); + sum += std::popcount(words_a[i+j] ^ words_b[i+j]); } } for (; (i + 1) * WORD_SZ <= sz; ++i) { - sum += __builtin_popcountl(words_a[i] ^ words_b[i]); + sum += std::popcount(words_a[i] ^ words_b[i]); } } if (__builtin_expect((i * WORD_SZ < sz), false)) { @@ -33,7 +34,7 @@ binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept { const auto *bytes_b = static_cast(rhs); for (i *= WORD_SZ; i < sz; ++i) { uint64_t xor_bits = bytes_a[i] ^ bytes_b[i]; - sum += __builtin_popcountl(xor_bits); + sum += std::popcount(xor_bits); } } return sum;