Skip to content

Commit

Permalink
Merge pull request #32207 from vespa-engine/balder/use-std-popcount
Browse files Browse the repository at this point in the history
Use std::popcount instead of gcc __builtin_popcount
  • Loading branch information
baldersheim authored Aug 21, 2024
2 parents 8a5b405 + 60a79fe commit dd3ece9
Show file tree
Hide file tree
Showing 5 changed files with 18 additions and 17 deletions.
2 changes: 1 addition & 1 deletion eval/src/tests/ann/xp-lsh-nns.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ static inline int hash_dist(const LsMaskHash &h1, const LsMaskHash &h2) {
for (size_t o = 0; o < NUM_HASH_WORDS; ++o) {
uint64_t hx = h1.bits[o] ^ h2.bits[o];
hx &= (h1.mask[o] | h2.mask[o]);
cnt += __builtin_popcountl(hx);
cnt += std::popcount(hx);
}
return cnt;
}
Expand Down
4 changes: 3 additions & 1 deletion eval/src/vespa/eval/eval/hamming_distance.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,14 @@

#pragma once

#include <bit>

namespace vespalib::eval {

inline double hamming_distance(double a, double b) {
uint8_t x = (uint8_t) (int8_t) a;
uint8_t y = (uint8_t) (int8_t) b;
return __builtin_popcount(x ^ y);
return std::popcount(uint8_t(x ^ y));
}

}
18 changes: 8 additions & 10 deletions searchlib/src/vespa/searchlib/common/condensedbitvectors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@ class CondensedBitVectorT : public CondensedBitVector
}
}
private:
static uint8_t countBits(T v) {
return ((sizeof(T)) <= 4)
? __builtin_popcount(v)
: __builtin_popcountl(v);
static constexpr uint8_t countBits(T v) noexcept {
return std::popcount(v);
}
T computeMask(const KeySet & keys) const __attribute__ ((noinline)) {
T mask(0);
Expand All @@ -38,13 +36,13 @@ class CondensedBitVectorT : public CondensedBitVector
return mask;
}
static const uint64_t B = 1ul;
void initializeCountVector(const KeySet & keys, CountVector & cv) const override {
void initializeCountVector(const KeySet & keys, std::span<uint8_t> cv) const override {
struct S {
void operator () (uint8_t & cv, uint8_t v) { cv = v; }
};
computeCountVector(computeMask(keys), cv, S());
}
void addCountVector(const KeySet & keys, CountVector & cv) const override {
void addCountVector(const KeySet & keys, std::span<uint8_t> cv) const override {
struct S {
void operator () (uint8_t & cv, uint8_t v) { cv += v; }
};
Expand All @@ -56,10 +54,10 @@ class CondensedBitVectorT : public CondensedBitVector
}

template <typename F>
void computeCountVector(T mask, CountVector & cv, F func) const __attribute__((noinline));
void computeCountVector(T mask, std::span<uint8_t> cv, F func) const __attribute__((noinline));

template <typename F>
void computeTail(T mask, CountVector & cv, F func, size_t i) const __attribute__((noinline));
void computeTail(T mask, std::span<uint8_t> cv, F func, size_t i) const __attribute__((noinline));

void set(Key key, uint32_t index, bool v) override {
assert(key < getKeyCapacity());
Expand Down Expand Up @@ -92,7 +90,7 @@ class CondensedBitVectorT : public CondensedBitVector
template <typename T>
template <typename F>
void
CondensedBitVectorT<T>::computeCountVector(T mask, CountVector & cv, F func) const
CondensedBitVectorT<T>::computeCountVector(T mask, std::span<uint8_t> cv, F func) const
{
size_t i(0);
const size_t UNROLL = 2;
Expand All @@ -109,7 +107,7 @@ CondensedBitVectorT<T>::computeCountVector(T mask, CountVector & cv, F func) con
template <typename T>
template <typename F>
void
CondensedBitVectorT<T>::computeTail(T mask, CountVector & cv, F func, size_t i) const
CondensedBitVectorT<T>::computeTail(T mask, std::span<uint8_t> cv, F func, size_t i) const
{
auto* v = &_v.acquire_elem_ref(0);
for (; i < cv.size(); i++) {
Expand Down
4 changes: 2 additions & 2 deletions searchlib/src/vespa/searchlib/common/condensedbitvectors.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ class CondensedBitVector

virtual ~CondensedBitVector();

virtual void initializeCountVector(const KeySet & keys, CountVector & v) const = 0;
virtual void addCountVector(const KeySet & keys, CountVector & v) const = 0;
virtual void initializeCountVector(const KeySet & keys, std::span<uint8_t> v) const = 0;
virtual void addCountVector(const KeySet & keys, std::span<uint8_t> v) const = 0;
virtual void set(Key key, uint32_t index, bool v) = 0;
virtual bool get(Key key, uint32_t index) const = 0;
virtual void clearIndex(uint32_t index) = 0;
Expand Down
7 changes: 4 additions & 3 deletions vespalib/src/vespa/vespalib/util/binary_hamming_distance.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
#include "binary_hamming_distance.h"
#include <cstdint>
#include <bit>

namespace vespalib {

Expand All @@ -21,19 +22,19 @@ binary_hamming_distance(const void *lhs, const void *rhs, size_t sz) noexcept {
const auto *words_b = static_cast<const uint64_t *>(rhs);
for (; (i+UNROLL_CNT) * WORD_SZ <= sz; i += UNROLL_CNT) {
for (uint8_t j=0; j < UNROLL_CNT; j++) {
sum += __builtin_popcountl(words_a[i+j] ^ words_b[i+j]);
sum += std::popcount(words_a[i+j] ^ words_b[i+j]);
}
}
for (; (i + 1) * WORD_SZ <= sz; ++i) {
sum += __builtin_popcountl(words_a[i] ^ words_b[i]);
sum += std::popcount(words_a[i] ^ words_b[i]);
}
}
if (__builtin_expect((i * WORD_SZ < sz), false)) {
const auto *bytes_a = static_cast<const uint8_t *>(lhs);
const auto *bytes_b = static_cast<const uint8_t *>(rhs);
for (i *= WORD_SZ; i < sz; ++i) {
uint64_t xor_bits = bytes_a[i] ^ bytes_b[i];
sum += __builtin_popcountl(xor_bits);
sum += std::popcount(xor_bits);
}
}
return sum;
Expand Down

0 comments on commit dd3ece9

Please sign in to comment.