Skip to content

Commit

Permalink
Use bmi2 target for kth_set_bit, improve compress_coords and radix sort
Browse files Browse the repository at this point in the history
  • Loading branch information
adamant-pwn committed Nov 14, 2024
1 parent bee5f9d commit f6f706d
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 7 deletions.
2 changes: 1 addition & 1 deletion cp-algo/util/bit.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace cp_algo {
size_t order_of_bit(auto x, size_t k) {
return k ? std::popcount(x << (bit_width<decltype(x)> - k)) : 0;
}
// Requires GCC target("popcnt,bmi2")
[[gnu::target("bmi2")]]
size_t kth_set_bit(uint64_t x, size_t k) {
return std::countr_zero(_pdep_u64(1ULL << k, x));
}
Expand Down
11 changes: 9 additions & 2 deletions cp-algo/util/compress_coords.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,17 @@
namespace cp_algo {
// coords is a range of reference_wrapper<T>
auto compress_coords(auto &coords) {
std::vector<int> original;
using T = std::decay_t<std::unwrap_reference_t<
std::ranges::range_value_t<decltype(coords)>
>>;
std::vector<T> original;
if(empty(coords)) {
return original;
}
original.reserve(size(coords));
radix_sort(coords);
int idx = -1, prev = -1;
size_t idx = -1;
T prev = ~coords.front();
for(auto &x: coords) {
if(x != prev) {
idx++;
Expand Down
8 changes: 4 additions & 4 deletions cp-algo/util/sort.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,12 +24,12 @@ namespace cp_algo {
if(empty(a)) {
return;
}
auto mx = std::ranges::max(a);
with_bit_floor(size(a), [&]<size_t floor>() {
auto [mn, mx] = std::ranges::minmax(a);
with_bit_floor<1>(size(a), [&]<size_t floor>() {
constexpr int base = std::min<size_t>(floor, 1 << 16);
for(int64_t i = 1; i <= mx; i *= base) {
for(int64_t i = 1; i <= mx - mn; i *= base) {
count_sort<base>(a, [&](auto x) {
return x / i % base;
return (x - mn) / i % base;
});
}
});
Expand Down

0 comments on commit f6f706d

Please sign in to comment.