Skip to content

Commit

Permalink
15/07
Browse files Browse the repository at this point in the history
  • Loading branch information
Victorin authored and james-d-mitchell committed Oct 10, 2024
1 parent 18fc9ce commit a10dae8
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 85 deletions.
1 change: 1 addition & 0 deletions benchmark/bench_bmat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ TEST_CASE_METHOD(Fix_BMat16, "Transpose", "[BMat16][000]") {

TEST_CASE_METHOD(Fix_BMat16, "Multiplication", "[BMat16][001]") {
BENCHMARK_MEM_FN_PAIR(BMat16::operator*, pair_sample);
BENCHMARK_MEM_FN_PAIR(mult_4bmat8, pair_sample);
BENCHMARK_MEM_FN_PAIR(mult_naive, pair_sample);
BENCHMARK_MEM_FN_PAIR(mult_naive_array, pair_sample);
}
Expand Down
80 changes: 37 additions & 43 deletions include/hpcombi/bmat16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,15 @@
#include "bmat8.hpp"

#include "simde/x86/avx2.h"
// #include "simde/x86/avx512/popcnt.h"

namespace HPCombi {
using xpu16 = uint16_t __attribute__((vector_size(32)));
using xpu64 = uint64_t __attribute__((vector_size(32)));

xpu64 to_line(xpu64 vect);
xpu64 to_block(xpu64 vect);

//! Class for fast boolean matrices of dimension up to 16 x 16
//!
//! The methods for these small matrices over the boolean semiring
Expand All @@ -65,7 +69,7 @@ class BMat16 {

//! A constructor.
//!
//! This constructor initializes a BMat16 with a 256-bit register
//! This constructor initializes a matrix with a 256-bit register
//! The rows are equal to the 16 chunks, of 16 bits each,
//! of the binary representation of the matrix
explicit BMat16(xpu64 mat) noexcept :
Expand All @@ -75,9 +79,11 @@ class BMat16 {
//!
//! This constructor initializes a matrix with 4 64 bits unsigned int
explicit BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept;
// explicit BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept :
// _data{xpu64{n0, n1, n2, n3}} {}

//! A constructor.
//!
//! This constructor initializes a matrix where the rows of the matrix
//! are the vectors in \p mat.
explicit BMat16(std::vector<std::vector<bool>> const &mat) noexcept;

//! A constructor.
Expand Down Expand Up @@ -105,35 +111,25 @@ class BMat16 {

//! Returns \c true if \c this equals \p that.
//!
//! This method checks the mathematical equality of two BMat8 objects.
//! This method checks the mathematical equality of two BMat16 objects.
bool operator==(BMat16 const &that) const noexcept;

//! Returns \c true if \c this does not equal \p that
//!
//! This method checks the mathematical inequality of two BMat8 objects.
//! This method checks the mathematical inequality of two BMat16 objects.
bool operator!=(BMat16 const &that) const noexcept {
return !(*this == that);
}

//! Returns \c true if \c this is less than \p that.
//!
//! This method checks whether a BMat16 objects is less than another.
//! We order by the results of to_int() for each matrix.
bool operator<(BMat16 const &that) const noexcept;

//! Returns \c true if \c this is greater than \p that.
//!
//! This method checks whether a BMat16 objects is greater than another.
//! We order by the results of to_int() for each matrix.
// bool operator>(BMat8 const &that) const noexcept {
// return _data > that._data;
// }

// Conversion of type of storage, from blocks to lines
BMat16 to_line() const;

// Conversion of type of storage, from lines to blocks
BMat16 to_block() const;
bool operator>(BMat16 const &that) const noexcept;

//! Returns the entry in the (\p i, \p j)th position.
//!
Expand All @@ -157,45 +153,43 @@ class BMat16 {
//! Returns the bitwise or between \c this and \p that
//!
//! This method perform the bitwise operator on the matrices and
//! returns the result as a BMat16
//! returns the result as a BMat16.
BMat16 operator|(BMat16 const& that) const noexcept {
return BMat16(_data | that._data);
}

//! Returns the transpose of \c this.
//!
//! Returns the standard matrix transpose of a BMat8.
//! Uses a naive technique, by simply iterating through all entries
BMat16 transpose_naive() const noexcept;

//! Returns the transpose of \c this.
//!
//! Returns the standard matrix transpose of a BMat16.
//! Uses a naive technique, by simply iterating through all entries
BMat16 transpose_block() const noexcept;
//! Uses a naive technique, by simply iterating through all entries.
BMat16 transpose_naive() const noexcept;

//! Returns the transpose of \c this
//!
//! Returns the standard matrix transpose of a BMat8.
//! Returns the standard matrix transpose of a BMat16.
//! Uses the technique found in Knuth AoCP Vol. 4 Fasc. 1a, p. 15.
BMat16 transpose() const noexcept {
return to_block().transpose_block().to_line();
}
BMat16 transpose() const noexcept;

//! Returns the matrix product of \c this and the transpose of \p that
//!
//! This method returns the standard matrix product (over the
//! boolean semiring) of two BMat8 objects. This is faster than transposing
//! boolean semiring) of two BMat16 objects. This is faster than transposing
//! that and calling the product of \c this with it. Implementation uses
//! vector instructions.
BMat16 mult_transpose(BMat16 const &that) const noexcept;

BMat16 mult_bmat8(BMat16 const &that) const noexcept;
//! Returns the matrix product of \c this and \p that
//!
//! This method returns the standard matrix product (over the
//! boolean semiring) of two BMat16 objects.
//! It comes down to 8 products of 8x8 matrices,
//! which make up a 16x16 when we cut it into 4.
BMat16 mult_4bmat8(BMat16 const &that) const noexcept;

//! Returns the matrix product of \c this and \p that
//!
//! This method returns the standard matrix product (over the
//! boolean semiring) of two BMat8 objects. This is a fast implementation
//! boolean semiring) of two BMat16 objects. This is a fast implementation
//! using transposition and vector instructions.
BMat16 operator*(BMat16 const &that) const noexcept {
return mult_transpose(that.transpose());
Expand All @@ -204,19 +198,19 @@ class BMat16 {
//! Returns the matrix product of \c this and \p that
//!
//! This method returns the standard matrix product (over the
//! boolean semiring) of two BMat8 objects. It performs the most naive approach
//! by simply iterating through all entries using the access operator of BMat8
//! boolean semiring) of two BMat16 objects. It performs the most naive approach
//! by simply iterating through all entries using the access operator of BMat16
BMat16 mult_naive(BMat16 const& that) const noexcept;

//! Returns the matrix product of \c this and \p that
//!
//! This method returns the standard matrix product (over the
//! boolean semiring) of two BMat8 objects. It performs the most naive approach
//! boolean semiring) of two BMat16 objects. It performs the most naive approach
//! by simply iterating through all entries using array conversion.
BMat16 mult_naive_array(BMat16 const& that) const noexcept;

// //! Returns the number of non-zero rows of \c this
// size_t nr_rows() const noexcept;
//! Returns the number of non-zero rows of \c this
size_t nr_rows() const noexcept;

//! Returns a \c std::vector for rows of \c this
// Not noexcept because it constructs a vector
Expand All @@ -232,20 +226,20 @@ class BMat16 {
return BMat16(ones[dim >= 8 ? 8 : dim], 0, 0, ones[dim >= 8 ? dim - 8 : 0]);
}

//! Returns a random BMat8
//! Returns a random BMat16
//!
//! This method returns a BMat8 chosen at random.
//! This method returns a BMat16 chosen at random.
// Not noexcept because random things aren't
static BMat16 random();

//! Returns a random square BMat8 up to dimension \p dim.
//! Returns a random square BMat16 up to dimension \p dim.
//!
//! This method returns a BMat8 chosen at random, where only the
//! This method returns a BMat16 chosen at random, where only the
//! top-left \p dim x \p dim entries may be non-zero.
// Not noexcept because BMat8::random above is not
// Not noexcept because BMat16::random above is not
static BMat16 random(size_t dim);

// void swap(BMat16 &that) noexcept { std::swap(this->_data, that._data); }
void swap(BMat16 &that) noexcept { std::swap(this->_data, that._data); }

// ! Write \c this on \c os
// Not noexcept
Expand Down
81 changes: 44 additions & 37 deletions include/hpcombi/bmat16_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,17 @@ static_assert(std::is_trivial<BMat16>(), "BMat16 is not a trivial class!");
static constexpr xpu16 line{0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07, 0x800, 0x901, 0xa02, 0xb03, 0xc04, 0xd05, 0xe06, 0xf07};
static constexpr xpu16 block{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d};

inline xpu64 to_line(xpu64 vect) {
return simde_mm256_shuffle_epi8(vect, line);
}

inline xpu64 to_block(xpu64 vect) {
return simde_mm256_shuffle_epi8(vect, block);
}

inline BMat16::BMat16(uint64_t n0, uint64_t n1, uint64_t n2, uint64_t n3) noexcept {
xpu64 tmp{n0, n1, n2, n3};
_data = simde_mm256_shuffle_epi8(tmp, line);
_data = to_line(tmp);
}

inline BMat16::BMat16(std::vector<std::vector<bool>> const &mat) noexcept {
Expand All @@ -57,15 +65,21 @@ inline bool BMat16::operator==(BMat16 const &that) const noexcept {
}

bool BMat16::operator<(BMat16 const &that) const noexcept {
return _data[0] < that._data[0] || (_data[0] == that._data[0] && (
_data[1] < that._data[1] || (_data[1] == that._data[1] && (
_data[2] < that._data[2] || (_data[2] == that._data[2] && (
_data[3] < that._data[3]
))))));
return _data[0] < that._data[0] ||
(_data[0] == that._data[0] && (_data[1] < that._data[1] ||
(_data[1] == that._data[1] && (_data[2] < that._data[2] ||
(_data[2] == that._data[2] && (_data[3] < that._data[3]))))));
}

bool BMat16::operator>(BMat16 const &that) const noexcept {
return _data[0] > that._data[0] ||
(_data[0] == that._data[0] && (_data[1] > that._data[1] ||
(_data[1] == that._data[1] && (_data[2] > that._data[2] ||
(_data[2] == that._data[2] && (_data[3] > that._data[3]))))));
}

std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
xpu64 tmp = simde_mm256_shuffle_epi8(_data, block);
xpu64 tmp = to_block(_data);
uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
std::array<std::array<bool, 16>, 16> res;
for (int i = 0; i < 64; i++) {
Expand All @@ -77,14 +91,6 @@ std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
return res;
}

inline BMat16 BMat16::to_line() const {
return BMat16(simde_mm256_shuffle_epi8(_data, line));
}

inline BMat16 BMat16::to_block() const {
return BMat16(simde_mm256_shuffle_epi8(_data, block));
}

inline BMat16 BMat16::transpose_naive() const noexcept {
uint64_t a = 0, b = 0, c = 0, d = 0;
for (int i = 7; i >= 0; i--) {
Expand All @@ -98,15 +104,16 @@ inline BMat16 BMat16::transpose_naive() const noexcept {
return BMat16(a, b, c, d);
}

inline BMat16 BMat16::transpose_block() const noexcept {
xpu64 x = simde_mm256_set_epi64x(_data[3], _data[1], _data[2], _data[0]);
inline BMat16 BMat16::transpose() const noexcept {
xpu64 tmp = to_block(_data);
xpu64 x = simde_mm256_set_epi64x(tmp[3], tmp[1], tmp[2], tmp[0]);
xpu64 y = (x ^ (x >> 7)) & (xpu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
x = x ^ y ^ (y << 7);
y = (x ^ (x >> 14)) & (xpu64{0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC, 0xCCCC0000CCCC});
x = x ^ y ^ (y << 14);
y = (x ^ (x >> 28)) & (xpu64{0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0, 0xF0F0F0F0});
x = x ^ y ^ (y << 28);
return BMat16(x);
return BMat16(to_line(x));
}

static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100};
Expand All @@ -131,23 +138,16 @@ inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
return BMat16(data);
}

BMat16 BMat16::mult_bmat8(BMat16 const &that) const noexcept {
xpu64 tmp1 = simde_mm256_shuffle_epi8(_data, block),
tmp2 = simde_mm256_shuffle_epi8(that._data, block);
BMat8 t1_0(tmp1[0]),
t1_1(tmp1[1]),
t1_2(tmp1[2]),
t1_3(tmp1[3]),
t2_0 = BMat8(tmp2[0]).transpose(),
t2_1 = BMat8(tmp2[1]).transpose(),
t2_2 = BMat8(tmp2[2]).transpose(),
t2_3 = BMat8(tmp2[3]).transpose();
return BMat16(
(t1_0.mult_transpose(t2_0) | t1_1.mult_transpose(t2_2)).to_int(),
(t1_0.mult_transpose(t2_1) | t1_1.mult_transpose(t2_3)).to_int(),
(t1_2.mult_transpose(t2_0) | t1_3.mult_transpose(t2_2)).to_int(),
(t1_2.mult_transpose(t2_1) | t1_3.mult_transpose(t2_3)).to_int()
);
BMat16 BMat16::mult_4bmat8(BMat16 const &that) const noexcept {
BMat16 tmp = that.transpose();
xpu64 t1 = to_block(_data),
t2 = to_block(tmp._data);
BMat8 a1(t1[0]), b1(t1[1]), c1(t1[2]), d1(t1[3]),
a2(t2[0]), b2(t2[1]), c2(t2[2]), d2(t2[3]);
return BMat16((a1.mult_transpose(a2) | b1.mult_transpose(b2)).to_int(),
(a1.mult_transpose(c2) | b1.mult_transpose(d2)).to_int(),
(c1.mult_transpose(a2) | d1.mult_transpose(b2)).to_int(),
(c1.mult_transpose(c2) | d1.mult_transpose(d2)).to_int());
}

BMat16 BMat16::mult_naive(BMat16 const &that) const noexcept {
Expand Down Expand Up @@ -183,10 +183,17 @@ BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
return BMat16(a, b, c, d);
}

inline std::vector<uint16_t> BMat16::rows() const { // A changer
inline size_t BMat16::nr_rows() const noexcept{
xpu16 tmp = _data, zero = simde_mm256_setzero_si256();
xpu16 x = (tmp != zero);
return 0;
// return simde_mm256_popcnt_epi16(x); // To change
}

inline std::vector<uint16_t> BMat16::rows() const { // To change
std::vector<uint16_t> rows;
for (size_t i = 0; i < 16; ++i) {
uint16_t row = static_cast<uint16_t>(_data[i/4] << (8 * i) >> 56);
uint16_t row = static_cast<uint16_t>(_data[i/4] << (16 * (i%4)) >> 48);
rows.push_back(row);
}
return rows;
Expand Down
19 changes: 14 additions & 5 deletions tests/test_bmat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,10 +162,11 @@ TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][002]") {
}
}

TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][003]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][004]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_4bmat8, BMlist, "[BMat16][003]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][004]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][005]");

TEST_CASE("BMat16::random", "[BMat16][005]") {
TEST_CASE("BMat16::random", "[BMat16][006]") {
for (size_t d = 1; d < 8; ++d) {
BMat16 bm = BMat16::random(d);
for (size_t i = d + 1; i < 16; ++i) {
Expand All @@ -177,7 +178,7 @@ TEST_CASE("BMat16::random", "[BMat16][005]") {
}
}

TEST_CASE("BMat8::operator()", "[BMat8][006]") {
TEST_CASE("BMat16::operator()", "[BMat16][007]") {
std::vector<std::vector<bool>> mat = {
{0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0},
{0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0},
Expand All @@ -202,7 +203,7 @@ TEST_CASE("BMat8::operator()", "[BMat8][006]") {
}
}

TEST_CASE_METHOD(BMat16Fixture, "BMa16::operator<<", "[BMat16][007]") {
TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator<<", "[BMat16][008]") {
std::ostringstream oss;
oss << bm3;
CHECK(oss.str() == "0001010001011011\n"
Expand All @@ -227,4 +228,12 @@ TEST_CASE_METHOD(BMat16Fixture, "BMa16::operator<<", "[BMat16][007]") {
os << BMat8::random(); // Also does not do anything visible
}

TEST_CASE_METHOD(BMat16Fixture, "BMat16::nr_rows", "[BMat16][009]") {
CHECK(zero.nr_rows() == 0);
CHECK(one1.nr_rows() == 1);
CHECK(one2.nr_rows() == 2);
CHECK(bm.nr_rows() == 16);
CHECK(BMat16({{1, 0, 1}, {1, 1, 0}, {0, 0, 0}}).nr_rows() == 2);
}

} // namespace HPCombi

0 comments on commit a10dae8

Please sign in to comment.