Skip to content

Commit

Permalink
17/07
Browse files Browse the repository at this point in the history
  • Loading branch information
Victorin committed Jul 17, 2024
1 parent 10d1722 commit 2ecaad4
Show file tree
Hide file tree
Showing 3 changed files with 148 additions and 75 deletions.
26 changes: 19 additions & 7 deletions include/hpcombi/bmat16.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,29 @@
#include <vector> // for vector

#include "debug.hpp" // for HPCOMBI_ASSERT
#include "epu8.hpp" // for epu8
#include "perm16.hpp" // for Perm16
#include "bmat8.hpp"

#include "simde/x86/avx2.h"
// #include "simde/x86/avx512/popcnt.h"

namespace HPCombi {
using xpu16 = uint16_t __attribute__((vector_size(32)));
using xpu64 = uint64_t __attribute__((vector_size(32)));

//! Converting storage type from blocks to rows of a xpu64
//! representing a 16x16 matrix (used in BMat16).
//!
//! Each 64 bit unsigned int represents 4 lines of the matrix.
xpu64 to_line(xpu64 vect);

//! Converting storage type from rows to blocks of a xpu64
//! representing a 16x16 matrix (used in BMat16).
//!
//! Each 64 bit unsigned int represents one of the four
//! 8x8 matrix that make up a 16x16 when quartered.
xpu64 to_block(xpu64 vect);



//! Class for fast boolean matrices of dimension up to 16 x 16
//!
//! The methods for these small matrices over the boolean semiring
Expand All @@ -60,8 +69,6 @@ xpu64 to_block(xpu64 vect);
//! BMat16 is a trivial class.
class BMat16 {
public:
xpu64 _data;

//! A default constructor.
//!
//! This constructor gives no guarantees on what the matrix will contain.
Expand All @@ -71,7 +78,7 @@ class BMat16 {
//!
//! This constructor initializes a matrix with a 256-bit register
//! The rows are equal to the 16 chunks, of 16 bits each,
//! of the binary representation of the matrix
//! of the binary representation of the matrix.
explicit BMat16(xpu64 mat) noexcept :
_data{mat} {}

Expand Down Expand Up @@ -242,9 +249,14 @@ class BMat16 {

void swap(BMat16 &that) noexcept { std::swap(this->_data, that._data); }

// ! Write \c this on \c os
//! Write \c this on \c os
// Not noexcept
std::ostream &write(std::ostream &os) const;


private:
xpu64 _data;

};

} // namespace HPCombi
Expand Down
17 changes: 9 additions & 8 deletions include/hpcombi/bmat16_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,9 @@ inline std::array<std::array<bool, 16>, 16> BMat16::to_array() const noexcept {
uint64_t a = tmp[0], b = tmp[1], c = tmp[2], d = tmp[3];
std::array<std::array<bool, 16>, 16> res;
for (size_t i = 0; i < 64; ++i) {
res[i/8][i%8] = a & 1; a >>= 1;
res[i/8][8 + i%8] = b & 1; b >>= 1;
res[8 + i/8][i%8] = c & 1; c >>= 1;
res[i/8][i%8] = a & 1; a >>= 1;
res[i/8][8 + i%8] = b & 1; b >>= 1;
res[8 + i/8][i%8] = c & 1; c >>= 1;
res[8 + i/8][8 + i%8] = d & 1; d >>= 1;
}
return res;
Expand Down Expand Up @@ -129,7 +129,6 @@ inline BMat16 BMat16::transpose() const noexcept {
}

static constexpr xpu16 rot{0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100, 0x302, 0x504, 0x706, 0x908, 0xb0a, 0xd0c, 0xf0e, 0x100};
static constexpr xpu16 alt{0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d, 0x200, 0x604, 0xa08, 0xe0c, 0x301, 0x705, 0xb09, 0xf0d};

inline BMat16 BMat16::mult_transpose(BMat16 const &that) const noexcept {
xpu16 x = _data;
Expand Down Expand Up @@ -185,8 +184,8 @@ inline BMat16 BMat16::mult_naive_array(BMat16 const &that) const noexcept {
for (int j = 7; j >= 0; --j) {
a <<= 1; b <<= 1; c <<= 1; d <<= 1;
for (size_t k = 0; k < 16; ++k) {
a |= tab1[i][k] & tab2[k][j];
b |= tab1[i][k] & tab2[k][j + 8];
a |= tab1[i][k] & tab2[k][j];
b |= tab1[i][k] & tab2[k][j + 8];
c |= tab1[i + 8][k] & tab2[k][j];
d |= tab1[i + 8][k] & tab2[k][j + 8];
}
Expand All @@ -202,7 +201,7 @@ inline size_t BMat16::nr_rows() const noexcept{
++res;
return res;

//// Vectorized version that doesn't work due to the absence of popcnt in simde
//// Vectorized version which doesn't work due to the absence of popcnt in simde
// xpu16 tmp = _data, zero = simde_mm256_setzero_si256();
// xpu16 x = (tmp != zero);
// return simde_mm256_popcnt_epi16(x);
Expand All @@ -212,6 +211,8 @@ inline std::vector<uint16_t> BMat16::rows() const {
std::vector<uint16_t> rows;
for (size_t i = 0; i < 16; ++i) {
uint16_t row_rev = (_data[i/4] << (16 * (3 - i%4)) >> 48);

// The row needs to be reversed
uint16_t row = 0;
for (size_t j = 0; j < 16; ++j) {
row = (row << 1) | (row_rev & 1);
Expand Down Expand Up @@ -294,7 +295,7 @@ inline std::ostream &BMat16::write(std::ostream &os) const {

namespace std {

// Not noexcept because BMat8::write isn't
// Not noexcept because BMat16::write isn't
inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat16 const &bm) {
return bm.write(os);
}
Expand Down
180 changes: 120 additions & 60 deletions tests/test_bmat16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,6 @@
#include <catch2/catch_test_macros.hpp> // for operator""_catch_sr, operator==

#include "hpcombi/bmat16.hpp" // for BMat16, operator<<
#include "hpcombi/perm16.hpp" // for Perm16
#include "hpcombi/vect16.hpp" // for Vect16

namespace HPCombi {
namespace {
Expand Down Expand Up @@ -88,38 +86,38 @@ struct BMat16Fixture {
{1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
{0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1}}),
bm2({{1, 1}, {0, 1}}), bm2t({{1, 0}, {1, 1}}),
bm3({{0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1},
{0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1},
{1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0},
{0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1},
{1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0},
{1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0},
{0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1},
{0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0},
{1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1},
{1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0},
{1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1},
{1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1},
{0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0},
{0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1},
{0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0},
{0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1}}),
bm3t({{0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0},
{0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1},
{0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0},
{1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1},
{0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0},
{1, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1},
{0, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1},
{0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1},
{0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1},
{1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1},
{0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1},
{1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1},
{1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0},
{0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1},
{1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0},
{1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1}}),
bm3({{0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0},
{0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1},
{1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1},
{0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0},
{0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1},
{1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0},
{1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0},
{1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0},
{0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0},
{1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1},
{1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0},
{0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1},
{1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 0},
{0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1},
{0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0}}),
bm3t({{0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0},
{0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1},
{0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 0},
{1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0},
{1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0},
{0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1},
{0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1},
{0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1},
{0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0},
{1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0},
{1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1},
{1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0},
{0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0}}),
BMlist(
{zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t}) {}
};
Expand All @@ -128,7 +126,45 @@ struct BMat16Fixture {
//****************************************************************************//
//****************************************************************************//

TEST_CASE_METHOD(BMat16Fixture, "BMat16::transpose", "[BMat16][000]") {
TEST_CASE_METHOD(BMat16Fixture, "BMat16::one", "[BMat16][000]") {
CHECK(BMat16::one(0) == zero);
CHECK(BMat16::one(2) == BMat16({{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}));
CHECK(BMat16::one(10) == BMat16({{1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}));
CHECK(BMat16::one(16) == BMat16::one());
}


TEST_CASE_METHOD(BMat16Fixture, "BMat16::transpose", "[BMat16][001]") {
CHECK(zero.transpose() == zero);
CHECK(bm2.transpose() == bm2t);
CHECK(bm3.transpose() == bm3t);
Expand All @@ -138,9 +174,9 @@ TEST_CASE_METHOD(BMat16Fixture, "BMat16::transpose", "[BMat16][000]") {
}
}

TEST_AGREES(BMat16Fixture, transpose, transpose_naive, BMlist, "[BMat16][001]");
TEST_AGREES(BMat16Fixture, transpose, transpose_naive, BMlist, "[BMat16][002]");

TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][002]") {
TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][003]") {
BMat16 tmp = bm * bm1;
CHECK(tmp == bmm1);
CHECK(tmp == bm * bm1);
Expand All @@ -162,11 +198,11 @@ TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][002]") {
}
}

TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_4bmat8, BMlist, "[BMat16][003]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][004]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][005]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_4bmat8, BMlist, "[BMat16][004]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][005]");
TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][006]");

TEST_CASE("BMat16::random", "[BMat16][006]") {
TEST_CASE("BMat16::random", "[BMat16][007]") {
for (size_t d = 1; d < 8; ++d) {
BMat16 bm = BMat16::random(d);
for (size_t i = d + 1; i < 16; ++i) {
Expand All @@ -178,7 +214,7 @@ TEST_CASE("BMat16::random", "[BMat16][006]") {
}
}

TEST_CASE("BMat16::operator()", "[BMat16][007]") {
TEST_CASE("BMat16::operator()", "[BMat16][008]") {
std::vector<std::vector<bool>> mat = {
{0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0},
{0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0},
Expand All @@ -203,32 +239,56 @@ TEST_CASE("BMat16::operator()", "[BMat16][007]") {
}
}

TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator<<", "[BMat16][008]") {
TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator<<", "[BMat16][009]") {
std::ostringstream oss;
oss << bm3;
CHECK(oss.str() == "0001010001011011\n"
"0111011000110001\n"
"1010001101100010\n"
"0011010100001011\n"
"1001001111001110\n"
"1010100111101100\n"
"0110111001100001\n"
"0000010110111100\n"
"1100110100000001\n"
"1101000100001100\n"
"1101111000101101\n"
"1010000001000011\n"
"0011101110111000\n"
"0001001010011001\n"
"0100100100011110\n"
"0101011111110101\n");
CHECK(oss.str() == "0001101000101110\n"
"0100101100001001\n"
"1010000101101111\n"
"0101001010100010\n"
"0010001000010001\n"
"1100101101100100\n"
"1011000000100100\n"
"1010001010010010\n"
"0100100100010010\n"
"1000101010001001\n"
"0000000010100001\n"
"1101110010100010\n"
"0100100000110101\n"
"1101001010101110\n"
"0100010100001001\n"
"0100000110100100\n");

std::stringbuf buff;
std::ostream os(&buff);
os << BMat8::random(); // Also does not do anything visible
}

TEST_CASE_METHOD(BMat16Fixture, "BMat16::nr_rows", "[BMat16][009]") {
TEST_CASE_METHOD(BMat16Fixture, "BMat16::set", "[BMat16][010]") {
BMat16 bs;
bs = bm;
bs.set(0, 0, 1);
CHECK(bs != bm);
bs = bm;
bs.set(0, 0, 0);
CHECK(bs == bm);
bs = bm;
bs.set(13, 6, 1);
CHECK(bs != bm);
CHECK(bs == bm3);

for (size_t i = 0; i < 16; ++i)
for (size_t j = 0; j < 16; ++j)
bs.set(i, j, true);
CHECK(bs == ones);

for (size_t i = 0; i < 16; ++i)
for (size_t j = 0; j < 16; ++j)
bs.set(i, j, false);
CHECK(bs == zero);
}

TEST_CASE_METHOD(BMat16Fixture, "BMat16::nr_rows", "[BMat16][011]") {
CHECK(zero.nr_rows() == 0);
CHECK(one1.nr_rows() == 1);
CHECK(one2.nr_rows() == 2);
Expand Down

0 comments on commit 2ecaad4

Please sign in to comment.