diff --git a/benchmark/bench_bmat16.cpp b/benchmark/bench_bmat16.cpp index f60704c..9ceb42a 100644 --- a/benchmark/bench_bmat16.cpp +++ b/benchmark/bench_bmat16.cpp @@ -58,12 +58,14 @@ class Fix_BMat16 { pair_sample; // not const, transpose2 is in place }; -TEST_CASE_METHOD(Fix_BMat16, "Transpose", "[BMat8][001]") { +TEST_CASE_METHOD(Fix_BMat16, "Transpose", "[BMat16][000]") { BENCHMARK_MEM_FN(transpose, sample); } -TEST_CASE_METHOD(Fix_BMat16, "Multiplication", "[BMat8][005]") { +TEST_CASE_METHOD(Fix_BMat16, "Multiplication", "[BMat16][001]") { BENCHMARK_MEM_FN_PAIR(BMat16::operator*, pair_sample); + BENCHMARK_MEM_FN_PAIR(mult_naive, pair_sample); + BENCHMARK_MEM_FN_PAIR(mult_naive_array, pair_sample); } } // namespace HPCombi diff --git a/include/hpcombi/bmat16.hpp b/include/hpcombi/bmat16.hpp index 5d1b87c..8ddf1b8 100644 --- a/include/hpcombi/bmat16.hpp +++ b/include/hpcombi/bmat16.hpp @@ -184,14 +184,14 @@ class BMat16 { //! Returns the matrix product of \c this and \p that //! //! This method returns the standard matrix product (over the - //! boolean semiring) of two BMat8 objects. It performs the most naive approch - //! by simply iterating through all entries using the acces oeprator of BMat8 + //! boolean semiring) of two BMat8 objects. It performs the most naive approach + //! by simply iterating through all entries using the access operator of BMat8 BMat16 mult_naive(BMat16 const& that) const noexcept; //! Returns the matrix product of \c this and \p that //! //! This method returns the standard matrix product (over the - //! boolean semiring) of two BMat8 objects. It performs the most naive approch + //! boolean semiring) of two BMat8 objects. It performs the most naive approach //! by simply iterating through all entries using array conversion. BMat16 mult_naive_array(BMat16 const& that) const noexcept; diff --git a/include/hpcombi/bmat16_impl.hpp b/include/hpcombi/bmat16_impl.hpp index 01803ed..fa2e3c7 100644 --- a/include/hpcombi/bmat16_impl.hpp +++ b/include/hpcombi/bmat16_impl.hpp @@ -52,10 +52,11 @@ bool BMat16::operator()(size_t i, size_t j) const noexcept { inline bool BMat16::operator==(BMat16 const &that) const noexcept { xpu64 tmp = _data ^ that._data; - return ((tmp[0] == 0) and - (tmp[1] == 0) and - (tmp[2] == 0) and - (tmp[3] == 0)); + return simde_mm256_testz_si256(tmp, tmp); + // return ((tmp[0] == 0) and + // (tmp[1] == 0) and + // (tmp[2] == 0) and + // (tmp[3] == 0)); } std::array, 16> BMat16::to_array() const noexcept { @@ -191,26 +192,27 @@ static const constexpr std::array ROW_MASK16 = { xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xffff} }; -static const constexpr std::array COL_MASK16 = { // A changer !!!! - xpu16{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, +static const constexpr std::array COL_MASK16 = { xpu16{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, xpu16{2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2}, - xpu16{3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3}, xpu16{4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4}, - xpu16{5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5}, - xpu16{6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6}, - xpu16{7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7}, xpu16{8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}, - xpu16{9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9}, - xpu16{0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa, 0xa}, - xpu16{0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb, 0xb}, - xpu16{0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc, 0xc}, - xpu16{0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd, 0xd}, - xpu16{0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe, 0xe}, - xpu16{0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf, 0xf} + xpu16{0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10}, + xpu16{0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20}, + xpu16{0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40}, + xpu16{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, + xpu16{0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100, 0x100}, + xpu16{0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200, 0x200}, + xpu16{0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400, 0x400}, + xpu16{0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800, 0x800}, + xpu16{0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000}, + xpu16{0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000, 0x2000}, + xpu16{0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000, 0x4000}, + xpu16{0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000} }; inline BMat16 BMat16::random(size_t const dim) { + // TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask HPCOMBI_ASSERT(0 < dim && dim <= 16); BMat16 bm = BMat16::random(); for (size_t i = dim; i < 16; ++i) { @@ -231,4 +233,13 @@ inline std::ostream &BMat16::write(std::ostream &os) const { } -} \ No newline at end of file +} // namespace HPCombi + +namespace std { + +// Not noexcept because BMat8::write isn't +inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat16 const &bm) { + return bm.write(os); +} + +} // namespace std diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp index 8ae97e8..1437da3 100644 --- a/include/hpcombi/bmat8.hpp +++ b/include/hpcombi/bmat8.hpp @@ -206,14 +206,14 @@ class BMat8 { //! Returns the matrix product of \c this and \p that //! //! This method returns the standard matrix product (over the - //! boolean semiring) of two BMat8 objects. It performs the most naive approch - //! by simply iterating through all entries using the acces oeprator of BMat8 + //! boolean semiring) of two BMat8 objects. It performs the most naive approach + //! by simply iterating through all entries using the access operator of BMat8 BMat8 mult_naive(BMat8 const& that) const noexcept; //! Returns the matrix product of \c this and \p that //! //! This method returns the standard matrix product (over the - //! boolean semiring) of two BMat8 objects. It performs the most naive approch + //! boolean semiring) of two BMat8 objects. It performs the most naive approach //! by simply iterating through all entries using array conversion. BMat8 mult_naive_array(BMat8 const& that) const noexcept; diff --git a/include/hpcombi/bmat8_impl.hpp b/include/hpcombi/bmat8_impl.hpp index 6b08191..5bf05f7 100644 --- a/include/hpcombi/bmat8_impl.hpp +++ b/include/hpcombi/bmat8_impl.hpp @@ -149,6 +149,7 @@ inline BMat8 BMat8::random() { } inline BMat8 BMat8::random(size_t const dim) { + // TO DO : Instead of nulling all the cols/rows one by one, one could do that at once with the proper mask HPCOMBI_ASSERT(0 < dim && dim <= 8); BMat8 bm = BMat8::random(); for (size_t i = dim; i < 8; ++i) { diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index d5b1e0f..10678a2 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -147,7 +147,7 @@ inline void merge(epu8 &a, epu8 &b) noexcept; */ #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_permutation_of - @par Algorithm: uses string matching cpmestrm intrisics + @par Algorithm: uses string matching cpmestrm intrinsics */ inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept; #endif @@ -586,7 +586,7 @@ inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept; */ #ifdef SIMDE_X86_SSE4_2_NATIVE /** @copydoc common_is_permutation - @par Algorithm: uses string matching cpmestri intrisics + @par Algorithm: uses string matching cpmestri intrinsics */ inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept; #endif diff --git a/list_builtin.txt b/list_builtin.txt index 7bddfca..908fca3 100644 --- a/list_builtin.txt +++ b/list_builtin.txt @@ -1,4 +1,4 @@ -# List of intrisics used in HPCombi +# List of intrinsics used in HPCombi # Format: ;; # line starting with # are comments __builtin_ffs;0;0 diff --git a/tests/test_bmat16.cpp b/tests/test_bmat16.cpp index e69de29..4cd883e 100644 --- a/tests/test_bmat16.cpp +++ b/tests/test_bmat16.cpp @@ -0,0 +1,194 @@ +//****************************************************************************// +// Copyright (C) 2016-2024 Florent Hivert , // +// // +// This file is part of HP-Combi // +// // +// HP-Combi is free software: you can redistribute it and/or modify it // +// under the terms of the GNU General Public License as published by the // +// Free Software Foundation, either version 3 of the License, or // +// (at your option) any later version. // +// // +// HP-Combi is distributed in the hope that it will be useful, but WITHOUT // +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or // +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // +// for more details. // +// // +// You should have received a copy of the GNU General Public License along // +// with HP-Combi. If not, see . // +//****************************************************************************// + +#include // for size_t +#include // for uint64_t +#include // for char_traits, ostream, ostrin... +#include // for operator== +#include // for pair +#include // for vector, allocator + +#include "test_main.hpp" // for TEST_AGREES, TEST_AGREES2 +#include // for operator""_catch_sr, operator== + +#include "hpcombi/bmat16.hpp" // for BMat16, operator<< +#include "hpcombi/perm16.hpp" // for Perm16 +#include "hpcombi/vect16.hpp" // for Vect16 + +namespace HPCombi { +namespace { +struct BMat16Fixture { + const BMat16 zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t; + const std::vector BMlist; + BMat16Fixture() + : zero(0, 0, 0, 0), one1(0, 0, 0, 1), one2(0, 0, 0, 0x20001), + ones(0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff, 0xffffffffffffffff), + bm({{0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0}, + {0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1}, + {1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1}, + {0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0}, + {0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + {1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0}, + {1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0}, + {1, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0}, + {0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0}, + {1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1}, + {1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0}, + {0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1}, + {1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0}, + {0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1}, + {0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0}}), + bm1({{0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0}, + {0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0}, + {0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0}, + {0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0}, + {1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1}, + {1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0}, + {1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0}, + {0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1}, + {0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1}, + {1, 0, 1, 0, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0}, + {0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1}, + {1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1}, + {0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0}, + {1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1}, + {0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1}}), + bmm1({{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1}, + {0, 1, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, + {1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, + {0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1}}), + bm2({{1, 1}, {0, 1}}), bm2t({{1, 0}, {1, 1}}), // A changer ! + bm3({{0, 0, 0, 1, 0, 0, 1, 1}, // A changer ! + {1, 1, 1, 1, 1, 1, 0, 1}, + {0, 1, 1, 1, 1, 1, 0, 1}, + {1, 1, 0, 1, 1, 1, 1, 1}, + {0, 0, 1, 0, 0, 1, 1, 1}, + {1, 1, 0, 0, 0, 0, 0, 1}, + {0, 1, 0, 0, 0, 0, 1, 1}, + {0, 1, 1, 1, 1, 0, 1, 0}}), + bm3t({{0, 1, 0, 1, 0, 1, 0, 0}, // A changer ! + {0, 1, 1, 1, 0, 1, 1, 1}, + {0, 1, 1, 0, 1, 0, 0, 1}, + {1, 1, 1, 1, 0, 0, 0, 1}, + {0, 1, 1, 1, 0, 0, 0, 1}, + {0, 1, 1, 1, 1, 0, 0, 0}, + {1, 0, 0, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 1, 1, 1, 0}}), + BMlist( + {zero, one1, one2, ones, bm, bm1, bmm1, bm2, bm2t, bm3, bm3t}) {} +}; +} // namespace + +//****************************************************************************// +//****************************************************************************// + +TEST_CASE_METHOD(BMat16Fixture, "BMat16::transpose", "[BMat16][000]") { + CHECK(zero.transpose() == zero); + CHECK(bm2.transpose() == bm2t); + CHECK(bm3.transpose() == bm3t); + + for (auto m : BMlist) { + CHECK(m.transpose().transpose() == m); + } +} + +// TEST_AGREES(BMat16Fixture, transpose, transpose_naive, BMlist, "[BMat16][001]"); + +TEST_CASE_METHOD(BMat16Fixture, "BMat16::operator*", "[BMat16][002]") { + BMat16 tmp = bm * bm1; + CHECK(tmp == bmm1); + CHECK(tmp == bm * bm1); + + for (auto b : BMlist) { + CHECK(zero * b == zero); + CHECK(b * zero == zero); + CHECK(b * b.one() == b); + CHECK(b.one() * b == b); + CHECK((b * b) * (b * b) == b * b * b * b); + } + + for (auto b1 : BMlist) { + for (auto b2 : BMlist) { + for (auto b3 : BMlist) { + CHECK((b1 * b2) * b3 == b1 * (b2 * b3)); + } + } + } +} + +TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive, BMlist, "[BMat16][003]"); +TEST_AGREES2(BMat16Fixture, BMat16::operator*, mult_naive_array, BMlist, "[BMat16][004]"); + +TEST_CASE("BMat16::random", "[BMat16][005]") { + for (size_t d = 1; d < 8; ++d) { + BMat16 bm = BMat16::random(d); + for (size_t i = d + 1; i < 16; ++i) { + for (size_t j = 0; j < 16; ++j) { + CHECK(bm(i, j) == 0); + CHECK(bm(j, i) == 0); + } + } + } +} + +TEST_CASE("BMat8::operator()", "[BMat8][006]") { + std::vector> mat = { // A changer ! + {0, 0, 0, 1, 0, 0, 1}, {0, 1, 1, 1, 0, 1, 0}, {1, 1, 0, 1, 1, 1, 1}, + {0, 0, 1, 0, 0, 1, 1}, {1, 1, 0, 0, 0, 0, 0}, {0, 1, 0, 0, 0, 0, 1}, + {0, 1, 1, 1, 1, 0, 1}}; + BMat16 bm(mat); + for (size_t i = 0; i < 15; ++i) { + for (size_t j = 0; j < 15; ++j) { + CHECK(static_cast(bm(i, j)) == mat[i][j]); + } + } +} + +TEST_CASE_METHOD(BMat16Fixture, "BMa16::operator<<", "[BMat16][007]") { + std::ostringstream oss; + oss << bm3; + CHECK(oss.str() == "00010011\n" + "11111101\n" + "01111101\n" + "11011111\n" + "00100111\n" + "11000001\n" + "01000011\n" + "01111010\n"); + + std::stringbuf buff; + std::ostream os(&buff); + os << BMat8::random(); // Also does not do anything visible +} + +} // namespace HPCombi