diff --git a/CF_8cpp.html b/CF_8cpp.html index b0f0ed6..36964a6 100644 --- a/CF_8cpp.html +++ b/CF_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/CF.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/CF.cpp File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
diff --git a/RD_8cpp.html b/RD_8cpp.html index b767951..6fd33b5 100644 --- a/RD_8cpp.html +++ b/RD_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/RD.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/RD.cpp File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
@@ -268,7 +268,7 @@

Initial value:
{FF, FF, FF, FF, FF, FF, FF, FF,
8, 9, 10, 11, 12, 13, 14, 15}
-
const uint8_t FF
Definition RD.cpp:47
+
const uint8_t FF
Definition RD.cpp:51

diff --git a/README_8md.html b/README_8md.html index e09e020..9fa463c 100644 --- a/README_8md.html +++ b/README_8md.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/README.md File Reference +HPCombi: /Users/jdm/hpcombi/README.md File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
-
/Users/jdm/git/HPCombi/README.md File Reference
+
/Users/jdm/hpcombi/README.md File Reference
diff --git a/Renner_8cpp.html b/Renner_8cpp.html index a04b36c..7e70662 100644 --- a/Renner_8cpp.html +++ b/Renner_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/Renner.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/Renner.cpp File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
@@ -450,7 +450,7 @@

Initial value:
{FF, FF, FF, FF, FF, FF, FF, FF,
8, 9, 10, 11, 12, 13, 14, 15}
-
const uint8_t FF
Definition Renner.cpp:64
+
const uint8_t FF
Definition Renner.cpp:68

diff --git a/Trans_8cpp.html b/Trans_8cpp.html index cbc4d84..233b6af 100644 --- a/Trans_8cpp.html +++ b/Trans_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/Trans.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/Trans.cpp File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
diff --git a/annotated.html b/annotated.html index b24f33d..71486b2 100644 --- a/annotated.html +++ b/annotated.html @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
diff --git a/arch_8hpp.html b/arch_8hpp.html index 9ad05e0..9683fc4 100644 --- a/arch_8hpp.html +++ b/arch_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/arch.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/arch.hpp File Reference @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,7 +77,7 @@
diff --git a/arch_8hpp_source.html b/arch_8hpp_source.html index 2bf1e6b..a3557c0 100644 --- a/arch_8hpp_source.html +++ b/arch_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/arch.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/arch.hpp Source File @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,39 +77,44 @@
arch.hpp
-Go to the documentation of this file.
1
-
2// Copyright (C) 2023 Florent Hivert <Florent.Hivert@lisn.fr>, //
+Go to the documentation of this file.
1//****************************************************************************//
+
2// Copyright (C) 2023-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
-
4// Distributed under the terms of the GNU General Public License (GPL) //
+
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
-
6// This code is distributed in the hope that it will be useful, //
-
7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
-
8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
-
9// General Public License for more details. //
+
6// HP-Combi is free software: you can redistribute it and/or modify it //
+
7// under the terms of the GNU General Public License as published by the //
+
8// Free Software Foundation, either version 3 of the License, or //
+
9// (at your option) any later version. //
10// //
-
11// The full text of the GPL is available at: //
-
12// //
-
13// http://www.gnu.org/licenses/ //
-
15
-
16#ifndef HPCOMBI_ARCH_HPP_INCLUDED
-
17#define HPCOMBI_ARCH_HPP_INCLUDED
-
18
-
19#if defined(SIMDE_ARCH_AMD64) && !defined(SIMDE_ARCH_X86_SSE4_1)
-
20char const msg[] =
-
21 R("x86_64 architecture without required compiler flags for SSE-4.1 "
-
22 "instruction set. Did you forget to provide the flag -march="
-
23 "(native,avx,sse4.1) flag ?");
-
24#error(msg)
-
25#endif
-
26
-
27#endif // HPCOMBI_ARCH_HPP_INCLUDED
+
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
+
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
+
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
+
14// for more details. //
+
15// //
+
16// You should have received a copy of the GNU General Public License along //
+
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
+
18//****************************************************************************//
+
19
+
20#ifndef HPCOMBI_ARCH_HPP_
+
21#define HPCOMBI_ARCH_HPP_
+
22
+
23#if defined(SIMDE_ARCH_AMD64) && !defined(SIMDE_ARCH_X86_SSE4_1)
+
24char const msg[] =
+
25 R("x86_64 architecture without required compiler flags for SSE-4.1 "
+
26 "instruction set. Did you forget to provide the flag -march="
+
27 "(native,avx,sse4.1) flag ?");
+
28#error(msg)
+
29#endif
+
30
+
31#endif // HPCOMBI_ARCH_HPP_
diff --git a/bmat8_8hpp_source.html b/bmat8_8hpp_source.html index 61ea97b..af105fa 100644 --- a/bmat8_8hpp_source.html +++ b/bmat8_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/bmat8.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/bmat8.hpp Source File @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,252 +77,257 @@
bmat8.hpp
-Go to the documentation of this file.
1
-
2// Copyright (C) 2018 Finn Smith <fls3@st-andrews.ac.uk> //
-
3// Copyright (C) 2018 James Mitchell <jdm3@st-andrews.ac.uk> //
-
4// Copyright (C) 2018 Florent Hivert <Florent.Hivert@lri.fr>, //
+Go to the documentation of this file.
1//****************************************************************************//
+
2// Copyright (C) 2018-2024 Finn Smith <fls3@st-andrews.ac.uk> //
+
3// Copyright (C) 2018-2024 James Mitchell <jdm3@st-andrews.ac.uk> //
+
4// Copyright (C) 2018-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
5// //
-
6// Distributed under the terms of the GNU General Public License (GPL) //
+
6// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
7// //
-
8// This code is distributed in the hope that it will be useful, //
-
9// but WITHOUT ANY WARRANTY; without even the implied warranty of //
-
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
-
11// General Public License for more details. //
+
8// HP-Combi is free software: you can redistribute it and/or modify it //
+
9// under the terms of the GNU General Public License as published by the //
+
10// Free Software Foundation, either version 3 of the License, or //
+
11// (at your option) any later version. //
12// //
-
13// The full text of the GPL is available at: //
-
14// //
-
15// http://www.gnu.org/licenses/ //
-
17
-
18// This file contains a declaration of fast boolean matrices up to dimension 8.
-
19
-
20#ifndef HPCOMBI_BMAT8_HPP_INCLUDED
-
21#define HPCOMBI_BMAT8_HPP_INCLUDED
-
22
-
23#include <array> // for array
-
24#include <bitset> // for bitset
-
25#include <cstddef> // for size_t
-
26#include <cstdint> // for uint64_t, uint8_t
-
27#include <functional> // for hash, __scalar_hash
-
28#include <iostream> // for ostream
-
29#include <memory> // for hash
-
30#include <utility> // for pair, swap
-
31#include <vector> // for vector
-
32
-
33#include "debug.hpp" // for HPCOMBI_ASSERT
-
34#include "epu8.hpp" // for epu8
-
35#include "perm16.hpp" // for Perm16
+
13// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
+
14// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
+
15// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
+
16// for more details. //
+
17// //
+
18// You should have received a copy of the GNU General Public License along //
+
19// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
+
20//****************************************************************************//
+
21
+
22// This file contains a declaration of fast boolean matrices up to dimension 8.
+
23
+
24#ifndef HPCOMBI_BMAT8_HPP_
+
25#define HPCOMBI_BMAT8_HPP_
+
26
+
27#include <array> // for array
+
28#include <bitset> // for bitset
+
29#include <cstddef> // for size_t
+
30#include <cstdint> // for uint64_t, uint8_t
+
31#include <functional> // for hash, __scalar_hash
+
32#include <iostream> // for ostream
+
33#include <memory> // for hash
+
34#include <utility> // for pair, swap
+
35#include <vector> // for vector
36
-
37namespace HPCombi {
-
38
-
48class BMat8 {
-
49 public:
-
53 BMat8() noexcept = default;
-
54
-
59 explicit BMat8(uint64_t mat) noexcept : _data(mat) {}
-
60
-
65 // Not sure if this is noexcept or not
-
66 explicit BMat8(std::vector<std::vector<bool>> const &mat);
-
67
-
71 BMat8(BMat8 const &) noexcept = default;
-
72
-
76 BMat8(BMat8 &&) noexcept = default;
-
77
-
81 BMat8 &operator=(BMat8 const &) noexcept = default;
-
82
-
86 BMat8 &operator=(BMat8 &&) noexcept = default;
-
87
-
89 ~BMat8() = default;
-
90
-
94 bool operator==(BMat8 const &that) const noexcept {
-
95 return _data == that._data;
-
96 }
-
97
-
101 bool operator!=(BMat8 const &that) const noexcept {
-
102 return _data != that._data;
-
103 }
-
104
-
109 bool operator<(BMat8 const &that) const noexcept {
-
110 return _data < that._data;
-
111 }
-
112
-
117 bool operator>(BMat8 const &that) const noexcept {
-
118 return _data > that._data;
-
119 }
-
120
-
126 bool operator()(size_t i, size_t j) const noexcept;
-
127
-
133 void set(size_t i, size_t j, bool val) noexcept;
-
134
-
140 uint64_t to_int() const noexcept { return _data; }
-
141
-
146 BMat8 transpose() const noexcept;
-
147
-
152 BMat8 transpose_mask() const noexcept;
-
153
-
158 BMat8 transpose_maskd() const noexcept;
-
159
-
164 static void transpose2(BMat8 &, BMat8 &) noexcept;
-
165
-
172 BMat8 mult_transpose(BMat8 const &that) const noexcept;
-
173
-
179 BMat8 operator*(BMat8 const &that) const noexcept {
-
180 return mult_transpose(that.transpose());
-
181 }
-
182
-
189 BMat8 row_space_basis() const noexcept;
-
190
-
195 BMat8 col_space_basis() const noexcept {
- -
197 }
-
198
-
200 size_t nr_rows() const noexcept;
-
201
-
203 // Not noexcept because it constructs a vector
-
204 std::vector<uint8_t> rows() const;
+
37#include "debug.hpp" // for HPCOMBI_ASSERT
+
38#include "epu8.hpp" // for epu8
+
39#include "perm16.hpp" // for Perm16
+
40
+
41namespace HPCombi {
+
42
+
52class BMat8 {
+
53 public:
+
57 BMat8() noexcept = default;
+
58
+
63 explicit BMat8(uint64_t mat) noexcept : _data(mat) {}
+
64
+
69 // Not sure if this is noexcept or not
+
70 explicit BMat8(std::vector<std::vector<bool>> const &mat);
+
71
+
75 BMat8(BMat8 const &) noexcept = default;
+
76
+
80 BMat8(BMat8 &&) noexcept = default;
+
81
+
85 BMat8 &operator=(BMat8 const &) noexcept = default;
+
86
+
90 BMat8 &operator=(BMat8 &&) noexcept = default;
+
91
+
93 ~BMat8() = default;
+
94
+
98 bool operator==(BMat8 const &that) const noexcept {
+
99 return _data == that._data;
+
100 }
+
101
+
105 bool operator!=(BMat8 const &that) const noexcept {
+
106 return _data != that._data;
+
107 }
+
108
+
113 bool operator<(BMat8 const &that) const noexcept {
+
114 return _data < that._data;
+
115 }
+
116
+
121 bool operator>(BMat8 const &that) const noexcept {
+
122 return _data > that._data;
+
123 }
+
124
+
130 bool operator()(size_t i, size_t j) const noexcept;
+
131
+
137 void set(size_t i, size_t j, bool val) noexcept;
+
138
+
144 uint64_t to_int() const noexcept { return _data; }
+
145
+
150 BMat8 transpose() const noexcept;
+
151
+
156 BMat8 transpose_mask() const noexcept;
+
157
+
162 BMat8 transpose_maskd() const noexcept;
+
163
+
168 static void transpose2(BMat8 &, BMat8 &) noexcept;
+
169
+
176 BMat8 mult_transpose(BMat8 const &that) const noexcept;
+
177
+
183 BMat8 operator*(BMat8 const &that) const noexcept {
+
184 return mult_transpose(that.transpose());
+
185 }
+
186
+
193 BMat8 row_space_basis() const noexcept;
+
194
+
199 BMat8 col_space_basis() const noexcept {
+ +
201 }
+
202
+
204 size_t nr_rows() const noexcept;
205
-
209 // Not noexcept because row_space_bitset_ref isn't
-
210 uint64_t row_space_size_ref() const;
-
211
-
215 // Not noexcept because it creates a vector
-
216 std::bitset<256> row_space_bitset_ref() const;
-
217
-
221 void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept;
-
222
-
227 uint64_t row_space_size_bitset() const noexcept;
-
228
-
234 uint64_t row_space_size_incl() const noexcept;
-
235
-
240 uint64_t row_space_size_incl1() const noexcept;
-
241
-
245 uint64_t row_space_size() const noexcept { return row_space_size_incl(); }
-
246
-
250 bool row_space_included_ref(BMat8 other) const noexcept;
-
251
-
255 bool row_space_included_bitset(BMat8 other) const noexcept;
-
256
-
261 epu8 row_space_mask(epu8 vects) const noexcept;
-
262
-
266 bool row_space_included(BMat8 other) const noexcept;
-
267
-
271 // Not noexcept because std::make_pair is not
-
272 static std::pair<bool, bool> row_space_included2(BMat8 a1, BMat8 b1,
-
273 BMat8 a2, BMat8 b2);
-
274
-
279 BMat8 row_permuted(Perm16 p) const noexcept;
-
280
-
285 BMat8 col_permuted(Perm16 p) const noexcept;
-
286
-
291 static BMat8 row_permutation_matrix(Perm16 p) noexcept;
-
292
-
297 static BMat8 col_permutation_matrix(Perm16 p) noexcept;
-
298
- -
305
-
311 // Not noexcept because vectors are allocated
- -
313
-
317 static BMat8 one(size_t dim = 8) noexcept {
-
318 HPCOMBI_ASSERT(dim <= 8);
-
319 static std::array<uint64_t, 9> const ones = {
-
320 0x0000000000000000, 0x8000000000000000, 0x8040000000000000,
-
321 0x8040200000000000, 0x8040201000000000, 0x8040201008000000,
-
322 0x8040201008040000, 0x8040201008040200, 0x8040201008040201};
-
323 return BMat8(ones[dim]);
-
324 }
-
325
-
329 // Not noexcept because random things aren't
-
330 static BMat8 random();
-
331
-
336 // Not noexcept because BMat8::random above is not
-
337 static BMat8 random(size_t dim);
-
338
-
339 void swap(BMat8 &that) noexcept { std::swap(this->_data, that._data); }
-
340
-
342 // Not noexcept
-
343 std::ostream &write(std::ostream &os) const;
+
207 // Not noexcept because it constructs a vector
+
208 std::vector<uint8_t> rows() const;
+
209
+
213 // Not noexcept because row_space_bitset_ref isn't
+
214 uint64_t row_space_size_ref() const;
+
215
+
219 // Not noexcept because it creates a vector
+
220 std::bitset<256> row_space_bitset_ref() const;
+
221
+
225 void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept;
+
226
+
231 uint64_t row_space_size_bitset() const noexcept;
+
232
+
238 uint64_t row_space_size_incl() const noexcept;
+
239
+
244 uint64_t row_space_size_incl1() const noexcept;
+
245
+
249 uint64_t row_space_size() const noexcept { return row_space_size_incl(); }
+
250
+
254 bool row_space_included_ref(BMat8 other) const noexcept;
+
255
+
259 bool row_space_included_bitset(BMat8 other) const noexcept;
+
260
+
265 epu8 row_space_mask(epu8 vects) const noexcept;
+
266
+
270 bool row_space_included(BMat8 other) const noexcept;
+
271
+
275 // Not noexcept because std::make_pair is not
+
276 static std::pair<bool, bool> row_space_included2(BMat8 a1, BMat8 b1,
+
277 BMat8 a2, BMat8 b2);
+
278
+
283 BMat8 row_permuted(Perm16 p) const noexcept;
+
284
+
289 BMat8 col_permuted(Perm16 p) const noexcept;
+
290
+
295 static BMat8 row_permutation_matrix(Perm16 p) noexcept;
+
296
+
301 static BMat8 col_permutation_matrix(Perm16 p) noexcept;
+
302
+ +
309
+
315 // Not noexcept because vectors are allocated
+ +
317
+
321 static BMat8 one(size_t dim = 8) noexcept {
+
322 HPCOMBI_ASSERT(dim <= 8);
+
323 static std::array<uint64_t, 9> const ones = {
+
324 0x0000000000000000, 0x8000000000000000, 0x8040000000000000,
+
325 0x8040200000000000, 0x8040201000000000, 0x8040201008000000,
+
326 0x8040201008040000, 0x8040201008040200, 0x8040201008040201};
+
327 return BMat8(ones[dim]);
+
328 }
+
329
+
333 // Not noexcept because random things aren't
+
334 static BMat8 random();
+
335
+
340 // Not noexcept because BMat8::random above is not
+
341 static BMat8 random(size_t dim);
+
342
+
343 void swap(BMat8 &that) noexcept { std::swap(this->_data, that._data); }
344
-
345#ifdef HPCOMBI_HAVE_DENSEHASHMAP
-
346 // FIXME do this another way
-
347 BMat8 empty_key() const noexcept { return BMat8(0xFF7FBFDFEFF7FBFE); }
-
348#endif
-
349
-
350 private:
-
351 uint64_t _data;
-
352
-
353 epu8 row_space_basis_internal() const noexcept;
-
354};
-
355
-
356} // namespace HPCombi
-
357
-
358#include "bmat8_impl.hpp"
+
346 // Not noexcept
+
347 std::ostream &write(std::ostream &os) const;
+
348
+
349#ifdef HPCOMBI_HAVE_DENSEHASHMAP
+
350 // FIXME do this another way
+
351 BMat8 empty_key() const noexcept { return BMat8(0xFF7FBFDFEFF7FBFE); }
+
352#endif
+
353
+
354 private:
+
355 uint64_t _data;
+
356
+
357 epu8 row_space_basis_internal() const noexcept;
+
358};
359
-
360namespace std {
-
361template <> struct hash<HPCombi::BMat8> {
-
362 inline size_t operator()(HPCombi::BMat8 const &bm) const {
-
363 return hash<uint64_t>()(bm.to_int());
-
364 }
-
365};
-
366} // namespace std
-
367#endif // HPCOMBI_BMAT8_HPP_INCLUDED
-
Class for fast boolean matrices of dimension up to 8 x 8.
Definition bmat8.hpp:48
-
uint64_t row_space_size_incl() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:309
-
BMat8 row_space_basis() const noexcept
Returns a canonical basis of the row space of this.
Definition bmat8_impl.hpp:232
-
static std::pair< bool, bool > row_space_included2(BMat8 a1, BMat8 b1, BMat8 a2, BMat8 b2)
Returns inclusion of row spaces.
Definition bmat8_impl.hpp:354
-
static void transpose2(BMat8 &, BMat8 &) noexcept
Transpose two matrices at once.
Definition bmat8_impl.hpp:183
-
uint64_t row_space_size_bitset() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:284
-
static BMat8 row_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by rows.
Definition bmat8_impl.hpp:429
-
BMat8 mult_transpose(BMat8 const &that) const noexcept
Returns the matrix product of this and the transpose of that.
Definition bmat8_impl.hpp:203
+
360} // namespace HPCombi
+
361
+
362#include "bmat8_impl.hpp"
+
363
+
364namespace std {
+
365template <> struct hash<HPCombi::BMat8> {
+
366 inline size_t operator()(HPCombi::BMat8 const &bm) const {
+
367 return hash<uint64_t>()(bm.to_int());
+
368 }
+
369};
+
370} // namespace std
+
371#endif // HPCOMBI_BMAT8_HPP_
+
Class for fast boolean matrices of dimension up to 8 x 8.
Definition bmat8.hpp:52
+
uint64_t row_space_size_incl() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:316
+
BMat8 row_space_basis() const noexcept
Returns a canonical basis of the row space of this.
Definition bmat8_impl.hpp:238
+
static std::pair< bool, bool > row_space_included2(BMat8 a1, BMat8 b1, BMat8 a2, BMat8 b2)
Returns inclusion of row spaces.
Definition bmat8_impl.hpp:361
+
static void transpose2(BMat8 &, BMat8 &) noexcept
Transpose two matrices at once.
Definition bmat8_impl.hpp:189
+
uint64_t row_space_size_bitset() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:291
+
static BMat8 row_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by rows.
Definition bmat8_impl.hpp:436
+
BMat8 mult_transpose(BMat8 const &that) const noexcept
Returns the matrix product of this and the transpose of that.
Definition bmat8_impl.hpp:209
BMat8(BMat8 const &) noexcept=default
A constructor.
-
BMat8 transpose_maskd() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:169
+
BMat8 transpose_maskd() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:175
BMat8(BMat8 &&) noexcept=default
A constructor.
-
bool row_space_included_bitset(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:325
-
bool operator>(BMat8 const &that) const noexcept
Returns true if this is greater than that.
Definition bmat8.hpp:117
-
BMat8 transpose() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:146
-
static BMat8 col_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by columns.
Definition bmat8_impl.hpp:433
-
epu8 row_space_mask(epu8 vects) const noexcept
Returns a mask for which vectors of a 16 rows epu8 are in the row space of this.
Definition bmat8_impl.hpp:344
-
BMat8 row_permuted(Perm16 p) const noexcept
Returns the matrix whose rows have been permuted according to p.
Definition bmat8_impl.hpp:417
+
bool row_space_included_bitset(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:332
+
bool operator>(BMat8 const &that) const noexcept
Returns true if this is greater than that.
Definition bmat8.hpp:121
+
BMat8 transpose() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:152
+
static BMat8 col_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by columns.
Definition bmat8_impl.hpp:440
+
epu8 row_space_mask(epu8 vects) const noexcept
Returns a mask for which vectors of a 16 rows epu8 are in the row space of this.
Definition bmat8_impl.hpp:351
+
BMat8 row_permuted(Perm16 p) const noexcept
Returns the matrix whose rows have been permuted according to p.
Definition bmat8_impl.hpp:424
BMat8() noexcept=default
A default constructor.
-
BMat8 col_space_basis() const noexcept
Returns a canonical basis of the col space of this.
Definition bmat8.hpp:195
-
bool row_space_included(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:333
-
std::ostream & write(std::ostream &os) const
Write this on os.
Definition bmat8_impl.hpp:477
-
void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept
Returns the the row space of this as 256 bits.
Definition bmat8_impl.hpp:269
-
Perm16 right_perm_action_on_basis(BMat8) const noexcept
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:468
-
std::bitset< 256 > row_space_bitset_ref() const
Returns the the row space of this.
Definition bmat8_impl.hpp:368
-
bool operator!=(BMat8 const &that) const noexcept
Returns true if this does not equal that.
Definition bmat8.hpp:101
-
BMat8 col_permuted(Perm16 p) const noexcept
Returns the matrix whose columns have been permuted according to p.
Definition bmat8_impl.hpp:425
-
uint64_t to_int() const noexcept
Returns the integer representation of this.
Definition bmat8.hpp:140
-
uint64_t row_space_size_ref() const
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:396
-
uint64_t row_space_size() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8.hpp:245
-
BMat8 transpose_mask() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:157
-
void swap(BMat8 &that) noexcept
Definition bmat8.hpp:339
-
static BMat8 one(size_t dim=8) noexcept
Returns the identity BMat8.
Definition bmat8.hpp:317
-
std::vector< uint8_t > rows() const
Returns a std::vector for rows of this.
Definition bmat8_impl.hpp:400
-
size_t nr_rows() const noexcept
Returns the number of non-zero rows of this.
Definition bmat8_impl.hpp:409
-
void set(size_t i, size_t j, bool val) noexcept
Sets the (i, j)th position to val.
Definition bmat8_impl.hpp:104
-
static BMat8 random()
Returns a random BMat8.
Definition bmat8_impl.hpp:128
-
bool operator()(size_t i, size_t j) const noexcept
Returns the entry in the (i, j)th position.
Definition bmat8_impl.hpp:98
-
bool operator<(BMat8 const &that) const noexcept
Returns true if this is less than that.
Definition bmat8.hpp:109
-
uint64_t row_space_size_incl1() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:293
-
bool row_space_included_ref(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:390
-
Perm16 right_perm_action_on_basis_ref(BMat8) const
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:437
+
BMat8 col_space_basis() const noexcept
Returns a canonical basis of the col space of this.
Definition bmat8.hpp:199
+
bool row_space_included(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:340
+
std::ostream & write(std::ostream &os) const
Write this on os.
Definition bmat8_impl.hpp:485
+
void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept
Returns the the row space of this as 256 bits.
Definition bmat8_impl.hpp:276
+
Perm16 right_perm_action_on_basis(BMat8) const noexcept
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:475
+
std::bitset< 256 > row_space_bitset_ref() const
Returns the the row space of this.
Definition bmat8_impl.hpp:375
+
bool operator!=(BMat8 const &that) const noexcept
Returns true if this does not equal that.
Definition bmat8.hpp:105
+
BMat8 col_permuted(Perm16 p) const noexcept
Returns the matrix whose columns have been permuted according to p.
Definition bmat8_impl.hpp:432
+
uint64_t to_int() const noexcept
Returns the integer representation of this.
Definition bmat8.hpp:144
+
uint64_t row_space_size_ref() const
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:403
+
uint64_t row_space_size() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8.hpp:249
+
BMat8 transpose_mask() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:163
+
void swap(BMat8 &that) noexcept
Definition bmat8.hpp:343
+
static BMat8 one(size_t dim=8) noexcept
Returns the identity BMat8.
Definition bmat8.hpp:321
+
std::vector< uint8_t > rows() const
Returns a std::vector for rows of this.
Definition bmat8_impl.hpp:407
+
size_t nr_rows() const noexcept
Returns the number of non-zero rows of this.
Definition bmat8_impl.hpp:416
+
void set(size_t i, size_t j, bool val) noexcept
Sets the (i, j)th position to val.
Definition bmat8_impl.hpp:110
+
static BMat8 random()
Returns a random BMat8.
Definition bmat8_impl.hpp:134
+
bool operator()(size_t i, size_t j) const noexcept
Returns the entry in the (i, j)th position.
Definition bmat8_impl.hpp:104
+
bool operator<(BMat8 const &that) const noexcept
Returns true if this is less than that.
Definition bmat8.hpp:113
+
uint64_t row_space_size_incl1() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:300
+
bool row_space_included_ref(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:397
+
Perm16 right_perm_action_on_basis_ref(BMat8) const
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:444
-
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:23
+
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:28
-
const Transf16 a1
Definition image.cpp:48
-
const Transf16 a2
Definition image.cpp:49
-
Definition bmat8.hpp:37
-
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:41
-
Definition bmat8.hpp:360
+
const Transf16 a1
Definition image.cpp:52
+
const Transf16 a2
Definition image.cpp:53
+
Definition bmat8.hpp:41
+
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:45
+
Definition bmat8.hpp:364
-
Permutations of .
Definition perm16.hpp:204
-
size_t operator()(HPCombi::BMat8 const &bm) const
Definition bmat8.hpp:362
+
Permutations of .
Definition perm16.hpp:208
+
size_t operator()(HPCombi::BMat8 const &bm) const
Definition bmat8.hpp:366
diff --git a/bmat8__impl_8hpp_source.html b/bmat8__impl_8hpp_source.html index 4493ad2..8fac811 100644 --- a/bmat8__impl_8hpp_source.html +++ b/bmat8__impl_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/bmat8_impl.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/bmat8_impl.hpp Source File @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,568 +77,577 @@
bmat8_impl.hpp
-Go to the documentation of this file.
1
-
2// Copyright (C) 2018 Finn Smith <fls3@st-andrews.ac.uk> //
-
3// Copyright (C) 2018 James Mitchell <jdm3@st-andrews.ac.uk> //
-
4// Copyright (C) 2018 Florent Hivert <Florent.Hivert@lri.fr>, //
+Go to the documentation of this file.
1//****************************************************************************//
+
2// Copyright (C) 2018-2024 Finn Smith <fls3@st-andrews.ac.uk> //
+
3// Copyright (C) 2018-2024 James Mitchell <jdm3@st-andrews.ac.uk> //
+
4// Copyright (C) 2018-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
5// //
-
6// Distributed under the terms of the GNU General Public License (GPL) //
+
6// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
7// //
-
8// This code is distributed in the hope that it will be useful, //
-
9// but WITHOUT ANY WARRANTY; without even the implied warranty of //
-
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
-
11// General Public License for more details. //
+
8// HP-Combi is free software: you can redistribute it and/or modify it //
+
9// under the terms of the GNU General Public License as published by the //
+
10// Free Software Foundation, either version 3 of the License, or //
+
11// (at your option) any later version. //
12// //
-
13// The full text of the GPL is available at: //
-
14// //
-
15// http://www.gnu.org/licenses/ //
-
17
-
18// This file contains an implementation of fast boolean matrices up to
-
19// dimension 8 x 8.
-
20
-
21namespace HPCombi {
-
22static_assert(std::is_trivial<BMat8>(), "BMat8 is not a trivial class!");
-
23
-
24static const constexpr std::array<uint64_t, 8> ROW_MASK = {
-
25 {0xff00000000000000, 0xff000000000000, 0xff0000000000, 0xff00000000,
-
26 0xff000000, 0xff0000, 0xff00, 0xff}};
-
27
-
28static const constexpr std::array<uint64_t, 8> COL_MASK = {
-
29 0x8080808080808080, 0x4040404040404040, 0x2020202020202020,
-
30 0x1010101010101010, 0x808080808080808, 0x404040404040404,
-
31 0x202020202020202, 0x101010101010101};
-
32
-
33static const constexpr std::array<uint64_t, 64> BIT_MASK = {{0x8000000000000000,
-
34 0x4000000000000000,
-
35 0x2000000000000000,
-
36 0x1000000000000000,
-
37 0x800000000000000,
-
38 0x400000000000000,
-
39 0x200000000000000,
-
40 0x100000000000000,
-
41 0x80000000000000,
-
42 0x40000000000000,
-
43 0x20000000000000,
-
44 0x10000000000000,
-
45 0x8000000000000,
-
46 0x4000000000000,
-
47 0x2000000000000,
-
48 0x1000000000000,
-
49 0x800000000000,
-
50 0x400000000000,
-
51 0x200000000000,
-
52 0x100000000000,
-
53 0x80000000000,
-
54 0x40000000000,
-
55 0x20000000000,
-
56 0x10000000000,
-
57 0x8000000000,
-
58 0x4000000000,
-
59 0x2000000000,
-
60 0x1000000000,
-
61 0x800000000,
-
62 0x400000000,
-
63 0x200000000,
-
64 0x100000000,
-
65 0x80000000,
-
66 0x40000000,
-
67 0x20000000,
-
68 0x10000000,
-
69 0x8000000,
-
70 0x4000000,
-
71 0x2000000,
-
72 0x1000000,
-
73 0x800000,
-
74 0x400000,
-
75 0x200000,
-
76 0x100000,
-
77 0x80000,
-
78 0x40000,
-
79 0x20000,
-
80 0x10000,
-
81 0x8000,
-
82 0x4000,
-
83 0x2000,
-
84 0x1000,
-
85 0x800,
-
86 0x400,
-
87 0x200,
-
88 0x100,
-
89 0x80,
-
90 0x40,
-
91 0x20,
-
92 0x10,
-
93 0x8,
-
94 0x4,
-
95 0x2,
-
96 0x1}};
-
97
-
98inline bool BMat8::operator()(size_t i, size_t j) const noexcept {
-
99 HPCOMBI_ASSERT(i < 8);
-
100 HPCOMBI_ASSERT(j < 8);
-
101 return (_data << (8 * i + j)) >> 63;
-
102}
+
13// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
+
14// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
+
15// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
+
16// for more details. //
+
17// //
+
18// You should have received a copy of the GNU General Public License along //
+
19// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
+
20//****************************************************************************//
+
21
+
22// This file contains an implementation of fast boolean matrices up to
+
23// dimension 8 x 8.
+
24
+
25// NOLINT(build/header_guard)
+
26
+
27namespace HPCombi {
+
28static_assert(std::is_trivial<BMat8>(), "BMat8 is not a trivial class!");
+
29
+
30static const constexpr std::array<uint64_t, 8> ROW_MASK = {
+
31 {0xff00000000000000, 0xff000000000000, 0xff0000000000, 0xff00000000,
+
32 0xff000000, 0xff0000, 0xff00, 0xff}};
+
33
+
34static const constexpr std::array<uint64_t, 8> COL_MASK = {
+
35 0x8080808080808080, 0x4040404040404040, 0x2020202020202020,
+
36 0x1010101010101010, 0x808080808080808, 0x404040404040404,
+
37 0x202020202020202, 0x101010101010101};
+
38
+
39static const constexpr std::array<uint64_t, 64> BIT_MASK = {{0x8000000000000000,
+
40 0x4000000000000000,
+
41 0x2000000000000000,
+
42 0x1000000000000000,
+
43 0x800000000000000,
+
44 0x400000000000000,
+
45 0x200000000000000,
+
46 0x100000000000000,
+
47 0x80000000000000,
+
48 0x40000000000000,
+
49 0x20000000000000,
+
50 0x10000000000000,
+
51 0x8000000000000,
+
52 0x4000000000000,
+
53 0x2000000000000,
+
54 0x1000000000000,
+
55 0x800000000000,
+
56 0x400000000000,
+
57 0x200000000000,
+
58 0x100000000000,
+
59 0x80000000000,
+
60 0x40000000000,
+
61 0x20000000000,
+
62 0x10000000000,
+
63 0x8000000000,
+
64 0x4000000000,
+
65 0x2000000000,
+
66 0x1000000000,
+
67 0x800000000,
+
68 0x400000000,
+
69 0x200000000,
+
70 0x100000000,
+
71 0x80000000,
+
72 0x40000000,
+
73 0x20000000,
+
74 0x10000000,
+
75 0x8000000,
+
76 0x4000000,
+
77 0x2000000,
+
78 0x1000000,
+
79 0x800000,
+
80 0x400000,
+
81 0x200000,
+
82 0x100000,
+
83 0x80000,
+
84 0x40000,
+
85 0x20000,
+
86 0x10000,
+
87 0x8000,
+
88 0x4000,
+
89 0x2000,
+
90 0x1000,
+
91 0x800,
+
92 0x400,
+
93 0x200,
+
94 0x100,
+
95 0x80,
+
96 0x40,
+
97 0x20,
+
98 0x10,
+
99 0x8,
+
100 0x4,
+
101 0x2,
+
102 0x1}};
103
-
104inline void BMat8::set(size_t i, size_t j, bool val) noexcept {
+
104inline bool BMat8::operator()(size_t i, size_t j) const noexcept {
105 HPCOMBI_ASSERT(i < 8);
106 HPCOMBI_ASSERT(j < 8);
-
107 _data ^= (-val ^ _data) & BIT_MASK[8 * i + j];
+
107 return (_data << (8 * i + j)) >> 63;
108}
109
-
110inline BMat8::BMat8(std::vector<std::vector<bool>> const &mat) {
-
111 HPCOMBI_ASSERT(mat.size() <= 8);
-
112 HPCOMBI_ASSERT(0 < mat.size());
-
113 _data = 0;
-
114 uint64_t pow = 1;
-
115 pow = pow << 63;
-
116 for (auto const &row : mat) {
-
117 HPCOMBI_ASSERT(row.size() == mat.size());
-
118 for (auto entry : row) {
-
119 if (entry) {
-
120 _data ^= pow;
-
121 }
-
122 pow = pow >> 1;
-
123 }
-
124 pow = pow >> (8 - mat.size());
-
125 }
-
126}
-
127
- -
129 static std::random_device _rd;
-
130 static std::mt19937 _gen(_rd());
-
131 static std::uniform_int_distribution<uint64_t> _dist(0, 0xffffffffffffffff);
-
132
-
133 return BMat8(_dist(_gen));
-
134}
-
135
-
136inline BMat8 BMat8::random(size_t const dim) {
-
137 HPCOMBI_ASSERT(0 < dim && dim <= 8);
-
138 BMat8 bm = BMat8::random();
-
139 for (size_t i = dim; i < 8; ++i) {
-
140 bm._data &= ~ROW_MASK[i];
-
141 bm._data &= ~COL_MASK[i];
-
142 }
-
143 return bm;
-
144}
-
145
-
146inline BMat8 BMat8::transpose() const noexcept {
-
147 uint64_t x = _data;
-
148 uint64_t y = (x ^ (x >> 7)) & 0xAA00AA00AA00AA;
-
149 x = x ^ y ^ (y << 7);
-
150 y = (x ^ (x >> 14)) & 0xCCCC0000CCCC;
-
151 x = x ^ y ^ (y << 14);
-
152 y = (x ^ (x >> 28)) & 0xF0F0F0F0;
-
153 x = x ^ y ^ (y << 28);
-
154 return BMat8(x);
-
155}
-
156
-
157inline BMat8 BMat8::transpose_mask() const noexcept {
-
158 epu8 x = simde_mm_set_epi64x(_data, _data << 1);
-
159 uint64_t res = simde_mm_movemask_epi8(x);
-
160 x = x << Epu8(2);
-
161 res = res << 16 | simde_mm_movemask_epi8(x);
-
162 x = x << Epu8(2);
-
163 res = res << 16 | simde_mm_movemask_epi8(x);
-
164 x = x << Epu8(2);
-
165 res = res << 16 | simde_mm_movemask_epi8(x);
-
166 return BMat8(res);
-
167}
-
168
-
169inline BMat8 BMat8::transpose_maskd() const noexcept {
-
170 uint64_t res =
-
171 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data, _data << 1));
-
172 res = res << 16 |
-
173 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 2, _data << 3));
-
174 res = res << 16 |
-
175 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 4, _data << 5));
-
176 res = res << 16 |
-
177 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 6, _data << 7));
-
178 return BMat8(res);
-
179}
-
180
-
181using epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__));
-
182
-
183inline void BMat8::transpose2(BMat8 &a, BMat8 &b) noexcept {
-
184 epu64 x = simde_mm_set_epi64x(a._data, b._data);
-
185 epu64 y = (x ^ (x >> 7)) & (epu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
-
186 x = x ^ y ^ (y << 7);
-
187 y = (x ^ (x >> 14)) & (epu64{0xCCCC0000CCCC, 0xCCCC0000CCCC});
-
188 x = x ^ y ^ (y << 14);
-
189 y = (x ^ (x >> 28)) & (epu64{0xF0F0F0F0, 0xF0F0F0F0});
-
190 x = x ^ y ^ (y << 28);
-
191 a._data = simde_mm_extract_epi64(x, 1);
-
192 b._data = simde_mm_extract_epi64(x, 0);
-
193}
-
194
-
195static constexpr epu8 rotlow{7, 0, 1, 2, 3, 4, 5, 6};
-
196static constexpr epu8 rothigh{0, 1, 2, 3, 4, 5, 6, 7,
-
197 15, 8, 9, 10, 11, 12, 13, 14};
-
198static constexpr epu8 rotboth{7, 0, 1, 2, 3, 4, 5, 6,
-
199 15, 8, 9, 10, 11, 12, 13, 14};
-
200static constexpr epu8 rot2{6, 7, 0, 1, 2, 3, 4, 5,
-
201 14, 15, 8, 9, 10, 11, 12, 13};
-
202
-
203inline BMat8 BMat8::mult_transpose(BMat8 const &that) const noexcept {
-
204 epu8 x = simde_mm_set_epi64x(_data, _data);
-
205 epu8 y = simde_mm_shuffle_epi8(simde_mm_set_epi64x(that._data, that._data),
-
206 rothigh);
-
207 epu8 data{};
-
208 epu8 diag{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
-
209 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40};
-
210 for (int i = 0; i < 4; ++i) {
-
211 data |= ((x & y) != epu8{}) & diag;
-
212 y = simde_mm_shuffle_epi8(y, rot2);
-
213 diag = simde_mm_shuffle_epi8(diag, rot2);
-
214 }
-
215 return BMat8(simde_mm_extract_epi64(data, 0) |
-
216 simde_mm_extract_epi64(data, 1));
-
217}
-
218
-
219inline epu8 BMat8::row_space_basis_internal() const noexcept {
-
220 epu8 res = remove_dups(revsorted8(simde_mm_set_epi64x(0, _data)));
-
221 epu8 rescy = res;
-
222 // We now compute the union of all the included different rows
-
223 epu8 orincl{};
-
224 for (int i = 0; i < 7; i++) {
-
225 rescy = permuted(rescy, rotlow);
-
226 orincl |= ((rescy | res) == res) & rescy;
-
227 }
-
228 res = (res != orincl) & res;
-
229 return res;
-
230}
-
231
-
232inline BMat8 BMat8::row_space_basis() const noexcept {
-
233 return BMat8(
-
234 simde_mm_extract_epi64(sorted8(row_space_basis_internal()), 0));
-
235}
-
236
-
237#if defined(FF)
-
238#error FF is defined !
-
239#endif // FF
-
240#define FF 0xff
-
241
-
242constexpr std::array<epu8, 4> masks{
-
243 {// clang-format off
-
244 {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0},
-
245 {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1},
-
246 {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2},
-
247 {FF,FF,FF,FF,FF,FF,FF,FF, 3, 3, 3, 3, 3, 3, 3, 3}
-
248 }};
-
249#undef FF
-
250
-
251static const epu8 shiftres{1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80};
-
252
-
253namespace detail {
-
254
-
255inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept {
-
256 static const epu8 bound08 = simde_mm_slli_epi32(
-
257 static_cast<simde__m128i>(Epu8.id()), 3); // shift for *8
-
258 static const epu8 bound18 = bound08 + Epu8(0x80);
-
259 for (size_t slice8 = 0; slice8 < 16; slice8++) {
-
260 epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */
-
261 epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5);
-
262 set0 |= (bm5 == bound08) & shft;
-
263 set1 |= (bm5 == bound18) & shft;
-
264 block = simde_mm_shuffle_epi8(block, Epu8.right_cycle());
-
265 }
-
266}
-
267}
-
268
-
269inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept {
-
270 epu8 in = simde_mm_set_epi64x(0, _data);
-
271 epu8 block0{}, block1{};
-
272 for (epu8 m : masks) {
-
273 block0 |= static_cast<epu8>(simde_mm_shuffle_epi8(in, m));
-
274 block1 |= static_cast<epu8>(simde_mm_shuffle_epi8(in, m | Epu8(4)));
-
275 }
-
276 res0 = epu8{};
-
277 res1 = epu8{};
-
278 for (size_t r = 0; r < 16; r++) {
-
279 detail::row_space_update_bitset(block0 | block1, res0, res1);
-
280 block1 = simde_mm_shuffle_epi8(block1, Epu8.right_cycle());
-
281 }
-
282}
-
283
-
284inline uint64_t BMat8::row_space_size_bitset() const noexcept {
-
285 epu8 res0{}, res1{};
-
286 row_space_bitset(res0, res1);
-
287 return (__builtin_popcountll(simde_mm_extract_epi64(res0, 0)) +
-
288 __builtin_popcountll(simde_mm_extract_epi64(res1, 0)) +
-
289 __builtin_popcountll(simde_mm_extract_epi64(res0, 1)) +
-
290 __builtin_popcountll(simde_mm_extract_epi64(res1, 1)));
-
291}
-
292
-
293inline uint64_t BMat8::row_space_size_incl1() const noexcept {
-
294 epu8 in = simde_mm_set_epi64x(_data, _data);
-
295 epu8 block = Epu8.id();
-
296 uint64_t res = 0;
-
297 for (size_t r = 0; r < 16; r++) {
-
298 epu8 orincl{};
-
299 for (int i = 0; i < 8; i++) {
-
300 orincl |= ((in | block) == block) & in;
-
301 in = permuted(in, rotboth);
-
302 }
-
303 res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl));
-
304 block += Epu8(16);
-
305 }
-
306 return res;
-
307}
-
308
-
309inline uint64_t BMat8::row_space_size_incl() const noexcept {
-
310 epu8 in = simde_mm_set_epi64x(_data, _data);
-
311 epu8 block = Epu8.id();
-
312 uint64_t res = 0;
-
313 for (size_t r = 0; r < 16; r++) {
-
314 epu8 orincl = ((in | block) == block) & in;
-
315 for (int i = 0; i < 7; i++) { // Only rotating
-
316 in = permuted(in, rotboth);
-
317 orincl |= ((in | block) == block) & in;
-
318 }
-
319 res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl));
-
320 block += Epu8(16);
-
321 }
-
322 return res;
-
323}
-
324
-
325inline bool BMat8::row_space_included_bitset(BMat8 other) const noexcept {
-
326 epu8 this0, this1, other0, other1;
-
327 this->row_space_bitset(this0, this1);
-
328 other.row_space_bitset(other0, other1);
-
329 // Double inclusion of bitsets
-
330 return equal(this0 | other0, other0) && equal(this1 | other1, other1);
-
331}
-
332
-
333inline bool BMat8::row_space_included(BMat8 other) const noexcept {
-
334 epu8 in = simde_mm_set_epi64x(0, other._data);
-
335 epu8 block = simde_mm_set_epi64x(0, _data);
-
336 epu8 orincl = ((in | block) == block) & in;
-
337 for (int i = 0; i < 7; i++) { // Only rotating
-
338 in = permuted(in, rotlow);
-
339 orincl |= ((in | block) == block) & in;
-
340 }
-
341 return equal(block, orincl);
-
342}
-
343
-
344inline epu8 BMat8::row_space_mask(epu8 block) const noexcept {
-
345 epu8 in = simde_mm_set_epi64x(_data, _data);
-
346 epu8 orincl = ((in | block) == block) & in;
-
347 for (int i = 0; i < 7; i++) { // Only rotating
-
348 in = permuted(in, rotboth);
-
349 orincl |= ((in | block) == block) & in;
-
350 }
-
351 return block == orincl;
-
352}
-
353
-
354inline std::pair<bool, bool> BMat8::row_space_included2(BMat8 a0, BMat8 b0,
-
355 BMat8 a1, BMat8 b1) {
-
356 epu8 in = simde_mm_set_epi64x(b1._data, b0._data);
-
357 epu8 block = simde_mm_set_epi64x(a1._data, a0._data);
-
358 epu8 orincl = ((in | block) == block) & in;
-
359 for (int i = 0; i < 7; i++) { // Only rotating
-
360 in = permuted(in, rotboth);
-
361 orincl |= ((in | block) == block) & in;
-
362 }
-
363 epu8 res = (block == orincl);
-
364 return std::make_pair(simde_mm_extract_epi64(res, 0) == -1,
-
365 simde_mm_extract_epi64(res, 1) == -1);
-
366}
-
367
-
368inline std::bitset<256> BMat8::row_space_bitset_ref() const {
-
369 std::bitset<256> lookup;
-
370 std::vector<uint8_t> row_vec = row_space_basis().rows();
-
371 auto last = std::remove(row_vec.begin(), row_vec.end(), 0);
-
372 row_vec.erase(last, row_vec.end());
-
373 for (uint8_t x : row_vec) {
-
374 lookup.set(x);
-
375 }
-
376 lookup.set(0);
-
377 std::vector<uint8_t> row_space(row_vec.begin(), row_vec.end());
-
378 for (size_t i = 0; i < row_space.size(); ++i) {
-
379 for (uint8_t row : row_vec) {
-
380 uint8_t x = row_space[i] | row;
-
381 if (!lookup[x]) {
-
382 row_space.push_back(x);
-
383 lookup.set(x);
-
384 }
-
385 }
-
386 }
-
387 return lookup;
-
388}
-
389
-
390inline bool BMat8::row_space_included_ref(BMat8 other) const noexcept {
-
391 std::bitset<256> thisspace = row_space_bitset_ref();
-
392 std::bitset<256> otherspace = other.row_space_bitset_ref();
-
393 return (thisspace | otherspace) == otherspace;
-
394}
-
395
-
396inline uint64_t BMat8::row_space_size_ref() const {
-
397 return row_space_bitset_ref().count();
-
398}
-
399
-
400inline std::vector<uint8_t> BMat8::rows() const {
-
401 std::vector<uint8_t> rows;
-
402 for (size_t i = 0; i < 8; ++i) {
-
403 uint8_t row = static_cast<uint8_t>(_data << (8 * i) >> 56);
-
404 rows.push_back(row);
-
405 }
-
406 return rows;
-
407}
-
408
-
409inline size_t BMat8::nr_rows() const noexcept {
-
410 epu8 x = simde_mm_set_epi64x(_data, 0);
-
411 return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8{}));
-
412}
-
413
-
414static constexpr epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0,
-
415 8, 9, 10, 11, 12, 13, 14, 15};
-
416
-
417inline BMat8 BMat8::row_permuted(Perm16 p) const noexcept {
-
418 epu8 x = simde_mm_set_epi64x(0, _data);
-
419 x = permuted(x, rev8);
-
420 x = permuted(x, p);
-
421 x = permuted(x, rev8);
-
422 return BMat8(simde_mm_extract_epi64(x, 0));
-
423}
-
424
-
425inline BMat8 BMat8::col_permuted(Perm16 p) const noexcept {
-
426 return transpose().row_permuted(p).transpose();
-
427}
-
428
- -
430 return one().row_permuted(p);
-
431}
-
432
- -
434 return one().row_permuted(p).transpose();
-
435}
-
436
- - -
439 std::vector<uint8_t> rows = this->rows();
-
440 BMat8 product = *this * bm;
-
441 std::vector<uint8_t> prod_rows = product.rows();
-
442
-
443 HPCOMBI_ASSERT(product.row_space_basis() == bm);
-
444
-
445 std::vector<uint8_t> perm(8);
-
446 for (size_t i = 0; i < nr_rows(); ++i) {
-
447 uint8_t row = rows[i];
-
448 perm[i] =
-
449 std::distance(prod_rows.begin(),
-
450 std::find(prod_rows.begin(), prod_rows.end(), row));
-
451 }
-
452
-
453#ifndef __clang__
-
454#pragma GCC diagnostic push
-
455#pragma GCC diagnostic ignored "-Wstringop-overflow"
-
456#endif
-
457 std::iota(perm.begin() + nr_rows(), perm.end(), nr_rows());
-
458#ifndef __clang__
-
459#pragma GCC diagnostic pop
-
460#endif
-
461
- -
463 for (size_t i = 0; i < 8; i++)
-
464 res[i] = perm[i];
-
465 return res;
-
466}
-
467
-
468inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept {
-
469 epu8 x = permuted(simde_mm_set_epi64x(_data, 0), Epu8.rev());
-
470 epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0), Epu8.rev());
-
471 // Vector ternary operator is not supported by clang.
-
472 // return (x != (epu8 {})) ? permutation_of(y, x) : Epu8.id();
-
473 return simde_mm_blendv_epi8(Epu8.id(), permutation_of(y, x), x != epu8{});
-
474}
-
475
-
476// Not noexcept because std::ostream::operator<< isn't
-
477inline std::ostream &BMat8::write(std::ostream &os) const {
-
478 uint64_t x = _data;
-
479 uint64_t pow = 1;
-
480 pow = pow << 63;
-
481 for (size_t i = 0; i < 8; ++i) {
-
482 for (size_t j = 0; j < 8; ++j) {
-
483 if (pow & x) {
-
484 os << "1";
-
485 } else {
-
486 os << "0";
-
487 }
-
488 x = x << 1;
-
489 }
-
490 os << "\n";
-
491 }
-
492 return os;
-
493}
-
494
-
495} // namespace HPCombi
-
496
-
497namespace std {
-
498
-
499// Not noexcept because BMat8::write isn't
-
500inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat8 const &bm) {
-
501 return bm.write(os);
-
502}
-
503
-
504} // namespace std
-
#define FF
Definition bmat8_impl.hpp:240
-
Class for fast boolean matrices of dimension up to 8 x 8.
Definition bmat8.hpp:48
-
uint64_t row_space_size_incl() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:309
-
BMat8 row_space_basis() const noexcept
Returns a canonical basis of the row space of this.
Definition bmat8_impl.hpp:232
-
static std::pair< bool, bool > row_space_included2(BMat8 a1, BMat8 b1, BMat8 a2, BMat8 b2)
Returns inclusion of row spaces.
Definition bmat8_impl.hpp:354
-
static void transpose2(BMat8 &, BMat8 &) noexcept
Transpose two matrices at once.
Definition bmat8_impl.hpp:183
-
uint64_t row_space_size_bitset() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:284
-
static BMat8 row_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by rows.
Definition bmat8_impl.hpp:429
-
BMat8 mult_transpose(BMat8 const &that) const noexcept
Returns the matrix product of this and the transpose of that.
Definition bmat8_impl.hpp:203
-
BMat8 transpose_maskd() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:169
-
bool row_space_included_bitset(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:325
-
BMat8 transpose() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:146
-
static BMat8 col_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by columns.
Definition bmat8_impl.hpp:433
-
epu8 row_space_mask(epu8 vects) const noexcept
Returns a mask for which vectors of a 16 rows epu8 are in the row space of this.
Definition bmat8_impl.hpp:344
-
BMat8 row_permuted(Perm16 p) const noexcept
Returns the matrix whose rows have been permuted according to p.
Definition bmat8_impl.hpp:417
+
110inline void BMat8::set(size_t i, size_t j, bool val) noexcept {
+
111 HPCOMBI_ASSERT(i < 8);
+
112 HPCOMBI_ASSERT(j < 8);
+
113 _data ^= (-val ^ _data) & BIT_MASK[8 * i + j];
+
114}
+
115
+
116inline BMat8::BMat8(std::vector<std::vector<bool>> const &mat) {
+
117 HPCOMBI_ASSERT(mat.size() <= 8);
+
118 HPCOMBI_ASSERT(0 < mat.size());
+
119 _data = 0;
+
120 uint64_t pow = 1;
+
121 pow = pow << 63;
+
122 for (auto const &row : mat) {
+
123 HPCOMBI_ASSERT(row.size() == mat.size());
+
124 for (auto entry : row) {
+
125 if (entry) {
+
126 _data ^= pow;
+
127 }
+
128 pow = pow >> 1;
+
129 }
+
130 pow = pow >> (8 - mat.size());
+
131 }
+
132}
+
133
+ +
135 static std::random_device _rd;
+
136 static std::mt19937 _gen(_rd());
+
137 static std::uniform_int_distribution<uint64_t> _dist(0, 0xffffffffffffffff);
+
138
+
139 return BMat8(_dist(_gen));
+
140}
+
141
+
142inline BMat8 BMat8::random(size_t const dim) {
+
143 HPCOMBI_ASSERT(0 < dim && dim <= 8);
+
144 BMat8 bm = BMat8::random();
+
145 for (size_t i = dim; i < 8; ++i) {
+
146 bm._data &= ~ROW_MASK[i];
+
147 bm._data &= ~COL_MASK[i];
+
148 }
+
149 return bm;
+
150}
+
151
+
152inline BMat8 BMat8::transpose() const noexcept {
+
153 uint64_t x = _data;
+
154 uint64_t y = (x ^ (x >> 7)) & 0xAA00AA00AA00AA;
+
155 x = x ^ y ^ (y << 7);
+
156 y = (x ^ (x >> 14)) & 0xCCCC0000CCCC;
+
157 x = x ^ y ^ (y << 14);
+
158 y = (x ^ (x >> 28)) & 0xF0F0F0F0;
+
159 x = x ^ y ^ (y << 28);
+
160 return BMat8(x);
+
161}
+
162
+
163inline BMat8 BMat8::transpose_mask() const noexcept {
+
164 epu8 x = simde_mm_set_epi64x(_data, _data << 1);
+
165 uint64_t res = simde_mm_movemask_epi8(x);
+
166 x = x << Epu8(2);
+
167 res = res << 16 | simde_mm_movemask_epi8(x);
+
168 x = x << Epu8(2);
+
169 res = res << 16 | simde_mm_movemask_epi8(x);
+
170 x = x << Epu8(2);
+
171 res = res << 16 | simde_mm_movemask_epi8(x);
+
172 return BMat8(res);
+
173}
+
174
+
175inline BMat8 BMat8::transpose_maskd() const noexcept {
+
176 uint64_t res =
+
177 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data, _data << 1));
+
178 res = res << 16 |
+
179 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 2, _data << 3));
+
180 res = res << 16 |
+
181 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 4, _data << 5));
+
182 res = res << 16 |
+
183 simde_mm_movemask_epi8(simde_mm_set_epi64x(_data << 6, _data << 7));
+
184 return BMat8(res);
+
185}
+
186
+
187using epu64 = uint64_t __attribute__((__vector_size__(16), __may_alias__));
+
188
+
189inline void BMat8::transpose2(BMat8 &a, BMat8 &b) noexcept {
+
190 epu64 x = simde_mm_set_epi64x(a._data, b._data);
+
191 epu64 y = (x ^ (x >> 7)) & (epu64{0xAA00AA00AA00AA, 0xAA00AA00AA00AA});
+
192 x = x ^ y ^ (y << 7);
+
193 y = (x ^ (x >> 14)) & (epu64{0xCCCC0000CCCC, 0xCCCC0000CCCC});
+
194 x = x ^ y ^ (y << 14);
+
195 y = (x ^ (x >> 28)) & (epu64{0xF0F0F0F0, 0xF0F0F0F0});
+
196 x = x ^ y ^ (y << 28);
+
197 a._data = simde_mm_extract_epi64(x, 1);
+
198 b._data = simde_mm_extract_epi64(x, 0);
+
199}
+
200
+
201static constexpr epu8 rotlow{7, 0, 1, 2, 3, 4, 5, 6};
+
202static constexpr epu8 rothigh{0, 1, 2, 3, 4, 5, 6, 7,
+
203 15, 8, 9, 10, 11, 12, 13, 14};
+
204static constexpr epu8 rotboth{7, 0, 1, 2, 3, 4, 5, 6,
+
205 15, 8, 9, 10, 11, 12, 13, 14};
+
206static constexpr epu8 rot2{6, 7, 0, 1, 2, 3, 4, 5,
+
207 14, 15, 8, 9, 10, 11, 12, 13};
+
208
+
209inline BMat8 BMat8::mult_transpose(BMat8 const &that) const noexcept {
+
210 epu8 x = simde_mm_set_epi64x(_data, _data);
+
211 epu8 y = simde_mm_shuffle_epi8(simde_mm_set_epi64x(that._data, that._data),
+
212 rothigh);
+
213 epu8 data{};
+
214 epu8 diag{0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
+
215 0x80, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40};
+
216 for (int i = 0; i < 4; ++i) {
+
217 data |= ((x & y) != epu8{}) & diag;
+
218 y = simde_mm_shuffle_epi8(y, rot2);
+
219 diag = simde_mm_shuffle_epi8(diag, rot2);
+
220 }
+
221 return BMat8(simde_mm_extract_epi64(data, 0) |
+
222 simde_mm_extract_epi64(data, 1));
+
223}
+
224
+
225inline epu8 BMat8::row_space_basis_internal() const noexcept {
+
226 epu8 res = remove_dups(revsorted8(simde_mm_set_epi64x(0, _data)));
+
227 epu8 rescy = res;
+
228 // We now compute the union of all the included different rows
+
229 epu8 orincl{};
+
230 for (int i = 0; i < 7; i++) {
+
231 rescy = permuted(rescy, rotlow);
+
232 orincl |= ((rescy | res) == res) & rescy;
+
233 }
+
234 res = (res != orincl) & res;
+
235 return res;
+
236}
+
237
+
238inline BMat8 BMat8::row_space_basis() const noexcept {
+
239 return BMat8(
+
240 simde_mm_extract_epi64(sorted8(row_space_basis_internal()), 0));
+
241}
+
242
+
243#if defined(FF)
+
244#error FF is defined !
+
245#endif // FF
+
246#define FF 0xff
+
247
+
248constexpr std::array<epu8, 4> masks{{
+
249 // clang-format off
+
250 {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0}, // NOLINT()
+
251 {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1}, // NOLINT()
+
252 {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2}, // NOLINT()
+
253 {FF,FF,FF,FF,FF,FF,FF,FF, 3, 3, 3, 3, 3, 3, 3, 3} // NOLINT()
+
254 }};
+
255#undef FF
+
256
+
257static const epu8 shiftres{1, 2, 4, 8, 0x10, 0x20, 0x40, 0x80};
+
258
+
259namespace detail {
+
260
+
261inline void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1)
+
262noexcept {
+
263 static const epu8 bound08 = simde_mm_slli_epi32(
+
264 static_cast<simde__m128i>(Epu8.id()), 3); // shift for *8
+
265 static const epu8 bound18 = bound08 + Epu8(0x80);
+
266 for (size_t slice8 = 0; slice8 < 16; slice8++) {
+
267 epu8 bm5 = Epu8(0xf8) & block; /* 11111000 */
+
268 epu8 shft = simde_mm_shuffle_epi8(shiftres, block - bm5);
+
269 set0 |= (bm5 == bound08) & shft;
+
270 set1 |= (bm5 == bound18) & shft;
+
271 block = simde_mm_shuffle_epi8(block, Epu8.right_cycle());
+
272 }
+
273}
+
274} // namespace detail
+
275
+
276inline void BMat8::row_space_bitset(epu8 &res0, epu8 &res1) const noexcept {
+
277 epu8 in = simde_mm_set_epi64x(0, _data);
+
278 epu8 block0{}, block1{};
+
279 for (epu8 m : masks) {
+
280 block0 |= static_cast<epu8>(simde_mm_shuffle_epi8(in, m));
+
281 block1 |= static_cast<epu8>(simde_mm_shuffle_epi8(in, m | Epu8(4)));
+
282 }
+
283 res0 = epu8{};
+
284 res1 = epu8{};
+
285 for (size_t r = 0; r < 16; r++) {
+
286 detail::row_space_update_bitset(block0 | block1, res0, res1);
+
287 block1 = simde_mm_shuffle_epi8(block1, Epu8.right_cycle());
+
288 }
+
289}
+
290
+
291inline uint64_t BMat8::row_space_size_bitset() const noexcept {
+
292 epu8 res0{}, res1{};
+
293 row_space_bitset(res0, res1);
+
294 return (__builtin_popcountll(simde_mm_extract_epi64(res0, 0)) +
+
295 __builtin_popcountll(simde_mm_extract_epi64(res1, 0)) +
+
296 __builtin_popcountll(simde_mm_extract_epi64(res0, 1)) +
+
297 __builtin_popcountll(simde_mm_extract_epi64(res1, 1)));
+
298}
+
299
+
300inline uint64_t BMat8::row_space_size_incl1() const noexcept {
+
301 epu8 in = simde_mm_set_epi64x(_data, _data);
+
302 epu8 block = Epu8.id();
+
303 uint64_t res = 0;
+
304 for (size_t r = 0; r < 16; r++) {
+
305 epu8 orincl{};
+
306 for (int i = 0; i < 8; i++) {
+
307 orincl |= ((in | block) == block) & in;
+
308 in = permuted(in, rotboth);
+
309 }
+
310 res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl));
+
311 block += Epu8(16);
+
312 }
+
313 return res;
+
314}
+
315
+
316inline uint64_t BMat8::row_space_size_incl() const noexcept {
+
317 epu8 in = simde_mm_set_epi64x(_data, _data);
+
318 epu8 block = Epu8.id();
+
319 uint64_t res = 0;
+
320 for (size_t r = 0; r < 16; r++) {
+
321 epu8 orincl = ((in | block) == block) & in;
+
322 for (int i = 0; i < 7; i++) { // Only rotating
+
323 in = permuted(in, rotboth);
+
324 orincl |= ((in | block) == block) & in;
+
325 }
+
326 res += __builtin_popcountll(simde_mm_movemask_epi8(block == orincl));
+
327 block += Epu8(16);
+
328 }
+
329 return res;
+
330}
+
331
+
332inline bool BMat8::row_space_included_bitset(BMat8 other) const noexcept {
+
333 epu8 this0, this1, other0, other1;
+
334 this->row_space_bitset(this0, this1);
+
335 other.row_space_bitset(other0, other1);
+
336 // Double inclusion of bitsets
+
337 return equal(this0 | other0, other0) && equal(this1 | other1, other1);
+
338}
+
339
+
340inline bool BMat8::row_space_included(BMat8 other) const noexcept {
+
341 epu8 in = simde_mm_set_epi64x(0, other._data);
+
342 epu8 block = simde_mm_set_epi64x(0, _data);
+
343 epu8 orincl = ((in | block) == block) & in;
+
344 for (int i = 0; i < 7; i++) { // Only rotating
+
345 in = permuted(in, rotlow);
+
346 orincl |= ((in | block) == block) & in;
+
347 }
+
348 return equal(block, orincl);
+
349}
+
350
+
351inline epu8 BMat8::row_space_mask(epu8 block) const noexcept {
+
352 epu8 in = simde_mm_set_epi64x(_data, _data);
+
353 epu8 orincl = ((in | block) == block) & in;
+
354 for (int i = 0; i < 7; i++) { // Only rotating
+
355 in = permuted(in, rotboth);
+
356 orincl |= ((in | block) == block) & in;
+
357 }
+
358 return block == orincl;
+
359}
+
360
+
361inline std::pair<bool, bool> BMat8::row_space_included2(BMat8 a0, BMat8 b0,
+
362 BMat8 a1, BMat8 b1) {
+
363 epu8 in = simde_mm_set_epi64x(b1._data, b0._data);
+
364 epu8 block = simde_mm_set_epi64x(a1._data, a0._data);
+
365 epu8 orincl = ((in | block) == block) & in;
+
366 for (int i = 0; i < 7; i++) { // Only rotating
+
367 in = permuted(in, rotboth);
+
368 orincl |= ((in | block) == block) & in;
+
369 }
+
370 epu8 res = (block == orincl);
+
371 return std::make_pair(simde_mm_extract_epi64(res, 0) == -1,
+
372 simde_mm_extract_epi64(res, 1) == -1);
+
373}
+
374
+
375inline std::bitset<256> BMat8::row_space_bitset_ref() const {
+
376 std::bitset<256> lookup;
+
377 std::vector<uint8_t> row_vec = row_space_basis().rows();
+
378 auto last = std::remove(row_vec.begin(), row_vec.end(), 0);
+
379 row_vec.erase(last, row_vec.end());
+
380 for (uint8_t x : row_vec) {
+
381 lookup.set(x);
+
382 }
+
383 lookup.set(0);
+
384 std::vector<uint8_t> row_space(row_vec.begin(), row_vec.end());
+
385 for (size_t i = 0; i < row_space.size(); ++i) {
+
386 for (uint8_t row : row_vec) {
+
387 uint8_t x = row_space[i] | row;
+
388 if (!lookup[x]) {
+
389 row_space.push_back(x);
+
390 lookup.set(x);
+
391 }
+
392 }
+
393 }
+
394 return lookup;
+
395}
+
396
+
397inline bool BMat8::row_space_included_ref(BMat8 other) const noexcept {
+
398 std::bitset<256> thisspace = row_space_bitset_ref();
+
399 std::bitset<256> otherspace = other.row_space_bitset_ref();
+
400 return (thisspace | otherspace) == otherspace;
+
401}
+
402
+
403inline uint64_t BMat8::row_space_size_ref() const {
+
404 return row_space_bitset_ref().count();
+
405}
+
406
+
407inline std::vector<uint8_t> BMat8::rows() const {
+
408 std::vector<uint8_t> rows;
+
409 for (size_t i = 0; i < 8; ++i) {
+
410 uint8_t row = static_cast<uint8_t>(_data << (8 * i) >> 56);
+
411 rows.push_back(row);
+
412 }
+
413 return rows;
+
414}
+
415
+
416inline size_t BMat8::nr_rows() const noexcept {
+
417 epu8 x = simde_mm_set_epi64x(_data, 0);
+
418 return __builtin_popcountll(simde_mm_movemask_epi8(x != epu8{}));
+
419}
+
420
+
421static constexpr epu8 rev8{7, 6, 5, 4, 3, 2, 1, 0,
+
422 8, 9, 10, 11, 12, 13, 14, 15};
+
423
+
424inline BMat8 BMat8::row_permuted(Perm16 p) const noexcept {
+
425 epu8 x = simde_mm_set_epi64x(0, _data);
+
426 x = permuted(x, rev8);
+
427 x = permuted(x, p);
+
428 x = permuted(x, rev8);
+
429 return BMat8(simde_mm_extract_epi64(x, 0));
+
430}
+
431
+
432inline BMat8 BMat8::col_permuted(Perm16 p) const noexcept {
+
433 return transpose().row_permuted(p).transpose();
+
434}
+
435
+ +
437 return one().row_permuted(p);
+
438}
+
439
+ +
441 return one().row_permuted(p).transpose();
+
442}
+
443
+ + +
446 std::vector<uint8_t> rows = this->rows();
+
447 BMat8 product = *this * bm;
+
448 std::vector<uint8_t> prod_rows = product.rows();
+
449
+
450 HPCOMBI_ASSERT(product.row_space_basis() == bm);
+
451
+
452 std::vector<uint8_t> perm(8);
+
453 for (size_t i = 0; i < nr_rows(); ++i) {
+
454 uint8_t row = rows[i];
+
455 perm[i] =
+
456 std::distance(prod_rows.begin(),
+
457 std::find(prod_rows.begin(), prod_rows.end(), row));
+
458 }
+
459
+
460#ifndef __clang__
+
461#pragma GCC diagnostic push
+
462#pragma GCC diagnostic ignored "-Wstringop-overflow"
+
463#endif
+
464 std::iota(perm.begin() + nr_rows(), perm.end(), nr_rows());
+
465#ifndef __clang__
+
466#pragma GCC diagnostic pop
+
467#endif
+
468
+ +
470 for (size_t i = 0; i < 8; i++)
+
471 res[i] = perm[i];
+
472 return res;
+
473}
+
474
+
475inline Perm16 BMat8::right_perm_action_on_basis(BMat8 other) const noexcept {
+
476 epu8 x = permuted(simde_mm_set_epi64x(_data, 0), Epu8.rev());
+
477 epu8 y = permuted(simde_mm_set_epi64x((*this * other)._data, 0),
+
478 Epu8.rev());
+
479 // Vector ternary operator is not supported by clang.
+
480 // return (x != (epu8 {})) ? permutation_of(y, x) : Epu8.id();
+
481 return simde_mm_blendv_epi8(Epu8.id(), permutation_of(y, x), x != epu8{});
+
482}
+
483
+
484// Not noexcept because std::ostream::operator<< isn't
+
485inline std::ostream &BMat8::write(std::ostream &os) const {
+
486 uint64_t x = _data;
+
487 uint64_t pow = 1;
+
488 pow = pow << 63;
+
489 for (size_t i = 0; i < 8; ++i) {
+
490 for (size_t j = 0; j < 8; ++j) {
+
491 if (pow & x) {
+
492 os << "1";
+
493 } else {
+
494 os << "0";
+
495 }
+
496 x = x << 1;
+
497 }
+
498 os << "\n";
+
499 }
+
500 return os;
+
501}
+
502
+
503} // namespace HPCombi
+
504
+
505namespace std {
+
506
+
507// Not noexcept because BMat8::write isn't
+
508inline std::ostream &operator<<(std::ostream &os, HPCombi::BMat8 const &bm) {
+
509 return bm.write(os);
+
510}
+
511
+
512} // namespace std
+
#define FF
Definition bmat8_impl.hpp:246
+
Class for fast boolean matrices of dimension up to 8 x 8.
Definition bmat8.hpp:52
+
uint64_t row_space_size_incl() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:316
+
BMat8 row_space_basis() const noexcept
Returns a canonical basis of the row space of this.
Definition bmat8_impl.hpp:238
+
static std::pair< bool, bool > row_space_included2(BMat8 a1, BMat8 b1, BMat8 a2, BMat8 b2)
Returns inclusion of row spaces.
Definition bmat8_impl.hpp:361
+
static void transpose2(BMat8 &, BMat8 &) noexcept
Transpose two matrices at once.
Definition bmat8_impl.hpp:189
+
uint64_t row_space_size_bitset() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:291
+
static BMat8 row_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by rows.
Definition bmat8_impl.hpp:436
+
BMat8 mult_transpose(BMat8 const &that) const noexcept
Returns the matrix product of this and the transpose of that.
Definition bmat8_impl.hpp:209
+
BMat8 transpose_maskd() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:175
+
bool row_space_included_bitset(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:332
+
BMat8 transpose() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:152
+
static BMat8 col_permutation_matrix(Perm16 p) noexcept
Returns the matrix associated to the permutation p by columns.
Definition bmat8_impl.hpp:440
+
epu8 row_space_mask(epu8 vects) const noexcept
Returns a mask for which vectors of a 16 rows epu8 are in the row space of this.
Definition bmat8_impl.hpp:351
+
BMat8 row_permuted(Perm16 p) const noexcept
Returns the matrix whose rows have been permuted according to p.
Definition bmat8_impl.hpp:424
BMat8() noexcept=default
A default constructor.
-
bool row_space_included(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:333
-
std::ostream & write(std::ostream &os) const
Write this on os.
Definition bmat8_impl.hpp:477
-
void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept
Returns the the row space of this as 256 bits.
Definition bmat8_impl.hpp:269
-
Perm16 right_perm_action_on_basis(BMat8) const noexcept
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:468
-
std::bitset< 256 > row_space_bitset_ref() const
Returns the the row space of this.
Definition bmat8_impl.hpp:368
-
BMat8 col_permuted(Perm16 p) const noexcept
Returns the matrix whose columns have been permuted according to p.
Definition bmat8_impl.hpp:425
-
uint64_t row_space_size_ref() const
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:396
-
BMat8 transpose_mask() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:157
-
std::vector< uint8_t > rows() const
Returns a std::vector for rows of this.
Definition bmat8_impl.hpp:400
-
size_t nr_rows() const noexcept
Returns the number of non-zero rows of this.
Definition bmat8_impl.hpp:409
-
void set(size_t i, size_t j, bool val) noexcept
Sets the (i, j)th position to val.
Definition bmat8_impl.hpp:104
-
static BMat8 random()
Returns a random BMat8.
Definition bmat8_impl.hpp:128
-
bool operator()(size_t i, size_t j) const noexcept
Returns the entry in the (i, j)th position.
Definition bmat8_impl.hpp:98
-
uint64_t row_space_size_incl1() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:293
-
bool row_space_included_ref(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:390
-
Perm16 right_perm_action_on_basis_ref(BMat8) const
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:437
-
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:23
-
const Transf16 a1
Definition image.cpp:48
-
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:62
-
void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept
Definition bmat8_impl.hpp:255
-
Definition bmat8.hpp:37
-
epu8 permuted(epu8 a, epu8 b) noexcept
Permuting a HPCombi::epu8.
Definition epu8.hpp:70
-
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:252
-
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half reverse sorted.
Definition epu8_impl.hpp:205
-
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of one other.
Definition epu8_impl.hpp:295
-
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:61
-
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half sorted.
Definition epu8_impl.hpp:199
-
uint64_t __attribute__((__vector_size__(16), __may_alias__)) epu64
Definition bmat8_impl.hpp:181
-
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:50
-
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:41
-
constexpr std::array< epu8, 4 > masks
Definition bmat8_impl.hpp:242
-
const T pow(const T x)
A generic compile time exponentiation function.
Definition power.hpp:79
-
Definition bmat8.hpp:360
-
std::ostream & operator<<(std::ostream &os, HPCombi::BMat8 const &bm)
Definition bmat8_impl.hpp:500
-
Permutations of .
Definition perm16.hpp:204
-
static constexpr Perm16 one()
The identity partial permutation.
Definition perm16.hpp:219
+
bool row_space_included(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:340
+
std::ostream & write(std::ostream &os) const
Write this on os.
Definition bmat8_impl.hpp:485
+
void row_space_bitset(epu8 &res1, epu8 &res2) const noexcept
Returns the the row space of this as 256 bits.
Definition bmat8_impl.hpp:276
+
Perm16 right_perm_action_on_basis(BMat8) const noexcept
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:475
+
std::bitset< 256 > row_space_bitset_ref() const
Returns the the row space of this.
Definition bmat8_impl.hpp:375
+
BMat8 col_permuted(Perm16 p) const noexcept
Returns the matrix whose columns have been permuted according to p.
Definition bmat8_impl.hpp:432
+
uint64_t row_space_size_ref() const
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:403
+
BMat8 transpose_mask() const noexcept
Returns the transpose of this.
Definition bmat8_impl.hpp:163
+
std::vector< uint8_t > rows() const
Returns a std::vector for rows of this.
Definition bmat8_impl.hpp:407
+
size_t nr_rows() const noexcept
Returns the number of non-zero rows of this.
Definition bmat8_impl.hpp:416
+
void set(size_t i, size_t j, bool val) noexcept
Sets the (i, j)th position to val.
Definition bmat8_impl.hpp:110
+
static BMat8 random()
Returns a random BMat8.
Definition bmat8_impl.hpp:134
+
bool operator()(size_t i, size_t j) const noexcept
Returns the entry in the (i, j)th position.
Definition bmat8_impl.hpp:104
+
uint64_t row_space_size_incl1() const noexcept
Returns the cardinality of the row space of this.
Definition bmat8_impl.hpp:300
+
bool row_space_included_ref(BMat8 other) const noexcept
Returns whether the row space of this is included in other's.
Definition bmat8_impl.hpp:397
+
Perm16 right_perm_action_on_basis_ref(BMat8) const
Give the permutation whose right multiplication change *this to other.
Definition bmat8_impl.hpp:444
+
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:28
+
const Transf16 a1
Definition image.cpp:52
+
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:66
+
void row_space_update_bitset(epu8 block, epu8 &set0, epu8 &set1) noexcept
Definition bmat8_impl.hpp:261
+
Definition bmat8.hpp:41
+
epu8 permuted(epu8 a, epu8 b) noexcept
Permuting a HPCombi::epu8.
Definition epu8.hpp:72
+
epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
Remove duplicates in a sorted HPCombi::epu8.
Definition epu8_impl.hpp:260
+
epu8 revsorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half reverse sorted.
Definition epu8_impl.hpp:212
+
epu8 permutation_of(epu8 a, epu8 b) noexcept
Find if a vector is a permutation of one other.
Definition epu8_impl.hpp:303
+
bool equal(epu8 a, epu8 b) noexcept
Equality of HPCombi::epu8.
Definition epu8.hpp:63
+
epu8 sorted8(epu8 a) noexcept
Return a HPCombi::epu8 with the two half sorted.
Definition epu8_impl.hpp:206
+
uint64_t __attribute__((__vector_size__(16), __may_alias__)) epu64
Definition bmat8_impl.hpp:187
+
constexpr TPUBuild< epu8 > Epu8
Factory object acting as a class constructor for type HPCombi::epu8.
Definition epu8.hpp:53
+
uint8_t __attribute__((vector_size(16))) epu8
SIMD vector of 16 unsigned bytes.
Definition epu8.hpp:45
+
constexpr std::array< epu8, 4 > masks
Definition bmat8_impl.hpp:248
+
const T pow(const T x)
A generic compile time exponentiation function.
Definition power.hpp:83
+
Definition bmat8.hpp:364
+
std::ostream & operator<<(std::ostream &os, HPCombi::BMat8 const &bm)
Definition bmat8_impl.hpp:508
+
Permutations of .
Definition perm16.hpp:208
+
static constexpr Perm16 one()
The identity partial permutation.
Definition perm16.hpp:223
diff --git a/builder_8hpp_source.html b/builder_8hpp_source.html index 266a69d..14d3e79 100644 --- a/builder_8hpp_source.html +++ b/builder_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/builder.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/builder.hpp Source File @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -77,161 +77,167 @@
builder.hpp
-Go to the documentation of this file.
1
-
2// Copyright (C) 2023 Florent Hivert <Florent.Hivert@lri.fr>, //
+Go to the documentation of this file.
1//****************************************************************************//
+
2// Copyright (C) 2023-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
3// //
-
4// Distributed under the terms of the GNU General Public License (GPL) //
+
4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
5// //
-
6// This code is distributed in the hope that it will be useful, //
-
7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
-
8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
-
9// General Public License for more details. //
+
6// HP-Combi is free software: you can redistribute it and/or modify it //
+
7// under the terms of the GNU General Public License as published by the //
+
8// Free Software Foundation, either version 3 of the License, or //
+
9// (at your option) any later version. //
10// //
-
11// The full text of the GPL is available at: //
-
12// //
-
13// http://www.gnu.org/licenses/ //
-
15
-
16#ifndef HPCOMBI_BUILDER_HPP_INCLUDED
-
17#define HPCOMBI_BUILDER_HPP_INCLUDED
-
18
-
19#include <array> // for array
-
20#include <cstddef> // for size_t
-
21#include <initializer_list> // for initializer_list
-
22#include <type_traits> // for remove_reference_t
-
23#include <utility> // for make_index_sequence, ind...
-
24
-
25#include "vect_generic.hpp" // for VectGeneric
-
26
-
27namespace HPCombi {
+
11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
+
12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
+
13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
+
14// for more details. //
+
15// //
+
16// You should have received a copy of the GNU General Public License along //
+
17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
+
18//****************************************************************************//
+
19
+
20#ifndef HPCOMBI_BUILDER_HPP_
+
21#define HPCOMBI_BUILDER_HPP_
+
22
+
23#include <array> // for array
+
24#include <cstddef> // for size_t
+
25#include <initializer_list> // for initializer_list
+
26#include <type_traits> // for remove_reference_t
+
27#include <utility> // for make_index_sequence, ind...
28
-
39template <class TPU> struct TPUBuild {
-
40
-
42 using type_elem = typename std::remove_reference_t<decltype((TPU{})[0])>;
-
43
-
45 static constexpr size_t size_elem = sizeof(type_elem);
+
29#include "vect_generic.hpp" // for VectGeneric
+
30
+
31namespace HPCombi {
+
32
+
43template <class TPU> struct TPUBuild {
+
45 using type_elem = typename std::remove_reference_t<decltype((TPU{})[0])>;
46
-
48 static constexpr size_t size = sizeof(TPU) / size_elem;
+
48 static constexpr size_t size_elem = sizeof(type_elem);
49
-
51 using array = std::array<type_elem, size>;
+
51 static constexpr size_t size = sizeof(TPU) / size_elem;
52
-
53 template <class Fun, decltype(size)... Is>
-
54 static constexpr TPU make_helper(Fun f, std::index_sequence<Is...>) {
-
55 static_assert(std::is_invocable_v<Fun, type_elem>);
-
56 return TPU{f(Is)...};
-
57 }
-
58
-
60 inline constexpr TPU operator()(std::initializer_list<type_elem> il,
-
61 type_elem def) const {
-
62 HPCOMBI_ASSERT(il.size() <= size);
-
63 array res;
-
64 std::copy(il.begin(), il.end(), res.begin());
-
65 std::fill(res.begin() + il.size(), res.end(), def);
-
66 return reinterpret_cast<const TPU &>(res);
-
67 }
-
68
-
70 template <class Fun> inline constexpr TPU operator()(Fun f) const {
-
71 static_assert(std::is_invocable_v<Fun, type_elem>);
-
72 return make_helper(f, std::make_index_sequence<size>{});
-
73 }
-
74
-
76 inline constexpr TPU operator()(type_elem c) const {
-
77 return operator()([c](auto) { return c; });
-
78 }
-
80 inline constexpr TPU operator()(int c) const {
-
81 return operator()(type_elem(c));
-
82 }
-
84 inline constexpr TPU operator()(size_t c) const {
-
85 return operator()(type_elem(c));
-
86 }
-
87
-
89 // Passing the argument by reference used to trigger a segfault in gcc
-
90 // Since vector types doesn't belongs to the standard, I didn't manage
-
91 // to know if I'm using undefined behavior here.
-
92 inline constexpr TPU operator()(array a) const {
-
93 return reinterpret_cast<const TPU &>(a);
-
94 }
-
95
-
97 constexpr TPU id() const { return operator()([](type_elem i) { return i; }); }
-
99 constexpr TPU rev() const {
-
100 return (*this)([](type_elem i) { return size - 1 - i; });
-
101 }
-
103 constexpr TPU left_cycle() const {
-
104 return (*this)([](type_elem i) { return (i + size - 1) % size; });
-
105 }
-
107 constexpr TPU right_cycle() const {
-
108 return (*this)([](type_elem i) { return (i + 1) % size; });
-
109 }
-
111 constexpr TPU left_dup() const {
-
112 return (*this)([](type_elem i) { return i == 15 ? 15 : i + 1; });
-
113 }
-
115 constexpr TPU right_dup() const {
-
116 return (*this)([](type_elem i) { return i == 0 ? 0 : i - 1; });
-
117 }
-
119 constexpr TPU popcount() const {
-
120 return (*this)([](type_elem i) {
-
121 return (((i & 0x01) != 0 ? 1 : 0) + ((i & 0x02) != 0 ? 1 : 0) +
-
122 ((i & 0x04) != 0 ? 1 : 0) + ((i & 0x08) != 0 ? 1 : 0) +
-
123 ((i & 0x10) != 0 ? 1 : 0) + ((i & 0x20) != 0 ? 1 : 0) +
-
124 ((i & 0x40) != 0 ? 1 : 0) + ((i & 0x80) != 0 ? 1 : 0));
-
125 });
-
126 }
-
127};
-
128
-
133template <class TPU>
-
134inline typename TPUBuild<TPU>::array &as_array(TPU &v) noexcept {
-
135 return reinterpret_cast<typename TPUBuild<TPU>::array &>(v);
-
136}
-
141template <class TPU>
-
142inline const typename TPUBuild<TPU>::array &as_array(const TPU &v) noexcept {
-
143 return reinterpret_cast<const typename TPUBuild<TPU>::array &>(v);
-
144}
-
145
-
150template <class TPU>
- -
152 return reinterpret_cast<VectGeneric<TPUBuild<TPU>::size> &>(as_array(v));
-
153}
-
154
-
159template <class TPU>
- -
161 return reinterpret_cast<const VectGeneric<TPUBuild<TPU>::size> &>(
-
162 as_array(v));
-
163}
-
164
-
165} // namespace HPCombi
-
166
-
167#endif // HPCOMBI_BUILDER_HPP_INCLUDED
-
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:23
-
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:62
-
Definition bmat8.hpp:37
-
VectGeneric< TPUBuild< TPU >::size > & as_VectGeneric(TPU &v)
Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.
Definition builder.hpp:151
-
TPUBuild< TPU >::array & as_array(TPU &v) noexcept
Cast a TPU to a c++ std::array.
Definition builder.hpp:134
-
Class for factory object associated to a SIMD packed unsigned integers.
Definition builder.hpp:39
-
constexpr TPU popcount() const
Popcount TPU: the ith entry contains the number of bits set in i.
Definition builder.hpp:119
-
static constexpr size_t size_elem
Size of the elements.
Definition builder.hpp:45
-
constexpr TPU left_dup() const
Left shift TPU, duplicating the rightmost entry.
Definition builder.hpp:111
-
constexpr TPU operator()(int c) const
explicit overloading for int constants
Definition builder.hpp:80
-
constexpr TPU right_dup() const
Right shift TPU, duplicating the leftmost entry.
Definition builder.hpp:115
-
constexpr TPU operator()(size_t c) const
explicit overloading for size_t constants
Definition builder.hpp:84
-
constexpr TPU rev() const
Return the reversed element of type TPU.
Definition builder.hpp:99
-
constexpr TPU operator()(std::initializer_list< type_elem > il, type_elem def) const
Construct a TPU from an std::initializer_list and a default value.
Definition builder.hpp:60
-
constexpr TPU operator()(array a) const
explicit overloading for array
Definition builder.hpp:92
-
constexpr TPU operator()(Fun f) const
Construct a TPU from a function giving the values at .
Definition builder.hpp:70
-
constexpr TPU right_cycle() const
Right cycle TPU permutation.
Definition builder.hpp:107
-
constexpr TPU left_cycle() const
Left cycle TPU permutation.
Definition builder.hpp:103
-
constexpr TPU operator()(type_elem c) const
Construct a constant TPU.
Definition builder.hpp:76
-
static constexpr TPU make_helper(Fun f, std::index_sequence< Is... >)
Definition builder.hpp:54
-
constexpr TPU id() const
Return the identity element of type TPU.
Definition builder.hpp:97
-
typename std::remove_reference_t< decltype((TPU{})[0])> type_elem
Type of the elements.
Definition builder.hpp:42
-
static constexpr size_t size
Number of elements.
Definition builder.hpp:48
-
std::array< type_elem, size > array
Array equivalent type.
Definition builder.hpp:51
-
A generic class for combinatorial integer vectors.
Definition vect_generic.hpp:44
+
54 using array = std::array<type_elem, size>;
+
55
+
56 template <class Fun, decltype(size)... Is>
+
57 static constexpr TPU make_helper(Fun f, std::index_sequence<Is...>) {
+
58 static_assert(std::is_invocable_v<Fun, type_elem>);
+
59 return TPU{f(Is)...};
+
60 }
+
61
+
63 inline constexpr TPU operator()(std::initializer_list<type_elem> il,
+
64 type_elem def) const {
+
65 HPCOMBI_ASSERT(il.size() <= size);
+
66 array res;
+
67 std::copy(il.begin(), il.end(), res.begin());
+
68 std::fill(res.begin() + il.size(), res.end(), def);
+
69 return reinterpret_cast<const TPU &>(res);
+
70 }
+
71
+
73 template <class Fun> inline constexpr TPU operator()(Fun f) const {
+
74 static_assert(std::is_invocable_v<Fun, type_elem>);
+
75 return make_helper(f, std::make_index_sequence<size>{});
+
76 }
+
77
+
79 inline constexpr TPU operator()(type_elem c) const {
+
80 return operator()([c](auto) { return c; });
+
81 }
+
83 inline constexpr TPU operator()(int c) const {
+
84 return operator()(type_elem(c));
+
85 }
+
87 inline constexpr TPU operator()(size_t c) const {
+
88 return operator()(type_elem(c));
+
89 }
+
90
+
92 // Passing the argument by reference used to trigger a segfault in gcc
+
93 // Since vector types doesn't belongs to the standard, I didn't manage
+
94 // to know if I'm using undefined behavior here.
+
95 inline constexpr TPU operator()(array a) const {
+
96 return reinterpret_cast<const TPU &>(a);
+
97 }
+
98
+
100 constexpr TPU id() const {
+
101 return operator()([](type_elem i) { return i; });
+
102 }
+
104 constexpr TPU rev() const {
+
105 return (*this)([](type_elem i) { return size - 1 - i; });
+
106 }
+
108 constexpr TPU left_cycle() const {
+
109 return (*this)([](type_elem i) { return (i + size - 1) % size; });
+
110 }
+
112 constexpr TPU right_cycle() const {
+
113 return (*this)([](type_elem i) { return (i + 1) % size; });
+
114 }
+
116 constexpr TPU left_dup() const {
+
117 return (*this)([](type_elem i) { return i == 15 ? 15 : i + 1; });
+
118 }
+
120 constexpr TPU right_dup() const {
+
121 return (*this)([](type_elem i) { return i == 0 ? 0 : i - 1; });
+
122 }
+
124 constexpr TPU popcount() const {
+
125 return (*this)([](type_elem i) {
+
126 return (((i & 0x01) != 0 ? 1 : 0) + ((i & 0x02) != 0 ? 1 : 0) +
+
127 ((i & 0x04) != 0 ? 1 : 0) + ((i & 0x08) != 0 ? 1 : 0) +
+
128 ((i & 0x10) != 0 ? 1 : 0) + ((i & 0x20) != 0 ? 1 : 0) +
+
129 ((i & 0x40) != 0 ? 1 : 0) + ((i & 0x80) != 0 ? 1 : 0));
+
130 });
+
131 }
+
132};
+
133
+
138template <class TPU>
+
139inline typename TPUBuild<TPU>::array &as_array(TPU &v) noexcept {
+
140 return reinterpret_cast<typename TPUBuild<TPU>::array &>(v);
+
141}
+
146template <class TPU>
+
147inline const typename TPUBuild<TPU>::array &as_array(const TPU &v) noexcept {
+
148 return reinterpret_cast<const typename TPUBuild<TPU>::array &>(v);
+
149}
+
150
+
155template <class TPU>
+ +
157 return reinterpret_cast<VectGeneric<TPUBuild<TPU>::size> &>(as_array(v));
+
158}
+
159
+
164template <class TPU>
+ +
166 return reinterpret_cast<const VectGeneric<TPUBuild<TPU>::size> &>(
+
167 as_array(v));
+
168}
+
169
+
170} // namespace HPCombi
+
171
+
172#endif // HPCOMBI_BUILDER_HPP_
+
#define HPCOMBI_ASSERT(x)
Definition debug.hpp:28
+
std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
Definition image.cpp:66
+
Definition bmat8.hpp:41
+
VectGeneric< TPUBuild< TPU >::size > & as_VectGeneric(TPU &v)
Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.
Definition builder.hpp:156
+
TPUBuild< TPU >::array & as_array(TPU &v) noexcept
Cast a TPU to a c++ std::array.
Definition builder.hpp:139
+
Class for factory object associated to a SIMD packed unsigned integers.
Definition builder.hpp:43
+
constexpr TPU popcount() const
Popcount TPU: the ith entry contains the number of bits set in i.
Definition builder.hpp:124
+
static constexpr size_t size_elem
Size of the elements.
Definition builder.hpp:48
+
constexpr TPU left_dup() const
Left shift TPU, duplicating the rightmost entry.
Definition builder.hpp:116
+
constexpr TPU operator()(int c) const
explicit overloading for int constants
Definition builder.hpp:83
+
constexpr TPU right_dup() const
Right shift TPU, duplicating the leftmost entry.
Definition builder.hpp:120
+
constexpr TPU operator()(size_t c) const
explicit overloading for size_t constants
Definition builder.hpp:87
+
constexpr TPU rev() const
Return the reversed element of type TPU.
Definition builder.hpp:104
+
constexpr TPU operator()(std::initializer_list< type_elem > il, type_elem def) const
Construct a TPU from an std::initializer_list and a default value.
Definition builder.hpp:63
+
constexpr TPU operator()(array a) const
explicit overloading for array
Definition builder.hpp:95
+
constexpr TPU operator()(Fun f) const
Construct a TPU from a function giving the values at .
Definition builder.hpp:73
+
constexpr TPU right_cycle() const
Right cycle TPU permutation.
Definition builder.hpp:112
+
constexpr TPU left_cycle() const
Left cycle TPU permutation.
Definition builder.hpp:108
+
constexpr TPU operator()(type_elem c) const
Construct a constant TPU.
Definition builder.hpp:79
+
static constexpr TPU make_helper(Fun f, std::index_sequence< Is... >)
Definition builder.hpp:57
+
constexpr TPU id() const
Return the identity element of type TPU.
Definition builder.hpp:100
+
typename std::remove_reference_t< decltype((TPU{})[0])> type_elem
Type of the elements.
Definition builder.hpp:45
+
static constexpr size_t size
Number of elements.
Definition builder.hpp:51
+
std::array< type_elem, size > array
Array equivalent type.
Definition builder.hpp:54
+
A generic class for combinatorial integer vectors.
Definition vect_generic.hpp:48
diff --git a/classEqEpu8-members.html b/classEqEpu8-members.html index d4a8bfa..547cd05 100644 --- a/classEqEpu8-members.html +++ b/classEqEpu8-members.html @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
diff --git a/classEqEpu8.html b/classEqEpu8.html index 051a9e2..25b13cb 100644 --- a/classEqEpu8.html +++ b/classEqEpu8.html @@ -30,7 +30,7 @@
HPCombi
-
High Performance Combinatorics in C++ using vector instructions v0.0.6
+
High Performance Combinatorics in C++ using vector instructions v1.0.0
@@ -183,7 +183,7 @@

CF.cpp +
  • /Users/jdm/hpcombi/examples/CF.cpp
  • diff --git a/classHPCombi_1_1BMat8-members.html b/classHPCombi_1_1BMat8-members.html index c45a0cd..33e2fda 100644 --- a/classHPCombi_1_1BMat8-members.html +++ b/classHPCombi_1_1BMat8-members.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/classHPCombi_1_1BMat8.html b/classHPCombi_1_1BMat8.html index d00ed7b..ee0c242 100644 --- a/classHPCombi_1_1BMat8.html +++ b/classHPCombi_1_1BMat8.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -1706,8 +1706,8 @@

    bmat8.hpp -
  • /Users/jdm/git/HPCombi/include/hpcombi/bmat8_impl.hpp
  • +
  • /Users/jdm/hpcombi/include/hpcombi/bmat8.hpp
  • +
  • /Users/jdm/hpcombi/include/hpcombi/bmat8_impl.hpp
  • diff --git a/classcommon__eval16.html b/classcommon__eval16.html index d8c0dc7..d223b07 100644 --- a/classcommon__eval16.html +++ b/classcommon__eval16.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -98,7 +98,7 @@ Returns { 1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1}
    Warning
    The entries larger than 15 are ignored

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__first__diff.html b/classcommon__first__diff.html index 68c7e71..82ce97f 100644 --- a/classcommon__first__diff.html +++ b/classcommon__first__diff.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -100,7 +100,7 @@ then first_diff(a, b) returns 3, first_diff(a, b, 3) returns 16, first_diff(a, b, 4) returns 3, first_diff(a, b, 7) returns 3.
    Warning
    bound is assumed to be smaller or equal than 16

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__horiz__max.html b/classcommon__horiz__max.html index 07fe5da..b3847ed 100644 --- a/classcommon__horiz__max.html +++ b/classcommon__horiz__max.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    Example:
    horiz_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2, 0,12, 0, 0, 0});
    Returns 12

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__horiz__min.html b/classcommon__horiz__min.html index 2ecdfaf..7e290a9 100644 --- a/classcommon__horiz__min.html +++ b/classcommon__horiz__min.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    Example:
    horiz_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 1, 3, 2, 2,12, 3, 4, 4});
    Returns 1

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__horiz__sum.html b/classcommon__horiz__sum.html index 6e28a33..25bd5b9 100644 --- a/classcommon__horiz__sum.html +++ b/classcommon__horiz__sum.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@ Returns 110
    Warning
    The result is supposed to fit in a uint8_t

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__inverse.html b/classcommon__inverse.html index 66e9498..6c31edf 100644 --- a/classcommon__inverse.html +++ b/classcommon__inverse.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@
    x.inverse()
    Returns
    {0,4,2,1,3,5,6,7,8,9,10,11,12,13,14,15} 

    The documentation for this class was generated from the following file: diff --git a/classcommon__inverse__pperm.html b/classcommon__inverse__pperm.html index 6ff68d7..d3edd5b 100644 --- a/classcommon__inverse__pperm.html +++ b/classcommon__inverse__pperm.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -93,7 +93,7 @@ Returns
    {0,0xFF,2,1,3,5,6,0xFF,8,9,0xFF,10,12,0xFF,0xFF,0xFF}
     * 

    The documentation for this class was generated from the following file: diff --git a/classcommon__is__permutation.html b/classcommon__is__permutation.html index 596459f..5944f67 100644 --- a/classcommon__is__permutation.html +++ b/classcommon__is__permutation.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -93,7 +93,7 @@

    If *this is a permutation of \(0\dots n-1\) for \(n<16\), it should be completed to a permutation of \(0\dots 15\) by adding fixed points. That is the values \(i\geq n\) should be mapped to themself.

    Example:
    The permutation \(\begin{matrix}0 1 2 3 4 5\\ 2 0 5 3 1 4 \end{matrix}\) is encoded by the array {2,0,5,3,1,4,6,7,8,9,10,11,12,13,14,15}

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__last__diff.html b/classcommon__last__diff.html index 91b08e3..54256f4 100644 --- a/classcommon__last__diff.html +++ b/classcommon__last__diff.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -100,7 +100,7 @@ then last_diff(a, b) returns 11, last_diff(a, b, 3) returns 16, last_diff(a, b, 4) returns 3, last_diff(a, b, 7) returns 3.
    Warning
    bound is assumed to be smaller or equal than 16

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__left__weak__leq.html b/classcommon__left__weak__leq.html index b808b48..4d28cda 100644 --- a/classcommon__left__weak__leq.html +++ b/classcommon__left__weak__leq.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    x.left_weak_leq(y)
    Returns
    true 

    The documentation for this class was generated from the following file: diff --git a/classcommon__lehmer.html b/classcommon__lehmer.html index 2b2c4ce..570ceb3 100644 --- a/classcommon__lehmer.html +++ b/classcommon__lehmer.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@
    x.lehmer()
    Returns
    {0,2,1,1,0,0,0,0,0,0,0,0,0,0,0,0} 

    The documentation for this class was generated from the following file: diff --git a/classcommon__length.html b/classcommon__length.html index 4591637..cfb545e 100644 --- a/classcommon__length.html +++ b/classcommon__length.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@
    x.length()
    Returns
    4 

    The documentation for this class was generated from the following file: diff --git a/classcommon__merge.html b/classcommon__merge.html index 45d94b3..ba55140 100644 --- a/classcommon__merge.html +++ b/classcommon__merge.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -94,7 +94,7 @@
    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__nb__cycles.html b/classcommon__nb__cycles.html index e3b2963..f5b9e9b 100644 --- a/classcommon__nb__cycles.html +++ b/classcommon__nb__cycles.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@
    x.nb_cycles()
    Returns
    10 

    The documentation for this class was generated from the following file: diff --git a/classcommon__nb__descent.html b/classcommon__nb__descent.html index 2c7701b..96e1f14 100644 --- a/classcommon__nb__descent.html +++ b/classcommon__nb__descent.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -92,7 +92,7 @@
    x.length()
    Returns
    2 

    The documentation for this class was generated from the following file: diff --git a/classcommon__partial__max.html b/classcommon__partial__max.html index ea639d4..267333d 100644 --- a/classcommon__partial__max.html +++ b/classcommon__partial__max.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    Example:
    partial_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    Returns { 5, 5, 5, 5, 5, 6,12,12,12,12,12,12,12,13,14,15}

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__partial__min.html b/classcommon__partial__min.html index 2500b92..38f47f6 100644 --- a/classcommon__partial__min.html +++ b/classcommon__partial__min.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    Example:
    partial_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    Returns { 5, 5, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__partial__sums.html b/classcommon__partial__sums.html index f6b7075..b63dde5 100644 --- a/classcommon__partial__sums.html +++ b/classcommon__partial__sums.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -91,7 +91,7 @@
    Example:
    partial_sums(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    Returns { 5,10,12,17,18,24,36,40,40,43,45,56,68,81,95,110}

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classcommon__permutation__of.html b/classcommon__permutation__of.html index 387d821..17b52f0 100644 --- a/classcommon__permutation__of.html +++ b/classcommon__permutation__of.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -95,7 +95,7 @@
    Returns
    a HPCombi::epu8 For each \(0 \leq i < 16\), res[i] is the position in a of b[i] if b[i] appears exactly once in a, or undefined if not.

    The documentation for this class was generated from the following file:
      -
    • /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp
    • +
    • /Users/jdm/hpcombi/include/hpcombi/epu8.hpp
    diff --git a/classes.html b/classes.html index fe0eb1a..d4de4ce 100644 --- a/classes.html +++ b/classes.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/classlibsemigroups_1_1Timer-members.html b/classlibsemigroups_1_1Timer-members.html index e1e37b3..3effb55 100644 --- a/classlibsemigroups_1_1Timer-members.html +++ b/classlibsemigroups_1_1Timer-members.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/classlibsemigroups_1_1Timer.html b/classlibsemigroups_1_1Timer.html index 02ef638..549086a 100644 --- a/classlibsemigroups_1_1Timer.html +++ b/classlibsemigroups_1_1Timer.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -275,7 +275,7 @@

    timer.h +
  • /Users/jdm/hpcombi/examples/timer.h
  • diff --git a/debug_8hpp.html b/debug_8hpp.html index 8806659..84b6416 100644 --- a/debug_8hpp.html +++ b/debug_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/debug.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/debug.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/debug_8hpp_source.html b/debug_8hpp_source.html index 38b5b0a..e08c826 100644 --- a/debug_8hpp_source.html +++ b/debug_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/debug.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/debug.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,38 +77,44 @@
    debug.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2023 James D. Mitchell <jdm3@st-andrews.ac.uk> //
    -
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    -
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    -
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16#ifndef HPCOMBI_DEBUG_HPP_
    -
    17#define HPCOMBI_DEBUG_HPP_
    -
    18
    -
    19#ifdef HPCOMBI_DEBUG
    -
    20#include <cassert>
    -
    21#define HPCOMBI_ASSERT(x) assert(x)
    -
    22#else
    -
    23#define HPCOMBI_ASSERT(x)
    -
    24#endif
    -
    25
    -
    26#endif // HPCOMBI_DEBUG_HPP_
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2023-2024 James D. Mitchell <jdm3@st-andrews.ac.uk> //
    +
    3// Copyright (C) 2023-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    +
    4// //
    +
    5// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    +
    6// //
    +
    7// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    8// under the terms of the GNU General Public License as published by the //
    +
    9// Free Software Foundation, either version 3 of the License, or //
    +
    10// (at your option) any later version. //
    +
    11// //
    +
    12// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    13// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    14// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    15// for more details. //
    +
    16// //
    +
    17// You should have received a copy of the GNU General Public License along //
    +
    18// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    19//****************************************************************************//
    +
    20
    +
    21#ifndef HPCOMBI_DEBUG_HPP_
    +
    22#define HPCOMBI_DEBUG_HPP_
    +
    23
    +
    24#ifdef HPCOMBI_DEBUG
    +
    25#include <cassert>
    +
    26#define HPCOMBI_ASSERT(x) assert(x)
    +
    27#else
    +
    28#define HPCOMBI_ASSERT(x)
    +
    29#endif
    +
    30
    +
    31#endif // HPCOMBI_DEBUG_HPP_
    @@ -94,8 +94,8 @@ #include <cstdint>
    #include <ostream>
    #include <string>
    -#include "debug.hpp"
    #include "builder.hpp"
    +#include "debug.hpp"
    #include "vect_generic.hpp"
    #include "simde/x86/sse4.1.h"
    #include "simde/x86/sse4.2.h"
    diff --git a/epu8_8hpp_source.html b/epu8_8hpp_source.html index 6636745..e818807 100644 --- a/epu8_8hpp_source.html +++ b/epu8_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/epu8.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/epu8.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,282 +77,285 @@
    epu8.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016-2023 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16#ifndef HPCOMBI_EPU8_HPP_INCLUDED
    -
    17#define HPCOMBI_EPU8_HPP_INCLUDED
    -
    18
    -
    19#include <array> // for array
    -
    20#include <cstddef> // for size_t
    -
    21#include <cstdint> // for uint8_t, uint64_t, int8_t
    -
    22#include <ostream> // for ostream
    -
    23#include <string> // for string
    -
    24
    -
    25#include "debug.hpp" // for HPCOMBI_ASSERT
    -
    26#include "builder.hpp" // for TPUBuild
    -
    27#include "vect_generic.hpp" // for VectGeneric
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20#ifndef HPCOMBI_EPU8_HPP_
    +
    21#define HPCOMBI_EPU8_HPP_
    +
    22
    +
    23#include <array> // for array
    +
    24#include <cstddef> // for size_t
    +
    25#include <cstdint> // for uint8_t, uint64_t, int8_t
    +
    26#include <ostream> // for ostream
    +
    27#include <string> // for string
    28
    -
    29#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
    -
    30#include "simde/x86/sse4.2.h" // for ???
    -
    31
    -
    32namespace HPCombi {
    -
    33
    -
    35inline constexpr uint8_t
    -
    36operator"" _u8(unsigned long long arg) noexcept { // NOLINT
    -
    37 return static_cast<uint8_t>(arg);
    -
    38}
    -
    39
    -
    41using epu8 = uint8_t __attribute__((vector_size(16)));
    -
    42
    -
    43static_assert(alignof(epu8) == 16,
    -
    44 "epu8 type is not properly aligned by the compiler !");
    -
    45
    +
    29#include "builder.hpp" // for TPUBuild
    +
    30#include "debug.hpp" // for HPCOMBI_ASSERT
    +
    31#include "vect_generic.hpp" // for VectGeneric
    +
    32
    +
    33#include "simde/x86/sse4.1.h" // for simde_mm_max_epu8, simde...
    +
    34#include "simde/x86/sse4.2.h" // for ???
    +
    35
    +
    36namespace HPCombi {
    +
    37
    +
    39inline constexpr uint8_t
    +
    40operator"" _u8(unsigned long long arg) noexcept { // NOLINT
    +
    41 return static_cast<uint8_t>(arg);
    +
    42}
    +
    43
    +
    45using epu8 = uint8_t __attribute__((vector_size(16)));
    46
    -
    50constexpr TPUBuild<epu8> Epu8 {};
    -
    51
    -
    52
    -
    54inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
    -
    56inline bool is_all_one(epu8 a) noexcept {
    -
    57 return simde_mm_testc_si128(a, Epu8(0xFF));
    -
    58}
    -
    59
    -
    61inline bool equal(epu8 a, epu8 b) noexcept {
    -
    62 return is_all_zero(simde_mm_xor_si128(a, b));
    -
    63}
    -
    65inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
    -
    66
    -
    68inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
    -
    70inline epu8 permuted(epu8 a, epu8 b) noexcept {
    -
    71 return simde_mm_shuffle_epi8(a, b);
    -
    72}
    -
    76inline epu8 shifted_right(epu8 a) noexcept {
    -
    77 return simde_mm_bslli_si128(a, 1);
    -
    78}
    -
    82inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
    -
    84inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }
    -
    85
    -
    87inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
    -
    89inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
    -
    90
    -
    92inline bool is_sorted(epu8 a) noexcept;
    -
    98inline epu8 sorted(epu8 a) noexcept;
    -
    103inline epu8 sorted8(epu8 a) noexcept;
    -
    109inline epu8 revsorted(epu8 a) noexcept;
    -
    114inline epu8 revsorted8(epu8 a) noexcept;
    -
    115
    -
    120inline epu8 sort_perm(epu8 &a) noexcept;
    -
    125inline epu8 sort8_perm(epu8 &a) noexcept;
    -
    126
    -
    136inline void merge(epu8 &a, epu8 &b) noexcept;
    -
    137
    -
    146#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    150inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
    -
    151#endif
    -
    155inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
    -
    159inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
    -
    160
    -
    162constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
    -
    163
    -
    171inline epu8 random_epu8(uint16_t bnd);
    -
    172
    -
    180inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
    -
    181
    -
    197inline uint8_t horiz_sum_ref(epu8) noexcept;
    -
    203inline uint8_t horiz_sum_gen(epu8) noexcept;
    -
    208inline uint8_t horiz_sum4(epu8) noexcept;
    -
    213inline uint8_t horiz_sum3(epu8) noexcept;
    -
    215inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
    -
    216
    -
    231inline epu8 partial_sums_ref(epu8) noexcept;
    -
    237inline epu8 partial_sums_gen(epu8) noexcept;
    -
    242inline epu8 partial_sums_round(epu8) noexcept;
    -
    244inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
    -
    245
    -
    260inline uint8_t horiz_max_ref(epu8) noexcept;
    -
    266inline uint8_t horiz_max_gen(epu8) noexcept;
    -
    271inline uint8_t horiz_max4(epu8) noexcept;
    -
    276inline uint8_t horiz_max3(epu8) noexcept;
    -
    278inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
    -
    279
    -
    294inline epu8 partial_max_ref(epu8) noexcept;
    -
    300inline epu8 partial_max_gen(epu8) noexcept;
    -
    305inline epu8 partial_max_round(epu8) noexcept;
    -
    307inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
    -
    308
    -
    323inline uint8_t horiz_min_ref(epu8) noexcept;
    -
    329inline uint8_t horiz_min_gen(epu8) noexcept;
    -
    334inline uint8_t horiz_min4(epu8) noexcept;
    -
    339inline uint8_t horiz_min3(epu8) noexcept;
    -
    341inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
    -
    342
    -
    357inline epu8 partial_min_ref(epu8) noexcept;
    -
    363inline epu8 partial_min_gen(epu8) noexcept;
    -
    368inline epu8 partial_min_round(epu8) noexcept;
    -
    370inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
    -
    371
    -
    389inline epu8 eval16_ref(epu8 v) noexcept;
    -
    394inline epu8 eval16_arr(epu8 v) noexcept;
    -
    399inline epu8 eval16_cycle(epu8 v) noexcept;
    -
    404inline epu8 eval16_popcount(epu8 v) noexcept;
    -
    406inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
    -
    407
    -
    430inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    431#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    436inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    437#endif
    -
    442inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    444inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
    -
    445 return first_diff_mask(a, b, bound);
    -
    446}
    -
    447
    -
    470inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    471#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    476inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    477#endif
    -
    482inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
    -
    484inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
    -
    485 return last_diff_mask(a, b, bound);
    -
    486}
    -
    487
    -
    489inline bool less(epu8 a, epu8 b) noexcept;
    -
    495inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
    -
    496
    -
    500inline uint64_t first_zero(epu8 v, int bnd) noexcept;
    -
    504inline uint64_t last_zero(epu8 v, int bnd) noexcept;
    -
    508inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
    -
    512inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
    -
    513
    -
    516inline epu8 popcount16(epu8 v) noexcept;
    -
    517
    -
    533inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
    -
    534
    -
    550inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
    -
    551
    -
    568inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
    -
    569
    -
    585#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    589inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
    -
    590#endif
    -
    594inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
    -
    598inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept;
    -
    602inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
    -
    603
    -
    604} // namespace HPCombi
    +
    47static_assert(alignof(epu8) == 16,
    +
    48 "epu8 type is not properly aligned by the compiler !");
    +
    49
    +
    53constexpr TPUBuild<epu8> Epu8{};
    +
    54
    +
    56inline bool is_all_zero(epu8 a) noexcept { return simde_mm_testz_si128(a, a); }
    +
    58inline bool is_all_one(epu8 a) noexcept {
    +
    59 return simde_mm_testc_si128(a, Epu8(0xFF));
    +
    60}
    +
    61
    +
    63inline bool equal(epu8 a, epu8 b) noexcept {
    +
    64 return is_all_zero(simde_mm_xor_si128(a, b));
    +
    65}
    +
    67inline bool not_equal(epu8 a, epu8 b) noexcept { return !equal(a, b); }
    +
    68
    +
    70inline epu8 permuted_ref(epu8 a, epu8 b) noexcept;
    +
    72inline epu8 permuted(epu8 a, epu8 b) noexcept {
    +
    73 return simde_mm_shuffle_epi8(a, b);
    +
    74}
    +
    78inline epu8 shifted_right(epu8 a) noexcept {
    +
    79 return simde_mm_bslli_si128(a, 1);
    +
    80}
    +
    84inline epu8 shifted_left(epu8 a) noexcept { return simde_mm_bsrli_si128(a, 1); }
    +
    86inline epu8 reverted(epu8 a) noexcept { return permuted(a, Epu8.rev()); }
    +
    87
    +
    89inline epu8 min(epu8 a, epu8 b) noexcept { return simde_mm_min_epu8(a, b); }
    +
    91inline epu8 max(epu8 a, epu8 b) noexcept { return simde_mm_max_epu8(a, b); }
    +
    92
    +
    94inline bool is_sorted(epu8 a) noexcept;
    +
    100inline epu8 sorted(epu8 a) noexcept;
    +
    105inline epu8 sorted8(epu8 a) noexcept;
    +
    111inline epu8 revsorted(epu8 a) noexcept;
    +
    116inline epu8 revsorted8(epu8 a) noexcept;
    +
    117
    +
    122inline epu8 sort_perm(epu8 &a) noexcept;
    +
    127inline epu8 sort8_perm(epu8 &a) noexcept;
    +
    128
    +
    138inline void merge(epu8 &a, epu8 &b) noexcept;
    +
    139
    +
    148#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    152inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept;
    +
    153#endif
    +
    157inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept;
    +
    161inline epu8 permutation_of(epu8 a, epu8 b) noexcept;
    +
    162
    +
    164constexpr uint64_t prime = 0x9e3779b97f4a7bb9;
    +
    165
    +
    173inline epu8 random_epu8(uint16_t bnd);
    +
    174
    +
    182inline epu8 remove_dups(epu8 a, uint8_t repl = 0) noexcept;
    +
    183
    +
    199inline uint8_t horiz_sum_ref(epu8) noexcept;
    +
    205inline uint8_t horiz_sum_gen(epu8) noexcept;
    +
    210inline uint8_t horiz_sum4(epu8) noexcept;
    +
    215inline uint8_t horiz_sum3(epu8) noexcept;
    +
    217inline uint8_t horiz_sum(epu8 v) noexcept { return horiz_sum3(v); }
    +
    218
    +
    233inline epu8 partial_sums_ref(epu8) noexcept;
    +
    239inline epu8 partial_sums_gen(epu8) noexcept;
    +
    244inline epu8 partial_sums_round(epu8) noexcept;
    +
    246inline epu8 partial_sums(epu8 v) noexcept { return partial_sums_round(v); }
    +
    247
    +
    262inline uint8_t horiz_max_ref(epu8) noexcept;
    +
    268inline uint8_t horiz_max_gen(epu8) noexcept;
    +
    273inline uint8_t horiz_max4(epu8) noexcept;
    +
    278inline uint8_t horiz_max3(epu8) noexcept;
    +
    280inline uint8_t horiz_max(epu8 v) noexcept { return horiz_max4(v); }
    +
    281
    +
    296inline epu8 partial_max_ref(epu8) noexcept;
    +
    302inline epu8 partial_max_gen(epu8) noexcept;
    +
    307inline epu8 partial_max_round(epu8) noexcept;
    +
    309inline epu8 partial_max(epu8 v) noexcept { return partial_max_round(v); }
    +
    310
    +
    325inline uint8_t horiz_min_ref(epu8) noexcept;
    +
    331inline uint8_t horiz_min_gen(epu8) noexcept;
    +
    336inline uint8_t horiz_min4(epu8) noexcept;
    +
    341inline uint8_t horiz_min3(epu8) noexcept;
    +
    343inline uint8_t horiz_min(epu8 v) noexcept { return horiz_min4(v); }
    +
    344
    +
    359inline epu8 partial_min_ref(epu8) noexcept;
    +
    365inline epu8 partial_min_gen(epu8) noexcept;
    +
    370inline epu8 partial_min_round(epu8) noexcept;
    +
    372inline epu8 partial_min(epu8 v) noexcept { return partial_min_round(v); }
    +
    373
    +
    391inline epu8 eval16_ref(epu8 v) noexcept;
    +
    396inline epu8 eval16_arr(epu8 v) noexcept;
    +
    401inline epu8 eval16_cycle(epu8 v) noexcept;
    +
    406inline epu8 eval16_popcount(epu8 v) noexcept;
    +
    408inline epu8 eval16(epu8 v) noexcept { return eval16_cycle(v); }
    +
    409
    +
    432inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    433#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    438inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    439#endif
    +
    444inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    446inline uint64_t first_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
    +
    447 return first_diff_mask(a, b, bound);
    +
    448}
    +
    449
    +
    472inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    473#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    478inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    479#endif
    +
    484inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound = 16) noexcept;
    +
    486inline uint64_t last_diff(epu8 a, epu8 b, size_t bound = 16) noexcept {
    +
    487 return last_diff_mask(a, b, bound);
    +
    488}
    +
    489
    +
    491inline bool less(epu8 a, epu8 b) noexcept;
    +
    497inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept;
    +
    498
    +
    502inline uint64_t first_zero(epu8 v, int bnd) noexcept;
    +
    506inline uint64_t last_zero(epu8 v, int bnd) noexcept;
    +
    510inline uint64_t first_non_zero(epu8 v, int bnd) noexcept;
    +
    514inline uint64_t last_non_zero(epu8 v, int bnd) noexcept;
    +
    515
    +
    518inline epu8 popcount16(epu8 v) noexcept;
    +
    519
    +
    535inline bool is_partial_transformation(epu8 v, const size_t k = 16) noexcept;
    +
    536
    +
    552inline bool is_transformation(epu8 v, const size_t k = 16) noexcept;
    +
    553
    +
    570inline bool is_partial_permutation(epu8 v, const size_t k = 16) noexcept;
    +
    571
    +
    587#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    591inline bool is_permutation_cpmestri(epu8 v, const size_t k = 16) noexcept;
    +
    592#endif
    +
    596inline bool is_permutation_sort(epu8 v, const size_t k = 16) noexcept;
    +
    600inline bool is_permutation_eval(epu8 v, const size_t k = 16) noexcept;
    +
    604inline bool is_permutation(epu8 v, const size_t k = 16) noexcept;
    605
    -
    606namespace std {
    +
    606} // namespace HPCombi
    607
    -
    608inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a);
    +
    608namespace std {
    609
    -
    610inline std::string to_string(HPCombi::epu8 const &a);
    +
    610inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a);
    611
    -
    618} // namespace std
    -
    619
    -
    620#include "epu8_impl.hpp"
    +
    612inline std::string to_string(HPCombi::epu8 const &a);
    +
    613
    +
    620} // namespace std
    621
    -
    622#endif // HPCOMBI_EPU8_HPP_INCLUDED
    +
    622#include "epu8_impl.hpp"
    +
    623
    +
    624#endif // HPCOMBI_EPU8_HPP_
    -
    std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
    Definition image.cpp:31
    -
    Definition bmat8.hpp:37
    -
    uint8_t horiz_min4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:410
    -
    epu8 max(epu8 a, epu8 b) noexcept
    Vector max between two HPCombi::epu8 0.
    Definition epu8.hpp:89
    -
    uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:85
    -
    uint64_t first_non_zero(epu8 v, int bnd) noexcept
    return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
    Definition epu8_impl.hpp:119
    -
    uint8_t horiz_min_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:401
    -
    epu8 eval16_arr(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:444
    -
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:70
    -
    epu8 sort8_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:212
    -
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:76
    -
    uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:70
    -
    uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:81
    -
    epu8 partial_sums_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:349
    -
    epu8 partial_sums(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:244
    -
    uint8_t horiz_min(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:341
    -
    epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
    Remove duplicates in a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:252
    -
    epu8 revsorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half reverse sorted.
    Definition epu8_impl.hpp:205
    -
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:295
    -
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:522
    -
    int8_t less_partial(epu8 a, epu8 b, int k) noexcept
    Partial lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:106
    -
    uint8_t horiz_max4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:375
    -
    uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:98
    -
    uint8_t horiz_max3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:376
    -
    bool is_all_one(epu8 a) noexcept
    Test whether all the entries of a HPCombi::epu8 are one.
    Definition epu8.hpp:56
    -
    constexpr uint64_t prime
    A prime number good for hashing.
    Definition epu8.hpp:162
    -
    uint8_t horiz_min_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:407
    -
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:491
    -
    bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:513
    -
    uint64_t last_zero(epu8 v, int bnd) noexcept
    return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
    Definition epu8_impl.hpp:116
    -
    void merge(epu8 &a, epu8 &b) noexcept
    Merge two sorted epu8.
    Definition epu8_impl.hpp:233
    -
    uint8_t horiz_sum4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:340
    -
    epu8 popcount16(epu8 v) noexcept
    a vector popcount function
    Definition epu8_impl.hpp:472
    -
    epu8 partial_sums_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:360
    -
    uint8_t horiz_sum3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:341
    -
    uint8_t horiz_sum_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:331
    -
    epu8 partial_sums_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:356
    -
    bool equal(epu8 a, epu8 b) noexcept
    Equality of HPCombi::epu8.
    Definition epu8.hpp:61
    -
    epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:286
    -
    epu8 min(epu8 a, epu8 b) noexcept
    Vector min between two HPCombi::epu8 0.
    Definition epu8.hpp:87
    -
    epu8 partial_max_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:384
    -
    epu8 sorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half sorted.
    Definition epu8_impl.hpp:199
    -
    epu8 partial_max_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:395
    -
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:406
    -
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:215
    -
    uint8_t horiz_max(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:278
    -
    epu8 reverted(epu8 a) noexcept
    Reverting a HPCombi::epu8.
    Definition epu8.hpp:84
    -
    epu8 eval16_cycle(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:455
    -
    epu8 eval16_ref(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:436
    -
    epu8 partial_max_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:391
    -
    bool less(epu8 a, epu8 b) noexcept
    Lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:102
    -
    uint8_t horiz_max_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:366
    -
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:196
    -
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:50
    -
    bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:517
    -
    uint64_t first_zero(epu8 v, int bnd) noexcept
    return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
    Definition epu8_impl.hpp:113
    -
    bool is_all_zero(epu8 a) noexcept
    Test whether all the entries of a HPCombi::epu8 are zero.
    Definition epu8.hpp:54
    -
    epu8 random_epu8(uint16_t bnd)
    A random HPCombi::epu8.
    Definition epu8_impl.hpp:240
    -
    epu8 partial_min(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:370
    -
    epu8 revsorted(epu8 a) noexcept
    Return a reverse sorted HPCombi::epu8.
    Definition epu8_impl.hpp:202
    -
    epu8 partial_min_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:430
    -
    bool is_sorted(epu8 a) noexcept
    Testing if a HPCombi::epu8 is sorted.
    Definition epu8_impl.hpp:193
    -
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:41
    -
    uint64_t last_non_zero(epu8 v, int bnd) noexcept
    return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
    Definition epu8_impl.hpp:122
    -
    uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8.hpp:484
    -
    epu8 eval16_popcount(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:463
    -
    epu8 sort_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:209
    -
    epu8 partial_min_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:426
    -
    epu8 partial_min_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:419
    -
    uint8_t horiz_max_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:372
    -
    epu8 shifted_left(epu8 a) noexcept
    Right shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:82
    -
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:485
    -
    uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8.hpp:444
    -
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:477
    -
    epu8 permuted_ref(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8_impl.hpp:53
    -
    epu8 partial_max(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:307
    -
    uint8_t horiz_sum_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:337
    -
    uint8_t horiz_min3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:411
    -
    bool not_equal(epu8 a, epu8 b) noexcept
    Non equality of HPCombi::epu8.
    Definition epu8.hpp:65
    -
    Definition bmat8.hpp:360
    -
    Class for factory object associated to a SIMD packed unsigned integers.
    Definition builder.hpp:39
    +
    std::ostream & operator<<(std::ostream &out, const std::vector< T > &v)
    Definition image.cpp:35
    +
    Definition bmat8.hpp:41
    +
    uint8_t horiz_min4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:418
    +
    epu8 max(epu8 a, epu8 b) noexcept
    Vector max between two HPCombi::epu8 0.
    Definition epu8.hpp:91
    +
    uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:92
    +
    uint64_t first_non_zero(epu8 v, int bnd) noexcept
    return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
    Definition epu8_impl.hpp:126
    +
    uint8_t horiz_min_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:409
    +
    epu8 eval16_arr(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:452
    +
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:72
    +
    epu8 sort8_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:219
    +
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:78
    +
    uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:77
    +
    uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:88
    +
    epu8 partial_sums_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:357
    +
    epu8 partial_sums(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:246
    +
    uint8_t horiz_min(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:343
    +
    epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
    Remove duplicates in a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:260
    +
    epu8 revsorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half reverse sorted.
    Definition epu8_impl.hpp:212
    +
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:303
    +
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:530
    +
    int8_t less_partial(epu8 a, epu8 b, int k) noexcept
    Partial lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:113
    +
    uint8_t horiz_max4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:383
    +
    uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:105
    +
    uint8_t horiz_max3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:384
    +
    bool is_all_one(epu8 a) noexcept
    Test whether all the entries of a HPCombi::epu8 are one.
    Definition epu8.hpp:58
    +
    constexpr uint64_t prime
    A prime number good for hashing.
    Definition epu8.hpp:164
    +
    uint8_t horiz_min_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:415
    +
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:499
    +
    bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:521
    +
    uint64_t last_zero(epu8 v, int bnd) noexcept
    return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
    Definition epu8_impl.hpp:123
    +
    void merge(epu8 &a, epu8 &b) noexcept
    Merge two sorted epu8.
    Definition epu8_impl.hpp:240
    +
    uint8_t horiz_sum4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:348
    +
    epu8 popcount16(epu8 v) noexcept
    a vector popcount function
    Definition epu8_impl.hpp:480
    +
    epu8 partial_sums_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:368
    +
    uint8_t horiz_sum3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:349
    +
    uint8_t horiz_sum_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:339
    +
    epu8 partial_sums_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:364
    +
    bool equal(epu8 a, epu8 b) noexcept
    Equality of HPCombi::epu8.
    Definition epu8.hpp:63
    +
    epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:294
    +
    epu8 min(epu8 a, epu8 b) noexcept
    Vector min between two HPCombi::epu8 0.
    Definition epu8.hpp:89
    +
    epu8 partial_max_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:392
    +
    epu8 sorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half sorted.
    Definition epu8_impl.hpp:206
    +
    epu8 partial_max_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:403
    +
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:408
    +
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:217
    +
    uint8_t horiz_max(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:280
    +
    epu8 reverted(epu8 a) noexcept
    Reverting a HPCombi::epu8.
    Definition epu8.hpp:86
    +
    epu8 eval16_cycle(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:463
    +
    epu8 eval16_ref(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:444
    +
    epu8 partial_max_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:399
    +
    bool less(epu8 a, epu8 b) noexcept
    Lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:109
    +
    uint8_t horiz_max_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:374
    +
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:203
    +
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:53
    +
    bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:525
    +
    uint64_t first_zero(epu8 v, int bnd) noexcept
    return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
    Definition epu8_impl.hpp:120
    +
    bool is_all_zero(epu8 a) noexcept
    Test whether all the entries of a HPCombi::epu8 are zero.
    Definition epu8.hpp:56
    +
    epu8 random_epu8(uint16_t bnd)
    A random HPCombi::epu8.
    Definition epu8_impl.hpp:248
    +
    epu8 partial_min(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:372
    +
    epu8 revsorted(epu8 a) noexcept
    Return a reverse sorted HPCombi::epu8.
    Definition epu8_impl.hpp:209
    +
    epu8 partial_min_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:438
    +
    bool is_sorted(epu8 a) noexcept
    Testing if a HPCombi::epu8 is sorted.
    Definition epu8_impl.hpp:200
    +
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:45
    +
    uint64_t last_non_zero(epu8 v, int bnd) noexcept
    return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
    Definition epu8_impl.hpp:129
    +
    uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8.hpp:486
    +
    epu8 eval16_popcount(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:471
    +
    epu8 sort_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:216
    +
    epu8 partial_min_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:434
    +
    epu8 partial_min_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:427
    +
    uint8_t horiz_max_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:380
    +
    epu8 shifted_left(epu8 a) noexcept
    Right shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:84
    +
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:493
    +
    uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8.hpp:446
    +
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:485
    +
    epu8 permuted_ref(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8_impl.hpp:59
    +
    epu8 partial_max(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:309
    +
    uint8_t horiz_sum_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:345
    +
    uint8_t horiz_min3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:419
    +
    bool not_equal(epu8 a, epu8 b) noexcept
    Non equality of HPCombi::epu8.
    Definition epu8.hpp:67
    +
    Definition bmat8.hpp:364
    +
    Class for factory object associated to a SIMD packed unsigned integers.
    Definition builder.hpp:43
    diff --git a/epu8__impl_8hpp.html b/epu8__impl_8hpp.html index e997a46..e8c3994 100644 --- a/epu8__impl_8hpp.html +++ b/epu8__impl_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/epu8_impl.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/epu8_impl.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/epu8__impl_8hpp_source.html b/epu8__impl_8hpp_source.html index db01a1e..fa0c100 100644 --- a/epu8__impl_8hpp_source.html +++ b/epu8__impl_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/epu8_impl.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/epu8_impl.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,671 +77,681 @@
    epu8_impl.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16// This is the implementation part of epu8.hpp this should be seen as
    -
    17// implementation details and should not be included directly.
    -
    18
    -
    19#include <initializer_list>
    -
    20#include <iostream>
    -
    21#include <random>
    -
    22#include <sstream>
    -
    23
    -
    24#include "vect_generic.hpp"
    -
    25
    -
    26#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    27// Comparison mode for _mm_cmpestri
    -
    28#define FIRST_DIFF \
    -
    29 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \
    -
    30 SIMDE_SIDD_NEGATIVE_POLARITY)
    -
    31#define LAST_DIFF \
    -
    32 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \
    -
    33 SIMDE_SIDD_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT)
    -
    34#define FIRST_ZERO (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY)
    -
    35#define LAST_ZERO \
    -
    36 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \
    -
    37 SIMDE_SIDD_MOST_SIGNIFICANT)
    -
    38#define FIRST_NON_ZERO \
    -
    39 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \
    -
    40 SIMDE_SIDD_MASKED_NEGATIVE_POLARITY)
    -
    41#define LAST_NON_ZERO \
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20// NOLINT(build/header_guard)
    +
    21
    +
    22// This is the implementation part of epu8.hpp this should be seen as
    +
    23// implementation details and should not be included directly.
    +
    24
    +
    25#include <initializer_list>
    +
    26#include <iostream>
    +
    27#include <random>
    +
    28#include <sstream>
    +
    29
    +
    30#include "vect_generic.hpp"
    +
    31
    +
    32#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    33// Comparison mode for _mm_cmpestri
    +
    34#define FIRST_DIFF \
    +
    35 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \
    +
    36 SIMDE_SIDD_NEGATIVE_POLARITY)
    +
    37#define LAST_DIFF \
    +
    38 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_EACH | \
    +
    39 SIMDE_SIDD_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT)
    +
    40#define FIRST_ZERO (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY)
    +
    41#define LAST_ZERO \
    42 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \
    -
    43 SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT)
    -
    44#endif
    -
    45
    -
    46namespace HPCombi {
    -
    47
    +
    43 SIMDE_SIDD_MOST_SIGNIFICANT)
    +
    44#define FIRST_NON_ZERO \
    +
    45 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \
    +
    46 SIMDE_SIDD_MASKED_NEGATIVE_POLARITY)
    +
    47#define LAST_NON_ZERO \
    +
    48 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | \
    +
    49 SIMDE_SIDD_MASKED_NEGATIVE_POLARITY | SIMDE_SIDD_MOST_SIGNIFICANT)
    +
    50#endif
    51
    -
    53inline epu8 permuted_ref(epu8 a, epu8 b) noexcept {
    -
    54 epu8 res;
    -
    55 for (uint64_t i = 0; i < 16; i++)
    -
    56 res[i] = a[b[i] & 0xF];
    -
    57 return res;
    -
    58}
    -
    59
    -
    60// Msk is supposed to be a boolean mask (i.e. each entry is either 0 or 255)
    -
    61inline uint64_t first_mask(epu8 msk, size_t bound) {
    -
    62 uint64_t res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound)));
    -
    63 return res == 0 ? 16 : (__builtin_ffsll(res) - 1);
    +
    52namespace HPCombi {
    +
    53
    +
    55// Implementation part for inline functions
    +
    57
    +
    59inline epu8 permuted_ref(epu8 a, epu8 b) noexcept {
    +
    60 epu8 res;
    +
    61 for (uint64_t i = 0; i < 16; i++)
    +
    62 res[i] = a[b[i] & 0xF];
    +
    63 return res;
    64}
    -
    65inline uint64_t last_mask(epu8 msk, size_t bound) {
    -
    66 auto res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound)));
    -
    67 return res == 0 ? 16 : (63 - __builtin_clzll(res));
    -
    68}
    -
    69
    -
    70inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
    -
    71 for (size_t i = 0; i < bound; i++)
    -
    72 if (a[i] != b[i])
    -
    73 return i;
    -
    74 return 16;
    +
    65
    +
    66// Msk is supposed to be a boolean mask (i.e. each entry is either 0 or
    +
    67// 255)
    +
    68inline uint64_t first_mask(epu8 msk, size_t bound) {
    +
    69 uint64_t res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound)));
    +
    70 return res == 0 ? 16 : (__builtin_ffsll(res) - 1);
    +
    71}
    +
    72inline uint64_t last_mask(epu8 msk, size_t bound) {
    +
    73 auto res = simde_mm_movemask_epi8(msk & (Epu8.id() < Epu8(bound)));
    +
    74 return res == 0 ? 16 : (63 - __builtin_clzll(res));
    75}
    -
    76#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    77inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
    -
    78 return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF));
    -
    79}
    -
    80#endif
    -
    81inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
    -
    82 return first_mask(a != b, bound);
    -
    83}
    -
    84
    -
    85inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
    -
    86 while (bound != 0) {
    -
    87 --bound;
    -
    88 if (a[bound] != b[bound])
    -
    89 return bound;
    -
    90 }
    -
    91 return 16;
    -
    92}
    -
    93#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    94inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
    -
    95 return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF));
    -
    96}
    -
    97#endif
    -
    98inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
    -
    99 return last_mask(a != b, bound);
    -
    100}
    -
    101
    -
    102inline bool less(epu8 a, epu8 b) noexcept {
    -
    103 uint64_t diff = first_diff(a, b);
    -
    104 return (diff < 16) && (a[diff] < b[diff]);
    -
    105}
    -
    106inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept {
    -
    107 uint64_t diff = first_diff(a, b, k);
    -
    108 return (diff == 16)
    -
    109 ? 0
    -
    110 : static_cast<int8_t>(a[diff]) - static_cast<int8_t>(b[diff]);
    -
    111}
    -
    112
    -
    113inline uint64_t first_zero(epu8 v, int bnd) noexcept {
    -
    114 return first_mask(v == epu8{}, bnd);
    -
    115}
    -
    116inline uint64_t last_zero(epu8 v, int bnd) noexcept {
    -
    117 return last_mask(v == epu8{}, bnd);
    +
    76
    +
    77inline uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
    +
    78 for (size_t i = 0; i < bound; i++)
    +
    79 if (a[i] != b[i])
    +
    80 return i;
    +
    81 return 16;
    +
    82}
    +
    83#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    84inline uint64_t first_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
    +
    85 return unsigned(_mm_cmpestri(a, bound, b, bound, FIRST_DIFF));
    +
    86}
    +
    87#endif
    +
    88inline uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
    +
    89 return first_mask(a != b, bound);
    +
    90}
    +
    91
    +
    92inline uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound) noexcept {
    +
    93 while (bound != 0) {
    +
    94 --bound;
    +
    95 if (a[bound] != b[bound])
    +
    96 return bound;
    +
    97 }
    +
    98 return 16;
    +
    99}
    +
    100#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    101inline uint64_t last_diff_cmpstr(epu8 a, epu8 b, size_t bound) noexcept {
    +
    102 return unsigned(_mm_cmpestri(a, bound, b, bound, LAST_DIFF));
    +
    103}
    +
    104#endif
    +
    105inline uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound) noexcept {
    +
    106 return last_mask(a != b, bound);
    +
    107}
    +
    108
    +
    109inline bool less(epu8 a, epu8 b) noexcept {
    +
    110 uint64_t diff = first_diff(a, b);
    +
    111 return (diff < 16) && (a[diff] < b[diff]);
    +
    112}
    +
    113inline int8_t less_partial(epu8 a, epu8 b, int k) noexcept {
    +
    114 uint64_t diff = first_diff(a, b, k);
    +
    115 return (diff == 16)
    +
    116 ? 0
    +
    117 : static_cast<int8_t>(a[diff]) - static_cast<int8_t>(b[diff]);
    118}
    -
    119inline uint64_t first_non_zero(epu8 v, int bnd) noexcept {
    -
    120 return first_mask(v != epu8{}, bnd);
    -
    121}
    -
    122inline uint64_t last_non_zero(epu8 v, int bnd) noexcept {
    -
    123 return last_mask(v != epu8{}, bnd);
    -
    124}
    -
    125
    -
    127template <bool Increasing = true, size_t sz>
    -
    128inline epu8 network_sort(epu8 res, std::array<epu8, sz> rounds) {
    -
    129 for (auto round : rounds) {
    -
    130 // This conditional should be optimized out by the compiler
    -
    131 epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round;
    -
    132 epu8 b = permuted(res, round);
    -
    133 // res = mask ? min(res,b) : max(res,b); is not accepted by clang
    -
    134 res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask);
    -
    135 }
    -
    136 return res;
    -
    137}
    -
    138
    -
    140template <bool Increasing = true, size_t sz>
    -
    141inline epu8 network_sort_perm(epu8 &v, std::array<epu8, sz> rounds) {
    -
    142 epu8 res = Epu8.id();
    -
    143 for (auto round : rounds) {
    -
    144 // This conditional should be optimized out by the compiler
    -
    145 epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round;
    -
    146 epu8 b = permuted(v, round);
    -
    147 epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask);
    -
    148 v = simde_mm_blendv_epi8(v, b, cmp);
    -
    149 res = simde_mm_blendv_epi8(res, permuted(res, round), cmp);
    -
    150 }
    -
    151 return res;
    -
    152}
    -
    153
    -
    160constexpr std::array<epu8, 9> sorting_rounds
    -
    161 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    162 {{epu8{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    -
    163 epu8{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    -
    164 epu8{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    -
    165 epu8{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7},
    -
    166 epu8{0, 2, 1, 12, 8, 10, 9, 11, 4, 6, 5, 7, 3, 14, 13, 15},
    -
    167 epu8{0, 4, 8, 10, 1, 9, 12, 13, 2, 5, 3, 14, 6, 7, 11, 15},
    -
    168 epu8{0, 1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 14, 15},
    -
    169 epu8{0, 1, 2, 6, 4, 8, 3, 10, 5, 12, 7, 11, 9, 13, 14, 15},
    -
    170 epu8{0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 13, 14, 15}}};
    -
    171
    -
    180constexpr std::array<epu8, 6> sorting_rounds8
    -
    181 // clang-format off
    -
    182 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    183{{
    -
    184 epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    -
    185 epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    -
    186 epu8 { 0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15},
    -
    187 epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    -
    188 epu8 { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
    -
    189 epu8 { 0, 2, 1, 4, 3, 6, 5, 7, 8, 10, 9, 12, 11, 14, 13, 15}
    -
    190}};
    -
    191// clang-format on
    -
    192
    -
    193inline bool is_sorted(epu8 a) noexcept {
    -
    194 return simde_mm_movemask_epi8(shifted_right(a) > a) == 0;
    -
    195}
    -
    196inline epu8 sorted(epu8 a) noexcept {
    -
    197 return network_sort<true>(a, sorting_rounds);
    -
    198}
    -
    199inline epu8 sorted8(epu8 a) noexcept {
    -
    200 return network_sort<true>(a, sorting_rounds8);
    -
    201}
    -
    202inline epu8 revsorted(epu8 a) noexcept {
    -
    203 return network_sort<false>(a, sorting_rounds);
    -
    204}
    -
    205inline epu8 revsorted8(epu8 a) noexcept {
    -
    206 return network_sort<false>(a, sorting_rounds8);
    -
    207}
    -
    208
    -
    209inline epu8 sort_perm(epu8 &a) noexcept {
    -
    210 return network_sort_perm<true>(a, sorting_rounds);
    +
    119
    +
    120inline uint64_t first_zero(epu8 v, int bnd) noexcept {
    +
    121 return first_mask(v == epu8{}, bnd);
    +
    122}
    +
    123inline uint64_t last_zero(epu8 v, int bnd) noexcept {
    +
    124 return last_mask(v == epu8{}, bnd);
    +
    125}
    +
    126inline uint64_t first_non_zero(epu8 v, int bnd) noexcept {
    +
    127 return first_mask(v != epu8{}, bnd);
    +
    128}
    +
    129inline uint64_t last_non_zero(epu8 v, int bnd) noexcept {
    +
    130 return last_mask(v != epu8{}, bnd);
    +
    131}
    +
    132
    +
    134template <bool Increasing = true, size_t sz>
    +
    135inline epu8 network_sort(epu8 res, std::array<epu8, sz> rounds) {
    +
    136 for (auto round : rounds) {
    +
    137 // This conditional should be optimized out by the compiler
    +
    138 epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round;
    +
    139 epu8 b = permuted(res, round);
    +
    140 // res = mask ? min(res,b) : max(res,b); is not accepted by clang
    +
    141 res = simde_mm_blendv_epi8(min(res, b), max(res, b), mask);
    +
    142 }
    +
    143 return res;
    +
    144}
    +
    145
    +
    147template <bool Increasing = true, size_t sz>
    +
    148inline epu8 network_sort_perm(epu8 &v, std::array<epu8, sz> rounds) {
    +
    149 epu8 res = Epu8.id();
    +
    150 for (auto round : rounds) {
    +
    151 // This conditional should be optimized out by the compiler
    +
    152 epu8 mask = Increasing ? round < Epu8.id() : Epu8.id() < round;
    +
    153 epu8 b = permuted(v, round);
    +
    154 epu8 cmp = simde_mm_blendv_epi8(b < v, v < b, mask);
    +
    155 v = simde_mm_blendv_epi8(v, b, cmp);
    +
    156 res = simde_mm_blendv_epi8(res, permuted(res, round), cmp);
    +
    157 }
    +
    158 return res;
    +
    159}
    +
    160
    +
    167constexpr std::array<epu8, 9> sorting_rounds
    +
    168 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    169 {{epu8{1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    +
    170 epu8{2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    +
    171 epu8{4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    +
    172 epu8{8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7},
    +
    173 epu8{0, 2, 1, 12, 8, 10, 9, 11, 4, 6, 5, 7, 3, 14, 13, 15},
    +
    174 epu8{0, 4, 8, 10, 1, 9, 12, 13, 2, 5, 3, 14, 6, 7, 11, 15},
    +
    175 epu8{0, 1, 4, 5, 2, 3, 8, 9, 6, 7, 12, 13, 10, 11, 14, 15},
    +
    176 epu8{0, 1, 2, 6, 4, 8, 3, 10, 5, 12, 7, 11, 9, 13, 14, 15},
    +
    177 epu8{0, 1, 2, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 13, 14, 15}}};
    +
    178
    +
    187constexpr std::array<epu8, 6> sorting_rounds8
    +
    188 // clang-format off
    +
    189 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    190{{
    +
    191 epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    +
    192 epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    +
    193 epu8 { 0, 2, 1, 3, 4, 6, 5, 7, 8, 10, 9, 11, 12, 14, 13, 15},
    +
    194 epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    +
    195 epu8 { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15},
    +
    196 epu8 { 0, 2, 1, 4, 3, 6, 5, 7, 8, 10, 9, 12, 11, 14, 13, 15}
    +
    197}};
    +
    198// clang-format on
    +
    199
    +
    200inline bool is_sorted(epu8 a) noexcept {
    +
    201 return simde_mm_movemask_epi8(shifted_right(a) > a) == 0;
    +
    202}
    +
    203inline epu8 sorted(epu8 a) noexcept {
    +
    204 return network_sort<true>(a, sorting_rounds);
    +
    205}
    +
    206inline epu8 sorted8(epu8 a) noexcept {
    +
    207 return network_sort<true>(a, sorting_rounds8);
    +
    208}
    +
    209inline epu8 revsorted(epu8 a) noexcept {
    +
    210 return network_sort<false>(a, sorting_rounds);
    211}
    -
    212inline epu8 sort8_perm(epu8 &a) noexcept {
    -
    213 return network_sort_perm<true>(a, sorting_rounds8);
    +
    212inline epu8 revsorted8(epu8 a) noexcept {
    +
    213 return network_sort<false>(a, sorting_rounds8);
    214}
    215
    -
    216constexpr std::array<epu8, 6> merge_rounds
    -
    217 // clang-format off
    -
    218 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    219{{
    -
    220 epu8 { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7},
    -
    221 epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    -
    222 epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    -
    223 epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    -
    224}};
    -
    225// clang-format on
    -
    226inline void merge_rev(epu8 &a, epu8 &b) noexcept {
    -
    227 epu8 mn = min(a, b);
    -
    228 b = max(a, b);
    -
    229 a = mn;
    -
    230 a = network_sort<true>(a, merge_rounds);
    -
    231 b = network_sort<true>(b, merge_rounds);
    -
    232}
    -
    233inline void merge(epu8 &a, epu8 &b) noexcept {
    -
    234 a = permuted(a, Epu8.rev());
    -
    235 merge_rev(a, b);
    -
    236}
    -
    237// TODO : AVX2 version.
    -
    238// TODO : compute merge_rounds on the fly instead of loading those from memory
    -
    239
    -
    240inline epu8 random_epu8(uint16_t bnd) {
    -
    241 epu8 res;
    -
    242
    -
    243 static std::random_device rd;
    -
    244 static std::default_random_engine e1(rd());
    -
    245 std::uniform_int_distribution<int> uniform_dist(0, bnd - 1);
    -
    246
    -
    247 for (size_t i = 0; i < 16; i++)
    -
    248 res[i] = uniform_dist(e1);
    -
    249 return res;
    -
    250}
    -
    251
    -
    252inline epu8 remove_dups(epu8 v, uint8_t repl) noexcept {
    -
    253 // Vector ternary operator is not supported by clang.
    -
    254 // return (v != shifted_right(v) ? v : Epu8(repl);
    -
    255 return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v));
    -
    256}
    -
    257
    -
    258// Gather at the front numbers with (3-i)-th bit not set.
    -
    259constexpr std::array<epu8, 3> inverting_rounds{{
    -
    260 // clang-format off
    -
    261 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    262 epu8 { 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15},
    -
    263 epu8 { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
    -
    264 epu8 { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}
    -
    265 // clang-format on
    -
    266}};
    -
    267
    -
    268#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    269#define FIND_IN_VECT \
    -
    270 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \
    -
    271 SIMDE_SIDD_NEGATIVE_POLARITY)
    -
    272#define FIND_IN_VECT_COMPL \
    -
    273 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK)
    -
    274
    -
    275inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept {
    -
    276 epu8 res = -static_cast<epu8>(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT));
    -
    277 for (epu8 round : inverting_rounds) {
    -
    278 a = permuted(a, round);
    -
    279 res <<= 1;
    -
    280 res -= static_cast<epu8>(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT));
    -
    281 }
    -
    282 return res;
    -
    283}
    -
    284#endif
    -
    285
    -
    286inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept {
    -
    287 auto ar = as_array(a);
    -
    288 epu8 res{};
    -
    289 for (size_t i = 0; i < 16; i++) {
    -
    290 res[i] =
    -
    291 std::distance(ar.begin(), std::find(ar.begin(), ar.end(), b[i]));
    -
    292 }
    -
    293 return res;
    -
    294}
    -
    295inline epu8 permutation_of(epu8 a, epu8 b) noexcept {
    -
    296#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    297 return permutation_of_cmpestrm(a, b);
    -
    298#else
    -
    299 return permutation_of_ref(a, b);
    -
    300#endif
    -
    301}
    -
    302
    -
    303#if defined(FF)
    -
    304#error FF is defined !
    -
    305#endif /* FF */
    -
    306#define FF 0xff
    -
    307
    -
    309constexpr std::array<epu8, 4> summing_rounds{{
    -
    310 // clang-format off
    -
    311 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    312 epu8 { FF, 0, FF, 2, FF, 4, FF, 6, FF, 8, FF, 10, FF, 12, FF, 14},
    -
    313 epu8 { FF, FF, 1, 1, FF, FF, 5, 5, FF, FF, 9, 9, FF, FF, 13, 13},
    -
    314 epu8 { FF, FF, FF, FF, 3, 3, 3, 3, FF, FF, FF, FF, 11, 11, 11, 11},
    -
    315 epu8 { FF, FF, FF, FF, FF, FF, FF, FF, 7, 7, 7, 7, 7, 7, 7, 7}
    -
    316 // clang-format on
    -
    317}};
    -
    318
    -
    319constexpr std::array<epu8, 4> mining_rounds{{
    -
    320 // clang-format off
    -
    321 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    -
    322 epu8 { 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14},
    -
    323 epu8 { 0, 1, 1, 1, 4, 5, 5, 5, 8, 9, 9, 9, 12, 13, 13, 13},
    -
    324 epu8 { 0, 1, 2, 3, 3, 3, 3, 3, 8, 9, 10, 11, 11, 11, 11, 11},
    -
    325 epu8 { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7}
    -
    326 // clang-format on
    -
    327}};
    -
    328
    -
    329#undef FF
    -
    330
    -
    331inline uint8_t horiz_sum_ref(epu8 v) noexcept {
    -
    332 uint8_t res = 0;
    -
    333 for (size_t i = 0; i < 16; i++)
    -
    334 res += v[i];
    -
    335 return res;
    -
    336}
    -
    337inline uint8_t horiz_sum_gen(epu8 v) noexcept {
    -
    338 return as_VectGeneric(v).horiz_sum();
    -
    339}
    -
    340inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; }
    -
    341inline uint8_t horiz_sum3(epu8 v) noexcept {
    -
    342 auto sr = summing_rounds;
    -
    343 v += permuted(v, sr[0]);
    -
    344 v += permuted(v, sr[1]);
    -
    345 v += permuted(v, sr[2]);
    -
    346 return v[7] + v[15];
    +
    216inline epu8 sort_perm(epu8 &a) noexcept {
    +
    217 return network_sort_perm<true>(a, sorting_rounds);
    +
    218}
    +
    219inline epu8 sort8_perm(epu8 &a) noexcept {
    +
    220 return network_sort_perm<true>(a, sorting_rounds8);
    +
    221}
    +
    222
    +
    223constexpr std::array<epu8, 6> merge_rounds
    +
    224 // clang-format off
    +
    225 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    226{{
    +
    227 epu8 { 8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7},
    +
    228 epu8 { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11},
    +
    229 epu8 { 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13},
    +
    230 epu8 { 1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14},
    +
    231}};
    +
    232// clang-format on
    +
    233inline void merge_rev(epu8 &a, epu8 &b) noexcept {
    +
    234 epu8 mn = min(a, b);
    +
    235 b = max(a, b);
    +
    236 a = mn;
    +
    237 a = network_sort<true>(a, merge_rounds);
    +
    238 b = network_sort<true>(b, merge_rounds);
    +
    239}
    +
    240inline void merge(epu8 &a, epu8 &b) noexcept {
    +
    241 a = permuted(a, Epu8.rev());
    +
    242 merge_rev(a, b);
    +
    243}
    +
    244// TODO : AVX2 version.
    +
    245// TODO : compute merge_rounds on the fly instead of loading those from
    +
    246// memory
    +
    247
    +
    248inline epu8 random_epu8(uint16_t bnd) {
    +
    249 epu8 res;
    +
    250
    +
    251 static std::random_device rd;
    +
    252 static std::default_random_engine e1(rd());
    +
    253 std::uniform_int_distribution<int> uniform_dist(0, bnd - 1);
    +
    254
    +
    255 for (size_t i = 0; i < 16; i++)
    +
    256 res[i] = uniform_dist(e1);
    +
    257 return res;
    +
    258}
    +
    259
    +
    260inline epu8 remove_dups(epu8 v, uint8_t repl) noexcept {
    +
    261 // Vector ternary operator is not supported by clang.
    +
    262 // return (v != shifted_right(v) ? v : Epu8(repl);
    +
    263 return simde_mm_blendv_epi8(Epu8(repl), v, v != shifted_right(v));
    +
    264}
    +
    265
    +
    266// Gather at the front numbers with (3-i)-th bit not set.
    +
    267constexpr std::array<epu8, 3> inverting_rounds{{
    +
    268 // clang-format off
    +
    269 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    270 epu8 { 0, 1, 2, 3, 8, 9, 10, 11, 4, 5, 6, 7, 12, 13, 14, 15},
    +
    271 epu8 { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15},
    +
    272 epu8 { 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15}
    +
    273 // clang-format on
    +
    274}};
    +
    275
    +
    276#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    277#define FIND_IN_VECT \
    +
    278 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK | \
    +
    279 SIMDE_SIDD_NEGATIVE_POLARITY)
    +
    280#define FIND_IN_VECT_COMPL \
    +
    281 (SIMDE_SIDD_UBYTE_OPS | SIMDE_SIDD_CMP_EQUAL_ANY | SIMDE_SIDD_UNIT_MASK)
    +
    282
    +
    283inline epu8 permutation_of_cmpestrm(epu8 a, epu8 b) noexcept {
    +
    284 epu8 res = -static_cast<epu8>(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT));
    +
    285 for (epu8 round : inverting_rounds) {
    +
    286 a = permuted(a, round);
    +
    287 res <<= 1;
    +
    288 res -= static_cast<epu8>(_mm_cmpestrm(a, 8, b, 16, FIND_IN_VECT));
    +
    289 }
    +
    290 return res;
    +
    291}
    +
    292#endif
    +
    293
    +
    294inline epu8 permutation_of_ref(epu8 a, epu8 b) noexcept {
    +
    295 auto ar = as_array(a);
    +
    296 epu8 res{};
    +
    297 for (size_t i = 0; i < 16; i++) {
    +
    298 res[i] =
    +
    299 std::distance(ar.begin(), std::find(ar.begin(), ar.end(), b[i]));
    +
    300 }
    +
    301 return res;
    +
    302}
    +
    303inline epu8 permutation_of(epu8 a, epu8 b) noexcept {
    +
    304#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    305 return permutation_of_cmpestrm(a, b);
    +
    306#else
    +
    307 return permutation_of_ref(a, b);
    +
    308#endif
    +
    309}
    +
    310
    +
    311#if defined(FF)
    +
    312#error FF is defined !
    +
    313#endif /* FF */
    +
    314#define FF 0xff
    +
    315
    +
    317constexpr std::array<epu8, 4> summing_rounds{{
    +
    318 // clang-format off
    +
    319 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    320 epu8 { FF, 0, FF, 2, FF, 4, FF, 6, FF, 8, FF, 10, FF, 12, FF, 14},
    +
    321 epu8 { FF, FF, 1, 1, FF, FF, 5, 5, FF, FF, 9, 9, FF, FF, 13, 13},
    +
    322 epu8 { FF, FF, FF, FF, 3, 3, 3, 3, FF, FF, FF, FF, 11, 11, 11, 11},
    +
    323 epu8 { FF, FF, FF, FF, FF, FF, FF, FF, 7, 7, 7, 7, 7, 7, 7, 7}
    +
    324 // clang-format on
    +
    325}};
    +
    326
    +
    327constexpr std::array<epu8, 4> mining_rounds{{
    +
    328 // clang-format off
    +
    329 // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
    +
    330 epu8 { 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14},
    +
    331 epu8 { 0, 1, 1, 1, 4, 5, 5, 5, 8, 9, 9, 9, 12, 13, 13, 13},
    +
    332 epu8 { 0, 1, 2, 3, 3, 3, 3, 3, 8, 9, 10, 11, 11, 11, 11, 11},
    +
    333 epu8 { 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7}
    +
    334 // clang-format on
    +
    335}};
    +
    336
    +
    337#undef FF
    +
    338
    +
    339inline uint8_t horiz_sum_ref(epu8 v) noexcept {
    +
    340 uint8_t res = 0;
    +
    341 for (size_t i = 0; i < 16; i++)
    +
    342 res += v[i];
    +
    343 return res;
    +
    344}
    +
    345inline uint8_t horiz_sum_gen(epu8 v) noexcept {
    +
    346 return as_VectGeneric(v).horiz_sum();
    347}
    -
    348
    -
    349inline epu8 partial_sums_ref(epu8 v) noexcept {
    -
    350 epu8 res{};
    -
    351 res[0] = v[0];
    -
    352 for (size_t i = 1; i < 16; i++)
    -
    353 res[i] = res[i - 1] + v[i];
    -
    354 return res;
    +
    348inline uint8_t horiz_sum4(epu8 v) noexcept { return partial_sums_round(v)[15]; }
    +
    349inline uint8_t horiz_sum3(epu8 v) noexcept {
    +
    350 auto sr = summing_rounds;
    +
    351 v += permuted(v, sr[0]);
    +
    352 v += permuted(v, sr[1]);
    +
    353 v += permuted(v, sr[2]);
    +
    354 return v[7] + v[15];
    355}
    -
    356inline epu8 partial_sums_gen(epu8 v) noexcept {
    -
    357 as_VectGeneric(v).partial_sums_inplace();
    -
    358 return v;
    -
    359}
    -
    360inline epu8 partial_sums_round(epu8 v) noexcept {
    -
    361 for (epu8 round : summing_rounds)
    -
    362 v += permuted(v, round);
    -
    363 return v;
    -
    364}
    -
    365
    -
    366inline uint8_t horiz_max_ref(epu8 v) noexcept {
    -
    367 uint8_t res = 0;
    -
    368 for (size_t i = 0; i < 16; i++)
    -
    369 res = std::max(res, v[i]);
    -
    370 return res;
    -
    371}
    -
    372inline uint8_t horiz_max_gen(epu8 v) noexcept {
    -
    373 return as_VectGeneric(v).horiz_max();
    -
    374}
    -
    375inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; }
    -
    376inline uint8_t horiz_max3(epu8 v) noexcept {
    -
    377 auto sr = summing_rounds;
    -
    378 v = max(v, permuted(v, sr[0]));
    -
    379 v = max(v, permuted(v, sr[1]));
    -
    380 v = max(v, permuted(v, sr[2]));
    -
    381 return std::max(v[7], v[15]);
    +
    356
    +
    357inline epu8 partial_sums_ref(epu8 v) noexcept {
    +
    358 epu8 res{};
    +
    359 res[0] = v[0];
    +
    360 for (size_t i = 1; i < 16; i++)
    +
    361 res[i] = res[i - 1] + v[i];
    +
    362 return res;
    +
    363}
    +
    364inline epu8 partial_sums_gen(epu8 v) noexcept {
    +
    365 as_VectGeneric(v).partial_sums_inplace();
    +
    366 return v;
    +
    367}
    +
    368inline epu8 partial_sums_round(epu8 v) noexcept {
    +
    369 for (epu8 round : summing_rounds)
    +
    370 v += permuted(v, round);
    +
    371 return v;
    +
    372}
    +
    373
    +
    374inline uint8_t horiz_max_ref(epu8 v) noexcept {
    +
    375 uint8_t res = 0;
    +
    376 for (size_t i = 0; i < 16; i++)
    +
    377 res = std::max(res, v[i]);
    +
    378 return res;
    +
    379}
    +
    380inline uint8_t horiz_max_gen(epu8 v) noexcept {
    +
    381 return as_VectGeneric(v).horiz_max();
    382}
    -
    383
    -
    384inline epu8 partial_max_ref(epu8 v) noexcept {
    -
    385 epu8 res;
    -
    386 res[0] = v[0];
    -
    387 for (size_t i = 1; i < 16; i++)
    -
    388 res[i] = std::max(res[i - 1], v[i]);
    -
    389 return res;
    +
    383inline uint8_t horiz_max4(epu8 v) noexcept { return partial_max_round(v)[15]; }
    +
    384inline uint8_t horiz_max3(epu8 v) noexcept {
    +
    385 auto sr = summing_rounds;
    +
    386 v = max(v, permuted(v, sr[0]));
    +
    387 v = max(v, permuted(v, sr[1]));
    +
    388 v = max(v, permuted(v, sr[2]));
    +
    389 return std::max(v[7], v[15]);
    390}
    -
    391inline epu8 partial_max_gen(epu8 v) noexcept {
    -
    392 as_VectGeneric(v).partial_max_inplace();
    -
    393 return v;
    -
    394}
    -
    395inline epu8 partial_max_round(epu8 v) noexcept {
    -
    396 for (epu8 round : summing_rounds)
    -
    397 v = max(v, permuted(v, round));
    -
    398 return v;
    -
    399}
    -
    400
    -
    401inline uint8_t horiz_min_ref(epu8 v) noexcept {
    -
    402 uint8_t res = 255;
    -
    403 for (size_t i = 0; i < 16; i++)
    -
    404 res = std::min(res, v[i]);
    -
    405 return res;
    -
    406}
    -
    407inline uint8_t horiz_min_gen(epu8 v) noexcept {
    -
    408 return as_VectGeneric(v).horiz_min();
    -
    409}
    -
    410inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; }
    -
    411inline uint8_t horiz_min3(epu8 v) noexcept {
    -
    412 auto sr = mining_rounds;
    -
    413 v = min(v, permuted(v, sr[0]));
    -
    414 v = min(v, permuted(v, sr[1]));
    -
    415 v = min(v, permuted(v, sr[2]));
    -
    416 return std::min(v[7], v[15]);
    +
    391
    +
    392inline epu8 partial_max_ref(epu8 v) noexcept {
    +
    393 epu8 res;
    +
    394 res[0] = v[0];
    +
    395 for (size_t i = 1; i < 16; i++)
    +
    396 res[i] = std::max(res[i - 1], v[i]);
    +
    397 return res;
    +
    398}
    +
    399inline epu8 partial_max_gen(epu8 v) noexcept {
    +
    400 as_VectGeneric(v).partial_max_inplace();
    +
    401 return v;
    +
    402}
    +
    403inline epu8 partial_max_round(epu8 v) noexcept {
    +
    404 for (epu8 round : summing_rounds)
    +
    405 v = max(v, permuted(v, round));
    +
    406 return v;
    +
    407}
    +
    408
    +
    409inline uint8_t horiz_min_ref(epu8 v) noexcept {
    +
    410 uint8_t res = 255;
    +
    411 for (size_t i = 0; i < 16; i++)
    +
    412 res = std::min(res, v[i]);
    +
    413 return res;
    +
    414}
    +
    415inline uint8_t horiz_min_gen(epu8 v) noexcept {
    +
    416 return as_VectGeneric(v).horiz_min();
    417}
    -
    418
    -
    419inline epu8 partial_min_ref(epu8 v) noexcept {
    -
    420 epu8 res;
    -
    421 res[0] = v[0];
    -
    422 for (size_t i = 1; i < 16; i++)
    -
    423 res[i] = std::min(res[i - 1], v[i]);
    -
    424 return res;
    +
    418inline uint8_t horiz_min4(epu8 v) noexcept { return partial_min_round(v)[15]; }
    +
    419inline uint8_t horiz_min3(epu8 v) noexcept {
    +
    420 auto sr = mining_rounds;
    +
    421 v = min(v, permuted(v, sr[0]));
    +
    422 v = min(v, permuted(v, sr[1]));
    +
    423 v = min(v, permuted(v, sr[2]));
    +
    424 return std::min(v[7], v[15]);
    425}
    -
    426inline epu8 partial_min_gen(epu8 v) noexcept {
    -
    427 as_VectGeneric(v).partial_min_inplace();
    -
    428 return v;
    -
    429}
    -
    430inline epu8 partial_min_round(epu8 v) noexcept {
    -
    431 for (epu8 round : mining_rounds)
    -
    432 v = min(v, permuted(v, round));
    -
    433 return v;
    -
    434}
    -
    435
    -
    436inline epu8 eval16_ref(epu8 v) noexcept {
    -
    437 epu8 res{};
    -
    438 for (size_t i = 0; i < 16; i++)
    -
    439 if (v[i] < 16)
    -
    440 res[v[i]]++;
    -
    441 return res;
    +
    426
    +
    427inline epu8 partial_min_ref(epu8 v) noexcept {
    +
    428 epu8 res;
    +
    429 res[0] = v[0];
    +
    430 for (size_t i = 1; i < 16; i++)
    +
    431 res[i] = std::min(res[i - 1], v[i]);
    +
    432 return res;
    +
    433}
    +
    434inline epu8 partial_min_gen(epu8 v) noexcept {
    +
    435 as_VectGeneric(v).partial_min_inplace();
    +
    436 return v;
    +
    437}
    +
    438inline epu8 partial_min_round(epu8 v) noexcept {
    +
    439 for (epu8 round : mining_rounds)
    +
    440 v = min(v, permuted(v, round));
    +
    441 return v;
    442}
    443
    -
    444inline epu8 eval16_arr(epu8 v8) noexcept {
    -
    445 decltype(Epu8)::array res{};
    -
    446 auto v = as_array(v8);
    -
    447 for (size_t i = 0; i < 16; i++)
    -
    448 if (v[i] < 16)
    -
    449 res[v[i]]++;
    -
    450 return Epu8(res);
    -
    451}
    -
    452inline epu8 eval16_gen(epu8 v) noexcept {
    -
    453 return Epu8(as_VectGeneric(v).eval().v);
    -
    454}
    -
    455inline epu8 eval16_cycle(epu8 v) noexcept {
    -
    456 epu8 res = -(Epu8.id() == v);
    -
    457 for (int i = 1; i < 16; i++) {
    -
    458 v = permuted(v, Epu8.left_cycle());
    -
    459 res -= (Epu8.id() == v);
    -
    460 }
    -
    461 return res;
    +
    444inline epu8 eval16_ref(epu8 v) noexcept {
    +
    445 epu8 res{};
    +
    446 for (size_t i = 0; i < 16; i++)
    +
    447 if (v[i] < 16)
    +
    448 res[v[i]]++;
    +
    449 return res;
    +
    450}
    +
    451
    +
    452inline epu8 eval16_arr(epu8 v8) noexcept {
    +
    453 decltype(Epu8)::array res{};
    +
    454 auto v = as_array(v8);
    +
    455 for (size_t i = 0; i < 16; i++)
    +
    456 if (v[i] < 16)
    +
    457 res[v[i]]++;
    +
    458 return Epu8(res);
    +
    459}
    +
    460inline epu8 eval16_gen(epu8 v) noexcept {
    +
    461 return Epu8(as_VectGeneric(v).eval().v);
    462}
    -
    463inline epu8 eval16_popcount(epu8 v) noexcept {
    -
    464 epu8 res{};
    -
    465 for (size_t i = 0; i < 16; i++) {
    -
    466 res[i] =
    -
    467 __builtin_popcountl(simde_mm_movemask_epi8(v == Epu8(uint8_t(i))));
    +
    463inline epu8 eval16_cycle(epu8 v) noexcept {
    +
    464 epu8 res = -(Epu8.id() == v);
    +
    465 for (int i = 1; i < 16; i++) {
    +
    466 v = permuted(v, Epu8.left_cycle());
    +
    467 res -= (Epu8.id() == v);
    468 }
    469 return res;
    470}
    -
    471
    -
    472inline epu8 popcount16(epu8 v) noexcept {
    -
    473 return (permuted(Epu8.popcount(), v & Epu8(0x0f)) +
    -
    474 permuted(Epu8.popcount(), v >> 4));
    -
    475}
    -
    476
    -
    477inline bool is_partial_transformation(epu8 v, const size_t k) noexcept {
    -
    478 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    479 // (forall x in v, x + 1 <= 16) and
    -
    480 // (v = Perm16::one() or last diff index < 16)
    -
    481 return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) &&
    -
    482 (diff == 16 || diff < k);
    +
    471inline epu8 eval16_popcount(epu8 v) noexcept {
    +
    472 epu8 res{};
    +
    473 for (size_t i = 0; i < 16; i++) {
    +
    474 res[i] =
    +
    475 __builtin_popcountl(simde_mm_movemask_epi8(v == Epu8(uint8_t(i))));
    +
    476 }
    +
    477 return res;
    +
    478}
    +
    479
    +
    480inline epu8 popcount16(epu8 v) noexcept {
    +
    481 return (permuted(Epu8.popcount(), v & Epu8(0x0f)) +
    +
    482 permuted(Epu8.popcount(), v >> 4));
    483}
    484
    -
    485inline bool is_transformation(epu8 v, const size_t k) noexcept {
    +
    485inline bool is_partial_transformation(epu8 v, const size_t k) noexcept {
    486 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    487 return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) &&
    -
    488 (diff == 16 || diff < k);
    -
    489}
    -
    490
    -
    491inline bool is_partial_permutation(epu8 v, const size_t k) noexcept {
    -
    492 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    493 // (forall x in v, x <= 15) and
    -
    494 // (forall x < 15, multiplicity x v <= 1
    -
    495 // (v = Perm16::one() or last diff index < 16)
    -
    496 return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) &&
    -
    497 (simde_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) &&
    -
    498 (diff == 16 || diff < k);
    -
    499}
    -
    500
    -
    501#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    502inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept {
    -
    503 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    504 // (forall x in v, x in Perm16::one()) and
    -
    505 // (forall x in Perm16::one(), x in v) and
    -
    506 // (v = Perm16::one() or last diff index < 16)
    -
    507 return _mm_cmpestri(Epu8.id(), 16, v, 16, FIRST_NON_ZERO) == 16 &&
    -
    508 _mm_cmpestri(v, 16, Epu8.id(), 16, FIRST_NON_ZERO) == 16 &&
    -
    509 (diff == 16 || diff < k);
    -
    510}
    -
    511#endif
    -
    512
    -
    513inline bool is_permutation_sort(epu8 v, const size_t k) noexcept {
    -
    514 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    515 return equal(sorted(v), Epu8.id()) && (diff == 16 || diff < k);
    -
    516}
    -
    517inline bool is_permutation_eval(epu8 v, const size_t k) noexcept {
    -
    518 uint64_t diff = last_diff(v, Epu8.id(), 16);
    -
    519 return equal(eval16(v), Epu8({}, 1)) && (diff == 16 || diff < k);
    -
    520}
    -
    521
    -
    522inline bool is_permutation(epu8 v, const size_t k) noexcept {
    -
    523#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    524 return is_permutation_cmpestri(v, k);
    -
    525#else
    -
    526 return is_permutation_sort(v, k);
    -
    527#endif
    +
    487 // (forall x in v, x + 1 <= 16) and
    +
    488 // (v = Perm16::one() or last diff index < 16)
    +
    489 return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) &&
    +
    490 (diff == 16 || diff < k);
    +
    491}
    +
    492
    +
    493inline bool is_transformation(epu8 v, const size_t k) noexcept {
    +
    494 uint64_t diff = last_diff(v, Epu8.id(), 16);
    +
    495 return (simde_mm_movemask_epi8(v < Epu8(0x10)) == 0xffff) &&
    +
    496 (diff == 16 || diff < k);
    +
    497}
    +
    498
    +
    499inline bool is_partial_permutation(epu8 v, const size_t k) noexcept {
    +
    500 uint64_t diff = last_diff(v, Epu8.id(), 16);
    +
    501 // (forall x in v, x <= 15) and
    +
    502 // (forall x < 15, multiplicity x v <= 1
    +
    503 // (v = Perm16::one() or last diff index < 16)
    +
    504 return (simde_mm_movemask_epi8(v + Epu8(1) <= Epu8(0x10)) == 0xffff) &&
    +
    505 (simde_mm_movemask_epi8(eval16(v) <= Epu8(1)) == 0xffff) &&
    +
    506 (diff == 16 || diff < k);
    +
    507}
    +
    508
    +
    509#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    510inline bool is_permutation_cmpestri(epu8 v, const size_t k) noexcept {
    +
    511 uint64_t diff = last_diff(v, Epu8.id(), 16);
    +
    512 // (forall x in v, x in Perm16::one()) and
    +
    513 // (forall x in Perm16::one(), x in v) and
    +
    514 // (v = Perm16::one() or last diff index < 16)
    +
    515 return _mm_cmpestri(Epu8.id(), 16, v, 16, FIRST_NON_ZERO) == 16 &&
    +
    516 _mm_cmpestri(v, 16, Epu8.id(), 16, FIRST_NON_ZERO) == 16 &&
    +
    517 (diff == 16 || diff < k);
    +
    518}
    +
    519#endif
    +
    520
    +
    521inline bool is_permutation_sort(epu8 v, const size_t k) noexcept {
    +
    522 uint64_t diff = last_diff(v, Epu8.id(), 16);
    +
    523 return equal(sorted(v), Epu8.id()) && (diff == 16 || diff < k);
    +
    524}
    +
    525inline bool is_permutation_eval(epu8 v, const size_t k) noexcept {
    +
    526 uint64_t diff = last_diff(v, Epu8.id(), 16);
    +
    527 return equal(eval16(v), Epu8({}, 1)) && (diff == 16 || diff < k);
    528}
    529
    -
    530} // namespace HPCombi
    -
    531
    -
    532namespace std {
    -
    533
    -
    534inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a) {
    -
    535 stream << "{" << std::setw(2) << unsigned(a[0]);
    -
    536 for (unsigned i = 1; i < 16; ++i)
    -
    537 stream << "," << std::setw(2) << unsigned(a[i]);
    -
    538 stream << "}";
    -
    539 return stream;
    -
    540}
    +
    530inline bool is_permutation(epu8 v, const size_t k) noexcept {
    +
    531#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    532 return is_permutation_cmpestri(v, k);
    +
    533#else
    +
    534 return is_permutation_sort(v, k);
    +
    535#endif
    +
    536}
    +
    537
    +
    538} // namespace HPCombi
    +
    539
    +
    540namespace std {
    541
    -
    542inline std::string to_string(HPCombi::epu8 const &a) {
    -
    543 std::ostringstream ss;
    -
    544 ss << a;
    -
    545 return ss.str();
    -
    546}
    -
    547
    -
    548template <> struct equal_to<HPCombi::epu8> {
    -
    549 bool operator()(const HPCombi::epu8 &lhs,
    -
    550 const HPCombi::epu8 &rhs) const noexcept {
    -
    551 return HPCombi::equal(lhs, rhs);
    -
    552 }
    -
    553};
    -
    554
    -
    555template <> struct not_equal_to<HPCombi::epu8> {
    -
    556 bool operator()(const HPCombi::epu8 &lhs,
    -
    557 const HPCombi::epu8 &rhs) const noexcept {
    -
    558 return HPCombi::not_equal(lhs, rhs);
    -
    559 }
    -
    560};
    -
    561
    -
    562template <> struct hash<HPCombi::epu8> {
    -
    563 inline size_t operator()(HPCombi::epu8 a) const noexcept {
    -
    564 unsigned __int128 v0 = simde_mm_extract_epi64(a, 0);
    -
    565 unsigned __int128 v1 = simde_mm_extract_epi64(a, 1);
    -
    566 return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64;
    -
    567
    -
    568 /* The following is extremely slow on Renner benchmark
    -
    569 uint64_t v0 = simde_mm_extract_epi64(ar.v, 0);
    -
    570 uint64_t v1 = simde_mm_extract_epi64(ar.v, 1);
    -
    571 size_t seed = v0 + 0x9e3779b9;
    -
    572 seed ^= v1 + 0x9e3779b9 + (seed<<6) + (seed>>2);
    -
    573 return seed;
    -
    574 */
    -
    575 }
    -
    576};
    -
    577
    -
    578template <> struct less<HPCombi::epu8> {
    -
    579 // WARNING: due to endianness this is not lexicographic comparison,
    -
    580 // but we don't care when using in std::set.
    -
    581 // 10% faster than calling the lexicographic comparison operator !
    -
    582 inline size_t operator()(const HPCombi::epu8 &v1,
    -
    583 const HPCombi::epu8 &v2) const noexcept {
    -
    584 simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2);
    -
    585 return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0];
    -
    586 }
    -
    587};
    -
    588
    -
    589} // namespace std
    -
    #define FF
    Definition bmat8_impl.hpp:240
    -
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:62
    -
    Definition bmat8.hpp:37
    -
    uint8_t horiz_min4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:410
    -
    epu8 max(epu8 a, epu8 b) noexcept
    Vector max between two HPCombi::epu8 0.
    Definition epu8.hpp:89
    -
    uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:85
    -
    uint64_t first_non_zero(epu8 v, int bnd) noexcept
    return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
    Definition epu8_impl.hpp:119
    -
    uint8_t horiz_min_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:401
    -
    epu8 eval16_arr(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:444
    -
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:70
    -
    epu8 sort8_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:212
    -
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:76
    -
    uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:70
    -
    uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:81
    -
    epu8 partial_sums_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:349
    -
    epu8 eval16_gen(epu8 v) noexcept
    Definition epu8_impl.hpp:452
    -
    epu8 network_sort(epu8 res, std::array< epu8, sz > rounds)
    Apply a sorting network.
    Definition epu8_impl.hpp:128
    -
    epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
    Remove duplicates in a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:252
    -
    epu8 revsorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half reverse sorted.
    Definition epu8_impl.hpp:205
    -
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:295
    -
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:522
    -
    void merge_rev(epu8 &a, epu8 &b) noexcept
    Definition epu8_impl.hpp:226
    -
    constexpr std::array< epu8, 4 > summing_rounds
    Permutation Round for partial and horizontal sums.
    Definition epu8_impl.hpp:309
    -
    int8_t less_partial(epu8 a, epu8 b, int k) noexcept
    Partial lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:106
    -
    uint8_t horiz_max4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:375
    -
    uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:98
    -
    uint8_t horiz_max3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:376
    -
    constexpr uint64_t prime
    A prime number good for hashing.
    Definition epu8.hpp:162
    -
    uint8_t horiz_min_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:407
    -
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:491
    -
    bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:513
    -
    uint64_t last_zero(epu8 v, int bnd) noexcept
    return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
    Definition epu8_impl.hpp:116
    -
    void merge(epu8 &a, epu8 &b) noexcept
    Merge two sorted epu8.
    Definition epu8_impl.hpp:233
    -
    uint8_t horiz_sum4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:340
    -
    epu8 popcount16(epu8 v) noexcept
    a vector popcount function
    Definition epu8_impl.hpp:472
    -
    epu8 partial_sums_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:360
    -
    uint8_t horiz_sum3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:341
    -
    uint8_t horiz_sum_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:331
    -
    epu8 partial_sums_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:356
    -
    bool equal(epu8 a, epu8 b) noexcept
    Equality of HPCombi::epu8.
    Definition epu8.hpp:61
    -
    epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:286
    -
    epu8 min(epu8 a, epu8 b) noexcept
    Vector min between two HPCombi::epu8 0.
    Definition epu8.hpp:87
    -
    uint64_t first_mask(epu8 msk, size_t bound)
    Definition epu8_impl.hpp:61
    -
    epu8 partial_max_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:384
    -
    epu8 sorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half sorted.
    Definition epu8_impl.hpp:199
    -
    epu8 partial_max_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:395
    -
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:406
    -
    epu8 network_sort_perm(epu8 &v, std::array< epu8, sz > rounds)
    Apply a sorting network in place and return the permutation.
    Definition epu8_impl.hpp:141
    -
    constexpr std::array< epu8, 6 > sorting_rounds8
    A duplicated 8-way sorting network.
    Definition epu8_impl.hpp:183
    -
    epu8 eval16_cycle(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:455
    -
    constexpr std::array< epu8, 9 > sorting_rounds
    A 16-way sorting network.
    Definition epu8_impl.hpp:162
    -
    epu8 eval16_ref(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:436
    -
    epu8 partial_max_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:391
    -
    bool less(epu8 a, epu8 b) noexcept
    Lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:102
    -
    uint8_t horiz_max_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:366
    -
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:196
    -
    uint64_t last_mask(epu8 msk, size_t bound)
    Definition epu8_impl.hpp:65
    -
    constexpr std::array< epu8, 4 > mining_rounds
    Definition epu8_impl.hpp:319
    -
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:50
    -
    bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:517
    -
    VectGeneric< TPUBuild< TPU >::size > & as_VectGeneric(TPU &v)
    Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.
    Definition builder.hpp:151
    -
    uint64_t first_zero(epu8 v, int bnd) noexcept
    return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
    Definition epu8_impl.hpp:113
    -
    epu8 random_epu8(uint16_t bnd)
    A random HPCombi::epu8.
    Definition epu8_impl.hpp:240
    -
    epu8 revsorted(epu8 a) noexcept
    Return a reverse sorted HPCombi::epu8.
    Definition epu8_impl.hpp:202
    -
    epu8 partial_min_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:430
    -
    bool is_sorted(epu8 a) noexcept
    Testing if a HPCombi::epu8 is sorted.
    Definition epu8_impl.hpp:193
    -
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:41
    -
    constexpr std::array< epu8, 3 > inverting_rounds
    Definition epu8_impl.hpp:259
    -
    uint64_t last_non_zero(epu8 v, int bnd) noexcept
    return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
    Definition epu8_impl.hpp:122
    -
    uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8.hpp:484
    -
    epu8 eval16_popcount(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:463
    -
    epu8 sort_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:209
    -
    epu8 partial_min_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:426
    -
    epu8 partial_min_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:419
    -
    uint8_t horiz_max_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:372
    -
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:485
    -
    uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8.hpp:444
    -
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:477
    -
    epu8 permuted_ref(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8_impl.hpp:53
    -
    constexpr std::array< epu8, 6 > merge_rounds
    Definition epu8_impl.hpp:219
    -
    TPUBuild< TPU >::array & as_array(TPU &v) noexcept
    Cast a TPU to a c++ std::array.
    Definition builder.hpp:134
    -
    uint8_t horiz_sum_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:337
    -
    uint8_t horiz_min3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:411
    -
    bool not_equal(epu8 a, epu8 b) noexcept
    Non equality of HPCombi::epu8.
    Definition epu8.hpp:65
    -
    Definition bmat8.hpp:360
    -
    std::ostream & operator<<(std::ostream &os, HPCombi::BMat8 const &bm)
    Definition bmat8_impl.hpp:500
    -
    std::string to_string(HPCombi::epu8 const &a)
    Definition epu8_impl.hpp:542
    -
    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept
    Definition epu8_impl.hpp:549
    -
    size_t operator()(HPCombi::epu8 a) const noexcept
    Definition epu8_impl.hpp:563
    -
    size_t operator()(const HPCombi::epu8 &v1, const HPCombi::epu8 &v2) const noexcept
    Definition epu8_impl.hpp:582
    -
    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept
    Definition epu8_impl.hpp:556
    +
    542inline std::ostream &operator<<(std::ostream &stream, HPCombi::epu8 const &a) {
    +
    543 stream << "{" << std::setw(2) << unsigned(a[0]);
    +
    544 for (unsigned i = 1; i < 16; ++i)
    +
    545 stream << "," << std::setw(2) << unsigned(a[i]);
    +
    546 stream << "}";
    +
    547 return stream;
    +
    548}
    +
    549
    +
    550inline std::string to_string(HPCombi::epu8 const &a) {
    +
    551 std::ostringstream ss;
    +
    552 ss << a;
    +
    553 return ss.str();
    +
    554}
    +
    555
    +
    556template <> struct equal_to<HPCombi::epu8> {
    +
    557 bool operator()(const HPCombi::epu8 &lhs,
    +
    558 const HPCombi::epu8 &rhs) const noexcept {
    +
    559 return HPCombi::equal(lhs, rhs);
    +
    560 }
    +
    561};
    +
    562
    +
    563template <> struct not_equal_to<HPCombi::epu8> {
    +
    564 bool operator()(const HPCombi::epu8 &lhs,
    +
    565 const HPCombi::epu8 &rhs) const noexcept {
    +
    566 return HPCombi::not_equal(lhs, rhs);
    +
    567 }
    +
    568};
    +
    569
    +
    570template <> struct hash<HPCombi::epu8> {
    +
    571 inline size_t operator()(HPCombi::epu8 a) const noexcept {
    +
    572 unsigned __int128 v0 = simde_mm_extract_epi64(a, 0);
    +
    573 unsigned __int128 v1 = simde_mm_extract_epi64(a, 1);
    +
    574 return ((v1 * HPCombi::prime + v0) * HPCombi::prime) >> 64;
    +
    575
    +
    576 /* The following is extremely slow on Renner benchmark
    +
    577 uint64_t v0 = simde_mm_extract_epi64(ar.v, 0);
    +
    578 uint64_t v1 = simde_mm_extract_epi64(ar.v, 1);
    +
    579 size_t seed = v0 + 0x9e3779b9;
    +
    580 seed ^= v1 + 0x9e3779b9 + (seed<<6) + (seed>>2);
    +
    581 return seed;
    +
    582 */
    +
    583 }
    +
    584};
    +
    585
    +
    586template <> struct less<HPCombi::epu8> {
    +
    587 // WARNING: due to endianness this is not lexicographic comparison,
    +
    588 // but we don't care when using in std::set.
    +
    589 // 10% faster than calling the lexicographic comparison operator !
    +
    590 inline size_t operator()(const HPCombi::epu8 &v1,
    +
    591 const HPCombi::epu8 &v2) const noexcept {
    +
    592 simde__m128 v1v = simde__m128(v1), v2v = simde__m128(v2);
    +
    593 return v1v[0] == v2v[0] ? v1v[1] < v2v[1] : v1v[0] < v2v[0];
    +
    594 }
    +
    595};
    +
    596
    +
    597} // namespace std
    +
    #define FF
    Definition bmat8_impl.hpp:246
    +
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:66
    +
    Definition bmat8.hpp:41
    +
    uint8_t horiz_min4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:418
    +
    epu8 max(epu8 a, epu8 b) noexcept
    Vector max between two HPCombi::epu8 0.
    Definition epu8.hpp:91
    +
    uint64_t last_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:92
    +
    uint64_t first_non_zero(epu8 v, int bnd) noexcept
    return the index of the first non zero entry or 16 if there are none Only index smaller than bound ar...
    Definition epu8_impl.hpp:126
    +
    uint8_t horiz_min_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:409
    +
    epu8 eval16_arr(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:452
    +
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:72
    +
    epu8 sort8_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:219
    +
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:78
    +
    uint64_t first_diff_ref(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:77
    +
    uint64_t first_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:88
    +
    epu8 partial_sums_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:357
    +
    epu8 eval16_gen(epu8 v) noexcept
    Definition epu8_impl.hpp:460
    +
    epu8 network_sort(epu8 res, std::array< epu8, sz > rounds)
    Apply a sorting network.
    Definition epu8_impl.hpp:135
    +
    epu8 remove_dups(epu8 a, uint8_t repl=0) noexcept
    Remove duplicates in a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:260
    +
    epu8 revsorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half reverse sorted.
    Definition epu8_impl.hpp:212
    +
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:303
    +
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:530
    +
    void merge_rev(epu8 &a, epu8 &b) noexcept
    Definition epu8_impl.hpp:233
    +
    constexpr std::array< epu8, 4 > summing_rounds
    Permutation Round for partial and horizontal sums.
    Definition epu8_impl.hpp:317
    +
    int8_t less_partial(epu8 a, epu8 b, int k) noexcept
    Partial lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:113
    +
    uint8_t horiz_max4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:383
    +
    uint64_t last_diff_mask(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8_impl.hpp:105
    +
    uint8_t horiz_max3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:384
    +
    constexpr uint64_t prime
    A prime number good for hashing.
    Definition epu8.hpp:164
    +
    uint8_t horiz_min_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:415
    +
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:499
    +
    bool is_permutation_sort(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:521
    +
    uint64_t last_zero(epu8 v, int bnd) noexcept
    return the index of the last zero entry or 16 if there are none Only index smaller than bound are tak...
    Definition epu8_impl.hpp:123
    +
    void merge(epu8 &a, epu8 &b) noexcept
    Merge two sorted epu8.
    Definition epu8_impl.hpp:240
    +
    uint8_t horiz_sum4(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:348
    +
    epu8 popcount16(epu8 v) noexcept
    a vector popcount function
    Definition epu8_impl.hpp:480
    +
    epu8 partial_sums_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:368
    +
    uint8_t horiz_sum3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:349
    +
    uint8_t horiz_sum_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:339
    +
    epu8 partial_sums_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:364
    +
    bool equal(epu8 a, epu8 b) noexcept
    Equality of HPCombi::epu8.
    Definition epu8.hpp:63
    +
    epu8 permutation_of_ref(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:294
    +
    epu8 min(epu8 a, epu8 b) noexcept
    Vector min between two HPCombi::epu8 0.
    Definition epu8.hpp:89
    +
    uint64_t first_mask(epu8 msk, size_t bound)
    Definition epu8_impl.hpp:68
    +
    epu8 partial_max_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:392
    +
    epu8 sorted8(epu8 a) noexcept
    Return a HPCombi::epu8 with the two half sorted.
    Definition epu8_impl.hpp:206
    +
    epu8 partial_max_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:403
    +
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:408
    +
    epu8 network_sort_perm(epu8 &v, std::array< epu8, sz > rounds)
    Apply a sorting network in place and return the permutation.
    Definition epu8_impl.hpp:148
    +
    constexpr std::array< epu8, 6 > sorting_rounds8
    A duplicated 8-way sorting network.
    Definition epu8_impl.hpp:190
    +
    epu8 eval16_cycle(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:463
    +
    constexpr std::array< epu8, 9 > sorting_rounds
    A 16-way sorting network.
    Definition epu8_impl.hpp:169
    +
    epu8 eval16_ref(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:444
    +
    epu8 partial_max_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:399
    +
    bool less(epu8 a, epu8 b) noexcept
    Lexicographic comparison between two HPCombi::epu8.
    Definition epu8_impl.hpp:109
    +
    uint8_t horiz_max_ref(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:374
    +
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:203
    +
    uint64_t last_mask(epu8 msk, size_t bound)
    Definition epu8_impl.hpp:72
    +
    constexpr std::array< epu8, 4 > mining_rounds
    Definition epu8_impl.hpp:327
    +
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:53
    +
    bool is_permutation_eval(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:525
    +
    VectGeneric< TPUBuild< TPU >::size > & as_VectGeneric(TPU &v)
    Cast a HPCombi::epu8 to a c++ HPCombi::VectGeneric.
    Definition builder.hpp:156
    +
    uint64_t first_zero(epu8 v, int bnd) noexcept
    return the index of the first zero entry or 16 if there are none Only index smaller than bound are ta...
    Definition epu8_impl.hpp:120
    +
    epu8 random_epu8(uint16_t bnd)
    A random HPCombi::epu8.
    Definition epu8_impl.hpp:248
    +
    epu8 revsorted(epu8 a) noexcept
    Return a reverse sorted HPCombi::epu8.
    Definition epu8_impl.hpp:209
    +
    epu8 partial_min_round(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:438
    +
    bool is_sorted(epu8 a) noexcept
    Testing if a HPCombi::epu8 is sorted.
    Definition epu8_impl.hpp:200
    +
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:45
    +
    constexpr std::array< epu8, 3 > inverting_rounds
    Definition epu8_impl.hpp:267
    +
    uint64_t last_non_zero(epu8 v, int bnd) noexcept
    return the index of the last non zero entry or 16 if there are none Only index smaller than bound are...
    Definition epu8_impl.hpp:129
    +
    uint64_t last_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The last difference between two HPCombi::epu8.
    Definition epu8.hpp:486
    +
    epu8 eval16_popcount(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8_impl.hpp:471
    +
    epu8 sort_perm(epu8 &a) noexcept
    Sort this and return the sorting permutation.
    Definition epu8_impl.hpp:216
    +
    epu8 partial_min_gen(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:434
    +
    epu8 partial_min_ref(epu8) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:427
    +
    uint8_t horiz_max_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:380
    +
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:493
    +
    uint64_t first_diff(epu8 a, epu8 b, size_t bound=16) noexcept
    The first difference between two HPCombi::epu8.
    Definition epu8.hpp:446
    +
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:485
    +
    epu8 permuted_ref(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8_impl.hpp:59
    +
    constexpr std::array< epu8, 6 > merge_rounds
    Definition epu8_impl.hpp:226
    +
    TPUBuild< TPU >::array & as_array(TPU &v) noexcept
    Cast a TPU to a c++ std::array.
    Definition builder.hpp:139
    +
    uint8_t horiz_sum_gen(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:345
    +
    uint8_t horiz_min3(epu8) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8_impl.hpp:419
    +
    bool not_equal(epu8 a, epu8 b) noexcept
    Non equality of HPCombi::epu8.
    Definition epu8.hpp:67
    +
    Definition bmat8.hpp:364
    +
    std::ostream & operator<<(std::ostream &os, HPCombi::BMat8 const &bm)
    Definition bmat8_impl.hpp:508
    +
    std::string to_string(HPCombi::epu8 const &a)
    Definition epu8_impl.hpp:550
    +
    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept
    Definition epu8_impl.hpp:557
    +
    size_t operator()(HPCombi::epu8 a) const noexcept
    Definition epu8_impl.hpp:571
    +
    size_t operator()(const HPCombi::epu8 &v1, const HPCombi::epu8 &v2) const noexcept
    Definition epu8_impl.hpp:590
    +
    bool operator()(const HPCombi::epu8 &lhs, const HPCombi::epu8 &rhs) const noexcept
    Definition epu8_impl.hpp:564
    diff --git a/examples.html b/examples.html index ec5941f..891857a 100644 --- a/examples.html +++ b/examples.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/files.html b/files.html index a6113c3..b54a8d6 100644 --- a/files.html +++ b/files.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -81,33 +81,34 @@
    Here is a list of all files with brief descriptions:
    diff --git a/functions.html b/functions.html index 48d8759..57e1926 100644 --- a/functions.html +++ b/functions.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_b.html b/functions_b.html index eadc8c1..550ea6c 100644 --- a/functions_b.html +++ b/functions_b.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_c.html b/functions_c.html index 2e06755..913f152 100644 --- a/functions_c.html +++ b/functions_c.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_d.html b/functions_d.html index a126205..303ef3d 100644 --- a/functions_d.html +++ b/functions_d.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_e.html b/functions_e.html index 3b06dd6..9aac92a 100644 --- a/functions_e.html +++ b/functions_e.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_f.html b/functions_f.html index ae04816..59adb3e 100644 --- a/functions_f.html +++ b/functions_f.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func.html b/functions_func.html index ec1a8e7..ed9be1c 100644 --- a/functions_func.html +++ b/functions_func.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_b.html b/functions_func_b.html index a182050..7aa9dd9 100644 --- a/functions_func_b.html +++ b/functions_func_b.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_c.html b/functions_func_c.html index eaf8b9e..a092efa 100644 --- a/functions_func_c.html +++ b/functions_func_c.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_d.html b/functions_func_d.html index 9f878d4..51002f4 100644 --- a/functions_func_d.html +++ b/functions_func_d.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_e.html b/functions_func_e.html index 7bfc0b6..b2972e6 100644 --- a/functions_func_e.html +++ b/functions_func_e.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_f.html b/functions_func_f.html index 554f153..4a5dd05 100644 --- a/functions_func_f.html +++ b/functions_func_f.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_h.html b/functions_func_h.html index f265178..b0b8a85 100644 --- a/functions_func_h.html +++ b/functions_func_h.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_i.html b/functions_func_i.html index 3df4a83..a387397 100644 --- a/functions_func_i.html +++ b/functions_func_i.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_l.html b/functions_func_l.html index 245cf19..1945684 100644 --- a/functions_func_l.html +++ b/functions_func_l.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_m.html b/functions_func_m.html index 9d1f426..872f63a 100644 --- a/functions_func_m.html +++ b/functions_func_m.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_n.html b/functions_func_n.html index cdd2c10..26685a6 100644 --- a/functions_func_n.html +++ b/functions_func_n.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_o.html b/functions_func_o.html index 2b7fe6e..9fc0f44 100644 --- a/functions_func_o.html +++ b/functions_func_o.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_p.html b/functions_func_p.html index c36ab0d..7371ba4 100644 --- a/functions_func_p.html +++ b/functions_func_p.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_r.html b/functions_func_r.html index f342c06..92cee3e 100644 --- a/functions_func_r.html +++ b/functions_func_r.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_s.html b/functions_func_s.html index 24c9601..994e2a8 100644 --- a/functions_func_s.html +++ b/functions_func_s.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_t.html b/functions_func_t.html index a2272bb..e9f4b00 100644 --- a/functions_func_t.html +++ b/functions_func_t.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_u.html b/functions_func_u.html index 52173f8..6d5edb8 100644 --- a/functions_func_u.html +++ b/functions_func_u.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_v.html b/functions_func_v.html index feb414a..26a6940 100644 --- a/functions_func_v.html +++ b/functions_func_v.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_w.html b/functions_func_w.html index 303117d..a3b121e 100644 --- a/functions_func_w.html +++ b/functions_func_w.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_func_~.html b/functions_func_~.html index 8349293..d4ba401 100644 --- a/functions_func_~.html +++ b/functions_func_~.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_h.html b/functions_h.html index 2dcc494..ece1c2a 100644 --- a/functions_h.html +++ b/functions_h.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_i.html b/functions_i.html index 2d68ef6..b69a57a 100644 --- a/functions_i.html +++ b/functions_i.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_l.html b/functions_l.html index 87cab48..bc5276a 100644 --- a/functions_l.html +++ b/functions_l.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_m.html b/functions_m.html index 4bcc10f..8c29c8d 100644 --- a/functions_m.html +++ b/functions_m.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_n.html b/functions_n.html index 3d85a34..7df62bc 100644 --- a/functions_n.html +++ b/functions_n.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_o.html b/functions_o.html index e0f37d9..0a4bbb9 100644 --- a/functions_o.html +++ b/functions_o.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_p.html b/functions_p.html index 86996ed..7da2d13 100644 --- a/functions_p.html +++ b/functions_p.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_r.html b/functions_r.html index 3cc8562..7a2acee 100644 --- a/functions_r.html +++ b/functions_r.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_rela.html b/functions_rela.html index ce18b76..d345ac4 100644 --- a/functions_rela.html +++ b/functions_rela.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_s.html b/functions_s.html index 44cde2f..eb75c0c 100644 --- a/functions_s.html +++ b/functions_s.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_t.html b/functions_t.html index 92cb8de..e1b43fb 100644 --- a/functions_t.html +++ b/functions_t.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_type.html b/functions_type.html index fba3751..ef31f9b 100644 --- a/functions_type.html +++ b/functions_type.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_u.html b/functions_u.html index dccd24b..0592ae3 100644 --- a/functions_u.html +++ b/functions_u.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_v.html b/functions_v.html index e64ec35..bda5dd4 100644 --- a/functions_v.html +++ b/functions_v.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_vars.html b/functions_vars.html index 9318f44..641a914 100644 --- a/functions_vars.html +++ b/functions_vars.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_w.html b/functions_w.html index b4dcf10..3e478b0 100644 --- a/functions_w.html +++ b/functions_w.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/functions_~.html b/functions_~.html index 696c6a5..9b6e5e7 100644 --- a/functions_~.html +++ b/functions_~.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/globals.html b/globals.html index b3c15d2..8537adb 100644 --- a/globals.html +++ b/globals.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/globals_defs.html b/globals_defs.html index 0032c00..0107ca1 100644 --- a/globals_defs.html +++ b/globals_defs.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/globals_func.html b/globals_func.html index ebaf8ad..ecde83a 100644 --- a/globals_func.html +++ b/globals_func.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/globals_vars.html b/globals_vars.html index 0e63f45..e5d5e06 100644 --- a/globals_vars.html +++ b/globals_vars.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/hierarchy.html b/hierarchy.html index 59be069..329356b 100644 --- a/hierarchy.html +++ b/hierarchy.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/hpcombi_8hpp.html b/hpcombi_8hpp.html index 4fd897e..42fdef5 100644 --- a/hpcombi_8hpp.html +++ b/hpcombi_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/hpcombi.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/hpcombi.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/hpcombi_8hpp_source.html b/hpcombi_8hpp_source.html index a7c501a..025e0d3 100644 --- a/hpcombi_8hpp_source.html +++ b/hpcombi_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/hpcombi.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/hpcombi.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,40 +77,45 @@
    hpcombi.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16#ifndef HPCOMBI_HPCOMBI_HPP_INCLUDED
    -
    17#define HPCOMBI_HPCOMBI_HPP_INCLUDED
    -
    18
    -
    19#include "bmat8.hpp"
    -
    20#include "debug.hpp"
    -
    21#include "epu8.hpp"
    -
    22#include "perm16.hpp"
    -
    23#include "perm_generic.hpp"
    -
    24#include "power.hpp"
    -
    25#include "vect16.hpp"
    -
    26#include "vect_generic.hpp"
    -
    27
    -
    28#endif // HPCOMBI_HPCOMBI_HPP_INCLUDED
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20#ifndef HPCOMBI_HPCOMBI_HPP_
    +
    21#define HPCOMBI_HPCOMBI_HPP_
    +
    22
    +
    23#include "bmat8.hpp"
    +
    24#include "debug.hpp"
    +
    25#include "epu8.hpp"
    +
    26#include "perm16.hpp"
    +
    27#include "perm_generic.hpp"
    +
    28#include "power.hpp"
    +
    29#include "vect16.hpp"
    +
    30#include "vect_generic.hpp"
    +
    31
    +
    32#endif // HPCOMBI_HPCOMBI_HPP_
    diff --git a/image_8cpp.html b/image_8cpp.html index ae2cfb4..b0fe8f8 100644 --- a/image_8cpp.html +++ b/image_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/image.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/image.cpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/index.html b/index.html index 9e98d5e..e5e8008 100644 --- a/index.html +++ b/index.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -80,9 +80,9 @@
    -

    High Performance Combinatorics in C++ using vector instructions v0.0.8

    +

    High Performance Combinatorics in C++ using vector instructions v1.0.1

    HPCombi is a C++17 header-only library using the SSE and AVX instruction sets, and some equivalents, for very fast manipulation of combinatorial objects such as transformations, permutations, and boolean matrices of small size. The goal of this project is to implement various new algorithms and benchmark them on various compiler and architectures.

    -

    HPCombi was initially designed using the SSE and AVX instruction sets, and did not work on machines without these instructions (such as ARM). From v1.0.0 HPCombi supports processors with other instruction sets also, via simd-everywhere. It might be the case that the greatest performance gains are achieved on processors supporting the SSE and AVX instruction sets, but the HPCombi benchmarks indicate that there are also still signficant gains on other processors too.

    +

    HPCombi was initially designed using the SSE and AVX instruction sets, and did not work on machines without these instructions (such as ARM). From v1.0.1 HPCombi supports processors with other instruction sets also, via SIMD Everywhere. It might be the case that the greatest performance gains are achieved on processors supporting the SSE and AVX instruction sets, but the HPCombi benchmarks indicate that there are also still significant gains on other processors too.

    Authors

    diff --git a/namespaceHPCombi.html b/namespaceHPCombi.html index 79f3fa1..cf87e6d 100644 --- a/namespaceHPCombi.html +++ b/namespaceHPCombi.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -651,8 +651,8 @@

    Returns
    the evaluation, that is the HPCombi::epu8 r such that r[i] is the number of occurrence of i in the input v
    Example:
    eval16(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    -
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:406
    -
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:41
    +
    epu8 eval16(epu8 v) noexcept
    Evaluation of a HPCombi::epu8.
    Definition epu8.hpp:408
    +
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:45
    Returns { 1, 1, 2, 1, 1, 3, 1, 0, 0, 0, 0, 1, 2, 1, 1, 1}
    Warning
    The entries larger than 15 are ignored
    @@ -1149,7 +1149,7 @@

    HPCombi::epu8.

    Returns
    the horizontal sum of the input
    Example:
    horiz_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2, 0,12, 0, 0, 0});
    -
    uint8_t horiz_max(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:278
    +
    uint8_t horiz_max(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:280
    Returns 12

    @@ -1309,7 +1309,7 @@

    HPCombi::epu8.

    Returns
    the horizontal sum of the input
    Example:
    horiz_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 1, 3, 2, 2,12, 3, 4, 4});
    -
    uint8_t horiz_min(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:341
    +
    uint8_t horiz_min(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:343
    Returns 1

    @@ -1469,7 +1469,7 @@

    HPCombi::epu8.

    Returns
    the horizontal sum of the input
    Example:
    horiz_sum(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    -
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:215
    +
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:217
    Returns 110
    Warning
    The result is supposed to fit in a uint8_t
    @@ -2678,7 +2678,7 @@

    HPCombi::epu8.

    Returns
    the partials max of the input
    Example:
    partial_max(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    -
    epu8 partial_max(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:307
    +
    epu8 partial_max(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:309
    Returns { 5, 5, 5, 5, 5, 6,12,12,12,12,12,12,12,13,14,15}
    @@ -2806,7 +2806,7 @@

    HPCombi::epu8.

    Returns
    the partials min of the input
    Example:
    partial_min(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    -
    epu8 partial_min(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:370
    +
    epu8 partial_min(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:372
    Returns { 5, 5, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0}
    @@ -2934,7 +2934,7 @@

    HPCombi::epu8.

    Returns
    the partials sums of the input
    Example:
    partial_sums(epu8 { 5, 5, 2, 5, 1, 6,12, 4, 0, 3, 2,11,12,13,14,15});
    -
    epu8 partial_sums(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:244
    +
    epu8 partial_sums(epu8 v) noexcept
    Horizontal partial sum of a HPCombi::epu8.
    Definition epu8.hpp:246
    Returns { 5,10,12,17,18,24,36,40,40,43,45,56,68,81,95,110}
    @@ -3200,8 +3200,6 @@

    Permuting a HPCombi::epu8.

    -

    Implementation part for inline functions //////////////////////////////////.

    -

    Permuting a HPCombi::epu8

    @@ -3736,14 +3734,14 @@

    -Initial value:
    {
    -
    {
    -
    {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0},
    -
    {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1},
    -
    {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2},
    -
    {FF,FF,FF,FF,FF,FF,FF,FF, 3, 3, 3, 3, 3, 3, 3, 3}
    +Initial value:
    {{
    +
    +
    {FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0,FF, 0},
    +
    {FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1,FF,FF, 1, 1},
    +
    {FF,FF,FF,FF, 2, 2, 2, 2,FF,FF,FF,FF, 2, 2, 2, 2},
    +
    {FF,FF,FF,FF,FF,FF,FF,FF, 3, 3, 3, 3, 3, 3, 3, 3}
    }}
    -
    #define FF
    Definition bmat8_impl.hpp:240
    +
    #define FF
    Definition bmat8_impl.hpp:246
    @@ -3894,8 +3892,7 @@

    }}

    A duplicated 8-way sorting network.

    -

    Batcher odd-Even mergesort sorting network used by the sorted function

    -

    odd–even mergesort"

    +

    Batcher odd-Even mergesort sorting network used by the sorted function

    @@ -3927,7 +3924,7 @@

    epu8 { FF, FF, FF, FF, FF, FF, FF, FF, 7, 7, 7, 7, 7, 7, 7, 7}
    }}
    -
    #define FF
    Definition epu8_impl.hpp:306
    +
    #define FF
    Definition epu8_impl.hpp:314

    Permutation Round for partial and horizontal sums.

    diff --git a/namespaceHPCombi_1_1detail.html b/namespaceHPCombi_1_1detail.html index 17df36a..ae4623d 100644 --- a/namespaceHPCombi_1_1detail.html +++ b/namespaceHPCombi_1_1detail.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespaceHPCombi_1_1power__helper.html b/namespaceHPCombi_1_1power__helper.html index 6b7ccc2..c866ac2 100644 --- a/namespaceHPCombi_1_1power__helper.html +++ b/namespaceHPCombi_1_1power__helper.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacelibsemigroups.html b/namespacelibsemigroups.html index fb1fbdb..4257962 100644 --- a/namespacelibsemigroups.html +++ b/namespacelibsemigroups.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacemembers.html b/namespacemembers.html index 6ea9423..6a12e69 100644 --- a/namespacemembers.html +++ b/namespacemembers.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacemembers_func.html b/namespacemembers_func.html index f1537af..d8c1181 100644 --- a/namespacemembers_func.html +++ b/namespacemembers_func.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacemembers_type.html b/namespacemembers_type.html index 91e189b..80084c4 100644 --- a/namespacemembers_type.html +++ b/namespacemembers_type.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacemembers_vars.html b/namespacemembers_vars.html index 0335cf4..aff5b8a 100644 --- a/namespacemembers_vars.html +++ b/namespacemembers_vars.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespaces.html b/namespaces.html index 9f84c02..4f1ba53 100644 --- a/namespaces.html +++ b/namespaces.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/namespacestd.html b/namespacestd.html index 781c90c..b541e7b 100644 --- a/namespacestd.html +++ b/namespacestd.html @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    diff --git a/pattern_8cpp.html b/pattern_8cpp.html index a45f8ad..e9531fa 100644 --- a/pattern_8cpp.html +++ b/pattern_8cpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/examples/pattern.cpp File Reference +HPCombi: /Users/jdm/hpcombi/examples/pattern.cpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/perm16_8hpp.html b/perm16_8hpp.html index 91401dc..3355edb 100644 --- a/perm16_8hpp.html +++ b/perm16_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm16.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm16.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/perm16_8hpp_source.html b/perm16_8hpp_source.html index 09a97d5..2663d27 100644 --- a/perm16_8hpp_source.html +++ b/perm16_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm16.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm16.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,351 +77,356 @@
    perm16.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16#ifndef HPCOMBI_PERM16_HPP_INCLUDED
    -
    17#define HPCOMBI_PERM16_HPP_INCLUDED
    -
    18
    -
    19#include <cstddef> // for size_t
    -
    20#include <cstdint> // for uint8_t, uint64_t, uint32_t
    -
    21#include <initializer_list> // for initializer_list
    -
    22#include <memory> // for hash
    -
    23#include <type_traits> // for is_trivial
    -
    24#include <vector> // for vector
    -
    25
    -
    26#include "epu8.hpp" // for epu8, permuted, etc
    -
    27#include "power.hpp" // for pow
    -
    28#include "vect16.hpp" // for hash, is_partial_permutation
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20#ifndef HPCOMBI_PERM16_HPP_
    +
    21#define HPCOMBI_PERM16_HPP_
    +
    22
    +
    23#include <cstddef> // for size_t
    +
    24#include <cstdint> // for uint8_t, uint64_t, uint32_t
    +
    25#include <initializer_list> // for initializer_list
    +
    26#include <memory> // for hash
    +
    27#include <type_traits> // for is_trivial
    +
    28#include <vector> // for vector
    29
    -
    30#include "simde/x86/sse4.1.h"
    -
    31#include "simde/x86/sse4.2.h"
    -
    32
    -
    33namespace HPCombi {
    -
    34
    -
    35// Forward declaration
    -
    36struct Perm16;
    -
    37struct PTransf16;
    -
    38struct Transf16;
    -
    39
    -
    43struct alignas(16) PTransf16 : public Vect16 {
    -
    44 static constexpr size_t size() { return 16; }
    -
    45
    - -
    47 using array = typename decltype(Epu8)::array;
    -
    48
    -
    49 PTransf16() = default;
    -
    50
    -
    51 constexpr PTransf16(const vect v) : Vect16(v) {}
    -
    52 constexpr PTransf16(const epu8 x) : Vect16(x) {}
    -
    53 PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    -
    54 size_t = 0 /* unused */);
    -
    55 PTransf16(std::initializer_list<uint8_t> il);
    -
    56
    -
    58 bool validate(size_t k = 16) const {
    - -
    60 }
    -
    61
    -
    63 static constexpr PTransf16 one() { return Epu8.id(); }
    -
    65 PTransf16 operator*(const PTransf16 &p) const {
    -
    66 return HPCombi::permuted(v, p.v) | (p.v == Epu8(0xFF));
    -
    67 }
    -
    68
    -
    70 epu8 image_mask_cmpestrm(bool complement = false) const;
    -
    72 epu8 image_mask_ref(bool complement = false) const;
    -
    73 epu8 image_mask(bool complement = false) const {
    -
    74#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    75 return image_mask_cmpestrm(complement);
    -
    76#else
    -
    77 return image_mask_ref(complement);
    -
    78#endif
    -
    79 }
    -
    81 uint32_t image_bitset(bool complement = false) const;
    -
    83 epu8 domain_mask(bool complement = false) const;
    -
    85 uint32_t domain_bitset(bool complement = false) const;
    -
    86
    -
    88 PTransf16 right_one() const;
    -
    90 PTransf16 left_one() const;
    -
    91
    -
    93 uint32_t rank_ref() const;
    -
    95 uint32_t rank() const;
    -
    97 uint32_t rank_cmpestrm() const;
    -
    98
    -
    100 epu8 fix_points_mask(bool complement = false) const;
    -
    102 uint32_t fix_points_bitset(bool complement = false) const;
    -
    104 uint8_t smallest_fix_point() const;
    -
    106 uint8_t smallest_moved_point() const;
    -
    108 uint8_t largest_fix_point() const;
    -
    110 uint8_t largest_moved_point() const;
    -
    112 uint8_t nb_fix_points() const;
    -
    113};
    -
    114
    -
    118struct Transf16 : public PTransf16 {
    -
    119 Transf16() = default;
    -
    120 constexpr Transf16(const Transf16 &v) = default;
    -
    121 /* implicit */ constexpr Transf16(const vect v) : PTransf16(v) {} // NOLINT
    -
    122 /* implicit */ constexpr Transf16(const epu8 x) : PTransf16(x) {} // NOLINT
    -
    123 Transf16(std::initializer_list<uint8_t> il) : PTransf16(il) {}
    -
    124 Transf16 &operator=(const Transf16 &) = default;
    -
    125
    -
    127 bool validate(size_t k = 16) const {
    -
    128 return HPCombi::is_transformation(v, k);
    -
    129 }
    -
    130
    -
    132 static constexpr Transf16 one() { return Epu8.id(); }
    -
    134 Transf16 operator*(const Transf16 &p) const {
    -
    135 return HPCombi::permuted(v, p.v);
    -
    136 }
    -
    137
    -
    139 explicit Transf16(uint64_t compressed);
    -
    141 explicit operator uint64_t() const;
    -
    142};
    -
    143
    -
    145struct PPerm16 : public PTransf16 {
    -
    146 PPerm16() = default;
    -
    147 constexpr PPerm16(const PPerm16 &v) = default;
    -
    148 /* implicit */ constexpr PPerm16(const vect v) : PTransf16(v) {} // NOLINT
    -
    149 /* implicit */ constexpr PPerm16(const epu8 x) : PTransf16(x) {} // NOLINT
    -
    150 PPerm16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    -
    151 size_t = 0 /* unused */)
    -
    152 : PTransf16(dom, rng) {}
    -
    153 PPerm16(std::initializer_list<uint8_t> il) : PTransf16(il) {}
    -
    154 PPerm16 &operator=(const PPerm16 &) = default;
    -
    155
    -
    157 bool validate(size_t k = 16) const {
    - -
    159 }
    -
    160
    -
    162 static constexpr PPerm16 one() { return Epu8.id(); }
    -
    164 PPerm16 operator*(const PPerm16 &p) const {
    -
    165 return this->PTransf16::operator*(p);
    -
    166 }
    -
    167
    -
    187 PPerm16 inverse_ref() const;
    -
    188#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    194 PPerm16 inverse_find() const;
    -
    195#endif
    -
    196
    - - -
    199};
    +
    30#include "epu8.hpp" // for epu8, permuted, etc
    +
    31#include "power.hpp" // for pow
    +
    32#include "vect16.hpp" // for hash, is_partial_permutation
    +
    33
    +
    34#include "simde/x86/sse4.1.h"
    +
    35#include "simde/x86/sse4.2.h"
    +
    36
    +
    37namespace HPCombi {
    +
    38
    +
    39// Forward declaration
    +
    40struct Perm16;
    +
    41struct PTransf16;
    +
    42struct Transf16;
    +
    43
    +
    47struct alignas(16) PTransf16 : public Vect16 {
    +
    48 static constexpr size_t size() { return 16; }
    +
    49
    + +
    51 using array = typename decltype(Epu8)::array;
    +
    52
    +
    53 PTransf16() = default;
    +
    54
    +
    55 constexpr PTransf16(const vect v) : Vect16(v) {}
    +
    56 constexpr PTransf16(const epu8 x) : Vect16(x) {}
    +
    57 PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    +
    58 size_t = 0 /* unused */);
    +
    59 PTransf16(std::initializer_list<uint8_t> il);
    +
    60
    +
    62 bool validate(size_t k = 16) const {
    + +
    64 }
    +
    65
    +
    67 static constexpr PTransf16 one() { return Epu8.id(); }
    +
    69 PTransf16 operator*(const PTransf16 &p) const {
    +
    70 return HPCombi::permuted(v, p.v) | (p.v == Epu8(0xFF));
    +
    71 }
    +
    72
    +
    74 epu8 image_mask_cmpestrm(bool complement = false) const;
    +
    76 epu8 image_mask_ref(bool complement = false) const;
    +
    77 epu8 image_mask(bool complement = false) const {
    +
    78#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    79 return image_mask_cmpestrm(complement);
    +
    80#else
    +
    81 return image_mask_ref(complement);
    +
    82#endif
    +
    83 }
    +
    85 uint32_t image_bitset(bool complement = false) const;
    +
    87 epu8 domain_mask(bool complement = false) const;
    +
    89 uint32_t domain_bitset(bool complement = false) const;
    +
    90
    +
    92 PTransf16 right_one() const;
    +
    94 PTransf16 left_one() const;
    +
    95
    +
    97 uint32_t rank_ref() const;
    +
    99 uint32_t rank() const;
    +
    101 uint32_t rank_cmpestrm() const;
    +
    102
    +
    104 epu8 fix_points_mask(bool complement = false) const;
    +
    106 uint32_t fix_points_bitset(bool complement = false) const;
    +
    108 uint8_t smallest_fix_point() const;
    +
    110 uint8_t smallest_moved_point() const;
    +
    112 uint8_t largest_fix_point() const;
    +
    114 uint8_t largest_moved_point() const;
    +
    116 uint8_t nb_fix_points() const;
    +
    117};
    +
    118
    +
    122struct Transf16 : public PTransf16 {
    +
    123 Transf16() = default;
    +
    124 constexpr Transf16(const Transf16 &v) = default;
    +
    125 /* implicit */ constexpr Transf16(const vect v) : PTransf16(v) {} // NOLINT
    +
    126 /* implicit */ constexpr Transf16(const epu8 x) : PTransf16(x) {} // NOLINT
    +
    127 Transf16(std::initializer_list<uint8_t> il) : PTransf16(il) {}
    +
    128 Transf16 &operator=(const Transf16 &) = default;
    +
    129
    +
    131 bool validate(size_t k = 16) const {
    +
    132 return HPCombi::is_transformation(v, k);
    +
    133 }
    +
    134
    +
    136 static constexpr Transf16 one() { return Epu8.id(); }
    +
    138 Transf16 operator*(const Transf16 &p) const {
    +
    139 return HPCombi::permuted(v, p.v);
    +
    140 }
    +
    141
    +
    143 explicit Transf16(uint64_t compressed);
    +
    145 explicit operator uint64_t() const;
    +
    146};
    +
    147
    +
    149struct PPerm16 : public PTransf16 {
    +
    150 PPerm16() = default;
    +
    151 constexpr PPerm16(const PPerm16 &v) = default;
    +
    152 /* implicit */ constexpr PPerm16(const vect v) : PTransf16(v) {} // NOLINT
    +
    153 /* implicit */ constexpr PPerm16(const epu8 x) : PTransf16(x) {} // NOLINT
    +
    154 PPerm16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    +
    155 size_t = 0 /* unused */)
    +
    156 : PTransf16(dom, rng) {}
    +
    157 PPerm16(std::initializer_list<uint8_t> il) : PTransf16(il) {}
    +
    158 PPerm16 &operator=(const PPerm16 &) = default;
    +
    159
    +
    161 bool validate(size_t k = 16) const {
    + +
    163 }
    +
    164
    +
    166 static constexpr PPerm16 one() { return Epu8.id(); }
    +
    168 PPerm16 operator*(const PPerm16 &p) const {
    +
    169 return this->PTransf16::operator*(p);
    +
    170 }
    +
    171
    +
    191 PPerm16 inverse_ref() const;
    +
    192#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    198 PPerm16 inverse_find() const;
    +
    199#endif
    200
    -
    204struct Perm16 : public Transf16 /* public PPerm : diamond problem */ {
    -
    205 Perm16() = default;
    -
    206 constexpr Perm16(const Perm16 &) = default;
    -
    207 /* implicit */ constexpr Perm16(const vect v) : Transf16(v) {} // NOLINT
    -
    208 /* implicit */ constexpr Perm16(const epu8 x) : Transf16(x) {} // NOLINT
    -
    209 Perm16 &operator=(const Perm16 &) = default;
    -
    210 Perm16(std::initializer_list<uint8_t> il) : Transf16(il) {}
    -
    211
    -
    213 bool validate(size_t k = 16) const { return HPCombi::is_permutation(v, k); }
    -
    214
    -
    215 // It's not possible to have a static constexpr member of same type as class
    -
    216 // being defined (see https://stackoverflow.com/questions/11928089/)
    -
    217 // therefore we chose to have functions.
    -
    219 static constexpr Perm16 one() { return Epu8.id(); }
    -
    221 Perm16 operator*(const Perm16 &p) const {
    -
    222 return HPCombi::permuted(v, p.v);
    -
    223 }
    -
    224
    -
    226 explicit Perm16(uint64_t compressed) : Transf16(compressed) {}
    -
    227
    -
    244 Perm16 inverse_ref() const;
    -
    249 Perm16 inverse_arr() const;
    -
    256 Perm16 inverse_sort() const;
    -
    262 Perm16 inverse_find() const { return permutation_of(v, one()); }
    -
    269 Perm16 inverse_pow() const;
    -
    275 Perm16 inverse_cycl() const;
    -
    279 Perm16 inverse() const { return inverse_cycl(); }
    -
    280
    -
    282 static Perm16 elementary_transposition(uint64_t i);
    -
    284 static Perm16 random(uint64_t n = 16);
    -
    288 static Perm16 unrankSJT(int n, int r);
    -
    289
    -
    306 epu8 lehmer_ref() const;
    -
    311 epu8 lehmer_arr() const;
    -
    316 epu8 lehmer() const;
    -
    317
    -
    333 uint8_t length_ref() const;
    -
    339 uint8_t length_arr() const;
    -
    344 uint8_t length() const;
    -
    345
    -
    361 uint8_t nb_descents_ref() const;
    -
    366 uint8_t nb_descents() const;
    -
    367
    -
    382 epu8 cycles_partition() const;
    -
    383
    -
    399 uint8_t nb_cycles_ref() const;
    -
    404 uint8_t nb_cycles_unroll() const;
    -
    408 uint8_t nb_cycles() const { return nb_cycles_unroll(); }
    -
    409
    -
    423 bool left_weak_leq_ref(Perm16 other) const;
    -
    428 bool left_weak_leq_length(Perm16 other) const;
    -
    433 bool left_weak_leq(Perm16 other) const;
    -
    434};
    -
    435
    + + +
    203};
    +
    204
    +
    208struct Perm16 : public Transf16 /* public PPerm : diamond problem */ {
    +
    209 Perm16() = default;
    +
    210 constexpr Perm16(const Perm16 &) = default;
    +
    211 /* implicit */ constexpr Perm16(const vect v) : Transf16(v) {} // NOLINT
    +
    212 /* implicit */ constexpr Perm16(const epu8 x) : Transf16(x) {} // NOLINT
    +
    213 Perm16 &operator=(const Perm16 &) = default;
    +
    214 Perm16(std::initializer_list<uint8_t> il) : Transf16(il) {}
    +
    215
    +
    217 bool validate(size_t k = 16) const { return HPCombi::is_permutation(v, k); }
    +
    218
    +
    219 // It's not possible to have a static constexpr member of same type as class
    +
    220 // being defined (see https://stackoverflow.com/questions/11928089/)
    +
    221 // therefore we chose to have functions.
    +
    223 static constexpr Perm16 one() { return Epu8.id(); }
    +
    225 Perm16 operator*(const Perm16 &p) const {
    +
    226 return HPCombi::permuted(v, p.v);
    +
    227 }
    +
    228
    +
    230 explicit Perm16(uint64_t compressed) : Transf16(compressed) {}
    +
    231
    +
    248 Perm16 inverse_ref() const;
    +
    253 Perm16 inverse_arr() const;
    +
    260 Perm16 inverse_sort() const;
    +
    266 Perm16 inverse_find() const { return permutation_of(v, one()); }
    +
    273 Perm16 inverse_pow() const;
    +
    279 Perm16 inverse_cycl() const;
    +
    283 Perm16 inverse() const { return inverse_cycl(); }
    +
    284
    +
    286 static Perm16 elementary_transposition(uint64_t i);
    +
    288 static Perm16 random(uint64_t n = 16);
    +
    292 static Perm16 unrankSJT(int n, int r);
    +
    293
    +
    310 epu8 lehmer_ref() const;
    +
    315 epu8 lehmer_arr() const;
    +
    320 epu8 lehmer() const;
    +
    321
    +
    337 uint8_t length_ref() const;
    +
    343 uint8_t length_arr() const;
    +
    348 uint8_t length() const;
    +
    349
    +
    365 uint8_t nb_descents_ref() const;
    +
    370 uint8_t nb_descents() const;
    +
    371
    +
    386 epu8 cycles_partition() const;
    +
    387
    +
    403 uint8_t nb_cycles_ref() const;
    +
    408 uint8_t nb_cycles_unroll() const;
    +
    412 uint8_t nb_cycles() const { return nb_cycles_unroll(); }
    +
    413
    +
    427 bool left_weak_leq_ref(Perm16 other) const;
    +
    432 bool left_weak_leq_length(Perm16 other) const;
    +
    437 bool left_weak_leq(Perm16 other) const;
    +
    438};
    439
    -
    440static_assert(sizeof(epu8) == sizeof(Perm16),
    -
    441 "epu8 and Perm16 have a different memory layout !");
    -
    442static_assert(std::is_trivial<epu8>(), "epu8 is not a trivial class !");
    -
    443static_assert(std::is_trivial<Perm16>(), "Perm16 is not a trivial class !");
    -
    444
    -
    445} // namespace HPCombi
    -
    446
    -
    447#include "perm16_impl.hpp"
    +
    443
    +
    444static_assert(sizeof(epu8) == sizeof(Perm16),
    +
    445 "epu8 and Perm16 have a different memory layout !");
    +
    446static_assert(std::is_trivial<epu8>(), "epu8 is not a trivial class !");
    +
    447static_assert(std::is_trivial<Perm16>(), "Perm16 is not a trivial class !");
    448
    -
    449namespace std {
    +
    449} // namespace HPCombi
    450
    -
    451template <> struct hash<HPCombi::PTransf16> {
    -
    453 size_t operator()(const HPCombi::PTransf16 &ar) const {
    -
    454 return std::hash<HPCombi::epu8>{}(ar.v);
    -
    455 }
    -
    456};
    -
    457
    -
    458template <> struct hash<HPCombi::Transf16> {
    -
    460 size_t operator()(const HPCombi::Transf16 &ar) const {
    -
    461 return uint64_t(ar);
    -
    462 }
    -
    463};
    -
    464
    -
    465template <> struct hash<HPCombi::PPerm16> {
    -
    467 size_t operator()(const HPCombi::PPerm16 &ar) const {
    -
    468 return std::hash<HPCombi::epu8>{}(ar.v);
    -
    469 }
    -
    470};
    -
    471
    -
    472template <> struct hash<HPCombi::Perm16> {
    -
    474 size_t operator()(const HPCombi::Perm16 &ar) const { return uint64_t(ar); }
    -
    475};
    -
    476
    -
    477} // namespace std
    -
    478
    -
    479#endif // HPCOMBI_PERM16_HPP_INCLUDED
    +
    451#include "perm16_impl.hpp"
    +
    452
    +
    453namespace std {
    +
    454
    +
    455template <> struct hash<HPCombi::PTransf16> {
    +
    457 size_t operator()(const HPCombi::PTransf16 &ar) const {
    +
    458 return std::hash<HPCombi::epu8>{}(ar.v);
    +
    459 }
    +
    460};
    +
    461
    +
    462template <> struct hash<HPCombi::Transf16> {
    +
    464 size_t operator()(const HPCombi::Transf16 &ar) const {
    +
    465 return uint64_t(ar);
    +
    466 }
    +
    467};
    +
    468
    +
    469template <> struct hash<HPCombi::PPerm16> {
    +
    471 size_t operator()(const HPCombi::PPerm16 &ar) const {
    +
    472 return std::hash<HPCombi::epu8>{}(ar.v);
    +
    473 }
    +
    474};
    +
    475
    +
    476template <> struct hash<HPCombi::Perm16> {
    +
    478 size_t operator()(const HPCombi::Perm16 &ar) const { return uint64_t(ar); }
    +
    479};
    +
    480
    +
    481} // namespace std
    +
    482
    +
    483#endif // HPCOMBI_PERM16_HPP_
    -
    Perm16 Perm16
    Definition perm16_impl.hpp:234
    -
    Definition bmat8.hpp:37
    -
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:70
    -
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:295
    -
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:522
    -
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:491
    -
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:50
    -
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:41
    -
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:485
    -
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:477
    -
    Definition bmat8.hpp:360
    +
    Perm16 Perm16
    Definition perm16_impl.hpp:240
    +
    Definition bmat8.hpp:41
    +
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:72
    +
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:303
    +
    bool is_permutation(epu8 v, const size_t k=16) noexcept
    Definition epu8_impl.hpp:530
    +
    bool is_partial_permutation(epu8 v, const size_t k=16) noexcept
    Test for partial permutations.
    Definition epu8_impl.hpp:499
    +
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:53
    +
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:45
    +
    bool is_transformation(epu8 v, const size_t k=16) noexcept
    Test for transformation.
    Definition epu8_impl.hpp:493
    +
    bool is_partial_transformation(epu8 v, const size_t k=16) noexcept
    Test for partial transformation.
    Definition epu8_impl.hpp:485
    +
    Definition bmat8.hpp:364
    Generic compile time power.
    -
    Partial permutation of .
    Definition perm16.hpp:145
    -
    PPerm16 right_one() const
    Definition perm16.hpp:197
    -
    PPerm16 inverse_ref() const
    The inverse of a partial permutation.
    Definition perm16_impl.hpp:140
    -
    PPerm16 operator*(const PPerm16 &p) const
    The product of two partial perrmutations.
    Definition perm16.hpp:164
    +
    Partial permutation of .
    Definition perm16.hpp:149
    +
    PPerm16 right_one() const
    Definition perm16.hpp:201
    +
    PPerm16 inverse_ref() const
    The inverse of a partial permutation.
    Definition perm16_impl.hpp:146
    +
    PPerm16 operator*(const PPerm16 &p) const
    The product of two partial perrmutations.
    Definition perm16.hpp:168
    PPerm16()=default
    constexpr PPerm16(const PPerm16 &v)=default
    PPerm16 & operator=(const PPerm16 &)=default
    -
    constexpr PPerm16(const vect v)
    Definition perm16.hpp:148
    -
    PPerm16(std::vector< uint8_t > dom, std::vector< uint8_t > rng, size_t=0)
    Definition perm16.hpp:150
    -
    static constexpr PPerm16 one()
    The identity partial permutations.
    Definition perm16.hpp:162
    -
    PPerm16 left_one() const
    Definition perm16.hpp:198
    -
    PPerm16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:153
    -
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:157
    -
    constexpr PPerm16(const epu8 x)
    Definition perm16.hpp:149
    -
    Partial transformation of .
    Definition perm16.hpp:43
    -
    uint8_t nb_fix_points() const
    Returns the number of fix points of *this.
    Definition perm16_impl.hpp:115
    -
    constexpr PTransf16(const epu8 x)
    Definition perm16.hpp:52
    -
    constexpr PTransf16(const vect v)
    Definition perm16.hpp:51
    -
    uint32_t fix_points_bitset(bool complement=false) const
    Returns a bit mask for the fix point of *this.
    Definition perm16_impl.hpp:93
    -
    static constexpr size_t size()
    Definition perm16.hpp:44
    -
    PTransf16 operator*(const PTransf16 &p) const
    The product of two partial transformations.
    Definition perm16.hpp:65
    -
    static constexpr PTransf16 one()
    The identity partial transformation.
    Definition perm16.hpp:63
    -
    uint8_t largest_moved_point() const
    Returns the largest non fix point of *this.
    Definition perm16_impl.hpp:110
    -
    uint32_t domain_bitset(bool complement=false) const
    Returns a bit mask for the domain of *this.
    Definition perm16_impl.hpp:42
    -
    PTransf16 left_one() const
    Returns the partial left identity for *this.
    Definition perm16_impl.hpp:66
    -
    typename decltype(Epu8)::array array
    Definition perm16.hpp:47
    -
    uint32_t rank_ref() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:69
    -
    PTransf16 right_one() const
    Returns the partial right identity for *this.
    Definition perm16_impl.hpp:45
    -
    uint32_t image_bitset(bool complement=false) const
    Returns a bit mask for the image of *this.
    Definition perm16_impl.hpp:63
    -
    epu8 fix_points_mask(bool complement=false) const
    Returns a mask for the fix point of *this.
    Definition perm16_impl.hpp:90
    -
    uint8_t smallest_fix_point() const
    Returns the smallest fix point of *this.
    Definition perm16_impl.hpp:97
    +
    constexpr PPerm16(const vect v)
    Definition perm16.hpp:152
    +
    PPerm16(std::vector< uint8_t > dom, std::vector< uint8_t > rng, size_t=0)
    Definition perm16.hpp:154
    +
    static constexpr PPerm16 one()
    The identity partial permutations.
    Definition perm16.hpp:166
    +
    PPerm16 left_one() const
    Definition perm16.hpp:202
    +
    PPerm16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:157
    +
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:161
    +
    constexpr PPerm16(const epu8 x)
    Definition perm16.hpp:153
    +
    Partial transformation of .
    Definition perm16.hpp:47
    +
    uint8_t nb_fix_points() const
    Returns the number of fix points of *this.
    Definition perm16_impl.hpp:121
    +
    constexpr PTransf16(const epu8 x)
    Definition perm16.hpp:56
    +
    constexpr PTransf16(const vect v)
    Definition perm16.hpp:55
    +
    uint32_t fix_points_bitset(bool complement=false) const
    Returns a bit mask for the fix point of *this.
    Definition perm16_impl.hpp:99
    +
    static constexpr size_t size()
    Definition perm16.hpp:48
    +
    PTransf16 operator*(const PTransf16 &p) const
    The product of two partial transformations.
    Definition perm16.hpp:69
    +
    static constexpr PTransf16 one()
    The identity partial transformation.
    Definition perm16.hpp:67
    +
    uint8_t largest_moved_point() const
    Returns the largest non fix point of *this.
    Definition perm16_impl.hpp:116
    +
    uint32_t domain_bitset(bool complement=false) const
    Returns a bit mask for the domain of *this.
    Definition perm16_impl.hpp:48
    +
    PTransf16 left_one() const
    Returns the partial left identity for *this.
    Definition perm16_impl.hpp:72
    +
    typename decltype(Epu8)::array array
    Definition perm16.hpp:51
    +
    uint32_t rank_ref() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:75
    +
    PTransf16 right_one() const
    Returns the partial right identity for *this.
    Definition perm16_impl.hpp:51
    +
    uint32_t image_bitset(bool complement=false) const
    Returns a bit mask for the image of *this.
    Definition perm16_impl.hpp:69
    +
    epu8 fix_points_mask(bool complement=false) const
    Returns a mask for the fix point of *this.
    Definition perm16_impl.hpp:96
    +
    uint8_t smallest_fix_point() const
    Returns the smallest fix point of *this.
    Definition perm16_impl.hpp:103
    -
    uint32_t rank_cmpestrm() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:78
    -
    epu8 domain_mask(bool complement=false) const
    Returns a mask for the domain of *this.
    Definition perm16_impl.hpp:39
    -
    uint32_t rank() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:82
    -
    epu8 image_mask_ref(bool complement=false) const
    Returns a mask for the image of *this.
    Definition perm16_impl.hpp:55
    -
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:58
    -
    uint8_t largest_fix_point() const
    Returns the largest fix point of *this.
    Definition perm16_impl.hpp:105
    -
    epu8 image_mask(bool complement=false) const
    Definition perm16.hpp:73
    +
    uint32_t rank_cmpestrm() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:84
    +
    epu8 domain_mask(bool complement=false) const
    Returns a mask for the domain of *this.
    Definition perm16_impl.hpp:45
    +
    uint32_t rank() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:88
    +
    epu8 image_mask_ref(bool complement=false) const
    Returns a mask for the image of *this.
    Definition perm16_impl.hpp:61
    +
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:62
    +
    uint8_t largest_fix_point() const
    Returns the largest fix point of *this.
    Definition perm16_impl.hpp:111
    +
    epu8 image_mask(bool complement=false) const
    Definition perm16.hpp:77
    epu8 image_mask_cmpestrm(bool complement=false) const
    Returns a mask for the image of *this.
    -
    uint8_t smallest_moved_point() const
    Returns the smallest non fix point of *this.
    Definition perm16_impl.hpp:101
    -
    Permutations of .
    Definition perm16.hpp:204
    -
    Perm16 inverse_cycl() const
    The inverse permutation.
    Definition perm16_impl.hpp:243
    -
    Perm16 inverse() const
    The inverse permutation.
    Definition perm16.hpp:279
    -
    epu8 lehmer() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:284
    -
    uint8_t length_ref() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:293
    -
    epu8 cycles_partition() const
    The set partition of the cycles of a permutation.
    Definition perm16_impl.hpp:338
    -
    bool left_weak_leq_ref(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:356
    -
    uint8_t nb_descents_ref() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:314
    -
    Perm16(uint64_t compressed)
    Construct a permutations from its 64 bits compressed.
    Definition perm16.hpp:226
    -
    Perm16 inverse_sort() const
    The inverse permutation.
    Definition perm16_impl.hpp:219
    -
    Perm16 operator*(const Perm16 &p) const
    The product of two permutations.
    Definition perm16.hpp:221
    -
    Perm16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:210
    -
    constexpr Perm16(const epu8 x)
    Definition perm16.hpp:208
    -
    Perm16 inverse_find() const
    The inverse permutation.
    Definition perm16.hpp:262
    -
    static constexpr Perm16 one()
    The identity partial permutation.
    Definition perm16.hpp:219
    -
    epu8 lehmer_ref() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:265
    -
    bool left_weak_leq_length(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:379
    -
    uint8_t length() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:312
    +
    uint8_t smallest_moved_point() const
    Returns the smallest non fix point of *this.
    Definition perm16_impl.hpp:107
    +
    Permutations of .
    Definition perm16.hpp:208
    +
    Perm16 inverse_cycl() const
    The inverse permutation.
    Definition perm16_impl.hpp:249
    +
    Perm16 inverse() const
    The inverse permutation.
    Definition perm16.hpp:283
    +
    epu8 lehmer() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:290
    +
    uint8_t length_ref() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:299
    +
    epu8 cycles_partition() const
    The set partition of the cycles of a permutation.
    Definition perm16_impl.hpp:344
    +
    bool left_weak_leq_ref(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:362
    +
    uint8_t nb_descents_ref() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:320
    +
    Perm16(uint64_t compressed)
    Construct a permutations from its 64 bits compressed.
    Definition perm16.hpp:230
    +
    Perm16 inverse_sort() const
    The inverse permutation.
    Definition perm16_impl.hpp:225
    +
    Perm16 operator*(const Perm16 &p) const
    The product of two permutations.
    Definition perm16.hpp:225
    +
    Perm16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:214
    +
    constexpr Perm16(const epu8 x)
    Definition perm16.hpp:212
    +
    Perm16 inverse_find() const
    The inverse permutation.
    Definition perm16.hpp:266
    +
    static constexpr Perm16 one()
    The identity partial permutation.
    Definition perm16.hpp:223
    +
    epu8 lehmer_ref() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:271
    +
    bool left_weak_leq_length(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:385
    +
    uint8_t length() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:318
    constexpr Perm16(const Perm16 &)=default
    -
    constexpr Perm16(const vect v)
    Definition perm16.hpp:207
    -
    Perm16 inverse_ref() const
    The inverse permutation.
    Definition perm16_impl.hpp:203
    -
    uint8_t nb_descents() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:321
    -
    uint8_t nb_cycles() const
    The number of cycles of a permutation.
    Definition perm16.hpp:408
    -
    uint8_t nb_cycles_ref() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:325
    -
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:213
    -
    static Perm16 elementary_transposition(uint64_t i)
    The elementary transposition exchanging and .
    Definition perm16_impl.hpp:195
    -
    epu8 lehmer_arr() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:274
    -
    static Perm16 unrankSJT(int n, int r)
    The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
    Definition perm16_impl.hpp:167
    -
    bool left_weak_leq(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:366
    -
    uint8_t nb_cycles_unroll() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:351
    +
    constexpr Perm16(const vect v)
    Definition perm16.hpp:211
    +
    Perm16 inverse_ref() const
    The inverse permutation.
    Definition perm16_impl.hpp:209
    +
    uint8_t nb_descents() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:327
    +
    uint8_t nb_cycles() const
    The number of cycles of a permutation.
    Definition perm16.hpp:412
    +
    uint8_t nb_cycles_ref() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:331
    +
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:217
    +
    static Perm16 elementary_transposition(uint64_t i)
    The elementary transposition exchanging and .
    Definition perm16_impl.hpp:201
    +
    epu8 lehmer_arr() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:280
    +
    static Perm16 unrankSJT(int n, int r)
    The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
    Definition perm16_impl.hpp:173
    +
    bool left_weak_leq(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:372
    +
    uint8_t nb_cycles_unroll() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:357
    Perm16 & operator=(const Perm16 &)=default
    Perm16()=default
    -
    Perm16 inverse_pow() const
    The inverse permutation.
    Definition perm16_impl.hpp:261
    -
    uint8_t length_arr() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:302
    -
    Perm16 inverse_arr() const
    The inverse permutation.
    Definition perm16_impl.hpp:210
    -
    static Perm16 random(uint64_t n=16)
    A random permutation of size .
    Definition perm16_impl.hpp:155
    -
    Full transformation of .
    Definition perm16.hpp:118
    -
    Transf16 operator*(const Transf16 &p) const
    The product of two transformations.
    Definition perm16.hpp:134
    -
    constexpr Transf16(const vect v)
    Definition perm16.hpp:121
    -
    Transf16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:123
    -
    constexpr Transf16(const epu8 x)
    Definition perm16.hpp:122
    +
    Perm16 inverse_pow() const
    The inverse permutation.
    Definition perm16_impl.hpp:267
    +
    uint8_t length_arr() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:308
    +
    Perm16 inverse_arr() const
    The inverse permutation.
    Definition perm16_impl.hpp:216
    +
    static Perm16 random(uint64_t n=16)
    A random permutation of size .
    Definition perm16_impl.hpp:161
    +
    Full transformation of .
    Definition perm16.hpp:122
    +
    Transf16 operator*(const Transf16 &p) const
    The product of two transformations.
    Definition perm16.hpp:138
    +
    constexpr Transf16(const vect v)
    Definition perm16.hpp:125
    +
    Transf16(std::initializer_list< uint8_t > il)
    Definition perm16.hpp:127
    +
    constexpr Transf16(const epu8 x)
    Definition perm16.hpp:126
    Transf16 & operator=(const Transf16 &)=default
    Transf16()=default
    -
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:127
    -
    static constexpr Transf16 one()
    The identity transformation.
    Definition perm16.hpp:132
    +
    bool validate(size_t k=16) const
    Return whether *this is a well constructed object.
    Definition perm16.hpp:131
    +
    static constexpr Transf16 one()
    The identity transformation.
    Definition perm16.hpp:136
    constexpr Transf16(const Transf16 &v)=default
    -
    Definition vect16.hpp:30
    -
    epu8 v
    Definition vect16.hpp:33
    -
    size_t operator()(const HPCombi::PPerm16 &ar) const
    A hash operator for HPCombi::PPerm16.
    Definition perm16.hpp:467
    -
    size_t operator()(const HPCombi::PTransf16 &ar) const
    A hash operator for HPCombi::PTransf16.
    Definition perm16.hpp:453
    -
    size_t operator()(const HPCombi::Perm16 &ar) const
    A hash operator for HPCombi::Perm16.
    Definition perm16.hpp:474
    -
    size_t operator()(const HPCombi::Transf16 &ar) const
    A hash operator for HPCombi::Transf16.
    Definition perm16.hpp:460
    +
    Definition vect16.hpp:34
    +
    epu8 v
    Definition vect16.hpp:37
    +
    size_t operator()(const HPCombi::PPerm16 &ar) const
    A hash operator for HPCombi::PPerm16.
    Definition perm16.hpp:471
    +
    size_t operator()(const HPCombi::PTransf16 &ar) const
    A hash operator for HPCombi::PTransf16.
    Definition perm16.hpp:457
    +
    size_t operator()(const HPCombi::Perm16 &ar) const
    A hash operator for HPCombi::Perm16.
    Definition perm16.hpp:478
    +
    size_t operator()(const HPCombi::Transf16 &ar) const
    A hash operator for HPCombi::Transf16.
    Definition perm16.hpp:464
    diff --git a/perm16__impl_8hpp.html b/perm16__impl_8hpp.html index 4d73b7e..3e148e0 100644 --- a/perm16__impl_8hpp.html +++ b/perm16__impl_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm16_impl.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm16_impl.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/perm16__impl_8hpp_source.html b/perm16__impl_8hpp_source.html index 186bec1..96af356 100644 --- a/perm16__impl_8hpp_source.html +++ b/perm16__impl_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm16_impl.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm16_impl.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,463 +77,470 @@
    perm16_impl.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16namespace HPCombi {
    -
    17
    -
    19// Implementation part for inline functions //////////////////////////////////
    -
    21
    -
    22inline PTransf16::PTransf16(std::initializer_list<uint8_t> il)
    -
    23 : Vect16(Epu8.id()) {
    -
    24 HPCOMBI_ASSERT(il.size() <= 16);
    -
    25 std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin());
    -
    26}
    -
    27
    -
    28inline PTransf16::PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    -
    29 size_t /*unused */)
    -
    30 : Vect16(Epu8(0xFF)) {
    -
    31 HPCOMBI_ASSERT(dom.size() == rng.size());
    -
    32 HPCOMBI_ASSERT(dom.size() <= 16);
    -
    33 for (size_t i = 0; i < dom.size(); ++i) {
    -
    34 HPCOMBI_ASSERT(dom[i] < 16);
    -
    35 v[dom[i]] = rng[i];
    -
    36 }
    -
    37}
    -
    38
    -
    39inline epu8 PTransf16::domain_mask(bool complement) const {
    -
    40 return complement ? v == Epu8(0xFF) : v != Epu8(0xFF);
    -
    41}
    -
    42inline uint32_t PTransf16::domain_bitset(bool complement) const {
    -
    43 return simde_mm_movemask_epi8(domain_mask(complement));
    -
    44}
    - -
    46 return domain_mask(true) | Epu8.id();
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20// NOLINT(build/header_guard)
    +
    21
    +
    22namespace HPCombi {
    +
    23
    +
    25// Implementation part for inline functions //////////////////////////////////
    +
    27
    +
    28inline PTransf16::PTransf16(std::initializer_list<uint8_t> il)
    +
    29 : Vect16(Epu8.id()) {
    +
    30 HPCOMBI_ASSERT(il.size() <= 16);
    +
    31 std::copy(il.begin(), il.end(), HPCombi::as_array(v).begin());
    +
    32}
    +
    33
    +
    34inline PTransf16::PTransf16(std::vector<uint8_t> dom, std::vector<uint8_t> rng,
    +
    35 size_t /*unused */)
    +
    36 : Vect16(Epu8(0xFF)) {
    +
    37 HPCOMBI_ASSERT(dom.size() == rng.size());
    +
    38 HPCOMBI_ASSERT(dom.size() <= 16);
    +
    39 for (size_t i = 0; i < dom.size(); ++i) {
    +
    40 HPCOMBI_ASSERT(dom[i] < 16);
    +
    41 v[dom[i]] = rng[i];
    +
    42 }
    +
    43}
    +
    44
    +
    45inline epu8 PTransf16::domain_mask(bool complement) const {
    +
    46 return complement ? v == Epu8(0xFF) : v != Epu8(0xFF);
    47}
    -
    48
    -
    49#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    50inline epu8 PTransf16::image_mask_cmpestrm(bool complement) const {
    -
    51 return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT)
    -
    52 : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL);
    +
    48inline uint32_t PTransf16::domain_bitset(bool complement) const {
    +
    49 return simde_mm_movemask_epi8(domain_mask(complement));
    +
    50}
    + +
    52 return domain_mask(true) | Epu8.id();
    53}
    -
    54#endif
    -
    55inline epu8 PTransf16::image_mask_ref(bool complement) const {
    -
    56 epu8 res{};
    -
    57 for (auto x : *this)
    -
    58 if (x != 0xFF)
    -
    59 res[x] = 0xFF;
    -
    60 return complement ? static_cast<epu8>(!res) : res;
    -
    61}
    -
    62
    -
    63inline uint32_t PTransf16::image_bitset(bool complement) const {
    -
    64 return simde_mm_movemask_epi8(image_mask(complement));
    -
    65}
    - -
    67 return image_mask(true) | Epu8.id();
    -
    68}
    -
    69inline uint32_t PTransf16::rank_ref() const {
    -
    70 decltype(Epu8)::array tmp{};
    -
    71 static_assert(decltype(Epu8)::size == 16, "Wrong size of EPU8 array");
    -
    72 for (auto x : *this)
    -
    73 if (x != 0xFF)
    -
    74 tmp[x] = 1;
    -
    75 return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0));
    -
    76}
    -
    77
    -
    78inline uint32_t PTransf16::rank_cmpestrm() const {
    -
    79 return __builtin_popcountl(image_bitset());
    -
    80}
    -
    81
    -
    82inline uint32_t PTransf16::rank() const {
    -
    83#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    84 return rank_cmpestrm();
    -
    85#else
    -
    86 return rank_ref();
    -
    87#endif
    -
    88}
    -
    89
    -
    90inline epu8 PTransf16::fix_points_mask(bool complement) const {
    -
    91 return complement ? v != one().v : v == one().v;
    -
    92}
    -
    93inline uint32_t PTransf16::fix_points_bitset(bool complement) const {
    -
    94 return simde_mm_movemask_epi8(fix_points_mask(complement));
    -
    95}
    -
    96
    -
    97inline uint8_t PTransf16::smallest_fix_point() const {
    -
    98 return __builtin_ffs(fix_points_bitset(false)) - 1;
    -
    99}
    -
    101inline uint8_t PTransf16::smallest_moved_point() const {
    -
    102 return __builtin_ffs(fix_points_bitset(true)) - 1;
    -
    103}
    -
    105inline uint8_t PTransf16::largest_fix_point() const {
    -
    106 uint32_t res = fix_points_bitset(false);
    -
    107 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
    -
    108}
    -
    110inline uint8_t PTransf16::largest_moved_point() const {
    -
    111 uint32_t res = fix_points_bitset(true);
    -
    112 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
    -
    113}
    -
    115inline uint8_t PTransf16::nb_fix_points() const {
    -
    116 return __builtin_popcountl(fix_points_bitset());
    -
    117}
    -
    118
    -
    119inline static constexpr uint8_t hilo_exchng_fun(uint8_t i) {
    -
    120 return i < 8 ? i + 8 : i - 8;
    -
    121}
    -
    122static constexpr epu8 hilo_exchng = Epu8(hilo_exchng_fun);
    -
    123inline static constexpr uint8_t hilo_mask_fun(uint8_t i) {
    -
    124 return i < 8 ? 0x0 : 0xFF;
    -
    125}
    -
    126static constexpr epu8 hilo_mask = Epu8(hilo_mask_fun);
    -
    127
    -
    128inline Transf16::Transf16(uint64_t compressed) {
    -
    129 epu8 res = simde_mm_set_epi64x(compressed, compressed);
    -
    130 v = simde_mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask);
    +
    54
    +
    55#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    56inline epu8 PTransf16::image_mask_cmpestrm(bool complement) const {
    +
    57 return complement ? _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT)
    +
    58 : _mm_cmpestrm(v, 16, one().v, 16, FIND_IN_VECT_COMPL);
    +
    59}
    +
    60#endif
    +
    61inline epu8 PTransf16::image_mask_ref(bool complement) const {
    +
    62 epu8 res{};
    +
    63 for (auto x : *this)
    +
    64 if (x != 0xFF)
    +
    65 res[x] = 0xFF;
    +
    66 return complement ? static_cast<epu8>(!res) : res;
    +
    67}
    +
    68
    +
    69inline uint32_t PTransf16::image_bitset(bool complement) const {
    +
    70 return simde_mm_movemask_epi8(image_mask(complement));
    +
    71}
    + +
    73 return image_mask(true) | Epu8.id();
    +
    74}
    +
    75inline uint32_t PTransf16::rank_ref() const {
    +
    76 decltype(Epu8)::array tmp{};
    +
    77 static_assert(decltype(Epu8)::size == 16, "Wrong size of EPU8 array");
    +
    78 for (auto x : *this)
    +
    79 if (x != 0xFF)
    +
    80 tmp[x] = 1;
    +
    81 return std::accumulate(tmp.begin(), tmp.end(), uint8_t(0));
    +
    82}
    +
    83
    +
    84inline uint32_t PTransf16::rank_cmpestrm() const {
    +
    85 return __builtin_popcountl(image_bitset());
    +
    86}
    +
    87
    +
    88inline uint32_t PTransf16::rank() const {
    +
    89#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    90 return rank_cmpestrm();
    +
    91#else
    +
    92 return rank_ref();
    +
    93#endif
    +
    94}
    +
    95
    +
    96inline epu8 PTransf16::fix_points_mask(bool complement) const {
    +
    97 return complement ? v != one().v : v == one().v;
    +
    98}
    +
    99inline uint32_t PTransf16::fix_points_bitset(bool complement) const {
    +
    100 return simde_mm_movemask_epi8(fix_points_mask(complement));
    +
    101}
    +
    102
    +
    103inline uint8_t PTransf16::smallest_fix_point() const {
    +
    104 return __builtin_ffs(fix_points_bitset(false)) - 1;
    +
    105}
    +
    107inline uint8_t PTransf16::smallest_moved_point() const {
    +
    108 return __builtin_ffs(fix_points_bitset(true)) - 1;
    +
    109}
    +
    111inline uint8_t PTransf16::largest_fix_point() const {
    +
    112 uint32_t res = fix_points_bitset(false);
    +
    113 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
    +
    114}
    +
    116inline uint8_t PTransf16::largest_moved_point() const {
    +
    117 uint32_t res = fix_points_bitset(true);
    +
    118 return res == 0 ? 0xFF : 31 - __builtin_clz(res);
    +
    119}
    +
    121inline uint8_t PTransf16::nb_fix_points() const {
    +
    122 return __builtin_popcountl(fix_points_bitset());
    +
    123}
    +
    124
    +
    125inline static constexpr uint8_t hilo_exchng_fun(uint8_t i) {
    +
    126 return i < 8 ? i + 8 : i - 8;
    +
    127}
    +
    128static constexpr epu8 hilo_exchng = Epu8(hilo_exchng_fun);
    +
    129inline static constexpr uint8_t hilo_mask_fun(uint8_t i) {
    +
    130 return i < 8 ? 0x0 : 0xFF;
    131}
    -
    132
    -
    133inline Transf16::operator uint64_t() const {
    -
    134 epu8 res =
    -
    135 static_cast<epu8>(simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4));
    -
    136 res = HPCombi::permuted(res, hilo_exchng) + v;
    -
    137 return simde_mm_extract_epi64(res, 0);
    -
    138}
    -
    139
    - -
    141 epu8 res = Epu8(0xFF);
    -
    142 for (size_t i = 0; i < 16; ++i)
    -
    143 if (v[i] < 16)
    -
    144 res[v[i]] = i;
    -
    145 return res;
    -
    146}
    -
    147
    -
    148#ifdef SIMDE_X86_SSE4_2_NATIVE
    -
    149inline PPerm16 PPerm16::inverse_find() const {
    -
    150 epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT);
    -
    151 return permutation_of(v, one()) | mask;
    +
    132static constexpr epu8 hilo_mask = Epu8(hilo_mask_fun);
    +
    133
    +
    134inline Transf16::Transf16(uint64_t compressed) {
    +
    135 epu8 res = simde_mm_set_epi64x(compressed, compressed);
    +
    136 v = simde_mm_blendv_epi8(res & Epu8(0x0F), res >> 4, hilo_mask);
    +
    137}
    +
    138
    +
    139inline Transf16::operator uint64_t() const {
    +
    140 epu8 res =
    +
    141 static_cast<epu8>(simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4));
    +
    142 res = HPCombi::permuted(res, hilo_exchng) + v;
    +
    143 return simde_mm_extract_epi64(res, 0);
    +
    144}
    +
    145
    + +
    147 epu8 res = Epu8(0xFF);
    +
    148 for (size_t i = 0; i < 16; ++i)
    +
    149 if (v[i] < 16)
    +
    150 res[v[i]] = i;
    +
    151 return res;
    152}
    -
    153#endif
    -
    154
    -
    155inline Perm16 Perm16::random(uint64_t n) {
    -
    156 static std::random_device rd;
    -
    157 static std::mt19937 g(rd());
    -
    158
    -
    159 Perm16 res = one();
    -
    160 auto ar = res.as_array();
    -
    161
    -
    162 std::shuffle(ar.begin(), ar.begin() + n, g);
    -
    163 return res;
    -
    164}
    -
    165
    -
    166// From Ruskey : Combinatorial Generation page 138
    -
    167inline Perm16 Perm16::unrankSJT(int n, int r) {
    -
    168 int j;
    -
    169 std::array<int, 16> dir;
    -
    170 epu8 res{};
    -
    171 for (j = 0; j < n; ++j)
    -
    172 res[j] = 0xFF;
    -
    173 for (j = n - 1; j >= 0; --j) {
    -
    174 int k, rem, c;
    -
    175 rem = r % (j + 1);
    -
    176 r = r / (j + 1);
    -
    177 if ((r & 1) != 0) {
    -
    178 k = -1;
    -
    179 dir[j] = +1;
    -
    180 } else {
    -
    181 k = n;
    -
    182 dir[j] = -1;
    -
    183 }
    -
    184 c = -1;
    -
    185 do {
    -
    186 k = k + dir[j];
    -
    187 if (res[k] == 0xFF)
    -
    188 ++c;
    -
    189 } while (c < rem);
    -
    190 res[k] = j;
    -
    191 }
    -
    192 return res;
    -
    193}
    -
    194
    - -
    196 HPCOMBI_ASSERT(i < 16);
    -
    197 epu8 res = one();
    -
    198 res[i] = i + 1;
    -
    199 res[i + 1] = i;
    -
    200 return res;
    -
    201}
    -
    202
    - -
    204 epu8 res;
    -
    205 for (size_t i = 0; i < 16; ++i)
    -
    206 res[v[i]] = i;
    -
    207 return res;
    -
    208}
    -
    209
    - -
    211 epu8 res;
    -
    212 auto &arres = HPCombi::as_array(res);
    -
    213 auto self = as_array();
    -
    214 for (size_t i = 0; i < 16; ++i)
    -
    215 arres[self[i]] = i;
    -
    216 return res;
    -
    217}
    -
    218
    - -
    220 // G++-7 compile this shift by 3 additions.
    -
    221 // epu8 res = (v << 4) + one().v;
    -
    222 // I call directly the shift intrinsic
    -
    223 epu8 res = static_cast<epu8>(
    -
    224 simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4)) +
    -
    225 one().v;
    -
    226 res = sorted(res) & Epu8(0x0F);
    -
    227 return res;
    -
    228}
    -
    229
    -
    230// We declare PERM16 as a correct Monoid
    -
    231namespace power_helper {
    -
    232
    -
    233// TODO required?
    - +
    153
    +
    154#ifdef SIMDE_X86_SSE4_2_NATIVE
    +
    155inline PPerm16 PPerm16::inverse_find() const {
    +
    156 epu8 mask = _mm_cmpestrm(v, 16, one(), 16, FIND_IN_VECT);
    +
    157 return permutation_of(v, one()) | mask;
    +
    158}
    +
    159#endif
    +
    160
    +
    161inline Perm16 Perm16::random(uint64_t n) {
    +
    162 static std::random_device rd;
    +
    163 static std::mt19937 g(rd());
    +
    164
    +
    165 Perm16 res = one();
    +
    166 auto ar = res.as_array();
    +
    167
    +
    168 std::shuffle(ar.begin(), ar.begin() + n, g);
    +
    169 return res;
    +
    170}
    +
    171
    +
    172// From Ruskey : Combinatorial Generation page 138
    +
    173inline Perm16 Perm16::unrankSJT(int n, int r) {
    +
    174 int j;
    +
    175 std::array<int, 16> dir;
    +
    176 epu8 res{};
    +
    177 for (j = 0; j < n; ++j)
    +
    178 res[j] = 0xFF;
    +
    179 for (j = n - 1; j >= 0; --j) {
    +
    180 int k, rem, c;
    +
    181 rem = r % (j + 1);
    +
    182 r = r / (j + 1);
    +
    183 if ((r & 1) != 0) {
    +
    184 k = -1;
    +
    185 dir[j] = +1;
    +
    186 } else {
    +
    187 k = n;
    +
    188 dir[j] = -1;
    +
    189 }
    +
    190 c = -1;
    +
    191 do {
    +
    192 k = k + dir[j];
    +
    193 if (res[k] == 0xFF)
    +
    194 ++c;
    +
    195 } while (c < rem);
    +
    196 res[k] = j;
    +
    197 }
    +
    198 return res;
    +
    199}
    +
    200
    + +
    202 HPCOMBI_ASSERT(i < 16);
    +
    203 epu8 res = one();
    +
    204 res[i] = i + 1;
    +
    205 res[i + 1] = i;
    +
    206 return res;
    +
    207}
    +
    208
    + +
    210 epu8 res;
    +
    211 for (size_t i = 0; i < 16; ++i)
    +
    212 res[v[i]] = i;
    +
    213 return res;
    +
    214}
    +
    215
    + +
    217 epu8 res;
    +
    218 auto &arres = HPCombi::as_array(res);
    +
    219 auto self = as_array();
    +
    220 for (size_t i = 0; i < 16; ++i)
    +
    221 arres[self[i]] = i;
    +
    222 return res;
    +
    223}
    +
    224
    + +
    226 // G++-7 compile this shift by 3 additions.
    +
    227 // epu8 res = (v << 4) + one().v;
    +
    228 // I call directly the shift intrinsic
    +
    229 epu8 res = static_cast<epu8>(
    +
    230 simde_mm_slli_epi32(static_cast<simde__m128i>(v), 4)) +
    +
    231 one().v;
    +
    232 res = sorted(res) & Epu8(0x0F);
    +
    233 return res;
    +
    234}
    235
    -
    236template <> struct Monoid<Perm16> {
    -
    237 static const Perm16 one() { return Perm16::one(); }
    -
    238 static Perm16 prod(Perm16 a, Perm16 b) { return a * b; }
    -
    239};
    -
    240
    -
    241} // namespace power_helper
    -
    242
    - -
    244 Perm16 res = one();
    -
    245 Perm16 newpow = pow<8>(*this);
    -
    246 for (int i = 9; i <= 16; i++) {
    -
    247 Perm16 oldpow = newpow;
    -
    248 newpow = oldpow * *this;
    -
    249 res.v = simde_mm_blendv_epi8(res, oldpow, newpow.v == one().v);
    -
    250 }
    -
    251 return res;
    -
    252}
    -
    253
    -
    254static constexpr uint32_t lcm_range(uint8_t n) {
    -
    255 uint32_t res = 1;
    -
    256 for (uint8_t i = 1; i <= n; ++i)
    -
    257 res = std::lcm(res, i);
    -
    258 return res;
    -
    259}
    -
    260
    - -
    262 return pow<lcm_range(16) - 1>(*this);
    -
    263}
    -
    264
    -
    265inline epu8 Perm16::lehmer_ref() const {
    -
    266 epu8 res{};
    -
    267 for (size_t i = 0; i < 16; i++)
    -
    268 for (size_t j = i + 1; j < 16; j++)
    -
    269 if (v[i] > v[j])
    -
    270 res[i]++;
    -
    271 return res;
    -
    272}
    -
    273
    -
    274inline epu8 Perm16::lehmer_arr() const {
    -
    275 decltype(Epu8)::array res{};
    -
    276 decltype(Epu8)::array ar = as_array();
    -
    277 for (size_t i = 0; i < 16; i++)
    -
    278 for (size_t j = i + 1; j < 16; j++)
    -
    279 if (ar[i] > ar[j])
    -
    280 res[i]++;
    -
    281 return Epu8(res);
    -
    282}
    -
    283
    -
    284inline epu8 Perm16::lehmer() const {
    -
    285 epu8 vsh = v, res = -one().v;
    -
    286 for (int i = 1; i < 16; i++) {
    -
    287 vsh = shifted_left(vsh);
    -
    288 res -= (v >= vsh);
    -
    289 }
    -
    290 return res;
    -
    291}
    -
    292
    -
    293inline uint8_t Perm16::length_ref() const {
    -
    294 uint8_t res = 0;
    -
    295 for (size_t i = 0; i < 16; i++)
    -
    296 for (size_t j = i + 1; j < 16; j++)
    -
    297 if (v[i] > v[j])
    -
    298 res++;
    -
    299 return res;
    -
    300}
    -
    301
    -
    302inline uint8_t Perm16::length_arr() const {
    -
    303 uint8_t res = 0;
    -
    304 decltype(Epu8)::array ar = as_array();
    -
    305 for (size_t i = 0; i < 16; i++)
    -
    306 for (size_t j = i + 1; j < 16; j++)
    -
    307 if (ar[i] > ar[j])
    -
    308 res++;
    -
    309 return res;
    -
    310}
    -
    311
    -
    312inline uint8_t Perm16::length() const { return horiz_sum(lehmer()); }
    -
    313
    -
    314inline uint8_t Perm16::nb_descents_ref() const {
    -
    315 uint8_t res = 0;
    -
    316 for (size_t i = 0; i < 16 - 1; i++)
    -
    317 if (v[i] > v[i + 1])
    -
    318 res++;
    -
    319 return res;
    -
    320}
    -
    321inline uint8_t Perm16::nb_descents() const {
    -
    322 return __builtin_popcountl(simde_mm_movemask_epi8(v < shifted_right(v)));
    -
    323}
    -
    324
    -
    325inline uint8_t Perm16::nb_cycles_ref() const {
    -
    326 std::array<bool, 16> b{};
    -
    327 uint8_t c = 0;
    -
    328 for (size_t i = 0; i < 16; i++) {
    -
    329 if (!b[i]) {
    -
    330 for (size_t j = i; !b[j]; j = v[j])
    -
    331 b[j] = true;
    -
    332 c++;
    -
    333 }
    -
    334 }
    -
    335 return c;
    -
    336}
    -
    337
    - -
    339 epu8 x0, x1 = one();
    -
    340 Perm16 p = *this;
    -
    341 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
    -
    342 p = p * p;
    -
    343 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
    -
    344 p = p * p;
    -
    345 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
    -
    346 p = p * p;
    -
    347 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
    -
    348 return x1;
    -
    349}
    -
    350
    -
    351inline uint8_t Perm16::nb_cycles_unroll() const {
    -
    352 epu8 res = (Epu8.id() == cycles_partition());
    -
    353 return __builtin_popcountl(simde_mm_movemask_epi8(res));
    -
    354}
    -
    355
    -
    356inline bool Perm16::left_weak_leq_ref(Perm16 other) const {
    -
    357 for (size_t i = 0; i < 16; i++) {
    -
    358 for (size_t j = i + 1; j < 16; j++) {
    -
    359 if ((v[i] > v[j]) && (other[i] < other[j]))
    -
    360 return false;
    -
    361 }
    -
    362 }
    -
    363 return true;
    -
    364}
    -
    365
    -
    366inline bool Perm16::left_weak_leq(Perm16 other) const {
    -
    367 epu8 srot = v, orot = other;
    -
    368 for (size_t i = 0; i < 15; i++) {
    -
    369 srot = shifted_right(srot);
    -
    370 orot = shifted_right(orot);
    -
    371 uint64_t sinv = simde_mm_movemask_epi8(v < srot);
    -
    372 uint64_t oinv = simde_mm_movemask_epi8(other.v < orot);
    -
    373 if ((sinv & oinv) != sinv)
    -
    374 return false;
    -
    375 }
    -
    376 return true;
    -
    377}
    -
    378
    -
    379inline bool Perm16::left_weak_leq_length(Perm16 other) const {
    -
    380 Perm16 prod = *this * other.inverse();
    -
    381 return other.length() == length() + prod.length();
    -
    382}
    -
    383
    -
    384} // namespace HPCombi
    -
    const PTransf16 id
    Definition RD.cpp:33
    -
    #define HPCOMBI_ASSERT(x)
    Definition debug.hpp:23
    -
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:62
    -
    Perm16 Perm16
    Definition perm16_impl.hpp:234
    -
    Definition bmat8.hpp:37
    -
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:70
    -
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:76
    -
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:295
    -
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:215
    -
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:196
    -
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:50
    -
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:41
    -
    epu8 shifted_left(epu8 a) noexcept
    Right shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:82
    -
    TPUBuild< TPU >::array & as_array(TPU &v) noexcept
    Cast a TPU to a c++ std::array.
    Definition builder.hpp:134
    -
    const T pow(const T x)
    A generic compile time exponentiation function.
    Definition power.hpp:79
    -
    Partial permutation of .
    Definition perm16.hpp:145
    -
    PPerm16 inverse_ref() const
    The inverse of a partial permutation.
    Definition perm16_impl.hpp:140
    -
    static constexpr PPerm16 one()
    The identity partial permutations.
    Definition perm16.hpp:162
    -
    Partial transformation of .
    Definition perm16.hpp:43
    -
    uint8_t nb_fix_points() const
    Returns the number of fix points of *this.
    Definition perm16_impl.hpp:115
    -
    uint32_t fix_points_bitset(bool complement=false) const
    Returns a bit mask for the fix point of *this.
    Definition perm16_impl.hpp:93
    -
    static constexpr size_t size()
    Definition perm16.hpp:44
    -
    static constexpr PTransf16 one()
    The identity partial transformation.
    Definition perm16.hpp:63
    -
    uint8_t largest_moved_point() const
    Returns the largest non fix point of *this.
    Definition perm16_impl.hpp:110
    -
    uint32_t domain_bitset(bool complement=false) const
    Returns a bit mask for the domain of *this.
    Definition perm16_impl.hpp:42
    -
    PTransf16 left_one() const
    Returns the partial left identity for *this.
    Definition perm16_impl.hpp:66
    -
    typename decltype(Epu8)::array array
    Definition perm16.hpp:47
    -
    uint32_t rank_ref() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:69
    -
    PTransf16 right_one() const
    Returns the partial right identity for *this.
    Definition perm16_impl.hpp:45
    -
    uint32_t image_bitset(bool complement=false) const
    Returns a bit mask for the image of *this.
    Definition perm16_impl.hpp:63
    -
    epu8 fix_points_mask(bool complement=false) const
    Returns a mask for the fix point of *this.
    Definition perm16_impl.hpp:90
    -
    uint8_t smallest_fix_point() const
    Returns the smallest fix point of *this.
    Definition perm16_impl.hpp:97
    +
    236// We declare PERM16 as a correct Monoid
    +
    237namespace power_helper {
    +
    238
    +
    239// TODO required?
    + +
    241
    +
    242template <> struct Monoid<Perm16> {
    +
    243 static const Perm16 one() { return Perm16::one(); }
    +
    244 static Perm16 prod(Perm16 a, Perm16 b) { return a * b; }
    +
    245};
    +
    246
    +
    247} // namespace power_helper
    +
    248
    + +
    250 Perm16 res = one();
    +
    251 Perm16 newpow = pow<8>(*this);
    +
    252 for (int i = 9; i <= 16; i++) {
    +
    253 Perm16 oldpow = newpow;
    +
    254 newpow = oldpow * *this;
    +
    255 res.v = simde_mm_blendv_epi8(res, oldpow, newpow.v == one().v);
    +
    256 }
    +
    257 return res;
    +
    258}
    +
    259
    +
    260static constexpr uint32_t lcm_range(uint8_t n) {
    +
    261 uint32_t res = 1;
    +
    262 for (uint8_t i = 1; i <= n; ++i)
    +
    263 res = std::lcm(res, i);
    +
    264 return res;
    +
    265}
    +
    266
    + +
    268 return pow<lcm_range(16) - 1>(*this);
    +
    269}
    +
    270
    +
    271inline epu8 Perm16::lehmer_ref() const {
    +
    272 epu8 res{};
    +
    273 for (size_t i = 0; i < 16; i++)
    +
    274 for (size_t j = i + 1; j < 16; j++)
    +
    275 if (v[i] > v[j])
    +
    276 res[i]++;
    +
    277 return res;
    +
    278}
    +
    279
    +
    280inline epu8 Perm16::lehmer_arr() const {
    +
    281 decltype(Epu8)::array res{};
    +
    282 decltype(Epu8)::array ar = as_array();
    +
    283 for (size_t i = 0; i < 16; i++)
    +
    284 for (size_t j = i + 1; j < 16; j++)
    +
    285 if (ar[i] > ar[j])
    +
    286 res[i]++;
    +
    287 return Epu8(res);
    +
    288}
    +
    289
    +
    290inline epu8 Perm16::lehmer() const {
    +
    291 epu8 vsh = v, res = -one().v;
    +
    292 for (int i = 1; i < 16; i++) {
    +
    293 vsh = shifted_left(vsh);
    +
    294 res -= (v >= vsh);
    +
    295 }
    +
    296 return res;
    +
    297}
    +
    298
    +
    299inline uint8_t Perm16::length_ref() const {
    +
    300 uint8_t res = 0;
    +
    301 for (size_t i = 0; i < 16; i++)
    +
    302 for (size_t j = i + 1; j < 16; j++)
    +
    303 if (v[i] > v[j])
    +
    304 res++;
    +
    305 return res;
    +
    306}
    +
    307
    +
    308inline uint8_t Perm16::length_arr() const {
    +
    309 uint8_t res = 0;
    +
    310 decltype(Epu8)::array ar = as_array();
    +
    311 for (size_t i = 0; i < 16; i++)
    +
    312 for (size_t j = i + 1; j < 16; j++)
    +
    313 if (ar[i] > ar[j])
    +
    314 res++;
    +
    315 return res;
    +
    316}
    +
    317
    +
    318inline uint8_t Perm16::length() const { return horiz_sum(lehmer()); }
    +
    319
    +
    320inline uint8_t Perm16::nb_descents_ref() const {
    +
    321 uint8_t res = 0;
    +
    322 for (size_t i = 0; i < 16 - 1; i++)
    +
    323 if (v[i] > v[i + 1])
    +
    324 res++;
    +
    325 return res;
    +
    326}
    +
    327inline uint8_t Perm16::nb_descents() const {
    +
    328 return __builtin_popcountl(simde_mm_movemask_epi8(v < shifted_right(v)));
    +
    329}
    +
    330
    +
    331inline uint8_t Perm16::nb_cycles_ref() const {
    +
    332 std::array<bool, 16> b{};
    +
    333 uint8_t c = 0;
    +
    334 for (size_t i = 0; i < 16; i++) {
    +
    335 if (!b[i]) {
    +
    336 for (size_t j = i; !b[j]; j = v[j])
    +
    337 b[j] = true;
    +
    338 c++;
    +
    339 }
    +
    340 }
    +
    341 return c;
    +
    342}
    +
    343
    + +
    345 epu8 x0, x1 = one();
    +
    346 Perm16 p = *this;
    +
    347 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
    +
    348 p = p * p;
    +
    349 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
    +
    350 p = p * p;
    +
    351 x0 = simde_mm_min_epi8(x1, HPCombi::permuted(x1, p));
    +
    352 p = p * p;
    +
    353 x1 = simde_mm_min_epi8(x0, HPCombi::permuted(x0, p));
    +
    354 return x1;
    +
    355}
    +
    356
    +
    357inline uint8_t Perm16::nb_cycles_unroll() const {
    +
    358 epu8 res = (Epu8.id() == cycles_partition());
    +
    359 return __builtin_popcountl(simde_mm_movemask_epi8(res));
    +
    360}
    +
    361
    +
    362inline bool Perm16::left_weak_leq_ref(Perm16 other) const {
    +
    363 for (size_t i = 0; i < 16; i++) {
    +
    364 for (size_t j = i + 1; j < 16; j++) {
    +
    365 if ((v[i] > v[j]) && (other[i] < other[j]))
    +
    366 return false;
    +
    367 }
    +
    368 }
    +
    369 return true;
    +
    370}
    +
    371
    +
    372inline bool Perm16::left_weak_leq(Perm16 other) const {
    +
    373 epu8 srot = v, orot = other;
    +
    374 for (size_t i = 0; i < 15; i++) {
    +
    375 srot = shifted_right(srot);
    +
    376 orot = shifted_right(orot);
    +
    377 uint64_t sinv = simde_mm_movemask_epi8(v < srot);
    +
    378 uint64_t oinv = simde_mm_movemask_epi8(other.v < orot);
    +
    379 if ((sinv & oinv) != sinv)
    +
    380 return false;
    +
    381 }
    +
    382 return true;
    +
    383}
    +
    384
    +
    385inline bool Perm16::left_weak_leq_length(Perm16 other) const {
    +
    386 Perm16 prod = *this * other.inverse();
    +
    387 return other.length() == length() + prod.length();
    +
    388}
    +
    389
    +
    390} // namespace HPCombi
    +
    const PTransf16 id
    Definition RD.cpp:37
    +
    #define HPCOMBI_ASSERT(x)
    Definition debug.hpp:28
    +
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:66
    +
    Perm16 Perm16
    Definition perm16_impl.hpp:240
    +
    Definition bmat8.hpp:41
    +
    epu8 permuted(epu8 a, epu8 b) noexcept
    Permuting a HPCombi::epu8.
    Definition epu8.hpp:72
    +
    epu8 shifted_right(epu8 a) noexcept
    Left shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:78
    +
    epu8 permutation_of(epu8 a, epu8 b) noexcept
    Find if a vector is a permutation of one other.
    Definition epu8_impl.hpp:303
    +
    uint8_t horiz_sum(epu8 v) noexcept
    Horizontal sum of a HPCombi::epu8.
    Definition epu8.hpp:217
    +
    epu8 sorted(epu8 a) noexcept
    Return a sorted HPCombi::epu8.
    Definition epu8_impl.hpp:203
    +
    constexpr TPUBuild< epu8 > Epu8
    Factory object acting as a class constructor for type HPCombi::epu8.
    Definition epu8.hpp:53
    +
    uint8_t __attribute__((vector_size(16))) epu8
    SIMD vector of 16 unsigned bytes.
    Definition epu8.hpp:45
    +
    epu8 shifted_left(epu8 a) noexcept
    Right shifted of a HPCombi::epu8 inserting a 0.
    Definition epu8.hpp:84
    +
    TPUBuild< TPU >::array & as_array(TPU &v) noexcept
    Cast a TPU to a c++ std::array.
    Definition builder.hpp:139
    +
    const T pow(const T x)
    A generic compile time exponentiation function.
    Definition power.hpp:83
    +
    Partial permutation of .
    Definition perm16.hpp:149
    +
    PPerm16 inverse_ref() const
    The inverse of a partial permutation.
    Definition perm16_impl.hpp:146
    +
    static constexpr PPerm16 one()
    The identity partial permutations.
    Definition perm16.hpp:166
    +
    Partial transformation of .
    Definition perm16.hpp:47
    +
    uint8_t nb_fix_points() const
    Returns the number of fix points of *this.
    Definition perm16_impl.hpp:121
    +
    uint32_t fix_points_bitset(bool complement=false) const
    Returns a bit mask for the fix point of *this.
    Definition perm16_impl.hpp:99
    +
    static constexpr size_t size()
    Definition perm16.hpp:48
    +
    static constexpr PTransf16 one()
    The identity partial transformation.
    Definition perm16.hpp:67
    +
    uint8_t largest_moved_point() const
    Returns the largest non fix point of *this.
    Definition perm16_impl.hpp:116
    +
    uint32_t domain_bitset(bool complement=false) const
    Returns a bit mask for the domain of *this.
    Definition perm16_impl.hpp:48
    +
    PTransf16 left_one() const
    Returns the partial left identity for *this.
    Definition perm16_impl.hpp:72
    +
    typename decltype(Epu8)::array array
    Definition perm16.hpp:51
    +
    uint32_t rank_ref() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:75
    +
    PTransf16 right_one() const
    Returns the partial right identity for *this.
    Definition perm16_impl.hpp:51
    +
    uint32_t image_bitset(bool complement=false) const
    Returns a bit mask for the image of *this.
    Definition perm16_impl.hpp:69
    +
    epu8 fix_points_mask(bool complement=false) const
    Returns a mask for the fix point of *this.
    Definition perm16_impl.hpp:96
    +
    uint8_t smallest_fix_point() const
    Returns the smallest fix point of *this.
    Definition perm16_impl.hpp:103
    -
    uint32_t rank_cmpestrm() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:78
    -
    epu8 domain_mask(bool complement=false) const
    Returns a mask for the domain of *this.
    Definition perm16_impl.hpp:39
    -
    uint32_t rank() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:82
    -
    epu8 image_mask_ref(bool complement=false) const
    Returns a mask for the image of *this.
    Definition perm16_impl.hpp:55
    -
    uint8_t largest_fix_point() const
    Returns the largest fix point of *this.
    Definition perm16_impl.hpp:105
    -
    epu8 image_mask(bool complement=false) const
    Definition perm16.hpp:73
    +
    uint32_t rank_cmpestrm() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:84
    +
    epu8 domain_mask(bool complement=false) const
    Returns a mask for the domain of *this.
    Definition perm16_impl.hpp:45
    +
    uint32_t rank() const
    Returns the size of the image of *this.
    Definition perm16_impl.hpp:88
    +
    epu8 image_mask_ref(bool complement=false) const
    Returns a mask for the image of *this.
    Definition perm16_impl.hpp:61
    +
    uint8_t largest_fix_point() const
    Returns the largest fix point of *this.
    Definition perm16_impl.hpp:111
    +
    epu8 image_mask(bool complement=false) const
    Definition perm16.hpp:77
    epu8 image_mask_cmpestrm(bool complement=false) const
    Returns a mask for the image of *this.
    -
    uint8_t smallest_moved_point() const
    Returns the smallest non fix point of *this.
    Definition perm16_impl.hpp:101
    -
    Permutations of .
    Definition perm16.hpp:204
    -
    Perm16 inverse_cycl() const
    The inverse permutation.
    Definition perm16_impl.hpp:243
    -
    Perm16 inverse() const
    The inverse permutation.
    Definition perm16.hpp:279
    -
    epu8 lehmer() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:284
    -
    uint8_t length_ref() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:293
    -
    epu8 cycles_partition() const
    The set partition of the cycles of a permutation.
    Definition perm16_impl.hpp:338
    -
    bool left_weak_leq_ref(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:356
    -
    uint8_t nb_descents_ref() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:314
    -
    Perm16 inverse_sort() const
    The inverse permutation.
    Definition perm16_impl.hpp:219
    -
    static constexpr Perm16 one()
    The identity partial permutation.
    Definition perm16.hpp:219
    -
    epu8 lehmer_ref() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:265
    -
    bool left_weak_leq_length(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:379
    -
    uint8_t length() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:312
    -
    Perm16 inverse_ref() const
    The inverse permutation.
    Definition perm16_impl.hpp:203
    -
    uint8_t nb_descents() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:321
    -
    uint8_t nb_cycles_ref() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:325
    -
    static Perm16 elementary_transposition(uint64_t i)
    The elementary transposition exchanging and .
    Definition perm16_impl.hpp:195
    -
    epu8 lehmer_arr() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:274
    -
    static Perm16 unrankSJT(int n, int r)
    The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
    Definition perm16_impl.hpp:167
    -
    bool left_weak_leq(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:366
    -
    uint8_t nb_cycles_unroll() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:351
    -
    Perm16 inverse_pow() const
    The inverse permutation.
    Definition perm16_impl.hpp:261
    -
    uint8_t length_arr() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:302
    -
    Perm16 inverse_arr() const
    The inverse permutation.
    Definition perm16_impl.hpp:210
    -
    static Perm16 random(uint64_t n=16)
    A random permutation of size .
    Definition perm16_impl.hpp:155
    +
    uint8_t smallest_moved_point() const
    Returns the smallest non fix point of *this.
    Definition perm16_impl.hpp:107
    +
    Permutations of .
    Definition perm16.hpp:208
    +
    Perm16 inverse_cycl() const
    The inverse permutation.
    Definition perm16_impl.hpp:249
    +
    Perm16 inverse() const
    The inverse permutation.
    Definition perm16.hpp:283
    +
    epu8 lehmer() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:290
    +
    uint8_t length_ref() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:299
    +
    epu8 cycles_partition() const
    The set partition of the cycles of a permutation.
    Definition perm16_impl.hpp:344
    +
    bool left_weak_leq_ref(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:362
    +
    uint8_t nb_descents_ref() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:320
    +
    Perm16 inverse_sort() const
    The inverse permutation.
    Definition perm16_impl.hpp:225
    +
    static constexpr Perm16 one()
    The identity partial permutation.
    Definition perm16.hpp:223
    +
    epu8 lehmer_ref() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:271
    +
    bool left_weak_leq_length(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:385
    +
    uint8_t length() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:318
    +
    Perm16 inverse_ref() const
    The inverse permutation.
    Definition perm16_impl.hpp:209
    +
    uint8_t nb_descents() const
    The number of descent of a permutation.
    Definition perm16_impl.hpp:327
    +
    uint8_t nb_cycles_ref() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:331
    +
    static Perm16 elementary_transposition(uint64_t i)
    The elementary transposition exchanging and .
    Definition perm16_impl.hpp:201
    +
    epu8 lehmer_arr() const
    The Lehmer code of a permutation.
    Definition perm16_impl.hpp:280
    +
    static Perm16 unrankSJT(int n, int r)
    The r -th permutation of size n for the Steinhaus–Johnson–Trotter order.
    Definition perm16_impl.hpp:173
    +
    bool left_weak_leq(Perm16 other) const
    Compare two permutations for the left weak order.
    Definition perm16_impl.hpp:372
    +
    uint8_t nb_cycles_unroll() const
    The number of cycles of a permutation.
    Definition perm16_impl.hpp:357
    +
    Perm16 inverse_pow() const
    The inverse permutation.
    Definition perm16_impl.hpp:267
    +
    uint8_t length_arr() const
    The Coxeter length (ie: number of inversion) of a permutation.
    Definition perm16_impl.hpp:308
    +
    Perm16 inverse_arr() const
    The inverse permutation.
    Definition perm16_impl.hpp:216
    +
    static Perm16 random(uint64_t n=16)
    A random permutation of size .
    Definition perm16_impl.hpp:161
    Transf16()=default
    -
    Definition vect16.hpp:30
    -
    array & as_array()
    Definition vect16.hpp:41
    -
    epu8 v
    Definition vect16.hpp:33
    -
    static const Perm16 one()
    Definition perm16_impl.hpp:237
    -
    static Perm16 prod(Perm16 a, Perm16 b)
    Definition perm16_impl.hpp:238
    -
    Algebraic monoid structure used by default for type T by the pow function and prod function.
    Definition power.hpp:99
    +
    Definition vect16.hpp:34
    +
    array & as_array()
    Definition vect16.hpp:45
    +
    epu8 v
    Definition vect16.hpp:37
    +
    static const Perm16 one()
    Definition perm16_impl.hpp:243
    +
    static Perm16 prod(Perm16 a, Perm16 b)
    Definition perm16_impl.hpp:244
    +
    Algebraic monoid structure used by default for type T by the pow function and prod function.
    Definition power.hpp:103
    diff --git a/perm__generic_8hpp_source.html b/perm__generic_8hpp_source.html index 4bd7da3..c4cf96f 100644 --- a/perm__generic_8hpp_source.html +++ b/perm__generic_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm_generic.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm_generic.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,107 +77,112 @@
    perm_generic.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16#ifndef HPCOMBI_PERM_GENERIC_HPP
    -
    17#define HPCOMBI_PERM_GENERIC_HPP
    -
    18
    -
    19#include <algorithm> // for shuffle
    -
    20#include <array> // for array
    -
    21#include <cstddef> // for size_t
    -
    22#include <cstdint> // for uint64_t, uint8_t
    -
    23#include <functional> // for hash
    -
    24#include <initializer_list> // for initializer_list
    -
    25#include <memory> // for hash
    -
    26#include <random> // for mt19937
    -
    27#include <type_traits> // for is_trivial
    -
    28
    -
    29#include "debug.hpp" // for HPCOMBI_ASSERT
    -
    30#include "vect_generic.hpp" // for VectGeneric
    -
    31
    -
    32namespace HPCombi {
    -
    33
    -
    34template <size_t Size, typename Expo = uint8_t>
    -
    35struct PermGeneric : public VectGeneric<Size, Expo> {
    - +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20#ifndef HPCOMBI_PERM_GENERIC_HPP_
    +
    21#define HPCOMBI_PERM_GENERIC_HPP_
    +
    22
    +
    23#include <algorithm> // for shuffle
    +
    24#include <array> // for array
    +
    25#include <cstddef> // for size_t
    +
    26#include <cstdint> // for uint64_t, uint8_t
    +
    27#include <functional> // for hash
    +
    28#include <initializer_list> // for initializer_list
    +
    29#include <memory> // for hash
    +
    30#include <random> // for mt19937
    +
    31#include <type_traits> // for is_trivial
    +
    32
    +
    33#include "debug.hpp" // for HPCOMBI_ASSERT
    +
    34#include "vect_generic.hpp" // for VectGeneric
    +
    35
    +
    36namespace HPCombi {
    37
    -
    38 static constexpr size_t size() { return Size; }
    -
    39
    -
    40 PermGeneric() = default;
    -
    41 PermGeneric(const vect v) : vect(v) {} // NOLINT
    -
    42 // Not marked explicit because we want to be able to pass non-initializer
    -
    43 // lists here
    -
    44 PermGeneric(std::initializer_list<Expo> il); // NOLINT
    -
    45
    - -
    47 return this->permuted(p);
    -
    48 }
    -
    49 static PermGeneric one() { return PermGeneric({}); }
    -
    50 static PermGeneric elementary_transposition(uint64_t i);
    -
    51
    -
    52 PermGeneric inverse() const;
    -
    53 static PermGeneric random();
    -
    54
    -
    55 vect lehmer() const;
    -
    56 uint64_t length() const;
    -
    57 uint64_t nb_descents() const;
    -
    58 uint64_t nb_cycles() const;
    -
    59
    -
    60 bool left_weak_leq(PermGeneric other) const;
    -
    61};
    -
    62
    -
    64// Memory layout concepts check //////////////////////////////////////////////
    -
    66
    -
    67static_assert(sizeof(VectGeneric<12>) == sizeof(PermGeneric<12>),
    -
    68 "VectGeneric and PermGeneric have a different memory layout !");
    -
    69static_assert(std::is_trivial<PermGeneric<12>>(),
    -
    70 "PermGeneric is not trivial !");
    -
    71
    -
    72} // namespace HPCombi
    -
    73
    -
    74#include "perm_generic_impl.hpp"
    +
    38template <size_t Size, typename Expo = uint8_t>
    +
    39struct PermGeneric : public VectGeneric<Size, Expo> {
    + +
    41
    +
    42 static constexpr size_t size() { return Size; }
    +
    43
    +
    44 PermGeneric() = default;
    +
    45 PermGeneric(const vect v) : vect(v) {} // NOLINT
    +
    46 // Not marked explicit because we want to be able to pass non-initializer
    +
    47 // lists here
    +
    48 PermGeneric(std::initializer_list<Expo> il); // NOLINT
    +
    49
    + +
    51 return this->permuted(p);
    +
    52 }
    +
    53 static PermGeneric one() { return PermGeneric({}); }
    +
    54 static PermGeneric elementary_transposition(uint64_t i);
    +
    55
    +
    56 PermGeneric inverse() const;
    +
    57 static PermGeneric random();
    +
    58
    +
    59 vect lehmer() const;
    +
    60 uint64_t length() const;
    +
    61 uint64_t nb_descents() const;
    +
    62 uint64_t nb_cycles() const;
    +
    63
    +
    64 bool left_weak_leq(PermGeneric other) const;
    +
    65};
    +
    66
    +
    68// Memory layout concepts check //////////////////////////////////////////////
    +
    70
    +
    71static_assert(sizeof(VectGeneric<12>) == sizeof(PermGeneric<12>),
    +
    72 "VectGeneric and PermGeneric have a different memory layout !");
    +
    73static_assert(std::is_trivial<PermGeneric<12>>(),
    +
    74 "PermGeneric is not trivial !");
    75
    -
    76#endif // HPCOMBI_PERM_GENERIC_HPP
    +
    76} // namespace HPCombi
    +
    77
    +
    78#include "perm_generic_impl.hpp"
    +
    79
    +
    80#endif // HPCOMBI_PERM_GENERIC_HPP_
    -
    Definition bmat8.hpp:37
    +
    Definition bmat8.hpp:41
    -
    Definition perm_generic.hpp:35
    -
    uint64_t length() const
    Definition perm_generic_impl.hpp:65
    -
    PermGeneric(const vect v)
    Definition perm_generic.hpp:41
    -
    bool left_weak_leq(PermGeneric other) const
    Definition perm_generic_impl.hpp:98
    -
    VectGeneric< Size, Expo > vect
    Definition perm_generic.hpp:36
    +
    Definition perm_generic.hpp:39
    +
    uint64_t length() const
    Definition perm_generic_impl.hpp:71
    +
    PermGeneric(const vect v)
    Definition perm_generic.hpp:45
    +
    bool left_weak_leq(PermGeneric other) const
    Definition perm_generic_impl.hpp:104
    +
    VectGeneric< Size, Expo > vect
    Definition perm_generic.hpp:40
    -
    PermGeneric operator*(const PermGeneric &p) const
    Definition perm_generic.hpp:46
    -
    static PermGeneric elementary_transposition(uint64_t i)
    Definition perm_generic_impl.hpp:28
    -
    static PermGeneric random()
    Definition perm_generic_impl.hpp:45
    -
    static constexpr size_t size()
    Definition perm_generic.hpp:38
    -
    PermGeneric inverse() const
    Definition perm_generic_impl.hpp:37
    -
    uint64_t nb_cycles() const
    Definition perm_generic_impl.hpp:84
    -
    vect lehmer() const
    Definition perm_generic_impl.hpp:55
    -
    static PermGeneric one()
    Definition perm_generic.hpp:49
    -
    uint64_t nb_descents() const
    Definition perm_generic_impl.hpp:75
    -
    A generic class for combinatorial integer vectors.
    Definition vect_generic.hpp:44
    -
    array v
    Definition vect_generic.hpp:47
    -
    VectGeneric permuted(const VectGeneric &u) const
    Definition vect_generic.hpp:102
    +
    PermGeneric operator*(const PermGeneric &p) const
    Definition perm_generic.hpp:50
    +
    static PermGeneric elementary_transposition(uint64_t i)
    Definition perm_generic_impl.hpp:34
    +
    static PermGeneric random()
    Definition perm_generic_impl.hpp:51
    +
    static constexpr size_t size()
    Definition perm_generic.hpp:42
    +
    PermGeneric inverse() const
    Definition perm_generic_impl.hpp:43
    +
    uint64_t nb_cycles() const
    Definition perm_generic_impl.hpp:90
    +
    vect lehmer() const
    Definition perm_generic_impl.hpp:61
    +
    static PermGeneric one()
    Definition perm_generic.hpp:53
    +
    uint64_t nb_descents() const
    Definition perm_generic_impl.hpp:81
    +
    A generic class for combinatorial integer vectors.
    Definition vect_generic.hpp:48
    +
    array v
    Definition vect_generic.hpp:51
    +
    VectGeneric permuted(const VectGeneric &u) const
    Definition vect_generic.hpp:106
    diff --git a/perm__generic__impl_8hpp.html b/perm__generic__impl_8hpp.html index f259428..3cdff00 100644 --- a/perm__generic__impl_8hpp.html +++ b/perm__generic__impl_8hpp.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm_generic_impl.hpp File Reference +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm_generic_impl.hpp File Reference @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,7 +77,7 @@
    diff --git a/perm__generic__impl_8hpp_source.html b/perm__generic__impl_8hpp_source.html index 4186ad3..0392995 100644 --- a/perm__generic__impl_8hpp_source.html +++ b/perm__generic__impl_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/perm_generic_impl.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/perm_generic_impl.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,147 +77,154 @@
    perm_generic_impl.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    16namespace HPCombi {
    -
    17
    -
    18template <size_t Size, typename Expo>
    -
    19PermGeneric<Size, Expo>::PermGeneric(std::initializer_list<Expo> il) {
    -
    20 HPCOMBI_ASSERT(il.size() <= Size);
    -
    21 std::copy(il.begin(), il.end(), this->v.begin());
    -
    22 for (Expo i = il.size(); i < Size; i++)
    -
    23 this->v[i] = i;
    -
    24}
    -
    25
    -
    26template <size_t Size, typename Expo>
    - - -
    29 HPCOMBI_ASSERT(i < Size);
    -
    30 PermGeneric res{{}};
    -
    31 res[i] = i + 1;
    -
    32 res[i + 1] = i;
    -
    33 return res;
    -
    34}
    -
    35
    -
    36template <size_t Size, typename Expo>
    - - -
    39 for (uint64_t i = 0; i < Size; i++)
    -
    40 res[this->v[i]] = i;
    -
    41 return res;
    -
    42}
    -
    43
    -
    44template <size_t Size, typename Expo>
    - -
    46 static std::random_device rd;
    -
    47 static std::mt19937 g(rd());
    -
    48
    -
    49 PermGeneric res{{}};
    -
    50 std::shuffle(res.v.begin(), res.v.end(), g);
    -
    51 return res;
    -
    52}
    -
    53
    -
    54template <size_t Size, typename Expo>
    - -
    56 vect res{};
    -
    57 for (size_t i = 0; i < Size; i++)
    -
    58 for (size_t j = i + 1; j < Size; j++)
    -
    59 if (this->v[i] > this->v[j])
    -
    60 res[i]++;
    -
    61 return res;
    -
    62}
    -
    63
    -
    64template <size_t Size, typename Expo>
    - -
    66 uint64_t res = 0;
    -
    67 for (size_t i = 0; i < Size; i++)
    -
    68 for (size_t j = i + 1; j < Size; j++)
    -
    69 if (this->v[i] > this->v[j])
    -
    70 res++;
    -
    71 return res;
    -
    72}
    -
    73
    -
    74template <size_t Size, typename Expo>
    - -
    76 uint64_t res = 0;
    -
    77 for (size_t i = 0; i < Size - 1; i++)
    -
    78 if (this->v[i] > this->v[i + 1])
    -
    79 res++;
    -
    80 return res;
    -
    81}
    -
    82
    -
    83template <size_t Size, typename Expo>
    - -
    85 std::array<bool, Size> b{};
    -
    86 uint64_t c = 0;
    -
    87 for (size_t i = 0; i < Size; i++) {
    -
    88 if (!b[i]) {
    -
    89 for (size_t j = i; !b[j]; j = this->v[j])
    -
    90 b[j] = true;
    -
    91 c++;
    -
    92 }
    -
    93 }
    -
    94 return c;
    -
    95}
    -
    96
    -
    97template <size_t Size, typename Expo>
    - -
    99 for (size_t i = 0; i < Size; i++) {
    -
    100 for (size_t j = i + 1; j < Size; j++) {
    -
    101 if ((this->v[i] > this->v[j]) && (other[i] < other[j]))
    -
    102 return false;
    -
    103 }
    -
    104 }
    -
    105 return true;
    -
    106}
    -
    107
    -
    108}; // namespace HPCombi
    -
    109
    -
    110namespace std {
    -
    111
    -
    112template <size_t Size, typename Expo>
    -
    113struct hash<HPCombi::PermGeneric<Size, Expo>> {
    - -
    115 return hash<HPCombi::VectGeneric<Size, Expo>>()(ar);
    -
    116 }
    -
    117};
    -
    118
    -
    119} // namespace std
    -
    #define HPCOMBI_ASSERT(x)
    Definition debug.hpp:23
    -
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:62
    -
    Definition bmat8.hpp:37
    -
    Definition bmat8.hpp:360
    -
    Definition perm_generic.hpp:35
    -
    uint64_t length() const
    Definition perm_generic_impl.hpp:65
    -
    bool left_weak_leq(PermGeneric other) const
    Definition perm_generic_impl.hpp:98
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    20// NOLINT(build/header_guard)
    +
    21
    +
    22namespace HPCombi {
    +
    23
    +
    24template <size_t Size, typename Expo>
    +
    25PermGeneric<Size, Expo>::PermGeneric(std::initializer_list<Expo> il) {
    +
    26 HPCOMBI_ASSERT(il.size() <= Size);
    +
    27 std::copy(il.begin(), il.end(), this->v.begin());
    +
    28 for (Expo i = il.size(); i < Size; i++)
    +
    29 this->v[i] = i;
    +
    30}
    +
    31
    +
    32template <size_t Size, typename Expo>
    + + +
    35 HPCOMBI_ASSERT(i < Size);
    +
    36 PermGeneric res{{}};
    +
    37 res[i] = i + 1;
    +
    38 res[i + 1] = i;
    +
    39 return res;
    +
    40}
    +
    41
    +
    42template <size_t Size, typename Expo>
    + + +
    45 for (uint64_t i = 0; i < Size; i++)
    +
    46 res[this->v[i]] = i;
    +
    47 return res;
    +
    48}
    +
    49
    +
    50template <size_t Size, typename Expo>
    + +
    52 static std::random_device rd;
    +
    53 static std::mt19937 g(rd());
    +
    54
    +
    55 PermGeneric res{{}};
    +
    56 std::shuffle(res.v.begin(), res.v.end(), g);
    +
    57 return res;
    +
    58}
    +
    59
    +
    60template <size_t Size, typename Expo>
    + +
    62 vect res{};
    +
    63 for (size_t i = 0; i < Size; i++)
    +
    64 for (size_t j = i + 1; j < Size; j++)
    +
    65 if (this->v[i] > this->v[j])
    +
    66 res[i]++;
    +
    67 return res;
    +
    68}
    +
    69
    +
    70template <size_t Size, typename Expo>
    + +
    72 uint64_t res = 0;
    +
    73 for (size_t i = 0; i < Size; i++)
    +
    74 for (size_t j = i + 1; j < Size; j++)
    +
    75 if (this->v[i] > this->v[j])
    +
    76 res++;
    +
    77 return res;
    +
    78}
    +
    79
    +
    80template <size_t Size, typename Expo>
    + +
    82 uint64_t res = 0;
    +
    83 for (size_t i = 0; i < Size - 1; i++)
    +
    84 if (this->v[i] > this->v[i + 1])
    +
    85 res++;
    +
    86 return res;
    +
    87}
    +
    88
    +
    89template <size_t Size, typename Expo>
    + +
    91 std::array<bool, Size> b{};
    +
    92 uint64_t c = 0;
    +
    93 for (size_t i = 0; i < Size; i++) {
    +
    94 if (!b[i]) {
    +
    95 for (size_t j = i; !b[j]; j = this->v[j])
    +
    96 b[j] = true;
    +
    97 c++;
    +
    98 }
    +
    99 }
    +
    100 return c;
    +
    101}
    +
    102
    +
    103template <size_t Size, typename Expo>
    + +
    105 for (size_t i = 0; i < Size; i++) {
    +
    106 for (size_t j = i + 1; j < Size; j++) {
    +
    107 if ((this->v[i] > this->v[j]) && (other[i] < other[j]))
    +
    108 return false;
    +
    109 }
    +
    110 }
    +
    111 return true;
    +
    112}
    +
    113
    +
    114}; // namespace HPCombi
    +
    115
    +
    116namespace std {
    +
    117
    +
    118template <size_t Size, typename Expo>
    +
    119struct hash<HPCombi::PermGeneric<Size, Expo>> {
    + +
    121 return hash<HPCombi::VectGeneric<Size, Expo>>()(ar);
    +
    122 }
    +
    123};
    +
    124
    +
    125} // namespace std
    +
    #define HPCOMBI_ASSERT(x)
    Definition debug.hpp:28
    +
    std::array< std::tuple< uint16_t, uint16_t, std::array< uint16_t, gens.size()> >, 65536 > res
    Definition image.cpp:66
    +
    Definition bmat8.hpp:41
    +
    Definition bmat8.hpp:364
    +
    Definition perm_generic.hpp:39
    +
    uint64_t length() const
    Definition perm_generic_impl.hpp:71
    +
    bool left_weak_leq(PermGeneric other) const
    Definition perm_generic_impl.hpp:104
    -
    static PermGeneric elementary_transposition(uint64_t i)
    Definition perm_generic_impl.hpp:28
    -
    static PermGeneric random()
    Definition perm_generic_impl.hpp:45
    -
    PermGeneric inverse() const
    Definition perm_generic_impl.hpp:37
    -
    uint64_t nb_cycles() const
    Definition perm_generic_impl.hpp:84
    -
    vect lehmer() const
    Definition perm_generic_impl.hpp:55
    -
    uint64_t nb_descents() const
    Definition perm_generic_impl.hpp:75
    -
    A generic class for combinatorial integer vectors.
    Definition vect_generic.hpp:44
    -
    size_t operator()(const HPCombi::PermGeneric< Size, Expo > &ar) const
    Definition perm_generic_impl.hpp:114
    +
    static PermGeneric elementary_transposition(uint64_t i)
    Definition perm_generic_impl.hpp:34
    +
    static PermGeneric random()
    Definition perm_generic_impl.hpp:51
    +
    PermGeneric inverse() const
    Definition perm_generic_impl.hpp:43
    +
    uint64_t nb_cycles() const
    Definition perm_generic_impl.hpp:90
    +
    vect lehmer() const
    Definition perm_generic_impl.hpp:61
    +
    uint64_t nb_descents() const
    Definition perm_generic_impl.hpp:81
    +
    A generic class for combinatorial integer vectors.
    Definition vect_generic.hpp:48
    +
    size_t operator()(const HPCombi::PermGeneric< Size, Expo > &ar) const
    Definition perm_generic_impl.hpp:120
    diff --git a/power_8hpp_source.html b/power_8hpp_source.html index cd3afb5..446d461 100644 --- a/power_8hpp_source.html +++ b/power_8hpp_source.html @@ -5,7 +5,7 @@ -HPCombi: /Users/jdm/git/HPCombi/include/hpcombi/power.hpp Source File +HPCombi: /Users/jdm/hpcombi/include/hpcombi/power.hpp Source File @@ -30,7 +30,7 @@
    HPCombi
    -
    High Performance Combinatorics in C++ using vector instructions v0.0.6
    +
    High Performance Combinatorics in C++ using vector instructions v1.0.0
    @@ -77,71 +77,76 @@
    power.hpp
    -Go to the documentation of this file.
    1
    -
    2// Copyright (C) 2016 Florent Hivert <Florent.Hivert@lri.fr>, //
    +Go to the documentation of this file.
    1//****************************************************************************//
    +
    2// Copyright (C) 2016-2024 Florent Hivert <Florent.Hivert@lisn.fr>, //
    3// //
    -
    4// Distributed under the terms of the GNU General Public License (GPL) //
    +
    4// This file is part of HP-Combi <https://github.com/libsemigroups/HPCombi> //
    5// //
    -
    6// This code is distributed in the hope that it will be useful, //
    -
    7// but WITHOUT ANY WARRANTY; without even the implied warranty of //
    -
    8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU //
    -
    9// General Public License for more details. //
    +
    6// HP-Combi is free software: you can redistribute it and/or modify it //
    +
    7// under the terms of the GNU General Public License as published by the //
    +
    8// Free Software Foundation, either version 3 of the License, or //
    +
    9// (at your option) any later version. //
    10// //
    -
    11// The full text of the GPL is available at: //
    -
    12// //
    -
    13// http://www.gnu.org/licenses/ //
    -
    15
    -
    35#ifndef HPCOMBI_POWER_HPP_INCLUDED
    -
    36#define HPCOMBI_POWER_HPP_INCLUDED
    -
    37
    -
    38namespace HPCombi {
    -
    39
    -
    40namespace power_helper {
    +
    11// HP-Combi is distributed in the hope that it will be useful, but WITHOUT //
    +
    12// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or //
    +
    13// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License //
    +
    14// for more details. //
    +
    15// //
    +
    16// You should have received a copy of the GNU General Public License along //
    +
    17// with HP-Combi. If not, see <https://www.gnu.org/licenses/>. //
    +
    18//****************************************************************************//
    +
    19
    +
    39#ifndef HPCOMBI_POWER_HPP_
    +
    40#define HPCOMBI_POWER_HPP_
    41
    -
    42// Forward declaration
    -
    43template <typename T> struct Monoid;
    -
    44
    -
    45} // namespace power_helper
    -
    46
    -
    57template <typename T, typename M = power_helper::Monoid<T>>
    -
    58const T square(const T x) {
    -
    59 return M::prod(x, x);
    -
    60}
    -
    61
    -
    78template <unsigned exp, typename T, typename M = power_helper::Monoid<T>>
    -
    79const T pow(const T x) {
    -
    80 return (exp == 0) ? M::one()
    -
    81 : (exp % 2 == 0)
    -
    82 ? square<T, M>(pow<unsigned(exp / 2), T, M>(x))
    -
    83 : M::prod(x, square<T, M>(pow<unsigned(exp / 2), T, M>(x)));
    -
    84}
    -
    85
    -
    86namespace power_helper {
    -
    87
    -
    99template <typename T> struct Monoid {
    -
    101 static const T one() { return 1; }
    -
    102
    -
    108 static const T prod(T a, T b) { return a * b; }
    -
    109};
    -
    110
    -
    111} // namespace power_helper
    -
    112
    -
    113} // namespace HPCombi
    +
    42namespace HPCombi {
    +
    43
    +
    44namespace power_helper {
    +
    45
    +
    46// Forward declaration
    +
    47template <typename T> struct Monoid;
    +
    48
    +
    49} // namespace power_helper
    +
    50
    +
    61template <typename T, typename M = power_helper::Monoid<T>>
    +
    62const T square(const T x) {
    +
    63 return M::prod(x, x);
    +
    64}
    +
    65
    +
    82template <unsigned exp, typename T, typename M = power_helper::Monoid<T>>
    +
    83const T pow(const T x) {
    +
    84 return (exp == 0) ? M::one()
    +
    85 : (exp % 2 == 0)
    +
    86 ? square<T, M>(pow<unsigned(exp / 2), T, M>(x))
    +
    87 : M::prod(x, square<T, M>(pow<unsigned(exp / 2), T, M>(x)));
    +
    88}
    +
    89
    +
    90namespace power_helper {
    +
    91
    +
    103template <typename T> struct Monoid {
    +
    105 static const T one() { return 1; }
    +
    106
    +
    112 static const T prod(T a, T b) { return a * b; }
    +
    113};
    114
    -
    115#endif // HPCOMBI_POWER_HPP_INCLUDED
    -
    Definition bmat8.hpp:37
    -
    const T square(const T x)
    A generic compile time squaring function.
    Definition power.hpp:58
    -
    const T pow(const T x)
    A generic compile time exponentiation function.
    Definition power.hpp:79
    -
    Algebraic monoid structure used by default for type T by the pow function and prod function.
    Definition power.hpp:99
    -
    static const T prod(T a, T b)
    the product of two elements of type T
    Definition power.hpp:108
    -
    static const T one()
    The one of type T.
    Definition power.hpp:101
    +
    115} // namespace power_helper
    +
    116
    +
    117} // namespace HPCombi
    +
    118
    +
    119#endif // HPCOMBI_POWER_HPP_
    +
    Definition bmat8.hpp:41
    +
    const T square(const T x)
    A generic compile time squaring function.
    Definition power.hpp:62
    +
    const T pow(const T x)
    A generic compile time exponentiation function.
    Definition power.hpp:83
    +
    Algebraic monoid structure used by default for type T by the pow function and prod function.
    Definition power.hpp:103
    +
    static const T prod(T a, T b)
    the product of two elements of type T
    Definition power.hpp:112
    +
    static const T one()
    The one of type T.
    Definition power.hpp:105