From 763562af8f539bcd473b39617f9945f15808429c Mon Sep 17 00:00:00 2001
From: Jean-Baptiste Rouquier <jrouquie@example.com>
Date: Mon, 16 Dec 2024 12:35:23 +0100
Subject: [PATCH] doc: add doc for all classes

---
 include/hpcombi/bmat8.hpp        | 20 ++++++++++---------
 include/hpcombi/builder.hpp      |  5 ++++-
 include/hpcombi/epu8.hpp         | 14 +++++++++++--
 include/hpcombi/perm16.hpp       | 34 ++++++++++++++++++--------------
 include/hpcombi/perm_generic.hpp |  7 +++++++
 include/hpcombi/power.hpp        | 26 +++++++++++++++---------
 include/hpcombi/vect16.hpp       |  1 +
 include/hpcombi/vect_generic.hpp |  7 ++++++-
 8 files changed, 77 insertions(+), 37 deletions(-)
diff --git a/include/hpcombi/bmat8.hpp b/include/hpcombi/bmat8.hpp
index 15b1230..ef68187 100644
--- a/include/hpcombi/bmat8.hpp
+++ b/include/hpcombi/bmat8.hpp
@@ -41,15 +41,17 @@
 
 namespace HPCombi {
 
-//! Class for fast boolean matrices of dimension up to 8 x 8
-//!
-//! The methods for these small matrices over the boolean semiring
-//! are more optimised than the generic methods for boolean matrices.
-//! Note that all BMat8 are represented internally as an 8 x 8 matrix;
-//! any entries not defined by the user are taken to be 0. This does
-//! not affect the results of any calculations.
-//!
-//! BMat8 is a trivial class.
+/** Boolean matrices of dimension up to 8×8, stored as a single uint64;
+isomorph to binary relations with methods for composition.
+
+The methods for these small matrices over the boolean semiring
+are more optimised than the generic methods for boolean matrices.
+Note that all BMat8 are represented internally as an 8×8 matrix;
+any entries not defined by the user are taken to be 0. This does
+not affect the results of any calculation.
+
+BMat8 is a trivial class.
+*/
 class BMat8 {
  public:
     //! A default constructor.
diff --git a/include/hpcombi/builder.hpp b/include/hpcombi/builder.hpp
index aa48d6e..20d553c 100644
--- a/include/hpcombi/builder.hpp
+++ b/include/hpcombi/builder.hpp
@@ -33,8 +33,11 @@
 
 namespace HPCombi {
 
-/** Class for factory object associated to a SIMD packed unsigned integers.
+/** Given a transformation from 0..15 → 0..15,
+ * build at compile-time the array representing the transformation.
+ *
  * @details
+ * Class for factory object associated to a SIMD packed unsigned integers.
  * The main purpose of this class is to be able to construct in a \c constexpr
  * way various instances of the \c TPU SIMD vector type. The behavior of
  * an instance of \c TPUBuild<TPU> is designed to mimic the behavior of \c TPU
diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp
index 0fd2598..36587c8 100644
--- a/include/hpcombi/epu8.hpp
+++ b/include/hpcombi/epu8.hpp
@@ -18,7 +18,10 @@
 //****************************************************************************//
 
 /** @file
-@brief declaration of HPCombi::epu8 */
+@brief declaration of HPCombi::epu8.
+
+Contains renaming of some low level functions,
+eg simde_mm_testz_si128(a,a) → is_all_zero(a) */
 
 #ifndef HPCOMBI_EPU8_HPP_
 #define HPCOMBI_EPU8_HPP_
@@ -44,7 +47,14 @@ operator"" _u8(unsigned long long arg) noexcept {  // NOLINT
     return static_cast<uint8_t>(arg);
 }
 
-/// SIMD vector of 16 unsigned bytes
+/**
+epu8 stands for *Extended Packed Unsigned, grouped by 8 bits*;
+this is the low level type chosen by Intel for their API to intrinsics,
+ie a SIMD vector of 16 unsigned bytes (16×8 = 128bits).
+Functions using this type uses semantically equivalent types,
+eg a _m128 which is 2 vect of 64bits.
+a flag tells the compiler to silently consider those types equivalent.
+ */
 using epu8 = uint8_t __attribute__((vector_size(16)));
 
 static_assert(alignof(epu8) == 16,
diff --git a/include/hpcombi/perm16.hpp b/include/hpcombi/perm16.hpp
index b0ff471..0b17751 100644
--- a/include/hpcombi/perm16.hpp
+++ b/include/hpcombi/perm16.hpp
@@ -18,7 +18,12 @@
 //****************************************************************************//
 
 /** @file
-@brief declaration of HPCombi::PTransf16, HPCombi::Transf16, HPCombi::PPerm16 and HPCombi::Perm16. */
+@brief declaration of
+\ref HPCombi::PTransf16 "PTransf16",
+\ref HPCombi::Transf16  "Transf16",
+\ref HPCombi::PPerm16   "PPerm16" and
+\ref HPCombi::Perm16    "Perm16"
+*/
 
 #ifndef HPCOMBI_PERM16_HPP_
 #define HPCOMBI_PERM16_HPP_
@@ -44,9 +49,9 @@ struct Perm16;
 struct PTransf16;
 struct Transf16;
 
-/** Partial transformation of @f$\{0\dots 15\}@f$
- *
- */
+/** Partial transformation of @f$\{0\dots 15\}@f$; see HPCombi::Transf16;
+partial means it might not be defined everywhere.
+Undefined images are encoded as 0xFF. */
 struct alignas(16) PTransf16 : public Vect16 {
     static constexpr size_t size() { return 16; }
 
@@ -119,13 +124,10 @@ struct alignas(16) PTransf16 : public Vect16 {
     uint8_t nb_fix_points() const;
 };
 
-/** Full transformation of @f$\{0\dots 15\}@f$.
- *
- * A transformation is a mapping of a set of n elements *into* itself.
- * I.e. as opposed to a permutation, it is not necessarily injective.
- * Here n=16.
- *
- */
+/** Full transformation of @f$\{0\dots 15\}@f$:
+a transformation is a mapping of a set of n elements *into* itself;
+ie as opposed to a permutation, it is not necessarily injective.
+Here n is hard-coded to 16. */
 struct Transf16 : public PTransf16 {
     Transf16() = default;
     constexpr Transf16(const Transf16 &v) = default;
@@ -152,7 +154,9 @@ struct Transf16 : public PTransf16 {
     explicit operator uint64_t() const;
 };
 
-//! Partial permutation of @f$\{0, \dots, 15\}@f$
+/** Partial permutation of @f$\{0\dots 15\}@f$; see HPCombi::Perm16;
+partial means it might not be defined everywhere (but where it's defined, it's injective).
+Undefined images are encoded as 0xFF. */
 struct PPerm16 : public PTransf16 {
     PPerm16() = default;
     constexpr PPerm16(const PPerm16 &v) = default;
@@ -209,9 +213,9 @@ struct PPerm16 : public PTransf16 {
     PPerm16 left_one() const { return PTransf16::left_one(); }
 };
 
-/** Permutations of @f$\{0\dots 15\}@f$
- *
- * A permutation is a bijective mapping of a set of n elements onto itself. Here n=16.
+/** Permutations of @f$\{0\dots 15\}@f$:
+ * A permutation is a bijective mapping of a set of n elements onto itself.
+ * Here n is hard-coded to 16.
  */
 struct Perm16 : public Transf16 /* public PPerm : diamond problem */ {
     Perm16() = default;
diff --git a/include/hpcombi/perm_generic.hpp b/include/hpcombi/perm_generic.hpp
index b641abd..9335a3f 100644
--- a/include/hpcombi/perm_generic.hpp
+++ b/include/hpcombi/perm_generic.hpp
@@ -38,6 +38,13 @@
 
 namespace HPCombi {
 
+/** Vanilla (ie NOT optimized) implementation of a permutation, used to check for test correctness and as baseline to measure speedup.
+Implemented as an std array, so the permutation is not necessarily of size n=16.
+PermGeneric<16> should implment as much as possibles of Perm16 (currently not everything due to lack of time/need).
+No optimisation, so prefer to use Perm16.
+
+About Expo, see comment on HPCombi::VectGeneric.
+*/
 template <size_t Size, typename Expo = uint8_t>
 struct PermGeneric : public VectGeneric<Size, Expo> {
     using vect = VectGeneric<Size, Expo>;
diff --git a/include/hpcombi/power.hpp b/include/hpcombi/power.hpp
index eb621c4..a98db24 100644
--- a/include/hpcombi/power.hpp
+++ b/include/hpcombi/power.hpp
@@ -17,23 +17,31 @@
 //  with HP-Combi. If not, see <https://www.gnu.org/licenses/>.               //
 //****************************************************************************//
 
+
 /** @file
-@brief Generic compile-time exponentiation algorithm.
+@brief  Generic compile-time unrolling of the fast exponentiation algorithm.
+
+Allows to write expressions such as
+- @c pow<23>(2.5) : entirely computed at compile time
+- @c pow<n>(x) expanded at compile time to a O(log n) long sequence of multiplications.
 
-The goal of this file is to be able to write expressions such as @c
-pow<23>(2.5) or @c pow<n>(x) where the first expression is entirely
-computed as compile time and the second one is expanded also as compile
-time to a O(log n) long sequence of multiplication. Furthermore such
-expression not only works for numbers for for any type where there is a
+Such expressions work for numbers but also for any type where there is a
 neutral element and an associative (non necessarily commutative) product,
-namely what mathematicians call \e monoids. These include for example,
+ie what mathematicians call \e monoids.
+These include for example
 strings where the neutral element is the empty string and the product is
 the concatenation.
 
-see HPCombi::power_helper::Monoid<std::string>
+See HPCombi::power_helper::Monoid<std::string>
+
+The algorithm used here is based on the base-2 representation of n,
+it is a 2-approximation of the optimum number of multiplications.
+The general problem is called *addition chain* and one can sometimes do better,
+eg on fibonaci numbers, use rather the fibonacci recurrence relation
+to choose which products to compute.
 
 @example stringmonoid.cpp
-This is an example of how to use pow with a non numerical Monoid.
+how to use pow with a non numerical Monoid.
 */
 
 #ifndef HPCOMBI_POWER_HPP_
diff --git a/include/hpcombi/vect16.hpp b/include/hpcombi/vect16.hpp
index 2019704..f9a0b2e 100644
--- a/include/hpcombi/vect16.hpp
+++ b/include/hpcombi/vect16.hpp
@@ -34,6 +34,7 @@
 
 namespace HPCombi {
 
+/** Vector of 16 bytes, with some optimized methods, superclass of HPCombi::Transf16. */
 struct alignas(16) Vect16 {
     static constexpr size_t size() { return 16; }
     using array = typename decltype(Epu8)::array;
diff --git a/include/hpcombi/vect_generic.hpp b/include/hpcombi/vect_generic.hpp
index 381b941..65cf4da 100644
--- a/include/hpcombi/vect_generic.hpp
+++ b/include/hpcombi/vect_generic.hpp
@@ -46,7 +46,12 @@ std::array<Expo, Size> sorted_vect(std::array<Expo, Size> v) {
     return v;
 }
 
-/** A generic class for combinatorial integer vectors.
+/** \ref HPCombi::VectGeneric "VectGeneric" is to \ref HPCombi::Vect16 "Vect16"
+what \ref HPCombi::PermGeneric "PermGeneric" is to \ref HPCombi::Perm16 "Perm16";
+see \ref HPCombi::PermGeneric "PermGeneric".
+
+HPCombi started as a library to manipulate monomials on several variables,
+ie a tuple of *expo*nents. The elements of arrays were thus named Expo.
  */
 template <size_t Size, typename Expo = uint8_t> struct VectGeneric {
     static constexpr size_t size() { return Size; }