From 02044b9a4c9ffc302785c0320ea13aa354274d00 Mon Sep 17 00:00:00 2001 From: Jean-Baptiste Rouquier Date: Tue, 17 Dec 2024 12:07:09 +0100 Subject: [PATCH] proofread the pull request --- include/hpcombi/epu8.hpp | 6 +++--- include/hpcombi/hpcombi.hpp | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/hpcombi/epu8.hpp b/include/hpcombi/epu8.hpp index 8c47b5e..c5f9b3b 100644 --- a/include/hpcombi/epu8.hpp +++ b/include/hpcombi/epu8.hpp @@ -52,9 +52,9 @@ epu8 stands for *Extended Packed Unsigned, grouped by 8 bits*; this is the low level type chosen by Intel for their API to intrinsics, ie a SIMD vector of 16 unsigned bytes (16×8 = 128bits). Functions using this type use semantically equivalent types, -eg a _m128 which is 2 vect of 64bits. -a flag tells the compiler to silently consider those types equivalent. - */ +eg a _m128 which is a vector containing 2 signed 64 bits integers. +A flag tells the compiler to silently consider those types equivalent. +*/ using epu8 = uint8_t __attribute__((vector_size(16))); static_assert(alignof(epu8) == 16, diff --git a/include/hpcombi/hpcombi.hpp b/include/hpcombi/hpcombi.hpp index c4cacad..33c2b1b 100644 --- a/include/hpcombi/hpcombi.hpp +++ b/include/hpcombi/hpcombi.hpp @@ -53,9 +53,9 @@ applying a permutation on a vector only takes a few CPU cycles. Further ideas are: - Vectorization (MMX, SSE, AVX instructions sets) and careful memory alignment, -- Careful memory management: avoiding all dynamic allocation during the computation, -- Avoid all unnecessary copies (often needed to rewrite the containers), -- Due to combinatorial explosion, sets often don’t fit in the computer’s memory or disks and are enumerated on the fly. +- Careful memory management: avoid all dynamic allocation during the computation, +- Avoid all unnecessary copies (it is often needed to rewrite the containers), +- Due to combinatorial explosion, sets often don’t fit in memory or disk and are enumerated on the fly. Here are some examples, the speedup is in comparison to an implementation without vector instructions: