diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index a344e771c..36dd36379 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -192,8 +192,8 @@ jobs: native-aliases: runs-on: ubuntu-22.04 env: - CFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -march=native -Wall -Wextra -Werror - CXXFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -march=native -Wall -Wextra -Werror + CFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -Wall -Wextra -Werror + CXXFLAGS: -DSIMDE_ENABLE_NATIVE_ALIASES -DSIMDE_NATIVE_ALIASES_TESTING -Wall -Wextra -Werror steps: - uses: actions/checkout@v3 with: diff --git a/simde/mips/msa/madd.h b/simde/mips/msa/madd.h index 5037577a4..15b478eb4 100644 --- a/simde/mips/msa/madd.h +++ b/simde/mips/msa/madd.h @@ -73,7 +73,7 @@ simde_msa_fmadd_w(simde_v4f32 a, simde_v4f32 b, simde_v4f32 c) { } #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_fmadd_w - #define __msa_fmadd_w(a, b) simde_msa_fmadd_w((a), (b)) + #define __msa_fmadd_w(a, b, c) simde_msa_fmadd_w((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -114,7 +114,7 @@ simde_msa_fmadd_d(simde_v2f64 a, simde_v2f64 b, simde_v2f64 c) { } #if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES) #undef __msa_fmadd_d - #define __msa_fmadd_d(a, b) simde_msa_fmadd_d((a), (b)) + #define __msa_fmadd_d(a, b, c) simde_msa_fmadd_d((a), (b), (c)) #endif SIMDE_END_DECLS_ diff --git a/simde/simde-features.h b/simde/simde-features.h index 449c8c5e4..b152abf44 100644 --- a/simde/simde-features.h +++ b/simde/simde-features.h @@ -639,6 +639,9 @@ #if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE) #define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES #endif + #if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) + #define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES + #endif #if !defined(SIMDE_X86_AVX512DQ_NATIVE) #define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES #endif @@ -678,6 +681,10 @@ #define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES #endif + #if !defined(SIMDE_MIPS_MSA_NATIVE) + #define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES + #endif + #if !defined(SIMDE_WASM_SIMD128_NATIVE) #define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES #endif diff --git a/simde/x86/avx512/2intersect.h b/simde/x86/avx512/2intersect.h index 66884f1dd..81b0ee1fb 100644 --- a/simde/x86/avx512/2intersect.h +++ b/simde/x86/avx512/2intersect.h @@ -37,36 +37,35 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES void simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) { - #if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) - _mm_2intersect_epi32(a, b, k1, k2); - #else - simde__m128i_private - a_ = simde__m128i_to_private(a), - b_ = simde__m128i_to_private(b); - simde__mmask8 - k1_ = 0, - k2_ = 0; - - for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { - #if defined(SIMDE_ENABLE_OPENMP) - #pragma omp simd reduction(|:k1_) reduction(|:k2_) - #else - SIMDE_VECTORIZE - #endif - for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { - const int32_t m = a_.i32[i] == b_.i32[j]; - k1_ |= m << i; - k2_ |= m << j; - } + simde__m128i_private + a_ = simde__m128i_to_private(a), + b_ = simde__m128i_to_private(b); + simde__mmask8 + k1_ = 0, + k2_ = 0; + + for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) { + #if defined(SIMDE_ENABLE_OPENMP) + #pragma omp simd reduction(|:k1_) reduction(|:k2_) + #else + SIMDE_VECTORIZE + #endif + for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) { + const int32_t m = a_.i32[i] == b_.i32[j]; + k1_ |= m << i; + k2_ |= m << j; } + } - *k1 = k1_; - *k2 = k2_; - #endif + *k1 = k1_; + *k2 = k2_; } +#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + #define simde_mm_2intersect_epi32(a, b, k1, k2) _mm_2intersect_epi32(a, b, k1, k2) +#endif #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef __mm_2intersect_epi32 - #define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2) + #undef _mm_2intersect_epi32 + #define _mm_2intersect_epi32(a, b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -100,8 +99,8 @@ simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, sim #endif } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef __mm_2intersect_epi64 - #define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2) + #undef _mm_2intersect_epi64 + #define _mm_2intersect_epi64(a, b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -136,7 +135,7 @@ simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1, } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_2intersect_epi32 - #define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2) + #define _mm256_2intersect_epi32(a, b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -171,7 +170,7 @@ simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1, } #if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_2intersect_epi64 - #define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2) + #define _mm256_2intersect_epi64(a, b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2) #endif SIMDE_FUNCTION_ATTRIBUTES diff --git a/simde/x86/avx512/cmp.h b/simde/x86/avx512/cmp.h index b71a4aa1f..6555c2bf9 100644 --- a/simde/x86/avx512/cmp.h +++ b/simde/x86/avx512/cmp.h @@ -1067,7 +1067,7 @@ simde_mm512_cmp_epi16_mask (simde__m512i a, simde__m512i b, const int imm8) #endif #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmp_epi16_mask -#define _mm512_mask_cmp_epi16_mask(a, b, imm8) simde_mm512_mask_cmp_epi16_mask((a), (b), (imm8)) +#define _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epi16_mask((k1), (a), (b), (imm8)) #endif SIMDE_HUGE_FUNCTION_ATTRIBUTES @@ -1592,7 +1592,7 @@ simde_mm512_cmp_epu32_mask (simde__m512i a, simde__m512i b, const int imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmp_epu32_mask -#define _mm512_mask_cmp_epu32_mask(a, b, imm8) simde_mm512_mask_cmp_epu32_mask((a), (b), (imm8)) +#define _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu32_mask((k1), (a), (b), (imm8)) #endif SIMDE_HUGE_FUNCTION_ATTRIBUTES @@ -1701,7 +1701,7 @@ simde_mm512_cmp_epu64_mask (simde__m512i a, simde__m512i b, const int imm8) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmp_epu64_mask -#define _mm512_mask_cmp_epu64_mask(a, b, imm8) simde_mm512_mask_cmp_epu64_mask((a), (b), (imm8)) +#define _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu64_mask((k1), (a), (b), (imm8)) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/cmpge.h b/simde/x86/avx512/cmpge.h index a94a0c410..d0d428790 100644 --- a/simde/x86/avx512/cmpge.h +++ b/simde/x86/avx512/cmpge.h @@ -78,8 +78,8 @@ simde_mm_cmpge_epi8_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi8_mask - #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) + #undef _mm_cmpge_epi8_mask + #define _mm_cmpge_epi8_mask(a, b) simde_mm_cmpge_epi8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -93,7 +93,7 @@ simde_mm_mask_cmpge_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epi8_mask - #define _mm_mask_cmpge_epi8_mask(src, k, a, b) simde_mm_mask_cmpge_epi8_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epi8_mask(k, a, b) simde_mm_mask_cmpge_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -134,8 +134,8 @@ simde_mm256_cmpge_epi8_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi8_mask - #define _mm512_cmpge_epi8_mask(a, b) simde_mm512_cmpge_epi8_mask((a), (b)) + #undef _mm256_cmpge_epi8_mask + #define _mm256_cmpge_epi8_mask(a, b) simde_mm256_cmpge_epi8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -149,7 +149,7 @@ simde_mm256_mask_cmpge_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epi8_mask - #define _mm256_mask_cmpge_epi8_mask(src, k, a, b) simde_mm256_mask_cmpge_epi8_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epi8_mask(k, a, b) simde_mm256_mask_cmpge_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -209,7 +209,7 @@ simde_mm512_mask_cmpge_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epi8_mask - #define _mm512_mask_cmpge_epi8_mask(src, k, a, b) simde_mm512_mask_cmpge_epi8_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epi8_mask(k, a, b) simde_mm512_mask_cmpge_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -252,8 +252,8 @@ simde_mm_cmpge_epu8_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu8_mask - #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) + #undef _mm_cmpge_epu8_mask + #define _mm_cmpge_epu8_mask(a, b) simde_mm_cmpge_epu8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -267,7 +267,7 @@ simde_mm_mask_cmpge_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epu8_mask - #define _mm_mask_cmpge_epu8_mask(src, k, a, b) simde_mm_mask_cmpge_epu8_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epu8_mask(k, a, b) simde_mm_mask_cmpge_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -308,8 +308,8 @@ simde_mm256_cmpge_epu8_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu8_mask - #define _mm512_cmpge_epu8_mask(a, b) simde_mm512_cmpge_epu8_mask((a), (b)) + #undef _mm256_cmpge_epu8_mask + #define _mm256_cmpge_epu8_mask(a, b) simde_mm256_cmpge_epu8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -323,7 +323,7 @@ simde_mm256_mask_cmpge_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epu8_mask - #define _mm256_mask_cmpge_epu8_mask(src, k, a, b) simde_mm256_mask_cmpge_epu8_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epu8_mask(k, a, b) simde_mm256_mask_cmpge_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -383,7 +383,7 @@ simde_mm512_mask_cmpge_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epu8_mask - #define _mm512_mask_cmpge_epu8_mask(src, k, a, b) simde_mm512_mask_cmpge_epu8_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epu8_mask(k, a, b) simde_mm512_mask_cmpge_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -426,8 +426,8 @@ simde_mm_cmpge_epi16_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi16_mask - #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) + #undef _mm_cmpge_epi16_mask + #define _mm_cmpge_epi16_mask(a, b) simde_mm_cmpge_epi16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -441,7 +441,7 @@ simde_mm_mask_cmpge_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epi16_mask - #define _mm_mask_cmpge_epi16_mask(src, k, a, b) simde_mm_mask_cmpge_epi16_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epi16_mask(k, a, b) simde_mm_mask_cmpge_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -482,8 +482,8 @@ simde_mm256_cmpge_epi16_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi16_mask - #define _mm512_cmpge_epi16_mask(a, b) simde_mm512_cmpge_epi16_mask((a), (b)) + #undef _mm256_cmpge_epi16_mask + #define _mm256_cmpge_epi16_mask(a, b) simde_mm256_cmpge_epi16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -497,7 +497,7 @@ simde_mm256_mask_cmpge_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epi16_mask - #define _mm256_mask_cmpge_epi16_mask(src, k, a, b) simde_mm256_mask_cmpge_epi16_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epi16_mask(k, a, b) simde_mm256_mask_cmpge_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -557,7 +557,7 @@ simde_mm512_mask_cmpge_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epi16_mask - #define _mm512_mask_cmpge_epi16_mask(src, k, a, b) simde_mm512_mask_cmpge_epi16_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epi16_mask(k, a, b) simde_mm512_mask_cmpge_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -600,8 +600,8 @@ simde_mm_cmpge_epu16_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu16_mask - #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) + #undef _mm_cmpge_epu16_mask + #define _mm_cmpge_epu16_mask(a, b) simde_mm_cmpge_epu16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -615,7 +615,7 @@ simde_mm_mask_cmpge_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epu16_mask - #define _mm_mask_cmpge_epu16_mask(src, k, a, b) simde_mm_mask_cmpge_epu16_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epu16_mask(k, a, b) simde_mm_mask_cmpge_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -656,8 +656,8 @@ simde_mm256_cmpge_epu16_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu16_mask - #define _mm512_cmpge_epu16_mask(a, b) simde_mm512_cmpge_epu16_mask((a), (b)) + #undef _mm256_cmpge_epu16_mask + #define _mm256_cmpge_epu16_mask(a, b) simde_mm256_cmpge_epu16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -671,7 +671,7 @@ simde_mm256_mask_cmpge_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epu16_mask - #define _mm256_mask_cmpge_epu16_mask(src, k, a, b) simde_mm256_mask_cmpge_epu16_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epu16_mask(k, a, b) simde_mm256_mask_cmpge_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -731,7 +731,7 @@ simde_mm512_mask_cmpge_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epu16_mask - #define _mm512_mask_cmpge_epu16_mask(src, k, a, b) simde_mm512_mask_cmpge_epu16_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epu16_mask(k, a, b) simde_mm512_mask_cmpge_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -774,8 +774,8 @@ simde_mm_cmpge_epi32_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi32_mask - #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) + #undef _mm_cmpge_epi32_mask + #define _mm_cmpge_epi32_mask(a, b) simde_mm_cmpge_epi32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -789,7 +789,7 @@ simde_mm_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epi32_mask - #define _mm_mask_cmpge_epi32_mask(src, k, a, b) simde_mm_mask_cmpge_epi32_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epi32_mask(k, a, b) simde_mm_mask_cmpge_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -830,8 +830,8 @@ simde_mm256_cmpge_epi32_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epi32_mask - #define _mm512_cmpge_epi32_mask(a, b) simde_mm512_cmpge_epi32_mask((a), (b)) + #undef _mm256_cmpge_epi32_mask + #define _mm256_cmpge_epi32_mask(a, b) simde_mm256_cmpge_epi32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -845,7 +845,7 @@ simde_mm256_mask_cmpge_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epi32_mask - #define _mm256_mask_cmpge_epi32_mask(src, k, a, b) simde_mm256_mask_cmpge_epi32_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epi32_mask(k, a, b) simde_mm256_mask_cmpge_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -905,7 +905,7 @@ simde_mm512_mask_cmpge_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epi32_mask - #define _mm512_mask_cmpge_epi32_mask(src, k, a, b) simde_mm512_mask_cmpge_epi32_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epi32_mask(k, a, b) simde_mm512_mask_cmpge_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -948,8 +948,8 @@ simde_mm_cmpge_epu32_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu32_mask - #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) + #undef _mm_cmpge_epu32_mask + #define _mm_cmpge_epu32_mask(a, b) simde_mm_cmpge_epu32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -963,7 +963,7 @@ simde_mm_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epu32_mask - #define _mm_mask_cmpge_epu32_mask(src, k, a, b) simde_mm_mask_cmpge_epu32_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epu32_mask(k, a, b) simde_mm_mask_cmpge_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1004,8 +1004,8 @@ simde_mm256_cmpge_epu32_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu32_mask - #define _mm512_cmpge_epu32_mask(a, b) simde_mm512_cmpge_epu32_mask((a), (b)) + #undef _mm256_cmpge_epu32_mask + #define _mm256_cmpge_epu32_mask(a, b) simde_mm256_cmpge_epu32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1019,7 +1019,7 @@ simde_mm256_mask_cmpge_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epu32_mask - #define _mm256_mask_cmpge_epu32_mask(src, k, a, b) simde_mm256_mask_cmpge_epu32_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epu32_mask(k, a, b) simde_mm256_mask_cmpge_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1079,7 +1079,7 @@ simde_mm512_mask_cmpge_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epu32_mask - #define _mm512_mask_cmpge_epu32_mask(src, k, a, b) simde_mm512_mask_cmpge_epu32_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epu32_mask(k, a, b) simde_mm512_mask_cmpge_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1137,7 +1137,7 @@ simde_mm_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epi64_mask - #define _mm_mask_cmpge_epi64_mask(src, k, a, b) simde_mm_mask_cmpge_epi64_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epi64_mask(k, a, b) simde_mm_mask_cmpge_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1193,7 +1193,7 @@ simde_mm256_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epi64_mask - #define _mm256_mask_cmpge_epi64_mask(src, k, a, b) simde_mm256_mask_cmpge_epi64_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epi64_mask(k, a, b) simde_mm256_mask_cmpge_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1253,7 +1253,7 @@ simde_mm512_mask_cmpge_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epi64_mask - #define _mm512_mask_cmpge_epi64_mask(src, k, a, b) simde_mm512_mask_cmpge_epi64_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epi64_mask(k, a, b) simde_mm512_mask_cmpge_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1294,8 +1294,8 @@ simde_mm_cmpge_epu64_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu64_mask - #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) + #undef _mm_cmpge_epu64_mask + #define _mm_cmpge_epu64_mask(a, b) simde_mm_cmpge_epu64_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1309,7 +1309,7 @@ simde_mm_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpge_epu64_mask - #define _mm_mask_cmpge_epu64_mask(src, k, a, b) simde_mm_mask_cmpge_epu64_mask((src), (k), (a), (b)) + #define _mm_mask_cmpge_epu64_mask(k, a, b) simde_mm_mask_cmpge_epu64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1350,8 +1350,8 @@ simde_mm256_cmpge_epu64_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmpge_epu64_mask - #define _mm512_cmpge_epu64_mask(a, b) simde_mm512_cmpge_epu64_mask((a), (b)) + #undef _mm256_cmpge_epu64_mask + #define _mm256_cmpge_epu64_mask(a, b) simde_mm256_cmpge_epu64_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1365,7 +1365,7 @@ simde_mm256_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpge_epu64_mask - #define _mm256_mask_cmpge_epu64_mask(src, k, a, b) simde_mm256_mask_cmpge_epu64_mask((src), (k), (a), (b)) + #define _mm256_mask_cmpge_epu64_mask(k, a, b) simde_mm256_mask_cmpge_epu64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1425,7 +1425,7 @@ simde_mm512_mask_cmpge_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmpge_epu64_mask - #define _mm512_mask_cmpge_epu64_mask(src, k, a, b) simde_mm512_mask_cmpge_epu64_mask((src), (k), (a), (b)) + #define _mm512_mask_cmpge_epu64_mask(k, a, b) simde_mm512_mask_cmpge_epu64_mask((k), (a), (b)) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/cmple.h b/simde/x86/avx512/cmple.h index c83227f48..9b3c3aad2 100644 --- a/simde/x86/avx512/cmple.h +++ b/simde/x86/avx512/cmple.h @@ -76,8 +76,8 @@ simde_mm_cmple_epi8_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi8_mask - #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) + #undef _mm_cmple_epi8_mask + #define _mm_cmple_epi8_mask(a, b) simde_mm_cmple_epi8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -91,7 +91,7 @@ simde_mm_mask_cmple_epi8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epi8_mask - #define _mm_mask_cmple_epi8_mask(src, k, a, b) simde_mm_mask_cmple_epi8_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epi8_mask(k, a, b) simde_mm_mask_cmple_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -132,8 +132,8 @@ simde_mm256_cmple_epi8_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512VBW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi8_mask - #define _mm512_cmple_epi8_mask(a, b) simde_mm512_cmple_epi8_mask((a), (b)) + #undef _mm256_cmple_epi8_mask + #define _mm256_cmple_epi8_mask(a, b) simde_mm256_cmple_epi8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -147,7 +147,7 @@ simde_mm256_mask_cmple_epi8_mask(simde__mmask32 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epi8_mask - #define _mm256_mask_cmple_epi8_mask(src, k, a, b) simde_mm256_mask_cmple_epi8_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epi8_mask(k, a, b) simde_mm256_mask_cmple_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -207,7 +207,7 @@ simde_mm512_mask_cmple_epi8_mask(simde__mmask64 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epi8_mask - #define _mm512_mask_cmple_epi8_mask(src, k, a, b) simde_mm512_mask_cmple_epi8_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epi8_mask(k, a, b) simde_mm512_mask_cmple_epi8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -250,8 +250,8 @@ simde_mm_cmple_epu8_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu8_mask - #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) + #undef _mm_cmple_epu8_mask + #define _mm_cmple_epu8_mask(a, b) simde_mm_cmple_epu8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -265,7 +265,7 @@ simde_mm_mask_cmple_epu8_mask(simde__mmask16 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epu8_mask - #define _mm_mask_cmple_epu8_mask(src, k, a, b) simde_mm_mask_cmple_epu8_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epu8_mask(k, a, b) simde_mm_mask_cmple_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -306,8 +306,8 @@ simde_mm256_cmple_epu8_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu8_mask - #define _mm512_cmple_epu8_mask(a, b) simde_mm512_cmple_epu8_mask((a), (b)) + #undef _mm256_cmple_epu8_mask + #define _mm256_cmple_epu8_mask(a, b) simde_mm256_cmple_epu8_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -321,7 +321,7 @@ simde_mm256_mask_cmple_epu8_mask(simde__mmask32 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epu8_mask - #define _mm256_mask_cmple_epu8_mask(src, k, a, b) simde_mm256_mask_cmple_epu8_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epu8_mask(k, a, b) simde_mm256_mask_cmple_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -381,7 +381,7 @@ simde_mm512_mask_cmple_epu8_mask(simde__mmask64 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epu8_mask - #define _mm512_mask_cmple_epu8_mask(src, k, a, b) simde_mm512_mask_cmple_epu8_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epu8_mask(k, a, b) simde_mm512_mask_cmple_epu8_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -424,8 +424,8 @@ simde_mm_cmple_epi16_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi16_mask - #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) + #undef _mm_cmple_epi16_mask + #define _mm_cmple_epi16_mask(a, b) simde_mm_cmple_epi16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -439,7 +439,7 @@ simde_mm_mask_cmple_epi16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epi16_mask - #define _mm_mask_cmple_epi16_mask(src, k, a, b) simde_mm_mask_cmple_epi16_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epi16_mask(k, a, b) simde_mm_mask_cmple_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -480,8 +480,8 @@ simde_mm256_cmple_epi16_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi16_mask - #define _mm512_cmple_epi16_mask(a, b) simde_mm512_cmple_epi16_mask((a), (b)) + #undef _mm256_cmple_epi16_mask + #define _mm256_cmple_epi16_mask(a, b) simde_mm256_cmple_epi16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -495,7 +495,7 @@ simde_mm256_mask_cmple_epi16_mask(simde__mmask16 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epi16_mask - #define _mm256_mask_cmple_epi16_mask(src, k, a, b) simde_mm256_mask_cmple_epi16_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epi16_mask(k, a, b) simde_mm256_mask_cmple_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -555,7 +555,7 @@ simde_mm512_mask_cmple_epi16_mask(simde__mmask32 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epi16_mask - #define _mm512_mask_cmple_epi16_mask(src, k, a, b) simde_mm512_mask_cmple_epi16_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epi16_mask(k, a, b) simde_mm512_mask_cmple_epi16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -598,8 +598,8 @@ simde_mm_cmple_epu16_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu16_mask - #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) + #undef _mm_cmple_epu16_mask + #define _mm_cmple_epu16_mask(a, b) simde_mm_cmple_epu16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -613,7 +613,7 @@ simde_mm_mask_cmple_epu16_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epu16_mask - #define _mm_mask_cmple_epu16_mask(src, k, a, b) simde_mm_mask_cmple_epu16_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epu16_mask(k, a, b) simde_mm_mask_cmple_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -654,8 +654,8 @@ simde_mm256_cmple_epu16_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu16_mask - #define _mm512_cmple_epu16_mask(a, b) simde_mm512_cmple_epu16_mask((a), (b)) + #undef _mm256_cmple_epu16_mask + #define _mm256_cmple_epu16_mask(a, b) simde_mm256_cmple_epu16_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -669,7 +669,7 @@ simde_mm256_mask_cmple_epu16_mask(simde__mmask16 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epu16_mask - #define _mm256_mask_cmple_epu16_mask(src, k, a, b) simde_mm256_mask_cmple_epu16_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epu16_mask(k, a, b) simde_mm256_mask_cmple_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -729,7 +729,7 @@ simde_mm512_mask_cmple_epu16_mask(simde__mmask32 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epu16_mask - #define _mm512_mask_cmple_epu16_mask(src, k, a, b) simde_mm512_mask_cmple_epu16_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epu16_mask(k, a, b) simde_mm512_mask_cmple_epu16_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -772,8 +772,8 @@ simde_mm_cmple_epi32_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi32_mask - #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) + #undef _mm_cmple_epi32_mask + #define _mm_cmple_epi32_mask(a, b) simde_mm_cmple_epi32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -787,7 +787,7 @@ simde_mm_mask_cmple_epi32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epi32_mask - #define _mm_mask_cmple_epi32_mask(src, k, a, b) simde_mm_mask_cmple_epi32_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epi32_mask(k, a, b) simde_mm_mask_cmple_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -828,8 +828,8 @@ simde_mm256_cmple_epi32_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epi32_mask - #define _mm512_cmple_epi32_mask(a, b) simde_mm512_cmple_epi32_mask((a), (b)) + #undef _mm256_cmple_epi32_mask + #define _mm256_cmple_epi32_mask(a, b) simde_mm256_cmple_epi32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -843,7 +843,7 @@ simde_mm256_mask_cmple_epi32_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epi32_mask - #define _mm256_mask_cmple_epi32_mask(src, k, a, b) simde_mm256_mask_cmple_epi32_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epi32_mask(k, a, b) simde_mm256_mask_cmple_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -903,7 +903,7 @@ simde_mm512_mask_cmple_epi32_mask(simde__mmask16 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epi32_mask - #define _mm512_mask_cmple_epi32_mask(src, k, a, b) simde_mm512_mask_cmple_epi32_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epi32_mask(k, a, b) simde_mm512_mask_cmple_epi32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -946,8 +946,8 @@ simde_mm_cmple_epu32_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu32_mask - #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) + #undef _mm_cmple_epu32_mask + #define _mm_cmple_epu32_mask(a, b) simde_mm_cmple_epu32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -961,7 +961,7 @@ simde_mm_mask_cmple_epu32_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epu32_mask - #define _mm_mask_cmple_epu32_mask(src, k, a, b) simde_mm_mask_cmple_epu32_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epu32_mask(k, a, b) simde_mm_mask_cmple_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1002,8 +1002,8 @@ simde_mm256_cmple_epu32_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu32_mask - #define _mm512_cmple_epu32_mask(a, b) simde_mm512_cmple_epu32_mask((a), (b)) + #undef _mm256_cmple_epu32_mask + #define _mm256_cmple_epu32_mask(a, b) simde_mm256_cmple_epu32_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1017,7 +1017,7 @@ simde_mm256_mask_cmple_epu32_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epu32_mask - #define _mm256_mask_cmple_epu32_mask(src, k, a, b) simde_mm256_mask_cmple_epu32_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epu32_mask(k, a, b) simde_mm256_mask_cmple_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1077,7 +1077,7 @@ simde_mm512_mask_cmple_epu32_mask(simde__mmask16 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epu32_mask - #define _mm512_mask_cmple_epu32_mask(src, k, a, b) simde_mm512_mask_cmple_epu32_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epu32_mask(k, a, b) simde_mm512_mask_cmple_epu32_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1135,7 +1135,7 @@ simde_mm_mask_cmple_epi64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epi64_mask - #define _mm_mask_cmple_epi64_mask(src, k, a, b) simde_mm_mask_cmple_epi64_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epi64_mask(k, a, b) simde_mm_mask_cmple_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1191,7 +1191,7 @@ simde_mm256_mask_cmple_epi64_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epi64_mask - #define _mm256_mask_cmple_epi64_mask(src, k, a, b) simde_mm256_mask_cmple_epi64_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epi64_mask(k, a, b) simde_mm256_mask_cmple_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1251,7 +1251,7 @@ simde_mm512_mask_cmple_epi64_mask(simde__mmask8 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epi64_mask - #define _mm512_mask_cmple_epi64_mask(src, k, a, b) simde_mm512_mask_cmple_epi64_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epi64_mask(k, a, b) simde_mm512_mask_cmple_epi64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1292,8 +1292,8 @@ simde_mm_cmple_epu64_mask (simde__m128i a, simde__m128i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu64_mask - #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) + #undef _mm_cmple_epu64_mask + #define _mm_cmple_epu64_mask(a, b) simde_mm_cmple_epu64_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1307,7 +1307,7 @@ simde_mm_mask_cmple_epu64_mask(simde__mmask8 k, simde__m128i a, simde__m128i b) } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmple_epu64_mask - #define _mm_mask_cmple_epu64_mask(src, k, a, b) simde_mm_mask_cmple_epu64_mask((src), (k), (a), (b)) + #define _mm_mask_cmple_epu64_mask(k, a, b) simde_mm_mask_cmple_epu64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1348,8 +1348,8 @@ simde_mm256_cmple_epu64_mask (simde__m256i a, simde__m256i b) { #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) - #undef _mm512_cmple_epu64_mask - #define _mm512_cmple_epu64_mask(a, b) simde_mm512_cmple_epu64_mask((a), (b)) + #undef _mm256_cmple_epu64_mask + #define _mm256_cmple_epu64_mask(a, b) simde_mm256_cmple_epu64_mask((a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1363,7 +1363,7 @@ simde_mm256_mask_cmple_epu64_mask(simde__mmask8 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmple_epu64_mask - #define _mm256_mask_cmple_epu64_mask(src, k, a, b) simde_mm256_mask_cmple_epu64_mask((src), (k), (a), (b)) + #define _mm256_mask_cmple_epu64_mask(k, a, b) simde_mm256_mask_cmple_epu64_mask((k), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -1423,7 +1423,7 @@ simde_mm512_mask_cmple_epu64_mask(simde__mmask8 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_cmple_epu64_mask - #define _mm512_mask_cmple_epu64_mask(src, k, a, b) simde_mm512_mask_cmple_epu64_mask((src), (k), (a), (b)) + #define _mm512_mask_cmple_epu64_mask(k, a, b) simde_mm512_mask_cmple_epu64_mask((k), (a), (b)) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/cmpneq.h b/simde/x86/avx512/cmpneq.h index 6583155dd..6e9bf3364 100644 --- a/simde/x86/avx512/cmpneq.h +++ b/simde/x86/avx512/cmpneq.h @@ -61,7 +61,7 @@ simde_mm_mask_cmpneq_epi8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epi8_mask - #define _mm_mask_cmpneq_epi8_mask(a, b) simde_mm_mask_cmpneq_epi8_mask((a), (b)) + #define _mm_mask_cmpneq_epi8_mask(k1, a, b) simde_mm_mask_cmpneq_epi8_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -89,7 +89,7 @@ simde_mm_mask_cmpneq_epu8_mask(simde__mmask16 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epu8_mask - #define _mm_mask_cmpneq_epu8_mask(a, b) simde_mm_mask_cmpneq_epu8_mask((a), (b)) + #define _mm_mask_cmpneq_epu8_mask(k1, a, b) simde_mm_mask_cmpneq_epu8_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -117,7 +117,7 @@ simde_mm_mask_cmpneq_epi16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epi16_mask - #define _mm_mask_cmpneq_epi16_mask(a, b) simde_mm_mask_cmpneq_epi16_mask((a), (b)) + #define _mm_mask_cmpneq_epi16_mask(k1, a, b) simde_mm_mask_cmpneq_epi16_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -145,7 +145,7 @@ simde_mm_mask_cmpneq_epu16_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epu16_mask - #define _mm_mask_cmpneq_epu16_mask(a, b) simde_mm_mask_cmpneq_epu16_mask((a), (b)) + #define _mm_mask_cmpneq_epu16_mask(k1, a, b) simde_mm_mask_cmpneq_epu16_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -173,7 +173,7 @@ simde_mm_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epi32_mask - #define _mm_mask_cmpneq_epi32_mask(a, b) simde_mm_mask_cmpneq_epi32_mask((a), (b)) + #define _mm_mask_cmpneq_epi32_mask(k1, a, b) simde_mm_mask_cmpneq_epi32_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -201,7 +201,7 @@ simde_mm_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epu32_mask - #define _mm_mask_cmpneq_epu32_mask(a, b) simde_mm_mask_cmpneq_epu32_mask((a), (b)) + #define _mm_mask_cmpneq_epu32_mask(k1, a, b) simde_mm_mask_cmpneq_epu32_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -229,7 +229,7 @@ simde_mm_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epi64_mask - #define _mm_mask_cmpneq_epi64_mask(a, b) simde_mm_mask_cmpneq_epi64_mask((a), (b)) + #define _mm_mask_cmpneq_epi64_mask(k1, a, b) simde_mm_mask_cmpneq_epi64_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -257,7 +257,7 @@ simde_mm_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m128i a, simde__m128i b } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_cmpneq_epu64_mask - #define _mm_mask_cmpneq_epu64_mask(a, b) simde_mm_mask_cmpneq_epu64_mask((a), (b)) + #define _mm_mask_cmpneq_epu64_mask(k1, a, b) simde_mm_mask_cmpneq_epu64_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -285,7 +285,7 @@ simde_mm256_mask_cmpneq_epi8_mask(simde__mmask32 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epi8_mask - #define _mm256_mask_cmpneq_epi8_mask(a, b) simde_mm256_mask_cmpneq_epi8_mask((a), (b)) + #define _mm256_mask_cmpneq_epi8_mask(k1, a, b) simde_mm256_mask_cmpneq_epi8_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -313,7 +313,7 @@ simde_mm256_mask_cmpneq_epu8_mask(simde__mmask32 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epu8_mask - #define _mm256_mask_cmpneq_epu8_mask(a, b) simde_mm256_mask_cmpneq_epu8_mask((a), (b)) + #define _mm256_mask_cmpneq_epu8_mask(k1, a, b) simde_mm256_mask_cmpneq_epu8_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -341,7 +341,7 @@ simde_mm256_mask_cmpneq_epi16_mask(simde__mmask16 k1, simde__m256i a, simde__m25 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epi16_mask - #define _mm256_mask_cmpneq_epi16_mask(a, b) simde_mm256_mask_cmpneq_epi16_mask((a), (b)) + #define _mm256_mask_cmpneq_epi16_mask(k1, a, b) simde_mm256_mask_cmpneq_epi16_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -369,7 +369,7 @@ simde_mm256_mask_cmpneq_epu16_mask(simde__mmask16 k1, simde__m256i a, simde__m25 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) || defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epu16_mask - #define _mm256_mask_cmpneq_epu16_mask(a, b) simde_mm256_mask_cmpneq_epu16_mask((a), (b)) + #define _mm256_mask_cmpneq_epu16_mask(k1, a, b) simde_mm256_mask_cmpneq_epu16_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -397,7 +397,7 @@ simde_mm256_mask_cmpneq_epi32_mask(simde__mmask8 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epi32_mask - #define _mm256_mask_cmpneq_epi32_mask(a, b) simde_mm256_mask_cmpneq_epi32_mask((a), (b)) + #define _mm256_mask_cmpneq_epi32_mask(k1, a, b) simde_mm256_mask_cmpneq_epi32_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -425,7 +425,7 @@ simde_mm256_mask_cmpneq_epu32_mask(simde__mmask8 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epu32_mask - #define _mm256_mask_cmpneq_epu32_mask(a, b) simde_mm256_mask_cmpneq_epu32_mask((a), (b)) + #define _mm256_mask_cmpneq_epu32_mask(k1, a, b) simde_mm256_mask_cmpneq_epu32_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -453,7 +453,7 @@ simde_mm256_mask_cmpneq_epi64_mask(simde__mmask8 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epi64_mask - #define _mm256_mask_cmpneq_epi64_mask(a, b) simde_mm256_mask_cmpneq_epi64_mask((a), (b)) + #define _mm256_mask_cmpneq_epi64_mask(k1, a, b) simde_mm256_mask_cmpneq_epi64_mask((k1), (a), (b)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -481,7 +481,7 @@ simde_mm256_mask_cmpneq_epu64_mask(simde__mmask8 k1, simde__m256i a, simde__m256 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_cmpneq_epu64_mask - #define _mm256_mask_cmpneq_epu64_mask(a, b) simde_mm256_mask_cmpneq_epu64_mask((a), (b)) + #define _mm256_mask_cmpneq_epu64_mask(k1, a, b) simde_mm256_mask_cmpneq_epu64_mask((k1), (a), (b)) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/compress.h b/simde/x86/avx512/compress.h index 69affdc07..06fffc733 100644 --- a/simde/x86/avx512/compress.h +++ b/simde/x86/avx512/compress.h @@ -152,7 +152,7 @@ simde_mm256_mask_compressstoreu_ps (void* base_addr, simde__mmask8 k, simde__m25 #endif } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) - #undef _mm256_mask_compressstoreu_pd + #undef _mm256_mask_compressstoreu_ps #define _mm256_mask_compressstoreu_ps(base_addr, k, a) simde_mm256_mask_compressstoreu_ps(base_addr, k, a) #endif diff --git a/simde/x86/avx512/fmsub.h b/simde/x86/avx512/fmsub.h index 626294cb3..4f52d4074 100644 --- a/simde/x86/avx512/fmsub.h +++ b/simde/x86/avx512/fmsub.h @@ -47,7 +47,7 @@ simde_mm256_mask3_fmsub_pd (simde__m256d a, simde__m256d b, simde__m256d c, simd } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask3_fmsub_pd - #define _mm256_mask3_fmsub_pd(a, b, c, k) _mm256_mask3_fmsub_pd(a, b, c, k) + #define _mm256_mask3_fmsub_pd(a, b, c, k) simde_mm256_mask3_fmsub_pd(a, b, c, k) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -61,7 +61,7 @@ simde_mm256_mask_fmsub_pd (simde__m256d a, simde__mmask8 k, simde__m256d b, simd } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_fmsub_pd - #define _mm256_mask_fmsub_pd(a, k, b, c) _mm256_mask_fmsub_pd(a, k, b, c) + #define _mm256_mask_fmsub_pd(a, k, b, c) simde_mm256_mask_fmsub_pd(a, k, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -75,7 +75,7 @@ simde_mm256_maskz_fmsub_pd (simde__mmask8 k, simde__m256d a, simde__m256d b, sim } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_fmsub_pd - #define _mm256_maskz_fmsub_pd(k, a, b, c) _mm256_maskz_fmsub_pd(k, a, b, c) + #define _mm256_maskz_fmsub_pd(k, a, b, c) simde_mm256_maskz_fmsub_pd(k, a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -89,7 +89,7 @@ simde_mm_mask3_fmsub_pd (simde__m128d a, simde__m128d b, simde__m128d c, simde__ } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask3_fmsub_pd - #define _mm_mask3_fmsub_pd(a, b, c, k) _mm_mask3_fmsub_pd(a, b, c, k) + #define _mm_mask3_fmsub_pd(a, b, c, k) simde_mm_mask3_fmsub_pd(a, b, c, k) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -103,7 +103,7 @@ simde_mm_mask_fmsub_pd (simde__m128d a, simde__mmask8 k, simde__m128d b, simde__ } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_fmsub_pd - #define _mm_mask_fmsub_pd(a, k, b, c) _mm_mask_fmsub_pd(a, k, b, c) + #define _mm_mask_fmsub_pd(a, k, b, c) simde_mm_mask_fmsub_pd(a, k, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -117,7 +117,7 @@ simde_mm_maskz_fmsub_pd (simde__mmask8 k, simde__m128d a, simde__m128d b, simde_ } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_fmsub_pd - #define _mm_maskz_fmsub_pd(k, a, b, c) _mm_maskz_fmsub_pd(k, a, b, c) + #define _mm_maskz_fmsub_pd(k, a, b, c) simde_mm_maskz_fmsub_pd(k, a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -131,7 +131,7 @@ simde_mm256_mask3_fmsub_ps (simde__m256 a, simde__m256 b, simde__m256 c, simde__ } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask3_fmsub_ps - #define _mm256_mask3_fmsub_ps(a, b, c, k) _mm256_mask3_fmsub_ps(a, b, c, k) + #define _mm256_mask3_fmsub_ps(a, b, c, k) simde_mm256_mask3_fmsub_ps(a, b, c, k) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -145,7 +145,7 @@ simde_mm256_mask_fmsub_ps (simde__m256 a, simde__mmask8 k, simde__m256 b, simde_ } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_fmsub_ps - #define _mm256_mask_fmsub_ps(a, k, b, c) _mm256_mask_fmsub_ps(a, k, b, c) + #define _mm256_mask_fmsub_ps(a, k, b, c) simde_mm256_mask_fmsub_ps(a, k, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -159,7 +159,7 @@ simde_mm256_maskz_fmsub_ps (simde__mmask8 k, simde__m256 a, simde__m256 b, simde } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_fmsub_ps - #define _mm256_maskz_fmsub_ps(k, a, b, c) _mm256_maskz_fmsub_ps(k, a, b, c) + #define _mm256_maskz_fmsub_ps(k, a, b, c) simde_mm256_maskz_fmsub_ps(k, a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -173,7 +173,7 @@ simde_mm_mask3_fmsub_ps (simde__m128 a, simde__m128 b, simde__m128 c, simde__mma } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask3_fmsub_ps - #define _mm_mask3_fmsub_ps(a, b, c, k) _mm_mask3_fmsub_ps(a, b, c, k) + #define _mm_mask3_fmsub_ps(a, b, c, k) simde_mm_mask3_fmsub_ps(a, b, c, k) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -187,7 +187,7 @@ simde_mm_mask_fmsub_ps (simde__m128 a, simde__mmask8 k, simde__m128 b, simde__m1 } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_fmsub_ps - #define _mm_mask_fmsub_ps(a, k, b, c) _mm_mask_fmsub_ps(a, k, b, c) + #define _mm_mask_fmsub_ps(a, k, b, c) simde_mm_mask_fmsub_ps(a, k, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -201,7 +201,7 @@ simde_mm_maskz_fmsub_ps (simde__mmask8 k, simde__m128 a, simde__m128 b, simde__m } #if defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_fmsub_ps - #define _mm_maskz_fmsub_ps(k, a, b, c) _mm_maskz_fmsub_ps(k, a, b, c) + #define _mm_maskz_fmsub_ps(k, a, b, c) simde_mm_maskz_fmsub_ps(k, a, b, c) #endif SIMDE_FUNCTION_ATTRIBUTES diff --git a/simde/x86/avx512/insert.h b/simde/x86/avx512/insert.h index f6a276417..67120d31c 100644 --- a/simde/x86/avx512/insert.h +++ b/simde/x86/avx512/insert.h @@ -301,7 +301,7 @@ simde_mm512_mask_insertf32x8(simde__m512 src, simde__mmask16 k, simde__m512 a, s } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_insertf32x8 - #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imms8) + #define _mm512_mask_insertf32x8(src, k, a, b, imm8) simde_mm512_mask_insertf32x8(src, k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -319,7 +319,7 @@ simde_mm512_maskz_insertf32x8(simde__mmask16 k, simde__m512 a, simde__m256 b, co } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_insertf32x8 - #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imms8) + #define _mm512_maskz_insertf32x8(k, a, b, imm8) simde_mm512_maskz_insertf32x8(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -355,7 +355,7 @@ simde_mm512_mask_insertf64x2(simde__m512d src, simde__mmask8 k, simde__m512d a, } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_insertf64x2 - #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imms8) + #define _mm512_mask_insertf64x2(src, k, a, b, imm8) simde_mm512_mask_insertf64x2(src, k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -373,7 +373,7 @@ simde_mm512_maskz_insertf64x2(simde__mmask8 k, simde__m512d a, simde__m128d b, c } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_insertf64x2 - #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imms8) + #define _mm512_maskz_insertf64x2(k, a, b, imm8) simde_mm512_maskz_insertf64x2(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -409,7 +409,7 @@ simde_mm512_mask_inserti32x8(simde__m512i src, simde__mmask16 k, simde__m512i a, } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_inserti32x8 - #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imms8) + #define _mm512_mask_inserti32x8(src, k, a, b, imm8) simde_mm512_mask_inserti32x8(src, k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -427,7 +427,7 @@ simde_mm512_maskz_inserti32x8(simde__mmask16 k, simde__m512i a, simde__m256i b, } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_inserti32x8 - #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imms8) + #define _mm512_maskz_inserti32x8(k, a, b, imm8) simde_mm512_maskz_inserti32x8(k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -463,7 +463,7 @@ simde_mm512_mask_inserti64x2(simde__m512i src, simde__mmask8 k, simde__m512i a, } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_inserti64x2 - #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imms8) + #define _mm512_mask_inserti64x2(src, k, a, b, imm8) simde_mm512_mask_inserti64x2(src, k, a, b, imm8) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -481,7 +481,7 @@ simde_mm512_maskz_inserti64x2(simde__mmask8 k, simde__m512i a, simde__m128i b, c } #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_inserti64x2 - #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imms8) + #define _mm512_maskz_inserti64x2(k, a, b, imm8) simde_mm512_maskz_inserti64x2(k, a, b, imm8) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/madd.h b/simde/x86/avx512/madd.h index 29c0fe90d..547d71ce4 100644 --- a/simde/x86/avx512/madd.h +++ b/simde/x86/avx512/madd.h @@ -61,7 +61,7 @@ simde_mm_maskz_madd_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_madd_epi16 - #define _mm_maskz_madd_epi16(src, k, a, b) simde_mm_maskz_madd_epi16(src, k, a, b) + #define _mm_maskz_madd_epi16(k, a, b) simde_mm_maskz_madd_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -89,7 +89,7 @@ simde_mm256_maskz_madd_epi16 (simde__mmask8 k, simde__m256i a, simde__m256i b) { } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_madd_epi16 - #define _mm256_maskz_madd_epi16(src, k, a, b) simde_mm256_maskz_madd_epi16(src, k, a, b) + #define _mm256_maskz_madd_epi16(k, a, b) simde_mm256_maskz_madd_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -148,7 +148,7 @@ simde_mm512_maskz_madd_epi16 (simde__mmask16 k, simde__m512i a, simde__m512i b) } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_madd_epi16 - #define _mm512_maskz_madd_epi16(src, k, a, b) simde_mm512_maskz_madd_epi16(src, k, a, b) + #define _mm512_maskz_madd_epi16(k, a, b) simde_mm512_maskz_madd_epi16(k, a, b) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/maddubs.h b/simde/x86/avx512/maddubs.h index 4b3d73917..43b5594cf 100644 --- a/simde/x86/avx512/maddubs.h +++ b/simde/x86/avx512/maddubs.h @@ -48,7 +48,7 @@ simde_mm_mask_maddubs_epi16 (simde__m128i src, simde__mmask8 k, simde__m128i a, } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_mask_maddubs_epi16 - #define _mm_mask_maddubs_epi16(a, b) simde_mm_mask_maddubs_epi16(a, b) + #define _mm_mask_maddubs_epi16(src, k, a, b) simde_mm_mask_maddubs_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -62,7 +62,7 @@ simde_mm_maskz_maddubs_epi16 (simde__mmask8 k, simde__m128i a, simde__m128i b) { } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_maddubs_epi16 - #define _mm_maskz_maddubs_epi16(a, b) simde_mm_maskz_maddubs_epi16(a, b) + #define _mm_maskz_maddubs_epi16(k, a, b) simde_mm_maskz_maddubs_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -76,7 +76,7 @@ simde_mm256_mask_maddubs_epi16 (simde__m256i src, simde__mmask16 k, simde__m256i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_mask_maddubs_epi16 - #define _mm256_mask_maddubs_epi16(a, b) simde_mm256_mask_maddubs_epi16(a, b) + #define _mm256_mask_maddubs_epi16(src, k, a, b) simde_mm256_mask_maddubs_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -90,7 +90,7 @@ simde_mm256_maskz_maddubs_epi16 (simde__mmask16 k, simde__m256i a, simde__m256i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_maddubs_epi16 - #define _mm256_maskz_maddubs_epi16(a, b) simde_mm256_maskz_maddubs_epi16(a, b) + #define _mm256_maskz_maddubs_epi16(k, a, b) simde_mm256_maskz_maddubs_epi16(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -136,7 +136,7 @@ simde_mm512_mask_maddubs_epi16 (simde__m512i src, simde__mmask32 k, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_maddubs_epi16 - #define _mm512_mask_maddubs_epi16(a, b) simde_mm512_mask_maddubs_epi16(a, b) + #define _mm512_mask_maddubs_epi16(src, k, a, b) simde_mm512_mask_maddubs_epi16(src, k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -150,7 +150,7 @@ simde_mm512_maskz_maddubs_epi16 (simde__mmask32 k, simde__m512i a, simde__m512i } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_maddubs_epi16 - #define _mm512_maskz_maddubs_epi16(a, b) simde_mm512_maskz_maddubs_epi16(a, b) + #define _mm512_maskz_maddubs_epi16(k, a, b) simde_mm512_maskz_maddubs_epi16(k, a, b) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/multishift.h b/simde/x86/avx512/multishift.h index e6a6c0979..5388d0d07 100644 --- a/simde/x86/avx512/multishift.h +++ b/simde/x86/avx512/multishift.h @@ -57,7 +57,7 @@ simde_mm_maskz_multishift_epi64_epi8 (simde__mmask16 k, simde__m128i a, simde__m } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_multishift_epi64_epi8 - #define _mm_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm_maskz_multishift_epi64_epi8(src, k, a, b) + #define _mm_maskz_multishift_epi64_epi8(k, a, b) simde_mm_maskz_multishift_epi64_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -109,7 +109,7 @@ simde_mm256_maskz_multishift_epi64_epi8 (simde__mmask32 k, simde__m256i a, simde } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_maskz_multishift_epi64_epi8 - #define _mm256_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm256_maskz_multishift_epi64_epi8(src, k, a, b) + #define _mm256_maskz_multishift_epi64_epi8(k, a, b) simde_mm256_maskz_multishift_epi64_epi8(k, a, b) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -161,7 +161,7 @@ simde_mm512_maskz_multishift_epi64_epi8 (simde__mmask64 k, simde__m512i a, simde } #if defined(SIMDE_X86_AVX512VBMI_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_multishift_epi64_epi8 - #define _mm512_maskz_multishift_epi64_epi8(src, k, a, b) simde_mm512_maskz_multishift_epi64_epi8(src, k, a, b) + #define _mm512_maskz_multishift_epi64_epi8(k, a, b) simde_mm512_maskz_multishift_epi64_epi8(k, a, b) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/range.h b/simde/x86/avx512/range.h index 10daa10e1..1d8c0fb49 100644 --- a/simde/x86/avx512/range.h +++ b/simde/x86/avx512/range.h @@ -615,7 +615,7 @@ simde_mm512_range_pd (simde__m512d a, simde__m512d b, int imm8) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_range_ss - #define _mm_maskz_range_ss(k, a, b, imm8) simde_mm_mask_range_ss(k, a, b, imm8) + #define _mm_maskz_range_ss(k, a, b, imm8) simde_mm_maskz_range_ss(k, a, b, imm8) #endif #if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_FAST_EXCEPTIONS) @@ -736,7 +736,7 @@ simde_mm512_range_pd (simde__m512d a, simde__m512d b, int imm8) #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_range_sd - #define _mm_maskz_range_sd(k, a, b, imm8) simde_mm_mask_range_sd(k, a, b, imm8) + #define _mm_maskz_range_sd(k, a, b, imm8) simde_mm_maskz_range_sd(k, a, b, imm8) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/range_round.h b/simde/x86/avx512/range_round.h index 6f4a7b6b8..7bf132075 100644 --- a/simde/x86/avx512/range_round.h +++ b/simde/x86/avx512/range_round.h @@ -117,7 +117,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_range_round_ps - #define _mm512_mask_range_round_ps(src, k, a, b, imm8) simde_mm512_mask_range_round_ps(src, k, a, b, imm8) + #define _mm512_mask_range_round_ps(src, k, a, b, imm8, sae) simde_mm512_mask_range_round_ps(src, k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -173,7 +173,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_range_round_ps - #define _mm512_maskz_range_round_ps(k, a, b, imm8) simde_mm512_maskz_range_round_ps(k, a, b, imm8) + #define _mm512_maskz_range_round_ps(k, a, b, imm8, sae) simde_mm512_maskz_range_round_ps(k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -285,7 +285,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_mask_range_round_pd - #define _mm512_mask_range_round_pd(src, k, a, b, imm8) simde_mm512_mask_range_round_pd(src, k, a, b, imm8) + #define _mm512_mask_range_round_pd(src, k, a, b, imm8, sae) simde_mm512_mask_range_round_pd(src, k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -341,7 +341,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_range_round_pd - #define _mm512_maskz_range_round_pd(k, a, b, imm8) simde_mm512_maskz_range_round_pd(k, a, b, imm8) + #define _mm512_maskz_range_round_pd(k, a, b, imm8, sae) simde_mm512_maskz_range_round_pd(k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -453,7 +453,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_mask_range_round_ss - #define _mm_mask_range_round_ss(src, k, a, b, imm8) simde_mm_mask_range_round_ss(src, k, a, b, imm8) + #define _mm_mask_range_round_ss(src, k, a, b, imm8, sae) simde_mm_mask_range_round_ss(src, k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -509,7 +509,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_range_round_ss - #define _mm_maskz_range_round_ss(k, a, b, imm8) simde_mm_maskz_range_round_ss(k, a, b, imm8) + #define _mm_maskz_range_round_ss(k, a, b, imm8, sae) simde_mm_maskz_range_round_ss(k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -621,7 +621,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_mask_range_round_sd - #define _mm_mask_range_round_sd(src, k, a, b, imm8) simde_mm_mask_range_round_sd(src, k, a, b, imm8) + #define _mm_mask_range_round_sd(src, k, a, b, imm8, sae) simde_mm_mask_range_round_sd(src, k, a, b, imm8, sae) #endif #if defined(SIMDE_X86_AVX512DQ_NATIVE) @@ -677,7 +677,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_range_round_sd - #define _mm_maskz_range_round_sd(k, a, b, imm8) simde_mm_maskz_range_round_sd(k, a, b, imm8) + #define _mm_maskz_range_round_sd(k, a, b, imm8, sae) simde_mm_maskz_range_round_sd(k, a, b, imm8, sae) #endif SIMDE_END_DECLS_ diff --git a/simde/x86/avx512/rol.h b/simde/x86/avx512/rol.h index 835bf6bbb..5bdf98bc1 100644 --- a/simde/x86/avx512/rol.h +++ b/simde/x86/avx512/rol.h @@ -73,7 +73,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_rol_epi32 - #define _mm_maskz_rol_epi32(src, k, a, imm8) simde_mm_maskz_rol_epi32(src, k, a, imm8) + #define _mm_maskz_rol_epi32(k, a, imm8) simde_mm_maskz_rol_epi32(k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) diff --git a/simde/x86/avx512/ror.h b/simde/x86/avx512/ror.h index 464f71f0f..7cac56c7e 100644 --- a/simde/x86/avx512/ror.h +++ b/simde/x86/avx512/ror.h @@ -73,7 +73,7 @@ SIMDE_BEGIN_DECLS_ #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm_maskz_ror_epi32 - #define _mm_maskz_ror_epi32(src, k, a, imm8) simde_mm_maskz_ror_epi32(src, k, a, imm8) + #define _mm_maskz_ror_epi32(k, a, imm8) simde_mm_maskz_ror_epi32(k, a, imm8) #endif #if defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) diff --git a/simde/x86/avx512/shuffle.h b/simde/x86/avx512/shuffle.h index 4dc3037a6..d1c537f34 100644 --- a/simde/x86/avx512/shuffle.h +++ b/simde/x86/avx512/shuffle.h @@ -162,6 +162,34 @@ simde_mm256_shuffle_i32x4 (simde__m256i a, simde__m256i b, const int imm8) #define simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_mov_pd(k, simde_mm256_shuffle_f64x2(a, b, imm8)) #define simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_mov_pd(src, k, simde_mm256_shuffle_f64x2(a, b, imm8)) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) + #undef _mm256_maskz_shuffle_i32x4 + #undef _mm256_mask_shuffle_i32x4 + #define _mm256_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_i32x4(k, a, b, imm8) + #define _mm256_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_i32x4(src, k, a, b, imm8) + + #undef _mm256_shuffle_f32x4 + #undef _mm256_maskz_shuffle_f32x4 + #undef _mm256_mask_shuffle_f32x4 + #define _mm256_shuffle_f32x4(a, b, imm8) simde_mm256_shuffle_f32x4(a, b, imm8) + #define _mm256_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm256_maskz_shuffle_f32x4(k, a, b, imm8) + #define _mm256_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm256_mask_shuffle_f32x4(src, k, a, b, imm8) + + #undef _mm256_shuffle_i64x2 + #undef _mm256_maskz_shuffle_i64x2 + #undef _mm256_mask_shuffle_i64x2 + #define _mm256_shuffle_i64x2(a, b, imm8) simde_mm256_shuffle_i64x2(a, b, imm8) + #define _mm256_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_i64x2(k, a, b, imm8) + #define _mm256_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_i64x2(src, k, a, b, imm8) + + #undef _mm256_shuffle_f64x2 + #undef _mm256_maskz_shuffle_f64x2 + #undef _mm256_mask_shuffle_f64x2 + #define _mm256_shuffle_f64x2(a, b, imm8) simde_mm256_shuffle_f64x2(a, b, imm8) + #define _mm256_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm256_maskz_shuffle_f64x2(k, a, b, imm8) + #define _mm256_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm256_mask_shuffle_f64x2(src, k, a, b, imm8) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde__m512i simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) @@ -201,6 +229,34 @@ simde_mm512_shuffle_i32x4 (simde__m512i a, simde__m512i b, const int imm8) #define simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_mov_pd(k, simde_mm512_shuffle_f64x2(a, b, imm8)) #define simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_mov_pd(src, k, simde_mm512_shuffle_f64x2(a, b, imm8)) +#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) + #undef _mm512_maskz_shuffle_i32x4 + #undef _mm512_mask_shuffle_i32x4 + #define _mm512_maskz_shuffle_i32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_i32x4(k, a, b, imm8) + #define _mm512_mask_shuffle_i32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_i32x4(src, k, a, b, imm8) + + #undef _mm512_shuffle_f32x4 + #undef _mm512_maskz_shuffle_f32x4 + #undef _mm512_mask_shuffle_f32x4 + #define _mm512_shuffle_f32x4(a, b, imm8) simde_mm512_shuffle_f32x4(a, b, imm8) + #define _mm512_maskz_shuffle_f32x4(k, a, b, imm8) simde_mm512_maskz_shuffle_f32x4(k, a, b, imm8) + #define _mm512_mask_shuffle_f32x4(src, k, a, b, imm8) simde_mm512_mask_shuffle_f32x4(src, k, a, b, imm8) + + #undef _mm512_shuffle_i64x2 + #undef _mm512_maskz_shuffle_i64x2 + #undef _mm512_mask_shuffle_i64x2 + #define _mm512_shuffle_i64x2(a, b, imm8) simde_mm512_shuffle_i64x2(a, b, imm8) + #define _mm512_maskz_shuffle_i64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_i64x2(k, a, b, imm8) + #define _mm512_mask_shuffle_i64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_i64x2(src, k, a, b, imm8) + + #undef _mm512_shuffle_f64x2 + #undef _mm512_maskz_shuffle_f64x2 + #undef _mm512_mask_shuffle_f64x2 + #define _mm512_shuffle_f64x2(a, b, imm8) simde_mm512_shuffle_f64x2(a, b, imm8) + #define _mm512_maskz_shuffle_f64x2(k, a, b, imm8) simde_mm512_maskz_shuffle_f64x2(k, a, b, imm8) + #define _mm512_mask_shuffle_f64x2(src, k, a, b, imm8) simde_mm512_mask_shuffle_f64x2(src, k, a, b, imm8) +#endif + #if defined(SIMDE_X86_AVX512F_NATIVE) #define simde_mm512_shuffle_ps(a, b, imm8) _mm512_shuffle_ps(a, b, imm8) #elif SIMDE_NATURAL_VECTOR_SIZE_LE(256) && defined(SIMDE_STATEMENT_EXPR_) diff --git a/simde/x86/avx512/sll.h b/simde/x86/avx512/sll.h index 8cc944648..18fbbb8ce 100644 --- a/simde/x86/avx512/sll.h +++ b/simde/x86/avx512/sll.h @@ -102,7 +102,7 @@ simde_mm512_maskz_sll_epi16 (simde__mmask32 k, simde__m512i a, simde__m128i coun } #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) #undef _mm512_maskz_sll_epi16 - #define _mm512_maskz_sll_epi16(src, k, a, count) simde_mm512_maskz_sll_epi16(src, k, a, count) + #define _mm512_maskz_sll_epi16(k, a, count) simde_mm512_maskz_sll_epi16(k, a, count) #endif SIMDE_FUNCTION_ATTRIBUTES diff --git a/simde/x86/avx512/storeu.h b/simde/x86/avx512/storeu.h index 700456cf5..e00801faf 100644 --- a/simde/x86/avx512/storeu.h +++ b/simde/x86/avx512/storeu.h @@ -42,14 +42,14 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu_epi8 #undef _mm256_storeu_epi16 - #define _mm256_storeu_epi8(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) - #define _mm256_storeu_epi16(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi8(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi16(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) #endif #if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES) #undef _mm256_storeu_epi32 #undef _mm256_storeu_epi64 - #define _mm256_storeu_epi32(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) - #define _mm256_storeu_epi64(mem_addr, a) simde_mm512_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi32(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) + #define _mm256_storeu_epi64(mem_addr, a) simde_mm256_storeu_si256(mem_addr, a) #endif SIMDE_FUNCTION_ATTRIBUTES diff --git a/test/x86/avx512/cmp.c b/test/x86/avx512/cmp.c index 1729d8af5..7e67db945 100644 --- a/test/x86/avx512/cmp.c +++ b/test/x86/avx512/cmp.c @@ -334,43 +334,43 @@ test_simde_mm256_cmp_epi32_mask (SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, b; simde__mmask8 r; - a = simde_mm256_loadu_si256(test_vec[0].a); - b = simde_mm256_loadu_si256(test_vec[0].b); + a = simde_mm256_loadu_epi32(test_vec[0].a); + b = simde_mm256_loadu_epi32(test_vec[0].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_EQ); simde_assert_equal_mmask8(r, test_vec[0].r); - a = simde_mm256_loadu_si256(test_vec[1].a); - b = simde_mm256_loadu_si256(test_vec[1].b); + a = simde_mm256_loadu_epi32(test_vec[1].a); + b = simde_mm256_loadu_epi32(test_vec[1].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_LT); simde_assert_equal_mmask8(r, test_vec[1].r); - a = simde_mm256_loadu_si256(test_vec[2].a); - b = simde_mm256_loadu_si256(test_vec[2].b); + a = simde_mm256_loadu_epi32(test_vec[2].a); + b = simde_mm256_loadu_epi32(test_vec[2].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_LE); simde_assert_equal_mmask8(r, test_vec[2].r); - a = simde_mm256_loadu_si256(test_vec[3].a); - b = simde_mm256_loadu_si256(test_vec[3].b); + a = simde_mm256_loadu_epi32(test_vec[3].a); + b = simde_mm256_loadu_epi32(test_vec[3].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_FALSE); simde_assert_equal_mmask8(r, test_vec[3].r); - a = simde_mm256_loadu_si256(test_vec[4].a); - b = simde_mm256_loadu_si256(test_vec[4].b); + a = simde_mm256_loadu_epi32(test_vec[4].a); + b = simde_mm256_loadu_epi32(test_vec[4].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_NE); simde_assert_equal_mmask8(r, test_vec[4].r); - a = simde_mm256_loadu_si256(test_vec[5].a); - b = simde_mm256_loadu_si256(test_vec[5].b); + a = simde_mm256_loadu_epi32(test_vec[5].a); + b = simde_mm256_loadu_epi32(test_vec[5].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_NLT); simde_assert_equal_mmask8(r, test_vec[5].r); - a = simde_mm256_loadu_si256(test_vec[6].a); - b = simde_mm256_loadu_si256(test_vec[6].b); + a = simde_mm256_loadu_epi32(test_vec[6].a); + b = simde_mm256_loadu_epi32(test_vec[6].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_NLE); simde_assert_equal_mmask8(r, test_vec[6].r); - a = simde_mm256_loadu_si256(test_vec[7].a); - b = simde_mm256_loadu_si256(test_vec[7].b); + a = simde_mm256_loadu_epi32(test_vec[7].a); + b = simde_mm256_loadu_epi32(test_vec[7].b); r = simde_mm256_cmp_epi32_mask(a, b, SIMDE_MM_CMPINT_TRUE); simde_assert_equal_mmask8(r, test_vec[7].r); @@ -1676,43 +1676,43 @@ test_simde_mm256_cmp_epu32_mask (SIMDE_MUNIT_TEST_ARGS) { simde__m256i a, b; simde__mmask8 r; - a = simde_mm256_loadu_si256(test_vec[0].a); - b = simde_mm256_loadu_si256(test_vec[0].b); + a = simde_mm256_loadu_epi32(test_vec[0].a); + b = simde_mm256_loadu_epi32(test_vec[0].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_EQ); simde_assert_equal_mmask8(r, test_vec[0].r); - a = simde_mm256_loadu_si256(test_vec[1].a); - b = simde_mm256_loadu_si256(test_vec[1].b); + a = simde_mm256_loadu_epi32(test_vec[1].a); + b = simde_mm256_loadu_epi32(test_vec[1].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_LT); simde_assert_equal_mmask8(r, test_vec[1].r); - a = simde_mm256_loadu_si256(test_vec[2].a); - b = simde_mm256_loadu_si256(test_vec[2].b); + a = simde_mm256_loadu_epi32(test_vec[2].a); + b = simde_mm256_loadu_epi32(test_vec[2].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_LE); simde_assert_equal_mmask8(r, test_vec[2].r); - a = simde_mm256_loadu_si256(test_vec[3].a); - b = simde_mm256_loadu_si256(test_vec[3].b); + a = simde_mm256_loadu_epi32(test_vec[3].a); + b = simde_mm256_loadu_epi32(test_vec[3].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_FALSE); simde_assert_equal_mmask8(r, test_vec[3].r); - a = simde_mm256_loadu_si256(test_vec[4].a); - b = simde_mm256_loadu_si256(test_vec[4].b); + a = simde_mm256_loadu_epi32(test_vec[4].a); + b = simde_mm256_loadu_epi32(test_vec[4].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_NE); simde_assert_equal_mmask8(r, test_vec[4].r); - a = simde_mm256_loadu_si256(test_vec[5].a); - b = simde_mm256_loadu_si256(test_vec[5].b); + a = simde_mm256_loadu_epi32(test_vec[5].a); + b = simde_mm256_loadu_epi32(test_vec[5].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_NLT); simde_assert_equal_mmask8(r, test_vec[5].r); - a = simde_mm256_loadu_si256(test_vec[6].a); - b = simde_mm256_loadu_si256(test_vec[6].b); + a = simde_mm256_loadu_epi32(test_vec[6].a); + b = simde_mm256_loadu_epi32(test_vec[6].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_NLE); simde_assert_equal_mmask8(r, test_vec[6].r); - a = simde_mm256_loadu_si256(test_vec[7].a); - b = simde_mm256_loadu_si256(test_vec[7].b); + a = simde_mm256_loadu_epi32(test_vec[7].a); + b = simde_mm256_loadu_epi32(test_vec[7].b); r = simde_mm256_cmp_epu32_mask(a, b, SIMDE_MM_CMPINT_TRUE); simde_assert_equal_mmask8(r, test_vec[7].r);