Skip to content

Commit

Permalink
avx512: fix many native aliases
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Oct 13, 2023
1 parent 00e1b40 commit ef0a80d
Show file tree
Hide file tree
Showing 21 changed files with 299 additions and 237 deletions.
4 changes: 2 additions & 2 deletions simde/mips/msa/madd.h
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ simde_msa_fmadd_w(simde_v4f32 a, simde_v4f32 b, simde_v4f32 c) {
}
#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES)
#undef __msa_fmadd_w
#define __msa_fmadd_w(a, b) simde_msa_fmadd_w((a), (b))
#define __msa_fmadd_w(a, b, c) simde_msa_fmadd_w((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -114,7 +114,7 @@ simde_msa_fmadd_d(simde_v2f64 a, simde_v2f64 b, simde_v2f64 c) {
}
#if defined(SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES)
#undef __msa_fmadd_d
#define __msa_fmadd_d(a, b) simde_msa_fmadd_d((a), (b))
#define __msa_fmadd_d(a, b, c) simde_msa_fmadd_d((a), (b), (c))
#endif

SIMDE_END_DECLS_
Expand Down
7 changes: 7 additions & 0 deletions simde/simde-features.h
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,9 @@
#if !defined(SIMDE_X86_AVX512VPOPCNTDQ_NATIVE)
#define SIMDE_X86_AVX512VPOPCNTDQ_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE)
#define SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES
#endif
#if !defined(SIMDE_X86_AVX512DQ_NATIVE)
#define SIMDE_X86_AVX512DQ_ENABLE_NATIVE_ALIASES
#endif
Expand Down Expand Up @@ -678,6 +681,10 @@
#define SIMDE_ARM_SVE_ENABLE_NATIVE_ALIASES
#endif

#if !defined(SIMDE_MIPS_MSA_NATIVE)
#define SIMDE_MIPS_MSA_ENABLE_NATIVE_ALIASES
#endif

#if !defined(SIMDE_WASM_SIMD128_NATIVE)
#define SIMDE_WASM_SIMD128_ENABLE_NATIVE_ALIASES
#endif
Expand Down
59 changes: 29 additions & 30 deletions simde/x86/avx512/2intersect.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,36 +37,35 @@ SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
void
simde_mm_2intersect_epi32(simde__m128i a, simde__m128i b, simde__mmask8 *k1, simde__mmask8 *k2) {
#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
_mm_2intersect_epi32(a, b, k1, k2);
#else
simde__m128i_private
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
simde__mmask8
k1_ = 0,
k2_ = 0;

for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
#if defined(SIMDE_ENABLE_OPENMP)
#pragma omp simd reduction(|:k1_) reduction(|:k2_)
#else
SIMDE_VECTORIZE
#endif
for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
const int32_t m = a_.i32[i] == b_.i32[j];
k1_ |= m << i;
k2_ |= m << j;
}
simde__m128i_private
a_ = simde__m128i_to_private(a),
b_ = simde__m128i_to_private(b);
simde__mmask8
k1_ = 0,
k2_ = 0;

for (size_t i = 0 ; i < sizeof(a_.i32) / sizeof(a_.i32[0]) ; i++) {
#if defined(SIMDE_ENABLE_OPENMP)
#pragma omp simd reduction(|:k1_) reduction(|:k2_)
#else
SIMDE_VECTORIZE
#endif
for (size_t j = 0 ; j < sizeof(b_.i32) / sizeof(b_.i32[0]) ; j++) {
const int32_t m = a_.i32[i] == b_.i32[j];
k1_ |= m << i;
k2_ |= m << j;
}
}

*k1 = k1_;
*k2 = k2_;
#endif
*k1 = k1_;
*k2 = k2_;
}
#if defined(SIMDE_X86_AVX512VP2INTERSECT_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE)
#define simde_mm_2intersect_epi32(a, b, k1, k2) _mm_2intersect_epi32(a, b, k1, k2)
#endif
#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef __mm_2intersect_epi32
#define __mm_2intersect_epi32(a,b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2)
#undef _mm_2intersect_epi32
#define _mm_2intersect_epi32(a, b, k1, k2) simde_mm_2intersect_epi32(a, b, k1, k2)
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -100,8 +99,8 @@ simde_mm_2intersect_epi64(simde__m128i a, simde__m128i b, simde__mmask8 *k1, sim
#endif
}
#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef __mm_2intersect_epi64
#define __mm_2intersect_epi64(a,b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2)
#undef _mm_2intersect_epi64
#define _mm_2intersect_epi64(a, b, k1, k2) simde_mm_2intersect_epi64(a, b, k1, k2)
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -136,7 +135,7 @@ simde_mm256_2intersect_epi32(simde__m256i a, simde__m256i b, simde__mmask8 *k1,
}
#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_2intersect_epi32
#define _mm256_2intersect_epi32(a,b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2)
#define _mm256_2intersect_epi32(a, b, k1, k2) simde_mm256_2intersect_epi32(a, b, k1, k2)
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -171,7 +170,7 @@ simde_mm256_2intersect_epi64(simde__m256i a, simde__m256i b, simde__mmask8 *k1,
}
#if defined(SIMDE_X86_AVX512VP2INTERSECT_ENABLE_NATIVE_ALIASES) && defined(SIMDE_X86_AVX512VL_ENABLE_NATIVE_ALIASES)
#undef _mm256_2intersect_epi64
#define _mm256_2intersect_epi64(a,b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2)
#define _mm256_2intersect_epi64(a, b, k1, k2) simde_mm256_2intersect_epi64(a, b, k1, k2)
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down
6 changes: 3 additions & 3 deletions simde/x86/avx512/cmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -1067,7 +1067,7 @@ simde_mm512_cmp_epi16_mask (simde__m512i a, simde__m512i b, const int imm8)
#endif
#if defined(SIMDE_X86_AVX512BW_ENABLE_NATIVE_ALIASES)
#undef _mm512_mask_cmp_epi16_mask
#define _mm512_mask_cmp_epi16_mask(a, b, imm8) simde_mm512_mask_cmp_epi16_mask((a), (b), (imm8))
#define _mm512_mask_cmp_epi16_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epi16_mask((k1), (a), (b), (imm8))
#endif

SIMDE_HUGE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -1592,7 +1592,7 @@ simde_mm512_cmp_epu32_mask (simde__m512i a, simde__m512i b, const int imm8)
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_mask_cmp_epu32_mask
#define _mm512_mask_cmp_epu32_mask(a, b, imm8) simde_mm512_mask_cmp_epu32_mask((a), (b), (imm8))
#define _mm512_mask_cmp_epu32_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu32_mask((k1), (a), (b), (imm8))
#endif

SIMDE_HUGE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -1701,7 +1701,7 @@ simde_mm512_cmp_epu64_mask (simde__m512i a, simde__m512i b, const int imm8)
#endif
#if defined(SIMDE_X86_AVX512F_ENABLE_NATIVE_ALIASES)
#undef _mm512_mask_cmp_epu64_mask
#define _mm512_mask_cmp_epu64_mask(a, b, imm8) simde_mm512_mask_cmp_epu64_mask((a), (b), (imm8))
#define _mm512_mask_cmp_epu64_mask(k1, a, b, imm8) simde_mm512_mask_cmp_epu64_mask((k1), (a), (b), (imm8))
#endif

SIMDE_END_DECLS_
Expand Down
Loading

0 comments on commit ef0a80d

Please sign in to comment.