Skip to content

Commit

Permalink
arm: use SIMDE_ARCH_ARM_FMA
Browse files Browse the repository at this point in the history
  • Loading branch information
mr-c committed Oct 16, 2023
1 parent 75014b9 commit 7198d6d
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 26 deletions.
10 changes: 5 additions & 5 deletions simde/arm/neon/fma.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
return vfma_f32(a, b, c);
#else
return simde_vadd_f32(a, simde_vmul_f32(b, c));
Expand All @@ -52,7 +52,7 @@ simde_vfma_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32x2_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
return vfma_f64(a, b, c);
#else
return simde_vadd_f64(a, simde_vmul_f64(b, c));
Expand All @@ -66,7 +66,7 @@ simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vfmaq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16)
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && defined(SIMDE_ARM_NEON_FP16)
return vfmaq_f16(a, b, c);
#else
return simde_vaddq_f16(a, simde_vmulq_f16(b, c));
Expand All @@ -80,7 +80,7 @@ simde_vfmaq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
return vfmaq_f32(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P6_NATIVE)
return vec_madd(b, c, a);
Expand Down Expand Up @@ -109,7 +109,7 @@ simde_vfmaq_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32x4_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vfmaq_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64x2_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
return vfmaq_f64(a, b, c);
#elif defined(SIMDE_POWER_ALTIVEC_P7_NATIVE) || defined(SIMDE_ZARCH_ZVECTOR_13_NATIVE)
return vec_madd(b, c, a);
Expand Down
24 changes: 12 additions & 12 deletions simde/arm/neon/fma_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

/* simde_vfmad_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmad_lane_f64(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_lane_f64(a, b, v, lane))
Expand All @@ -61,7 +61,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmad_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmad_laneq_f64(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmad_laneq_f64(a, b, v, lane))
Expand All @@ -84,7 +84,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmas_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmas_lane_f32(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_lane_f32(a, b, v, lane))
Expand All @@ -107,7 +107,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmas_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0)
#define simde_vfmas_laneq_f32(a, b, v, lane) \
SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vfmas_laneq_f32(a, b, v, lane))
Expand All @@ -130,7 +130,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfma_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfma_lane_f32(a, b, v, lane) vfma_lane_f32(a, b, v, lane)
#else
#define simde_vfma_lane_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_lane_f32(b, v, lane))
Expand All @@ -141,7 +141,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfma_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfma_lane_f64(a, b, v, lane) vfma_lane_f64((a), (b), (v), (lane))
#else
#define simde_vfma_lane_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_lane_f64(b, v, lane))
Expand All @@ -152,7 +152,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfma_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfma_laneq_f32(a, b, v, lane) vfma_laneq_f32((a), (b), (v), (lane))
#else
#define simde_vfma_laneq_f32(a, b, v, lane) simde_vadd_f32(a, simde_vmul_laneq_f32(b, v, lane))
Expand All @@ -163,7 +163,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfma_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfma_laneq_f64(a, b, v, lane) vfma_laneq_f64((a), (b), (v), (lane))
#else
#define simde_vfma_laneq_f64(a, b, v, lane) simde_vadd_f64(a, simde_vmul_laneq_f64(b, v, lane))
Expand All @@ -174,7 +174,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmaq_lane_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfmaq_lane_f64(a, b, v, lane) vfmaq_lane_f64((a), (b), (v), (lane))
#else
#define simde_vfmaq_lane_f64(a, b, v, lane) simde_vaddq_f64(a, simde_vmulq_lane_f64(b, v, lane))
Expand All @@ -185,7 +185,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmaq_lane_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfmaq_lane_f32(a, b, v, lane) vfmaq_lane_f32((a), (b), (v), (lane))
#else
#define simde_vfmaq_lane_f32(a, b, v, lane) simde_vaddq_f32(a, simde_vmulq_lane_f32(b, v, lane))
Expand All @@ -196,7 +196,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmaq_laneq_f32 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfmaq_laneq_f32(a, b, v, lane) vfmaq_laneq_f32((a), (b), (v), (lane))
#else
#define simde_vfmaq_laneq_f32(a, b, v, lane) \
Expand All @@ -208,7 +208,7 @@ SIMDE_BEGIN_DECLS_
#endif

/* simde_vfmaq_laneq_f64 */
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
#define simde_vfmaq_laneq_f64(a, b, v, lane) vfmaq_laneq_f64((a), (b), (v), (lane))
#else
#define simde_vfmaq_laneq_f64(a, b, v, lane) \
Expand Down
8 changes: 4 additions & 4 deletions simde/arm/neon/fma_n.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ SIMDE_BEGIN_DECLS_
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
return vfma_n_f32(a, b, c);
#else
return simde_vfma_f32(a, b, simde_vdup_n_f32(c));
Expand All @@ -52,7 +52,7 @@ simde_vfma_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x1_t
simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vfma_n_f64(a, b, c);
#else
return simde_vfma_f64(a, b, simde_vdup_n_f64(c));
Expand All @@ -66,7 +66,7 @@ simde_vfma_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399)
return vfmaq_n_f32(a, b, c);
#else
return simde_vfmaq_f32(a, b, simde_vdupq_n_f32(c));
Expand All @@ -80,7 +80,7 @@ simde_vfmaq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) {
SIMDE_FUNCTION_ATTRIBUTES
simde_float64x2_t
simde_vfmaq_n_f64(simde_float64x2_t a, simde_float64x2_t b, simde_float64_t c) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0))
return vfmaq_n_f64(a, b, c);
#else
return simde_vfmaq_f64(a, b, simde_vdupq_n_f64(c));
Expand Down
2 changes: 1 addition & 1 deletion simde/mips/msa/madd.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ simde_v4f32
simde_msa_fmadd_w(simde_v4f32 a, simde_v4f32 b, simde_v4f32 c) {
#if defined(SIMDE_MIPS_MSA_NATIVE)
return __msa_fmadd_w(a, b, c);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
return vfmaq_f32(a, c, b);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vmlaq_f32(a, b, c);
Expand Down
3 changes: 3 additions & 0 deletions simde/simde-arch.h
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@
#if defined(__ARM_FEATURE_SVE)
# define SIMDE_ARCH_ARM_SVE
#endif
#if defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA
# define SIMDE_ARCH_ARM_FMA
#endif

/* Blackfin
<https://en.wikipedia.org/wiki/Blackfin> */
Expand Down
4 changes: 2 additions & 2 deletions simde/wasm/relaxed-simd.h
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ simde_wasm_f32x4_fma (simde_v128_t a, simde_v128_t b, simde_v128_t c) {

#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 = vec_madd(c_.altivec_f32, b_.altivec_f32, a_.altivec_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
r_.neon_f32 = vfmaq_f32(a_.neon_f32, c_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vmlaq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32);
Expand Down Expand Up @@ -443,7 +443,7 @@ simde_wasm_f32x4_fms (simde_v128_t a, simde_v128_t b, simde_v128_t c) {

#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 = vec_nmsub(c_.altivec_f32, b_.altivec_f32, a_.altivec_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
r_.neon_f32 = vfmsq_f32(a_.neon_f32, c_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vmlsq_f32(a_.neon_f32, b_.neon_f32, c_.neon_f32);
Expand Down
2 changes: 2 additions & 0 deletions simde/x86/aes.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ uint8_t gmult(uint8_t a, uint8_t b) {
}
*/

#if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))
static uint8_t simde_x_aes_gmult_lookup_table[8][256] = {
{ // gmult(0x02, b);
0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e,
Expand Down Expand Up @@ -509,6 +510,7 @@ void simde_x_aes_dec(uint8_t *in, uint8_t *out, uint8_t *w, int is_last) {
}
}
}
#endif // if !(defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_CRYPTO))

SIMDE_FUNCTION_ATTRIBUTES
simde__m128i simde_mm_aesenc_si128(simde__m128i a, simde__m128i round_key) {
Expand Down
4 changes: 2 additions & 2 deletions simde/x86/fma.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ simde_mm_fmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {

#if defined(SIMDE_POWER_ALTIVEC_P7_NATIVE)
r_.altivec_f32 = vec_madd(a_.altivec_f32, b_.altivec_f32, c_.altivec_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA)
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
r_.neon_f32 = vfmaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vmlaq_f32(c_.neon_f32, b_.neon_f32, a_.neon_f32);
Expand Down Expand Up @@ -489,7 +489,7 @@ simde_mm_fnmadd_ps (simde__m128 a, simde__m128 b, simde__m128 c) {
b_ = simde__m128_to_private(b),
c_ = simde__m128_to_private(c);

#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(__ARM_FEATURE_FMA)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA)
r_.neon_f32 = vfmsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32);
#elif defined(SIMDE_ARM_NEON_A32V7_NATIVE)
r_.neon_f32 = vmlsq_f32(c_.neon_f32, a_.neon_f32, b_.neon_f32);
Expand Down

0 comments on commit 7198d6d

Please sign in to comment.