From e6060b0221649ba5c0a75a0009a4b91d807130a4 Mon Sep 17 00:00:00 2001 From: yyctw Date: Wed, 11 Oct 2023 13:08:44 +0800 Subject: [PATCH] [Fix] Add conditions for fp16 intrinsics --- simde/arm/neon.h | 4 + simde/arm/neon/create.h | 2 +- simde/arm/neon/cvt.h | 66 +++------ simde/arm/neon/cvt_n.h | 163 ++++++++++----------- simde/arm/neon/cvtn.h | 90 +++++++----- simde/arm/neon/dup_lane.h | 6 +- simde/arm/neon/ext.h | 2 +- simde/arm/neon/fma.h | 4 +- simde/arm/neon/fma_n.h | 4 +- simde/arm/neon/fms.h | 6 +- simde/arm/neon/fms_n.h | 4 +- simde/arm/neon/ld1_lane.h | 4 +- simde/arm/neon/ld1_x2.h | 5 +- simde/arm/neon/ld1_x3.h | 2 +- simde/arm/neon/ld1_x4.h | 2 +- simde/arm/neon/ld1q_x2.h | 2 +- simde/arm/neon/ld1q_x3.h | 2 +- simde/arm/neon/ld1q_x4.h | 2 +- simde/arm/neon/ld2.h | 8 +- simde/arm/neon/ld2_dup.h | 40 ++--- simde/arm/neon/ld2_lane.h | 4 +- simde/arm/neon/ld3.h | 2 +- simde/arm/neon/ld3_dup.h | 42 +++--- simde/arm/neon/ld3_lane.h | 4 +- simde/arm/neon/ld4.h | 2 +- simde/arm/neon/ld4_dup.h | 42 +++--- simde/arm/neon/ld4_lane.h | 4 +- simde/arm/neon/mlal_high_lane.h | 16 +- simde/arm/neon/mlsl_high_lane.h | 16 +- simde/arm/neon/mul_lane.h | 17 ++- simde/arm/neon/neg.h | 26 ++-- simde/arm/neon/qdmlal.h | 21 ++- simde/arm/neon/qdmlal_high.h | 18 ++- simde/arm/neon/qdmlal_high_lane.h | 29 +++- simde/arm/neon/qdmlal_high_n.h | 19 ++- simde/arm/neon/qdmlal_lane.h | 9 +- simde/arm/neon/qdmlsl.h | 21 ++- simde/arm/neon/qdmlsl_high.h | 15 +- simde/arm/neon/qdmlsl_high_lane.h | 29 +++- simde/arm/neon/qdmlsl_high_n.h | 19 ++- simde/arm/neon/qdmlsl_lane.h | 9 +- simde/arm/neon/reinterpret.h | 2 +- simde/arm/neon/types.h | 55 +++---- test/arm/neon/cvt.c | 70 +++++++++ test/arm/neon/cvt_n.c | 220 +++++++++++++++++----------- test/arm/neon/cvtn.c | 236 +++++++++++++++--------------- test/arm/neon/dup_lane.c | 80 +++++----- test/arm/neon/ext.c | 62 ++++---- test/arm/neon/fma.c | 164 ++++++++++----------- test/arm/neon/fma_lane.c | 12 +- test/arm/neon/fma_n.c | 4 +- test/arm/neon/fms.c | 6 +- test/arm/neon/fms_lane.c | 12 +- test/arm/neon/fms_n.c | 4 +- test/arm/neon/ld1_dup.c | 2 +- test/arm/neon/ld1_lane.c | 4 +- test/arm/neon/ld1_x2.c | 2 +- test/arm/neon/ld1_x3.c | 2 +- test/arm/neon/ld1_x4.c | 2 +- test/arm/neon/ld1q_x2.c | 2 +- test/arm/neon/ld1q_x3.c | 2 +- test/arm/neon/ld1q_x4.c | 2 +- test/arm/neon/ld2.c | 2 +- test/arm/neon/ld2_dup.c | 4 +- test/arm/neon/ld2_lane.c | 4 +- test/arm/neon/ld3.c | 2 +- test/arm/neon/ld3_dup.c | 4 +- test/arm/neon/ld3_lane.c | 4 +- test/arm/neon/ld4.c | 2 +- test/arm/neon/ld4_dup.c | 4 +- test/arm/neon/ld4_lane.c | 4 +- test/arm/neon/mla_lane.c | 96 ++++++++++-- test/arm/neon/mul_lane.c | 14 +- test/arm/neon/neg.c | 6 +- test/arm/neon/qdmull_high_lane.c | 32 ++++ test/arm/neon/test-neon.h | 129 ++++++++-------- 76 files changed, 1170 insertions(+), 864 deletions(-) diff --git a/simde/arm/neon.h b/simde/arm/neon.h index 8aac128d1..82b027531 100644 --- a/simde/arm/neon.h +++ b/simde/arm/neon.h @@ -31,6 +31,8 @@ #include "neon/types.h" #include "neon/aba.h" +#include "neon/abal.h" +#include "neon/abal_high.h" #include "neon/abd.h" #include "neon/abdl.h" #include "neon/abs.h" @@ -48,6 +50,8 @@ #include "neon/bsl.h" #include "neon/cage.h" #include "neon/cagt.h" +#include "neon/cale.h" +#include "neon/calt.h" #include "neon/ceq.h" #include "neon/ceqz.h" #include "neon/cge.h" diff --git a/simde/arm/neon/create.h b/simde/arm/neon/create.h index 2ff5a172f..adca6779e 100644 --- a/simde/arm/neon/create.h +++ b/simde/arm/neon/create.h @@ -157,7 +157,7 @@ simde_vcreate_u64(uint64_t a) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vcreate_f16(uint64_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcreate_f16(a); #else return simde_vreinterpret_f16_u64(simde_vdup_n_u64(a)); diff --git a/simde/arm/neon/cvt.h b/simde/arm/neon/cvt.h index 76412002d..9dec4a1ed 100644 --- a/simde/arm/neon/cvt.h +++ b/simde/arm/neon/cvt.h @@ -141,9 +141,9 @@ simde_vcvt_f64_f32(simde_float32x2_t a) { SIMDE_FUNCTION_ATTRIBUTES int16_t -simde_x_vcvts_s16_f16(simde_float16 a) { - #if defined(SIMDE_FAST_CONVERSION_RANGE) && defined(SIMDE_ARM_NEON_FP16) - return HEDLEY_STATIC_CAST(int16_t, a); +simde_vcvth_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_s16_f16(a); #else simde_float32 af = simde_float16_to_float32(a); if (HEDLEY_UNLIKELY(af < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { @@ -157,12 +157,16 @@ simde_x_vcvts_s16_f16(simde_float16 a) { } #endif } +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_s16_f16 + #define vcvth_s16_f16(a) simde_vcvth_s16_f16(a) +#endif SIMDE_FUNCTION_ATTRIBUTES uint16_t -simde_x_vcvts_u16_f16(simde_float16 a) { - #if defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, simde_float16_to_float32(a)); +simde_vcvth_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_u16_f16(a); #else simde_float32 af = simde_float16_to_float32(a); if (HEDLEY_UNLIKELY(af < SIMDE_FLOAT32_C(0.0))) { @@ -176,6 +180,10 @@ simde_x_vcvts_u16_f16(simde_float16 a) { } #endif } +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_u16_f16 + #define vcvth_u16_f16(a) simde_vcvth_u16_f16(a) +#endif SIMDE_FUNCTION_ATTRIBUTES int32_t @@ -266,7 +274,7 @@ simde_vcvtd_s64_f64(simde_float64 a) { return INT64_MIN; } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { return INT64_MAX; - } else if (simde_math_isnanf(a)) { + } else if (simde_math_isnan(a)) { return 0; } else { return HEDLEY_STATIC_CAST(int64_t, a); @@ -330,38 +338,6 @@ simde_vcvtd_f64_u64(uint64_t a) { #define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a) #endif -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_s16(int16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_s16(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_s16 - #define vcvth_f16_s16(a) simde_vcvth_f16_s16(a) -#endif - -SIMDE_FUNCTION_ATTRIBUTES -simde_float16_t -simde_vcvth_f16_u16(uint16_t a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - return vcvth_f16_u16(a); - #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - return HEDLEY_STATIC_CAST(simde_float16_t, a); - #else - return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); - #endif -} -#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) - #undef vcvth_f16_u16 - #define vcvth_f16_u16(a) simde_vcvth_f16_u16(a) -#endif - SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vcvt_s16_f16(simde_float16x4_t a) { @@ -376,7 +352,7 @@ simde_vcvt_s16_f16(simde_float16x4_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); } #endif @@ -428,7 +404,7 @@ simde_vcvt_u16_f16(simde_float16x4_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); } #endif @@ -533,7 +509,7 @@ simde_vcvtq_s16_f16(simde_float16x8_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_s16_f16(a_.values[i]); } #endif @@ -637,7 +613,7 @@ simde_vcvtq_u16_f16(simde_float16x8_t a) { #else SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i]); + r_.values[i] = simde_vcvth_u16_f16(a_.values[i]); } #endif @@ -1214,7 +1190,7 @@ simde_vcvtas_s32_f32(simde_float32 a) { } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { return 0; } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_round(a)); + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a)); } #endif } @@ -1237,7 +1213,7 @@ simde_vcvtas_u32_f32(simde_float32 a) { return 0; } else { if(a < 0) return 0; - return HEDLEY_STATIC_CAST(int32_t, simde_math_round(a)); + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a)); } #endif } diff --git a/simde/arm/neon/cvt_n.h b/simde/arm/neon/cvt_n.h index 7a224b68e..eabf6d1b1 100644 --- a/simde/arm/neon/cvt_n.h +++ b/simde/arm/neon/cvt_n.h @@ -43,17 +43,19 @@ simde_vcvt_n_s16_f16(simde_float16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); } return simde_int16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_s16_f16(a, n) vcvt_n_s16_f16(a, n) + #define simde_vcvt_n_s16_f16(a, n) vcvt_n_s16_f16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_s16_f16 - #define vcvt_n_s16_f16(a, n) simde_vcvt_n_s16_f16(a, n) + #define vcvt_n_s16_f16(a, n) simde_vcvt_n_s16_f16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -65,17 +67,17 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); } return simde_int32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_s32_f32(a, n) vcvt_n_s32_f32(a, n) + #define simde_vcvt_n_s32_f32(a, n) vcvt_n_s32_f32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_n_s32_f32 - #define vcvt_n_s32_f32(a, n) simde_vcvt_n_s32_f32(a, n) + #define vcvt_n_s32_f32(a, n) simde_vcvt_n_s32_f32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -85,7 +87,6 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n) simde_float64x1_private a_ = simde_float64x1_to_private(a); simde_int64x1_private r_; - SIMDE_CONVERT_VECTOR_(r_.values, a_.values); SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvtd_s64_f64(a_.values[i] * pow(2, n)); @@ -94,11 +95,11 @@ simde_vcvt_n_s64_f64(simde_float64x1_t a, const int n) return simde_int64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_s64_f64(a, n) vcvt_n_s64_f64(a, n) + #define simde_vcvt_n_s64_f64(a, n) vcvt_n_s64_f64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_s64_f64 - #define vcvt_n_s64_f64(a, n) simde_vcvt_n_s64_f64(a, n) + #define vcvt_n_s64_f64(a, n) simde_vcvt_n_s64_f64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -110,17 +111,19 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); } return simde_uint16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_u16_f16(a, n) vcvt_n_u16_f16(a, n) + #define simde_vcvt_n_u16_f16(a, n) vcvt_n_u16_f16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_u16_f16 - #define vcvt_n_u16_f16(a, n) simde_vcvt_n_u16_f16(a, n) + #define vcvt_n_u16_f16(a, n) simde_vcvt_n_u16_f16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -132,17 +135,17 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); } return simde_uint32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_u32_f32(a, n) vcvt_n_u32_f32(a, n) + #define simde_vcvt_n_u32_f32(a, n) vcvt_n_u32_f32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_n_u32_f32 - #define vcvt_n_u32_f32(a, n) simde_vcvt_n_u32_f32(a, n) + #define vcvt_n_u32_f32(a, n) simde_vcvt_n_u32_f32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -160,11 +163,11 @@ simde_vcvt_n_u64_f64(simde_float64x1_t a, const int n) return simde_uint64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvt_n_u64_f64(a, n) vcvt_n_u64_f64(a, n) + #define simde_vcvt_n_u64_f64(a, n) vcvt_n_u64_f64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_u64_f64 - #define vcvt_n_u64_f64(a, n) simde_vcvt_n_u64_f64(a, n) + #define vcvt_n_u64_f64(a, n) simde_vcvt_n_u64_f64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -176,17 +179,19 @@ simde_vcvtq_n_s16_f16(simde_float16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_s16_f16(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); } return simde_int16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_s16_f16(a, n) vcvtq_n_s16_f16(a, n) + #define simde_vcvtq_n_s16_f16(a, n) vcvtq_n_s16_f16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_s16_f16 - #define vcvtq_n_s16_f16(a, n) simde_vcvtq_n_s16_f16(a, n) + #define vcvtq_n_s16_f16(a, n) simde_vcvtq_n_s16_f16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -198,17 +203,17 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); } return simde_int32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_s32_f32(a, n) vcvtq_n_s32_f32(a, n) + #define simde_vcvtq_n_s32_f32(a, n) vcvtq_n_s32_f32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_s32_f32 - #define vcvtq_n_s32_f32(a, n) simde_vcvtq_n_s32_f32(a, n) + #define vcvtq_n_s32_f32(a, n) simde_vcvtq_n_s32_f32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -226,11 +231,11 @@ simde_vcvtq_n_s64_f64(simde_float64x2_t a, const int n) return simde_int64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define vcvtq_n_s64_f64(a, n) vcvtq_n_s64_f64(a, n) + #define simde_vcvtq_n_s64_f64(a, n) vcvtq_n_s64_f64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_s64_f64 - #define vcvtq_n_s64_f64(a, n) simde_vcvtq_n_s64_f64(a, n) + #define vcvtq_n_s64_f64(a, n) simde_vcvtq_n_s64_f64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -242,17 +247,19 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_x_vcvts_u16_f16(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( + simde_float16_to_float32(a_.values[i]) * + HEDLEY_STATIC_CAST(float, pow(2, n)))); } return simde_uint16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) -#define simde_vcvtq_n_u16_f16(a, n) vcvtq_n_u16_f16(a, n) +#define simde_vcvtq_n_u16_f16(a, n) vcvtq_n_u16_f16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_u16_f16 - #define vcvtq_n_u16_f16(a, n) simde_vcvtq_n_u16_f16(a, n) + #define vcvtq_n_u16_f16(a, n) simde_vcvtq_n_u16_f16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -264,17 +271,17 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * pow(2, n)); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); } return simde_uint32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvtq_n_u32_f32(a, n) vcvtq_n_u32_f32(a, n) + #define simde_vcvtq_n_u32_f32(a, n) vcvtq_n_u32_f32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_u32_f32 - #define vcvtq_n_u32_f32(a, n) simde_vcvtq_n_u32_f32(a, n) + #define vcvtq_n_u32_f32(a, n) simde_vcvtq_n_u32_f32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -292,11 +299,11 @@ simde_vcvtq_n_u64_f64(simde_float64x2_t a, const int n) return simde_uint64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) - #define simde_vcvtq_n_u64_f64(a, n) vcvtq_n_u64_f64(a, n) + #define simde_vcvtq_n_u64_f64(a, n) vcvtq_n_u64_f64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_u64_f64 - #define vcvtq_n_u64_f64(a, n) simde_vcvtq_n_u64_f64(a, n) + #define vcvtq_n_u64_f64(a, n) simde_vcvtq_n_u64_f64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -308,21 +315,17 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, (a_.values[i] / pow(2, n))); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); - #endif + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); } return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_f16_u16(a, n) vcvt_n_f16_u16(a, n) + #define simde_vcvt_n_f16_u16(a, n) vcvt_n_f16_u16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f16_u16 - #define vcvt_n_f16_u16(a, n) simde_vcvt_n_f16_u16(a, n) + #define vcvt_n_f16_u16(a, n) simde_vcvt_n_f16_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -334,21 +337,17 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, (a_.values[i] / pow(2, n))); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); - #endif + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); } return simde_float16x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvt_n_f16_s16(a, n) vcvt_n_f16_s16(a, n) + #define simde_vcvt_n_f16_s16(a, n) vcvt_n_f16_s16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f16_s16 - #define vcvt_n_f16_s16(a, n) simde_vcvt_n_f16_s16(a, n) + #define vcvt_n_f16_s16(a, n) simde_vcvt_n_f16_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -360,21 +359,17 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, (a_.values[i] / pow(2, n))); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); - #endif + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); } return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_f16_u16(a, n) vcvtq_n_f16_u16(a, n) + #define simde_vcvtq_n_f16_u16(a, n) vcvtq_n_f16_u16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f16_u16 - #define vcvtq_n_f16_u16(a, n) simde_vcvtq_n_f16_u16(a, n) + #define vcvtq_n_f16_u16(a, n) simde_vcvtq_n_f16_u16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -386,21 +381,17 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - #if SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI - r_.values[i] = HEDLEY_STATIC_CAST(simde_float16_t, (a_.values[i] / pow(2, n))); - #else - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); - #endif + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, (a_.values[i] / pow(2, n)))); } return simde_float16x8_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) - #define simde_vcvtq_n_f16_s16(a, n) vcvtq_n_f16_s16(a, n) + #define simde_vcvtq_n_f16_s16(a, n) vcvtq_n_f16_s16((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f16_s16 - #define vcvtq_n_f16_s16(a, n) simde_vcvtq_n_f16_s16(a, n) + #define vcvtq_n_f16_s16(a, n) simde_vcvtq_n_f16_s16((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -412,17 +403,17 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_f32_u32(a, n) vcvt_n_f32_u32(a, n) + #define simde_vcvt_n_f32_u32(a, n) vcvt_n_f32_u32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f32_u32 - #define vcvt_n_f32_u32(a, n) simde_vcvt_n_f32_u32(a, n) + #define vcvt_n_f32_u32(a, n) simde_vcvt_n_f32_u32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -434,17 +425,17 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float32x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvt_n_f32_s32(a, n) vcvt_n_f32_s32(a, n) + #define simde_vcvt_n_f32_s32(a, n) vcvt_n_f32_s32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f32_s32 - #define vcvt_n_f32_s32(a, n) simde_vcvt_n_f32_s32(a, n) + #define vcvt_n_f32_s32(a, n) simde_vcvt_n_f32_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -456,17 +447,17 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_f64_u64(a, n) vcvt_n_f64_u64(a, n) + #define simde_vcvt_n_f64_u64(a, n) vcvt_n_f64_u64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f64_u64 - #define vcvt_n_f64_u64(a, n) simde_vcvt_n_f64_u64(a, n) + #define vcvt_n_f64_u64(a, n) simde_vcvt_n_f64_u64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -478,17 +469,17 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtq_n_f64_u64(a, n) vcvtq_n_f64_u64(a, n) + #define simde_vcvtq_n_f64_u64(a, n) vcvtq_n_f64_u64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f64_u64 - #define vcvtq_n_f64_u64(a, n) simde_vcvtq_n_f64_u64(a, n) + #define vcvtq_n_f64_u64(a, n) simde_vcvtq_n_f64_u64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -500,17 +491,17 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float64x1_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvt_n_f64_s64(a, n) vcvt_n_f64_s64(a, n) + #define simde_vcvt_n_f64_s64(a, n) vcvt_n_f64_s64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvt_n_f64_s64 - #define vcvt_n_f64_s64(a, n) simde_vcvt_n_f64_s64(a, n) + #define vcvt_n_f64_s64(a, n) simde_vcvt_n_f64_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -522,17 +513,17 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float64x2_from_private(r_); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) - #define simde_vcvtq_n_f64_s64(a, n) vcvtq_n_f64_s64(a, n) + #define simde_vcvtq_n_f64_s64(a, n) vcvtq_n_f64_s64((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f64_s64 - #define vcvtq_n_f64_s64(a, n) simde_vcvtq_n_f64_s64(a, n) + #define vcvtq_n_f64_s64(a, n) simde_vcvtq_n_f64_s64((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -544,17 +535,17 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_f32_s32(a, n) vcvtq_n_f32_s32(a, n) + #define simde_vcvtq_n_f32_s32(a, n) vcvtq_n_f32_s32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f32_s32 - #define vcvtq_n_f32_s32(a, n) simde_vcvtq_n_f32_s32(a, n) + #define vcvtq_n_f32_s32(a, n) simde_vcvtq_n_f32_s32((a), (n)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -566,21 +557,21 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i] / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); } return simde_float32x4_from_private(r_); } #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) - #define simde_vcvtq_n_f32_u32(a, n) vcvtq_n_f32_u32(a, n) + #define simde_vcvtq_n_f32_u32(a, n) vcvtq_n_f32_u32((a), (n)) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vcvtq_n_f32_u32 - #define vcvtq_n_f32_u32(a, n) simde_vcvtq_n_f32_u32(a, n) + #define vcvtq_n_f32_u32(a, n) simde_vcvtq_n_f32_u32((a), (n)) #endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* SIMDE_ARM_NEON_CVTN_H */ +#endif /* SIMDE_ARM_NEON_CVT_N_H */ diff --git a/simde/arm/neon/cvtn.h b/simde/arm/neon/cvtn.h index 0d15f2501..651dee274 100644 --- a/simde/arm/neon/cvtn.h +++ b/simde/arm/neon/cvtn.h @@ -30,6 +30,8 @@ #include "types.h" #include "cvt.h" +#include "calt.h" +#include "cagt.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -107,16 +109,17 @@ simde_vcvtnh_s64_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_s64_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float16, INT64_MIN))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { return INT64_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float16, INT64_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { return INT64_MAX; - } else if (simde_math_isnan(a)) { + } else if (simde_math_isnanf(a_)) { return 0; } else { - return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundevenf(a_)); } #endif } @@ -131,16 +134,17 @@ simde_vcvtnh_s32_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_s32_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float16, INT32_MIN))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float16, INT32_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { return INT32_MAX; - } else if (simde_math_isnan(a)) { + } else if (simde_math_isnanf(a_)) { return 0; } else { - return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundevenf(a_)); } #endif } @@ -155,14 +159,17 @@ simde_vcvtnh_s16_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_s16_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float16, INT16_MIN))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { return INT16_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float16, INT16_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { return INT16_MAX; + } else if (simde_math_isnanf(a_)) { + return 0; } else { - return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(int16_t, simde_math_roundevenf(a_)); } #endif } @@ -174,19 +181,20 @@ simde_vcvtnh_s16_f16(simde_float16 a) { SIMDE_FUNCTION_ATTRIBUTES uint64_t simde_vcvtnh_u64_f16(simde_float16 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_u64_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT16_C(0.0))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float16, UINT64_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { return UINT64_MAX; - } else if (simde_math_isnan(a)) { + } else if (simde_math_isnanf(a_)) { return 0; } else { - return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundevenf(a_)); } #endif } @@ -198,19 +206,20 @@ simde_vcvtnh_u64_f16(simde_float16 a) { SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcvtnh_u32_f16(simde_float16 a) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_u32_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT16_C(0.0))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float16, UINT32_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { return UINT32_MAX; - } else if (simde_math_isnan(a)) { + } else if (simde_math_isnanf(a_)) { return 0; } else { - return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundevenf(a_)); } #endif } @@ -222,19 +231,20 @@ simde_vcvtnh_u32_f16(simde_float16 a) { SIMDE_FUNCTION_ATTRIBUTES uint16_t simde_vcvtnh_u16_f16(simde_float16 a) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvtnh_u16_f16(a); #elif defined(SIMDE_FAST_CONVERSION_RANGE) - return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(simde_float16_to_float32(a))); #else - if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT16_C(0.0))) { + simde_float32 a_ = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(a_ < HEDLEY_STATIC_CAST(simde_float32, 0))) { return 0; - } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float16, UINT16_MAX))) { + } else if (HEDLEY_UNLIKELY(a_ > HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { return UINT16_MAX; - } else if (simde_math_isnan(a)) { + } else if (simde_math_isnanf(a_)) { return 0; } else { - return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(a)); + return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundevenf(a_)); } #endif } @@ -426,7 +436,7 @@ simde_vcvtnq_s16_f16(simde_float16x8_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtnq_s16_f16 - #define vcvtnq_s16_f16(a, n) simde_vcvtnq_s16_f16(a, n) + #define vcvtnq_s16_f16(a) simde_vcvtnq_s16_f16(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -448,7 +458,7 @@ simde_vcvtn_s16_f16(simde_float16x4_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_s16_f16 - #define vcvtn_s16_f16(a, n) simde_vcvtn_s16_f16(a, n) + #define vcvtn_s16_f16(a) simde_vcvtn_s16_f16(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -470,7 +480,7 @@ simde_vcvtnq_u16_f16(simde_float16x8_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtnq_u16_f16 - #define vcvtnq_u16_f16(a, n) simde_vcvtnq_u16_f16(a, n) + #define vcvtnq_u16_f16(a) simde_vcvtnq_u16_f16(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -492,7 +502,7 @@ simde_vcvtn_u16_f16(simde_float16x4_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_u16_f16 - #define vcvtn_u16_f16(a, n) simde_vcvtn_u16_f16(a, n) + #define vcvtn_u16_f16(a) simde_vcvtn_u16_f16(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -514,7 +524,7 @@ simde_vcvtn_u32_f32(simde_float32x2_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_u32_f32 - #define vcvtn_u32_f32(a, n) simde_vcvtn_u32_f32(a, n) + #define vcvtn_u32_f32(a) simde_vcvtn_u32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -536,7 +546,7 @@ simde_vcvtn_s32_f32(simde_float32x2_t a) { } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_s32_f32 - #define vcvtn_s32_f32(a, n) simde_vcvtn_s32_f32(a, n) + #define vcvtn_s32_f32(a) simde_vcvtn_s32_f32(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -558,7 +568,7 @@ simde_vcvtn_s64_f64(simde_float64x1_t a) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_s64_f64 - #define vcvtn_s64_f64(a, n) simde_vcvtn_s64_f64(a, n) + #define vcvtn_s64_f64(a) simde_vcvtn_s64_f64(a) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -580,7 +590,7 @@ simde_vcvtn_u64_f64(simde_float64x1_t a) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vcvtn_u64_f64 - #define vcvtn_u64_f64(a, n) simde_vcvtn_u64_f64(a, n) + #define vcvtn_u64_f64(a) simde_vcvtn_u64_f64(a) #endif SIMDE_END_DECLS_ diff --git a/simde/arm/neon/dup_lane.h b/simde/arm/neon/dup_lane.h index 90f40065c..61a88503c 100644 --- a/simde/arm/neon/dup_lane.h +++ b/simde/arm/neon/dup_lane.h @@ -153,7 +153,7 @@ simde_vduph_lane_f16(simde_float16x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { return simde_float16x4_to_private(vec).values[lane]; } -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vduph_lane_f16(vec, lane) vduph_lane_f16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -162,7 +162,7 @@ simde_vduph_lane_f16(simde_float16x4_t vec, const int lane) #endif // simde_vdup_lane_f16 -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vdup_lane_f16(vec, lane) vdup_lane_f16(vec, lane) #else #define simde_vdup_lane_f16(vec, lane) simde_vdup_n_f16(simde_vduph_lane_f16(vec, lane)) @@ -179,7 +179,7 @@ simde_vdupq_lane_f16(simde_float16x4_t vec, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { return simde_vdupq_n_f16(simde_float16x4_to_private(vec).values[lane]); } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vdupq_lane_f16(vec, lane) vdupq_lane_f16(vec, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/ext.h b/simde/arm/neon/ext.h index 6392682e1..3b1c3c9ab 100644 --- a/simde/arm/neon/ext.h +++ b/simde/arm/neon/ext.h @@ -38,7 +38,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vext_f16(simde_float16x4_t a, simde_float16x4_t b, const int n) SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 3) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) simde_float16x4_t r; SIMDE_CONSTIFY_4_(vext_f16, r, (HEDLEY_UNREACHABLE(), a), n, a, b); return r; diff --git a/simde/arm/neon/fma.h b/simde/arm/neon/fma.h index 10f453805..56868a681 100644 --- a/simde/arm/neon/fma.h +++ b/simde/arm/neon/fma.h @@ -38,7 +38,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16_t simde_vfmah_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16) return vfmah_f16(a, b, c); #else return simde_vaddh_f16(a, simde_vmulh_f16(b, c)); @@ -80,7 +80,7 @@ simde_vfma_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vfma_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16) return vfma_f16(a, b, c); #else return simde_vadd_f16(a, simde_vmul_f16(b, c)); diff --git a/simde/arm/neon/fma_n.h b/simde/arm/neon/fma_n.h index f79d6304a..b92b0590b 100644 --- a/simde/arm/neon/fma_n.h +++ b/simde/arm/neon/fma_n.h @@ -39,7 +39,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vfma_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) return vfma_n_f16(a, b, c); #else return simde_vfma_f16(a, b, simde_vdup_n_f16(c)); @@ -53,7 +53,7 @@ simde_vfma_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vfmaq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) return vfmaq_n_f16(a, b, c); #else return simde_vfmaq_f16(a, b, simde_vdupq_n_f16(c)); diff --git a/simde/arm/neon/fms.h b/simde/arm/neon/fms.h index e03942339..bea67ef11 100644 --- a/simde/arm/neon/fms.h +++ b/simde/arm/neon/fms.h @@ -38,7 +38,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16_t simde_vfmsh_f16(simde_float16_t a, simde_float16_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16) return vfmsh_f16(a, b, c); #else return simde_vaddh_f16(a, simde_vnegh_f16(simde_vmulh_f16(b, c))); @@ -80,7 +80,7 @@ simde_vfms_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64x1_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vfms_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16) return vfms_f16(a, b, c); #else return simde_vadd_f16(a, simde_vneg_f16(simde_vmul_f16(b, c))); @@ -94,7 +94,7 @@ simde_vfms_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16x4_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vfmsq_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16x8_t c) { - #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && defined(SIMDE_ARM_NEON_FP16) return vfmsq_f16(a, b, c); #else return simde_vaddq_f16(a, simde_vnegq_f16(simde_vmulq_f16(b, c))); diff --git a/simde/arm/neon/fms_n.h b/simde/arm/neon/fms_n.h index 8a1cfc108..5d4ee27b1 100644 --- a/simde/arm/neon/fms_n.h +++ b/simde/arm/neon/fms_n.h @@ -38,7 +38,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4_t simde_vfms_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) return vfms_n_f16(a, b, c); #else return simde_vfms_f16(a, b, simde_vdup_n_f16(c)); @@ -52,7 +52,7 @@ simde_vfms_n_f16(simde_float16x4_t a, simde_float16x4_t b, simde_float16_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8_t simde_vfmsq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { - #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && (defined(__ARM_FEATURE_FMA) && __ARM_FEATURE_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) && defined(SIMDE_ARM_NEON_FP16) return vfmsq_n_f16(a, b, c); #else return simde_vfmsq_f16(a, b, simde_vdupq_n_f16(c)); diff --git a/simde/arm/neon/ld1_lane.h b/simde/arm/neon/ld1_lane.h index 5e2117ff1..8332703aa 100644 --- a/simde/arm/neon/ld1_lane.h +++ b/simde/arm/neon/ld1_lane.h @@ -170,7 +170,7 @@ simde_float16x4_t simde_vld1_lane_f16(simde_float16_t const *ptr, simde_float16x r.values[lane] = *ptr; return simde_float16x4_from_private(r); } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld1_lane_f16(ptr, src, lane) vld1_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -346,7 +346,7 @@ simde_float16x8_t simde_vld1q_lane_f16(simde_float16_t const *ptr, simde_float16 r.values[lane] = *ptr; return simde_float16x8_from_private(r); } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld1q_lane_f16(ptr, src, lane) vld1q_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/ld1_x2.h b/simde/arm/neon/ld1_x2.h index 8b7fdf034..10c858e74 100644 --- a/simde/arm/neon/ld1_x2.h +++ b/simde/arm/neon/ld1_x2.h @@ -45,10 +45,9 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x2_t simde_vld1_f16_x2(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(8)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ - defined(SIMDE_ARM_NEON_FP16) + (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) return vld1_f16_x2(ptr); #else simde_float16x4_private a_[2]; diff --git a/simde/arm/neon/ld1_x3.h b/simde/arm/neon/ld1_x3.h index 25381e176..52d864d6e 100644 --- a/simde/arm/neon/ld1_x3.h +++ b/simde/arm/neon/ld1_x3.h @@ -44,7 +44,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x3_t simde_vld1_f16_x3(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(12)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) return vld1_f16_x3(ptr); diff --git a/simde/arm/neon/ld1_x4.h b/simde/arm/neon/ld1_x4.h index 2ecfd8a89..3b7edb6bb 100644 --- a/simde/arm/neon/ld1_x4.h +++ b/simde/arm/neon/ld1_x4.h @@ -45,7 +45,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x4_t simde_vld1_f16_x4(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) return vld1_f16_x4(ptr); diff --git a/simde/arm/neon/ld1q_x2.h b/simde/arm/neon/ld1q_x2.h index 9c5f6009c..9f16aec54 100644 --- a/simde/arm/neon/ld1q_x2.h +++ b/simde/arm/neon/ld1q_x2.h @@ -45,7 +45,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x2_t simde_vld1q_f16_x2(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ defined(SIMDE_ARM_NEON_FP16) diff --git a/simde/arm/neon/ld1q_x3.h b/simde/arm/neon/ld1q_x3.h index be36c1cb3..01242d5ff 100644 --- a/simde/arm/neon/ld1q_x3.h +++ b/simde/arm/neon/ld1q_x3.h @@ -44,7 +44,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x3_t simde_vld1q_f16_x3(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(24)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) return vld1q_f16_x3(ptr); diff --git a/simde/arm/neon/ld1q_x4.h b/simde/arm/neon/ld1q_x4.h index 80aecd935..cd97d1479 100644 --- a/simde/arm/neon/ld1q_x4.h +++ b/simde/arm/neon/ld1q_x4.h @@ -45,7 +45,7 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x4_t simde_vld1q_f16_x4(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(32)]) { #if \ - defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \ + defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && \ (!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) && \ (!defined(__clang__) || (SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) return vld1q_f16_x4(ptr); diff --git a/simde/arm/neon/ld2.h b/simde/arm/neon/ld2.h index 8dd2d6223..a304004f4 100644 --- a/simde/arm/neon/ld2.h +++ b/simde/arm/neon/ld2.h @@ -345,14 +345,8 @@ simde_vld2_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x2_t simde_vld2_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(8)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld2_f16(ptr); - #elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && defined(SIMDE_SHUFFLE_VECTOR_) - simde_float16x8_private a_ = simde_float16x8_to_private(simde_vld1q_f16(ptr)); - a_.values = SIMDE_SHUFFLE_VECTOR_(16, 16, a_.values, a_.values, 0, 2, 4, 6, 1, 3, 5, 7); - simde_float16x4x2_t r; - simde_memcpy(&r, &a_, sizeof(r)); - return r; #else simde_float16x4_private r_[2]; diff --git a/simde/arm/neon/ld2_dup.h b/simde/arm/neon/ld2_dup.h index 16cc05e72..8d30ce080 100644 --- a/simde/arm/neon/ld2_dup.h +++ b/simde/arm/neon/ld2_dup.h @@ -246,7 +246,7 @@ simde_vld2_dup_u64(uint64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x2_t simde_vld2q_dup_f16(simde_float16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld2q_dup_f16(ptr); #else simde_float16x8x2_t r; @@ -257,7 +257,7 @@ simde_vld2q_dup_f16(simde_float16 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_f16 #define vld2q_dup_f16(a) simde_vld2q_dup_f16((a)) #endif @@ -265,7 +265,7 @@ simde_vld2q_dup_f16(simde_float16 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vld2q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_f32(ptr); #else simde_float32x4x2_t r; @@ -276,7 +276,7 @@ simde_vld2q_dup_f32(simde_float32 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_f32 #define vld2q_dup_f32(a) simde_vld2q_dup_f32((a)) #endif @@ -303,7 +303,7 @@ simde_vld2q_dup_f64(simde_float64 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x2_t simde_vld2q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_s8(ptr); #else simde_int8x16x2_t r; @@ -314,7 +314,7 @@ simde_vld2q_dup_s8(int8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_s8 #define vld2q_dup_s8(a) simde_vld2q_dup_s8((a)) #endif @@ -322,7 +322,7 @@ simde_vld2q_dup_s8(int8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x2_t simde_vld2q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_s16(ptr); #else simde_int16x8x2_t r; @@ -333,7 +333,7 @@ simde_vld2q_dup_s16(int16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_s16 #define vld2q_dup_s16(a) simde_vld2q_dup_s16((a)) #endif @@ -341,7 +341,7 @@ simde_vld2q_dup_s16(int16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x2_t simde_vld2q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_s32(ptr); #else simde_int32x4x2_t r; @@ -352,7 +352,7 @@ simde_vld2q_dup_s32(int32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_s32 #define vld2q_dup_s32(a) simde_vld2q_dup_s32((a)) #endif @@ -360,7 +360,7 @@ simde_vld2q_dup_s32(int32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int64x2x2_t simde_vld2q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_s64(ptr); #else simde_int64x2x2_t r; @@ -371,7 +371,7 @@ simde_vld2q_dup_s64(int64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_s64 #define vld2q_dup_s64(a) simde_vld2q_dup_s64((a)) #endif @@ -379,7 +379,7 @@ simde_vld2q_dup_s64(int64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x2_t simde_vld2q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_u8(ptr); #else simde_uint8x16x2_t r; @@ -390,7 +390,7 @@ simde_vld2q_dup_u8(uint8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_u8 #define vld2q_dup_u8(a) simde_vld2q_dup_u8((a)) #endif @@ -398,7 +398,7 @@ simde_vld2q_dup_u8(uint8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x2_t simde_vld2q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_u16(ptr); #else simde_uint16x8x2_t r; @@ -409,7 +409,7 @@ simde_vld2q_dup_u16(uint16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_u16 #define vld2q_dup_u16(a) simde_vld2q_dup_u16((a)) #endif @@ -417,7 +417,7 @@ simde_vld2q_dup_u16(uint16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x2_t simde_vld2q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_u32(ptr); #else simde_uint32x4x2_t r; @@ -428,7 +428,7 @@ simde_vld2q_dup_u32(uint32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_u32 #define vld2q_dup_u32(a) simde_vld2q_dup_u32((a)) #endif @@ -436,7 +436,7 @@ simde_vld2q_dup_u32(uint32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2x2_t simde_vld2q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld2q_dup_u64(ptr); #else simde_uint64x2x2_t r; @@ -447,7 +447,7 @@ simde_vld2q_dup_u64(uint64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld2q_dup_u64 #define vld2q_dup_u64(a) simde_vld2q_dup_u64((a)) #endif diff --git a/simde/arm/neon/ld2_lane.h b/simde/arm/neon/ld2_lane.h index f35a1c212..9c9adb6ef 100644 --- a/simde/arm/neon/ld2_lane.h +++ b/simde/arm/neon/ld2_lane.h @@ -205,7 +205,7 @@ simde_float16x4x2_t simde_vld2_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_P } return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld2_lane_f16(ptr, src, lane) vld2_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -424,7 +424,7 @@ simde_float16x8x2_t simde_vld2q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_ } return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld2q_lane_f16(ptr, src, lane) vld2q_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/ld3.h b/simde/arm/neon/ld3.h index 0bfb827cb..6ab02f321 100644 --- a/simde/arm/neon/ld3.h +++ b/simde/arm/neon/ld3.h @@ -44,7 +44,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x3_t simde_vld3_f16(simde_float16 const *ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld3_f16(ptr); #else simde_float16x4_private r_[3]; diff --git a/simde/arm/neon/ld3_dup.h b/simde/arm/neon/ld3_dup.h index 70e044c7d..49507d69c 100644 --- a/simde/arm/neon/ld3_dup.h +++ b/simde/arm/neon/ld3_dup.h @@ -36,7 +36,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x3_t -simde_vld3_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { +simde_vld3_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(3)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld3_dup_f16(ptr); #else @@ -246,7 +246,7 @@ simde_vld3_dup_u64(uint64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x3_t simde_vld3q_dup_f16(simde_float16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld3q_dup_f16(ptr); #else simde_float16x8x3_t r; @@ -257,7 +257,7 @@ simde_vld3q_dup_f16(simde_float16 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_f16 #define vld3q_dup_f16(a) simde_vld3q_dup_f16((a)) #endif @@ -265,7 +265,7 @@ simde_vld3q_dup_f16(simde_float16 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x3_t simde_vld3q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_f32(ptr); #else simde_float32x4x3_t r; @@ -276,7 +276,7 @@ simde_vld3q_dup_f32(simde_float32 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_f32 #define vld3q_dup_f32(a) simde_vld3q_dup_f32((a)) #endif @@ -303,7 +303,7 @@ simde_vld3q_dup_f64(simde_float64 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x3_t simde_vld3q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_s8(ptr); #else simde_int8x16x3_t r; @@ -314,7 +314,7 @@ simde_vld3q_dup_s8(int8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_s8 #define vld3q_dup_s8(a) simde_vld3q_dup_s8((a)) #endif @@ -322,7 +322,7 @@ simde_vld3q_dup_s8(int8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x3_t simde_vld3q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_s16(ptr); #else simde_int16x8x3_t r; @@ -333,7 +333,7 @@ simde_vld3q_dup_s16(int16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_s16 #define vld3q_dup_s16(a) simde_vld3q_dup_s16((a)) #endif @@ -341,7 +341,7 @@ simde_vld3q_dup_s16(int16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x3_t simde_vld3q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_s32(ptr); #else simde_int32x4x3_t r; @@ -352,7 +352,7 @@ simde_vld3q_dup_s32(int32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_s32 #define vld3q_dup_s32(a) simde_vld3q_dup_s32((a)) #endif @@ -360,7 +360,7 @@ simde_vld3q_dup_s32(int32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int64x2x3_t simde_vld3q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_s64(ptr); #else simde_int64x2x3_t r; @@ -371,7 +371,7 @@ simde_vld3q_dup_s64(int64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_s64 #define vld3q_dup_s64(a) simde_vld3q_dup_s64((a)) #endif @@ -379,7 +379,7 @@ simde_vld3q_dup_s64(int64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x3_t simde_vld3q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_u8(ptr); #else simde_uint8x16x3_t r; @@ -390,7 +390,7 @@ simde_vld3q_dup_u8(uint8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_u8 #define vld3q_dup_u8(a) simde_vld3q_dup_u8((a)) #endif @@ -398,7 +398,7 @@ simde_vld3q_dup_u8(uint8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x3_t simde_vld3q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_u16(ptr); #else simde_uint16x8x3_t r; @@ -409,7 +409,7 @@ simde_vld3q_dup_u16(uint16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_u16 #define vld3q_dup_u16(a) simde_vld3q_dup_u16((a)) #endif @@ -417,7 +417,7 @@ simde_vld3q_dup_u16(uint16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x3_t simde_vld3q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_u32(ptr); #else simde_uint32x4x3_t r; @@ -428,7 +428,7 @@ simde_vld3q_dup_u32(uint32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_u32 #define vld3q_dup_u32(a) simde_vld3q_dup_u32((a)) #endif @@ -436,7 +436,7 @@ simde_vld3q_dup_u32(uint32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2x3_t simde_vld3q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld3q_dup_u64(ptr); #else simde_uint64x2x3_t r; @@ -447,7 +447,7 @@ simde_vld3q_dup_u64(uint64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld3q_dup_u64 #define vld3q_dup_u64(a) simde_vld3q_dup_u64((a)) #endif diff --git a/simde/arm/neon/ld3_lane.h b/simde/arm/neon/ld3_lane.h index c9a19a69e..87f803eb6 100644 --- a/simde/arm/neon/ld3_lane.h +++ b/simde/arm/neon/ld3_lane.h @@ -205,7 +205,7 @@ simde_float16x4x3_t simde_vld3_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_P } return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld3_lane_f16(ptr, src, lane) vld3_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -424,7 +424,7 @@ simde_float16x8x3_t simde_vld3q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_ } return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #define simde_vld3q_lane_f16(ptr, src, lane) vld3q_lane_f16(ptr, src, lane) #endif #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/ld4.h b/simde/arm/neon/ld4.h index 02d7ce3b7..4eb2f3a47 100644 --- a/simde/arm/neon/ld4.h +++ b/simde/arm/neon/ld4.h @@ -43,7 +43,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x4_t simde_vld4_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(16)]) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld4_f16(ptr); #else simde_float16x4_private a_[4]; diff --git a/simde/arm/neon/ld4_dup.h b/simde/arm/neon/ld4_dup.h index de5a01119..5e297a35f 100644 --- a/simde/arm/neon/ld4_dup.h +++ b/simde/arm/neon/ld4_dup.h @@ -36,7 +36,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_float16x4x4_t -simde_vld4_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(2)]) { +simde_vld4_dup_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld4_dup_f16(ptr); #else @@ -246,7 +246,7 @@ simde_vld4_dup_u64(uint64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float16x8x4_t simde_vld4q_dup_f16(simde_float16 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vld4q_dup_f16(ptr); #else simde_float16x8x4_t r; @@ -257,7 +257,7 @@ simde_vld4q_dup_f16(simde_float16 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_f16 #define vld4q_dup_f16(a) simde_vld4q_dup_f16((a)) #endif @@ -265,7 +265,7 @@ simde_vld4q_dup_f16(simde_float16 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x4_t simde_vld4q_dup_f32(simde_float32 const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_f32(ptr); #else simde_float32x4x4_t r; @@ -276,7 +276,7 @@ simde_vld4q_dup_f32(simde_float32 const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_f32 #define vld4q_dup_f32(a) simde_vld4q_dup_f32((a)) #endif @@ -303,7 +303,7 @@ simde_vld4q_dup_f64(simde_float64 const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int8x16x4_t simde_vld4q_dup_s8(int8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_s8(ptr); #else simde_int8x16x4_t r; @@ -314,7 +314,7 @@ simde_vld4q_dup_s8(int8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_s8 #define vld4q_dup_s8(a) simde_vld4q_dup_s8((a)) #endif @@ -322,7 +322,7 @@ simde_vld4q_dup_s8(int8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int16x8x4_t simde_vld4q_dup_s16(int16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_s16(ptr); #else simde_int16x8x4_t r; @@ -333,7 +333,7 @@ simde_vld4q_dup_s16(int16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_s16 #define vld4q_dup_s16(a) simde_vld4q_dup_s16((a)) #endif @@ -341,7 +341,7 @@ simde_vld4q_dup_s16(int16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int32x4x4_t simde_vld4q_dup_s32(int32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_s32(ptr); #else simde_int32x4x4_t r; @@ -352,7 +352,7 @@ simde_vld4q_dup_s32(int32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_s32 #define vld4q_dup_s32(a) simde_vld4q_dup_s32((a)) #endif @@ -360,7 +360,7 @@ simde_vld4q_dup_s32(int32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_int64x2x4_t simde_vld4q_dup_s64(int64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_s64(ptr); #else simde_int64x2x4_t r; @@ -371,7 +371,7 @@ simde_vld4q_dup_s64(int64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_s64 #define vld4q_dup_s64(a) simde_vld4q_dup_s64((a)) #endif @@ -379,7 +379,7 @@ simde_vld4q_dup_s64(int64_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint8x16x4_t simde_vld4q_dup_u8(uint8_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_u8(ptr); #else simde_uint8x16x4_t r; @@ -390,7 +390,7 @@ simde_vld4q_dup_u8(uint8_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_u8 #define vld4q_dup_u8(a) simde_vld4q_dup_u8((a)) #endif @@ -398,7 +398,7 @@ simde_vld4q_dup_u8(uint8_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint16x8x4_t simde_vld4q_dup_u16(uint16_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_u16(ptr); #else simde_uint16x8x4_t r; @@ -409,7 +409,7 @@ simde_vld4q_dup_u16(uint16_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_u16 #define vld4q_dup_u16(a) simde_vld4q_dup_u16((a)) #endif @@ -417,7 +417,7 @@ simde_vld4q_dup_u16(uint16_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4x4_t simde_vld4q_dup_u32(uint32_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_u32(ptr); #else simde_uint32x4x4_t r; @@ -428,7 +428,7 @@ simde_vld4q_dup_u32(uint32_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_u32 #define vld4q_dup_u32(a) simde_vld4q_dup_u32((a)) #endif @@ -436,7 +436,7 @@ simde_vld4q_dup_u32(uint32_t const * ptr) { SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2x4_t simde_vld4q_dup_u64(uint64_t const * ptr) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) return vld4q_dup_u64(ptr); #else simde_uint64x2x4_t r; @@ -447,7 +447,7 @@ simde_vld4q_dup_u64(uint64_t const * ptr) { return r; #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vld4q_dup_u64 #define vld4q_dup_u64(a) simde_vld4q_dup_u64((a)) #endif diff --git a/simde/arm/neon/ld4_lane.h b/simde/arm/neon/ld4_lane.h index 56d5741ff..b3424c7d7 100644 --- a/simde/arm/neon/ld4_lane.h +++ b/simde/arm/neon/ld4_lane.h @@ -277,7 +277,7 @@ simde_vld4_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_floa return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) #define simde_vld4_lane_f16(ptr, src, lane) \ SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4_lane_f16(ptr, src, lane)) @@ -574,7 +574,7 @@ simde_vld4q_lane_f16(simde_float16_t const ptr[HEDLEY_ARRAY_PARAM(4)], simde_flo return r; } -#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(10,0,0) #define simde_vld4q_lane_f16(ptr, src, lane) \ SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vld4q_lane_f16(ptr, src, lane)) diff --git a/simde/arm/neon/mlal_high_lane.h b/simde/arm/neon/mlal_high_lane.h index 3c9688ea2..50018a95d 100644 --- a/simde/arm/neon/mlal_high_lane.h +++ b/simde/arm/neon/mlal_high_lane.h @@ -40,7 +40,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(v[lane])); + return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_lane_s16(a, b, v, lane) vmlal_high_lane_s16(a, b, v, lane) @@ -53,7 +53,7 @@ simde_vmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(v[lane])); + return simde_vmlal_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_laneq_s16(a, b, v, lane) vmlal_high_laneq_s16(a, b, v, lane) @@ -66,7 +66,7 @@ simde_vmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(v[lane])); + return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_lane_s32(a, b, v, lane) vmlal_high_lane_s32(a, b, v, lane) @@ -79,7 +79,7 @@ simde_vmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(v[lane])); + return simde_vmlal_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_laneq_s32(a, b, v, lane) vmlal_high_laneq_s32(a, b, v, lane) @@ -92,7 +92,7 @@ simde_vmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlal_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(v[lane])); + return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_lane_u16(a, b, v, lane) vmlal_high_lane_u16(a, b, v, lane) @@ -105,7 +105,7 @@ simde_vmlal_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4 SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlal_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(v[lane])); + return simde_vmlal_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_laneq_u16(a, b, v, lane) vmlal_high_laneq_u16(a, b, v, lane) @@ -118,7 +118,7 @@ simde_vmlal_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlal_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(v[lane])); + return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_lane_u32(a, b, v, lane) vmlal_high_lane_u32(a, b, v, lane) @@ -131,7 +131,7 @@ simde_vmlal_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2 SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlal_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(v[lane])); + return simde_vmlal_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlal_high_laneq_u32(a, b, v, lane) vmlal_high_laneq_u32(a, b, v, lane) diff --git a/simde/arm/neon/mlsl_high_lane.h b/simde/arm/neon/mlsl_high_lane.h index 15a640ddf..f45b7d989 100644 --- a/simde/arm/neon/mlsl_high_lane.h +++ b/simde/arm/neon/mlsl_high_lane.h @@ -40,7 +40,7 @@ SIMDE_BEGIN_DECLS_ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(v[lane])); + return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_lane_s16(a, b, v, lane) vmlsl_high_lane_s16(a, b, v, lane) @@ -53,7 +53,7 @@ simde_vmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(v[lane])); + return simde_vmlsl_high_s16(a, b, simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_laneq_s16(a, b, v, lane) vmlsl_high_laneq_s16(a, b, v, lane) @@ -66,7 +66,7 @@ simde_vmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(v[lane])); + return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_lane_s32(a, b, v, lane) vmlsl_high_lane_s32(a, b, v, lane) @@ -79,7 +79,7 @@ simde_vmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(v[lane])); + return simde_vmlsl_high_s32(a, b, simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_laneq_s32(a, b, v, lane) vmlsl_high_laneq_s32(a, b, v, lane) @@ -92,7 +92,7 @@ simde_vmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsl_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(v[lane])); + return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_lane_u16(a, b, v, lane) vmlsl_high_lane_u16(a, b, v, lane) @@ -105,7 +105,7 @@ simde_vmlsl_high_lane_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x4 SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vmlsl_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(v[lane])); + return simde_vmlsl_high_u16(a, b, simde_vdupq_n_u16(simde_uint16x8_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_laneq_u16(a, b, v, lane) vmlsl_high_laneq_u16(a, b, v, lane) @@ -118,7 +118,7 @@ simde_vmlsl_high_laneq_u16(simde_uint32x4_t a, simde_uint16x8_t b, simde_uint16x SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlsl_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(v[lane])); + return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x2_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_lane_u32(a, b, v, lane) vmlsl_high_lane_u32(a, b, v, lane) @@ -131,7 +131,7 @@ simde_vmlsl_high_lane_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x2 SIMDE_FUNCTION_ATTRIBUTES simde_uint64x2_t simde_vmlsl_high_laneq_u32(simde_uint64x2_t a, simde_uint32x4_t b, simde_uint32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(v[lane])); + return simde_vmlsl_high_u32(a, b, simde_vdupq_n_u32(simde_uint32x4_to_private(v).values[lane])); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vmlsl_high_laneq_u32(a, b, v, lane) vmlsl_high_laneq_u32(a, b, v, lane) diff --git a/simde/arm/neon/mul_lane.h b/simde/arm/neon/mul_lane.h index a727366ae..1ac2e9420 100644 --- a/simde/arm/neon/mul_lane.h +++ b/simde/arm/neon/mul_lane.h @@ -39,9 +39,9 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16_t simde_vmulh_lane_f16(simde_float16_t a, simde_float16x4_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return a * simde_float16x4_to_private(b).values[lane]; + return simde_vmulh_f16(a, simde_float16x4_to_private(b).values[lane]); } -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) #define simde_vmulh_lane_f16(a, b, lane) \ SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_lane_f16(a, b, lane)) @@ -115,9 +115,9 @@ SIMDE_FUNCTION_ATTRIBUTES simde_float16_t simde_vmulh_laneq_f16(simde_float16_t a, simde_float16x8_t b, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return a * simde_float16x8_to_private(b).values[lane]; + return simde_vmulh_f16(a, simde_float16x8_to_private(b).values[lane]); } -#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) #if defined(__clang__) && !SIMDE_DETECT_CLANG_VERSION_CHECK(11,0,0) #define simde_vmulh_laneq_f16(a, b, lane) \ SIMDE_DISABLE_DIAGNOSTIC_EXPR_(SIMDE_DIAGNOSTIC_DISABLE_VECTOR_CONVERSION_, vmulh_laneq_f16(a, b, lane)) @@ -160,7 +160,7 @@ simde_vmul_lane_f16(simde_float16x4_t a, simde_float16x4_t b, const int lane) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); } return simde_float16x4_from_private(r_); @@ -433,6 +433,9 @@ simde_vmulq_lane_f16(simde_float16x8_t a, simde_float16x4_t b, const int lane) return simde_float16x8_from_private(r_); } +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vmulq_lane_f16(a, b, lane) vmulq_lane_f16((a), (b), (lane)) +#endif #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vmulq_lane_f16 #define vmulq_lane_f16(a, b, lane) simde_vmulq_lane_f16((a), (b), (lane)) @@ -593,7 +596,7 @@ simde_vmulq_laneq_f16(simde_float16x8_t a, simde_float16x8_t b, const int lane) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); } return simde_float16x8_from_private(r_); @@ -761,7 +764,7 @@ simde_vmul_laneq_f16(simde_float16x4_t a, simde_float16x8_t b, const int lane) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = a_.values[i] * b_.values[lane]; + r_.values[i] = simde_vmulh_f16(a_.values[i], b_.values[lane]); } return simde_float16x4_from_private(r_); diff --git a/simde/arm/neon/neg.h b/simde/arm/neon/neg.h index d9f46c3cc..e6b2a8e48 100644 --- a/simde/arm/neon/neg.h +++ b/simde/arm/neon/neg.h @@ -54,7 +54,7 @@ simde_vnegh_f16(simde_float16_t a) { #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vnegh_f16(a); #else - return -a; + return simde_float16_from_float32(-simde_float16_to_float32(a)); #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) @@ -72,14 +72,10 @@ simde_vneg_f16(simde_float16x4_t a) { r_, a_ = simde_float16x4_to_private(a); - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegh_f16(a_.values[i]); + } return simde_float16x4_from_private(r_); #endif @@ -261,14 +257,10 @@ simde_vnegq_f16(simde_float16x8_t a) { r_, a_ = simde_float16x8_to_private(a); - #if defined(SIMDE_VECTOR_SUBSCRIPT_OPS) - r_.values = -a_.values; - #else - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = -(a_.values[i]); - } - #endif + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vnegh_f16(a_.values[i]); + } return simde_float16x8_from_private(r_); #endif diff --git a/simde/arm/neon/qdmlal.h b/simde/arm/neon/qdmlal.h index b57d9c6a2..b23ab6fca 100644 --- a/simde/arm/neon/qdmlal.h +++ b/simde/arm/neon/qdmlal.h @@ -29,6 +29,7 @@ #include "add.h" #include "mul.h" +#include "mul_n.h" #include "movl.h" #include "types.h" @@ -47,7 +48,7 @@ simde_vqdmlalh_s16(int32_t a, int16_t b, int16_t c) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlalh_s16 - #define vqdmlalh_s16(a, b, c) simde_qdvmlalh_s16((a), (b), (c)) + #define vqdmlalh_s16(a, b, c) simde_vqdmlalh_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -61,7 +62,7 @@ simde_vqdmlals_s32(int64_t a, int32_t b, int32_t c) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlals_s32 - #define vqdmlals_s32(a, b, c) simde_qdvmlals_s32((a), (b), (c)) + #define vqdmlals_s32(a, b, c) simde_vqdmlals_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -70,12 +71,12 @@ simde_vqdmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlal_s16(a, b, c); #else - return simde_vaddq_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)) * 2, a); + return simde_vaddq_s32(simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2), a); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmlal_s16 - #define vqdmlal_s16(a, b, c) simde_qdvmlal_s16((a), (b), (c)) + #define vqdmlal_s16(a, b, c) simde_vqdmlal_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -84,7 +85,17 @@ simde_vqdmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlal_s32(a, b, c); #else - return simde_vaddq_s64(simde_x_vmulq_s64(simde_vmovl_s32(b), simde_vmovl_s32(c)) * 2, a); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_s32(b), + simde_vmovl_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlal_high.h b/simde/arm/neon/qdmlal_high.h index 994b0f6ae..016deb011 100644 --- a/simde/arm/neon/qdmlal_high.h +++ b/simde/arm/neon/qdmlal_high.h @@ -29,6 +29,7 @@ #include "movl_high.h" #include "mla.h" +#include "mul_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -41,7 +42,10 @@ simde_vqdmlal_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlal_high_s16(a, b, c); #else - return simde_vaddq_s32(simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)) * 2, a); + return simde_vaddq_s32( + simde_vmulq_n_s32( + simde_vmulq_s32( + simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2), a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -55,7 +59,17 @@ simde_vqdmlal_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlal_high_s32(a, b, c); #else - return simde_vaddq_s64(simde_x_vmulq_s64(simde_vmovl_high_s32(b), simde_vmovl_high_s32(c)) * 2, a); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlal_high_lane.h b/simde/arm/neon/qdmlal_high_lane.h index db7245b36..b2d6a8b42 100644 --- a/simde/arm/neon/qdmlal_high_lane.h +++ b/simde/arm/neon/qdmlal_high_lane.h @@ -30,6 +30,7 @@ #include "movl_high.h" #include "add.h" #include "mul.h" +#include "mul_n.h" #include "dup_n.h" #include "mla.h" #include "types.h" @@ -42,9 +43,10 @@ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmlal_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { return simde_vaddq_s32( + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(v[lane]))) * 2, a); + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2), a); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlal_high_lane_s16(a, b, v, lane) vqdmlal_high_lane_s16(a, b, v, lane) @@ -58,9 +60,10 @@ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { return simde_vaddq_s32( + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(v[lane]))) * 2, a); + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2), a); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlal_high_laneq_s16(a, b, v, lane) vqdmlal_high_laneq_s16(a, b, v, lane) @@ -73,10 +76,17 @@ simde_vqdmlal_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8 SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqdmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vaddq_s64( + simde_int64x2_private r_ = simde_int64x2_to_private( simde_x_vmulq_s64( simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(v[lane]))) * 2, a); + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlal_high_lane_s32(a, b, v, lane) vqdmlal_high_lane_s32(a, b, v, lane) @@ -89,10 +99,17 @@ simde_vqdmlal_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_ SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqdmlal_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vaddq_s64( + simde_int64x2_private r_ = simde_int64x2_to_private( simde_x_vmulq_s64( simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(v[lane]))) * 2, a); + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlal_high_laneq_s32(a, b, v, lane) vqdmlal_high_laneq_s32(a, b, v, lane) diff --git a/simde/arm/neon/qdmlal_high_n.h b/simde/arm/neon/qdmlal_high_n.h index b8e9c43ab..205cafbcc 100644 --- a/simde/arm/neon/qdmlal_high_n.h +++ b/simde/arm/neon/qdmlal_high_n.h @@ -31,6 +31,7 @@ #include "dup_n.h" #include "add.h" #include "mul.h" +#include "mul_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -44,9 +45,10 @@ simde_vqdmlal_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { return vqdmlal_high_n_s16(a, b, c); #else return simde_vaddq_s32( + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(c))) * 2, a); + simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2), a); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -60,10 +62,17 @@ simde_vqdmlal_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlal_high_n_s32(a, b, c); #else - return simde_vaddq_s64( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(c))) * 2, a); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vaddq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlal_lane.h b/simde/arm/neon/qdmlal_lane.h index 744fa7091..14a663cd6 100644 --- a/simde/arm/neon/qdmlal_lane.h +++ b/simde/arm/neon/qdmlal_lane.h @@ -29,6 +29,7 @@ #include "qdmlal.h" #include "dup_lane.h" +#include "get_lane.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -78,7 +79,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlalh_lane_s16(a, b, v, lane) vqdmlalh_lane_s16((a), (b), (v), (lane)) #else - #define simde_vqdmlalh_lane_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), (v[lane])) + #define simde_vqdmlalh_lane_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vget_lane_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlalh_lane_s16 @@ -88,7 +89,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlalh_laneq_s16(a, b, v, lane) vqdmlalh_laneq_s16((a), (b), (v), (lane)) #else - #define simde_vqdmlalh_laneq_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), (v[lane])) + #define simde_vqdmlalh_laneq_s16(a, b, v, lane) simde_vqdmlalh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlalh_laneq_s16 @@ -98,7 +99,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlals_lane_s32(a, b, v, lane) vqdmlals_lane_s32((a), (b), (v), (lane)) #else - #define simde_vqdmlals_lane_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), (v[lane])) + #define simde_vqdmlals_lane_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vget_lane_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlals_lane_s32 @@ -108,7 +109,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlals_laneq_s32(a, b, v, lane) vqdmlals_laneq_s32((a), (b), (v), (lane)) #else - #define simde_vqdmlals_laneq_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), (v[lane])) + #define simde_vqdmlals_laneq_s32(a, b, v, lane) simde_vqdmlals_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlals_laneq_s32 diff --git a/simde/arm/neon/qdmlsl.h b/simde/arm/neon/qdmlsl.h index ed889a204..e7770ac61 100644 --- a/simde/arm/neon/qdmlsl.h +++ b/simde/arm/neon/qdmlsl.h @@ -29,6 +29,7 @@ #include "sub.h" #include "mul.h" +#include "mul_n.h" #include "movl.h" #include "types.h" @@ -47,7 +48,7 @@ simde_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlslh_s16 - #define vqdmlslh_s16(a, b, c) simde_qdvmlslh_s16((a), (b), (c)) + #define vqdmlslh_s16(a, b, c) simde_vqdmlslh_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -61,7 +62,7 @@ simde_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsls_s32 - #define vqdmlsls_s32(a, b, c) simde_qdvmlsls_s32((a), (b), (c)) + #define vqdmlsls_s32(a, b, c) simde_vqdmlsls_s32((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -70,12 +71,12 @@ simde_vqdmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlsl_s16(a, b, c); #else - return simde_vsubq_s32(a, simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)) * 2); + return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) #undef vqdmlsl_s16 - #define vqdmlsl_s16(a, b, c) simde_qdvmlsl_s16((a), (b), (c)) + #define vqdmlsl_s16(a, b, c) simde_vqdmlsl_s16((a), (b), (c)) #endif SIMDE_FUNCTION_ATTRIBUTES @@ -84,7 +85,17 @@ simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlsl_s32(a, b, c); #else - return simde_vsubq_s64(a, simde_x_vmulq_s64(simde_vmovl_s32(b), simde_vmovl_s32(c)) * 2); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_s32(b), + simde_vmovl_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlsl_high.h b/simde/arm/neon/qdmlsl_high.h index 96b8095d0..18a6f47fe 100644 --- a/simde/arm/neon/qdmlsl_high.h +++ b/simde/arm/neon/qdmlsl_high.h @@ -30,6 +30,7 @@ #include "movl_high.h" #include "sub.h" #include "mul.h" +#include "mul_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -42,7 +43,7 @@ simde_vqdmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_s16(a, b, c); #else - return simde_vsubq_s32(a, simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)) * 2); + return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -56,7 +57,17 @@ simde_vqdmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_s32(a, b, c); #else - return simde_vsubq_s64(a, simde_x_vmulq_s64(simde_vmovl_high_s32(b), simde_vmovl_high_s32(c)) * 2); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(c))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlsl_high_lane.h b/simde/arm/neon/qdmlsl_high_lane.h index 6f09ca3c9..877c72a2a 100644 --- a/simde/arm/neon/qdmlsl_high_lane.h +++ b/simde/arm/neon/qdmlsl_high_lane.h @@ -30,6 +30,7 @@ #include "movl_high.h" #include "sub.h" #include "mul.h" +#include "mul_n.h" #include "dup_n.h" #include "types.h" @@ -41,9 +42,10 @@ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { return simde_vsubq_s32(a, + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(v[lane]))) * 2); + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_lane_s16(a, b, v, lane) vqdmlsl_high_lane_s16(a, b, v, lane) @@ -57,9 +59,10 @@ SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t simde_vqdmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { return simde_vsubq_s32(a, + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(v[lane]))) * 2); + simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_laneq_s16(a, b, v, lane) vqdmlsl_high_laneq_s16(a, b, v, lane) @@ -72,10 +75,17 @@ simde_vqdmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8 SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqdmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - return simde_vsubq_s64(a, + simde_int64x2_private r_ = simde_int64x2_to_private( simde_x_vmulq_s64( simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(v[lane]))) * 2); + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_lane_s32(a, b, v, lane) vqdmlsl_high_lane_s32(a, b, v, lane) @@ -88,10 +98,17 @@ simde_vqdmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_ SIMDE_FUNCTION_ATTRIBUTES simde_int64x2_t simde_vqdmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vsubq_s64(a, + simde_int64x2_private r_ = simde_int64x2_to_private( simde_x_vmulq_s64( simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(v[lane]))) * 2); + simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); } #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_laneq_s32(a, b, v, lane) vqdmlsl_high_laneq_s32(a, b, v, lane) diff --git a/simde/arm/neon/qdmlsl_high_n.h b/simde/arm/neon/qdmlsl_high_n.h index 495898bae..9db3d7e04 100644 --- a/simde/arm/neon/qdmlsl_high_n.h +++ b/simde/arm/neon/qdmlsl_high_n.h @@ -31,6 +31,7 @@ #include "dup_n.h" #include "sub.h" #include "mul.h" +#include "mul_n.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -44,9 +45,10 @@ simde_vqdmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { return vqdmlsl_high_n_s16(a, b, c); #else return simde_vsubq_s32(a, + simde_vmulq_n_s32( simde_vmulq_s32( simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(c))) * 2); + simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -60,10 +62,17 @@ simde_vqdmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_n_s32(a, b, c); #else - return simde_vsubq_s64(a, - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(c))) * 2); + simde_int64x2_private r_ = simde_int64x2_to_private( + simde_x_vmulq_s64( + simde_vmovl_high_s32(b), + simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); + } + + return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlsl_lane.h b/simde/arm/neon/qdmlsl_lane.h index 8d9487f16..d93677da0 100644 --- a/simde/arm/neon/qdmlsl_lane.h +++ b/simde/arm/neon/qdmlsl_lane.h @@ -29,6 +29,7 @@ #include "qdmlsl.h" #include "dup_lane.h" +#include "get_lane.h" #include "types.h" HEDLEY_DIAGNOSTIC_PUSH @@ -78,7 +79,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlslh_lane_s16(a, b, v, lane) vqdmlslh_lane_s16((a), (b), (v), (lane)) #else - #define simde_vqdmlslh_lane_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), (v[lane])) + #define simde_vqdmlslh_lane_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vget_lane_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlslh_lane_s16 @@ -88,7 +89,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlslh_laneq_s16(a, b, v, lane) vqdmlslh_laneq_s16((a), (b), (v), (lane)) #else - #define simde_vqdmlslh_laneq_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), (v[lane])) + #define simde_vqdmlslh_laneq_s16(a, b, v, lane) simde_vqdmlslh_s16((a), (b), simde_vgetq_lane_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlslh_laneq_s16 @@ -98,7 +99,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsls_lane_s32(a, b, v, lane) vqdmlsls_lane_s32((a), (b), (v), (lane)) #else - #define simde_vqdmlsls_lane_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), (v[lane])) + #define simde_vqdmlsls_lane_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vget_lane_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsls_lane_s32 @@ -108,7 +109,7 @@ SIMDE_BEGIN_DECLS_ #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsls_laneq_s32(a, b, v, lane) vqdmlsls_laneq_s32((a), (b), (v), (lane)) #else - #define simde_vqdmlsls_laneq_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), (v[lane])) + #define simde_vqdmlsls_laneq_s32(a, b, v, lane) simde_vqdmlsls_s32((a), (b), simde_vgetq_lane_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsls_laneq_s32 diff --git a/simde/arm/neon/reinterpret.h b/simde/arm/neon/reinterpret.h index 474c348d2..d7efddfbb 100644 --- a/simde/arm/neon/reinterpret.h +++ b/simde/arm/neon/reinterpret.h @@ -2334,7 +2334,7 @@ simde_vreinterpret_u64_u32(simde_uint32x2_t a) { SIMDE_FUNCTION_ATTRIBUTES simde_uint64x1_t simde_vreinterpret_u64_f16(simde_float16x4_t a) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vreinterpret_u64_f16(a); #else simde_uint64x1_private r_; diff --git a/simde/arm/neon/types.h b/simde/arm/neon/types.h index cdded3ee0..623300635 100644 --- a/simde/arm/neon/types.h +++ b/simde/arm/neon/types.h @@ -403,9 +403,13 @@ typedef union { #if defined(SIMDE_ARM_NEON_FP16) typedef float16_t simde_float16_t; typedef float16x4_t simde_float16x4_t; - typedef float16x8_t simde_float16x8_t; typedef float16x4x2_t simde_float16x4x2_t; + typedef float16x4x3_t simde_float16x4x3_t; + typedef float16x4x4_t simde_float16x4x4_t; + typedef float16x8_t simde_float16x8_t; typedef float16x8x2_t simde_float16x8x2_t; + typedef float16x8x3_t simde_float16x8x3_t; + typedef float16x8x4_t simde_float16x8x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 #endif @@ -428,19 +432,6 @@ typedef union { #define SIMDE_ARM_NEON_NEED_PORTABLE_F64X2XN #endif - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 - typedef float16_t simde_float16_t; - typedef float16x4_t simde_float16x4_t; - typedef float16x4x2_t simde_float16x4x2_t; - typedef float16x4x3_t simde_float16x4x3_t; - typedef float16x4x4_t simde_float16x4x4_t; - typedef float16x8_t simde_float16x8_t; - typedef float16x8x2_t simde_float16x8x2_t; - typedef float16x8x3_t simde_float16x8x3_t; - typedef float16x8x4_t simde_float16x8x4_t; - #else - #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 - #endif #elif (defined(SIMDE_X86_MMX_NATIVE) || defined(SIMDE_X86_SSE_NATIVE)) && defined(SIMDE_ARM_NEON_FORCE_NATIVE_TYPES) #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 #define SIMDE_ARM_NEON_NEED_PORTABLE_F32 @@ -580,9 +571,21 @@ typedef union { typedef struct simde_float16x4x2_t { simde_float16x4_t val[2]; } simde_float16x4x2_t; + typedef struct simde_float16x4x3_t { + simde_float16x4_t val[3]; + } simde_float16x4x3_t; + typedef struct simde_float16x4x4_t { + simde_float16x4_t val[4]; + } simde_float16x4x4_t; typedef struct simde_float16x8x2_t { simde_float16x8_t val[2]; } simde_float16x8x2_t; + typedef struct simde_float16x8x3_t { + simde_float16x8_t val[3]; + } simde_float16x8x3_t; + typedef struct simde_float16x8x4_t { + simde_float16x8_t val[4]; + } simde_float16x8x4_t; #else #define SIMDE_ARM_NEON_NEED_PORTABLE_F16 #endif @@ -672,9 +675,21 @@ typedef union { typedef struct simde_float16x4x2_t { simde_float16x4_t val[2]; } simde_float16x4x2_t; + typedef struct simde_float16x4x3_t { + simde_float16x4_t val[3]; + } simde_float16x4x3_t; + typedef struct simde_float16x4x4_t { + simde_float16x4_t val[4]; + } simde_float16x4x4_t; typedef struct simde_float16x8x2_t { simde_float16x8_t val[2]; } simde_float16x8x2_t; + typedef struct simde_float16x8x3_t { + simde_float16x8_t val[3]; + } simde_float16x8x3_t; + typedef struct simde_float16x8x4_t { + simde_float16x8_t val[4]; + } simde_float16x8x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F32) typedef simde_float32 simde_float32_t; @@ -767,9 +782,6 @@ typedef union { typedef struct simde_float32x2x3_t { simde_float32x2_t val[3]; } simde_float32x2x3_t; - typedef struct simde_float16x4x3_t { - simde_float16x4_t val[3]; - } simde_float16x4x3_t; typedef struct simde_int8x16x3_t { simde_int8x16_t val[3]; @@ -798,9 +810,6 @@ typedef union { typedef struct simde_float32x4x3_t { simde_float32x4_t val[3]; } simde_float32x4x3_t; - typedef struct simde_float16x8x3_t { - simde_float16x8_t val[3]; - } simde_float16x8x3_t; typedef struct simde_int8x8x4_t { simde_int8x8_t val[4]; @@ -829,9 +838,6 @@ typedef union { typedef struct simde_float32x2x4_t { simde_float32x2_t val[4]; } simde_float32x2x4_t; - typedef struct simde_float16x4x4_t { - simde_float16x4_t val[4]; - } simde_float16x4x4_t; typedef struct simde_int8x16x4_t { simde_int8x16_t val[4]; @@ -860,9 +866,6 @@ typedef union { typedef struct simde_float32x4x4_t { simde_float32x4_t val[4]; } simde_float32x4x4_t; - typedef struct simde_float16x8x4_t { - simde_float16x8_t val[4]; - } simde_float16x8x4_t; #endif #if defined(SIMDE_ARM_NEON_NEED_PORTABLE_F64X1XN) diff --git a/test/arm/neon/cvt.c b/test/arm/neon/cvt.c index 197e5baa2..6b20d842d 100644 --- a/test/arm/neon/cvt.c +++ b/test/arm/neon/cvt.c @@ -3,6 +3,74 @@ #include "test-neon.h" #include "../../../simde/arm/neon/cvt.h" +static int +test_simde_vcvth_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + int16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( -0.604), + INT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 24.671), + INT16_C( 24) }, + { SIMDE_FLOAT16_VALUE( -23.744), + -INT16_C( 23) }, + { SIMDE_FLOAT16_VALUE( -7.939), + -INT16_C( 7) }, + { SIMDE_FLOAT16_VALUE( -18.393), + -INT16_C( 18) }, + { SIMDE_FLOAT16_VALUE( 29.124), + INT16_C( 29) }, + { SIMDE_FLOAT16_VALUE( 26.359), + INT16_C( 26) }, + { SIMDE_FLOAT16_VALUE( 19.447), + INT16_C( 19) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int16_t r = simde_vcvth_s16_f16(a); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vcvth_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 25.639), + UINT16_C( 25) }, + { SIMDE_FLOAT16_VALUE( -25.081), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 15.061), + UINT16_C( 15) }, + { SIMDE_FLOAT16_VALUE( -21.777), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -26.635), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -9.047), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -27.803), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 3.276), + UINT16_C( 3) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint16_t r = simde_vcvth_u16_f16(a); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +} + static int test_simde_vcvts_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1797,6 +1865,8 @@ test_simde_vcvtaq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_u16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_u32_f32) diff --git a/test/arm/neon/cvt_n.c b/test/arm/neon/cvt_n.c index 54b9a86a8..0452fad4c 100644 --- a/test/arm/neon/cvt_n.c +++ b/test/arm/neon/cvt_n.c @@ -14,7 +14,7 @@ test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { int16_t r13[4]; int16_t r16[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(-0.2), SIMDE_FLOAT16_C(-4.8), SIMDE_FLOAT16_C(9.9), SIMDE_FLOAT16_C(1.1), }, + { { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(1.1), }, { -INT16_C(1), -INT16_C(38), INT16_C(79), INT16_C(8), }, { -INT16_C(12), -INT16_C(307), INT16_C(633), INT16_C(70), }, { -INT16_C(204), -INT16_C(4916), INT16_C(10136), INT16_C(1126), }, @@ -27,14 +27,18 @@ test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_int16x4_t r3 = simde_vcvt_n_s16_f16(a, 3); simde_int16x4_t r6 = simde_vcvt_n_s16_f16(a, 6); simde_int16x4_t r10 = simde_vcvt_n_s16_f16(a, 10); - simde_int16x4_t r13 = simde_vcvt_n_s16_f16(a, 13); - simde_int16x4_t r16 = simde_vcvt_n_s16_f16(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int16x4_t r13 = simde_vcvt_n_s16_f16(a, 13); + simde_int16x4_t r16 = simde_vcvt_n_s16_f16(a, 16); + #endif simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); - simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); - simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x4(r16, simde_vld1_s16(test_vec[i].r16)); + #endif } return 0; @@ -42,7 +46,7 @@ test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvt_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { - struct { + static const struct { simde_float32 a[2]; int32_t r3[2]; int32_t r10[2]; @@ -63,14 +67,18 @@ test_simde_vcvt_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_int32x2_t r3 = simde_vcvt_n_s32_f32(a, 3); simde_int32x2_t r10 = simde_vcvt_n_s32_f32(a, 10); simde_int32x2_t r16 = simde_vcvt_n_s32_f32(a, 16); - simde_int32x2_t r23 = simde_vcvt_n_s32_f32(a, 23); - simde_int32x2_t r32 = simde_vcvt_n_s32_f32(a, 32); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int32x2_t r23 = simde_vcvt_n_s32_f32(a, 23); + simde_int32x2_t r32 = simde_vcvt_n_s32_f32(a, 32); + #endif simde_test_arm_neon_assert_equal_i32x2(r3, simde_vld1_s32(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i32x2(r10, simde_vld1_s32(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i32x2(r16, simde_vld1_s32(test_vec[i].r16)); - simde_test_arm_neon_assert_equal_i32x2(r23, simde_vld1_s32(test_vec[i].r23)); - simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i32x2(r23, simde_vld1_s32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i32x2(r32, simde_vld1_s32(test_vec[i].r32)); + #endif } return 0; @@ -109,15 +117,19 @@ test_simde_vcvt_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { simde_int64x1_t r17 = simde_vcvt_n_s64_f64(a, 17); simde_int64x1_t r23 = simde_vcvt_n_s64_f64(a, 23); simde_int64x1_t r38 = simde_vcvt_n_s64_f64(a, 38); - simde_int64x1_t r55 = simde_vcvt_n_s64_f64(a, 55); - simde_int64x1_t r64 = simde_vcvt_n_s64_f64(a, 64); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int64x1_t r55 = simde_vcvt_n_s64_f64(a, 55); + simde_int64x1_t r64 = simde_vcvt_n_s64_f64(a, 64); + #endif simde_test_arm_neon_assert_equal_i64x1(r3, simde_vld1_s64(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i64x1(r17, simde_vld1_s64(test_vec[i].r17)); simde_test_arm_neon_assert_equal_i64x1(r23, simde_vld1_s64(test_vec[i].r23)); simde_test_arm_neon_assert_equal_i64x1(r38, simde_vld1_s64(test_vec[i].r38)); - simde_test_arm_neon_assert_equal_i64x1(r55, simde_vld1_s64(test_vec[i].r55)); - simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i64x1(r55, simde_vld1_s64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_i64x1(r64, simde_vld1_s64(test_vec[i].r64)); + #endif } return 0; @@ -133,7 +145,7 @@ test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { uint16_t r13[4]; uint16_t r16[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(9.1), SIMDE_FLOAT16_C(5.4), SIMDE_FLOAT16_C(3.2), }, + { { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(3.2), }, { UINT16_C(11), UINT16_C(72), UINT16_C(43), UINT16_C(25), }, { UINT16_C(89), UINT16_C(582), UINT16_C(345), UINT16_C(204), }, { UINT16_C(1434), UINT16_C(9320), UINT16_C(5528), UINT16_C(3276), }, @@ -146,14 +158,18 @@ test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_uint16x4_t r3 = simde_vcvt_n_u16_f16(a, 3); simde_uint16x4_t r6 = simde_vcvt_n_u16_f16(a, 6); simde_uint16x4_t r10 = simde_vcvt_n_u16_f16(a, 10); - simde_uint16x4_t r13 = simde_vcvt_n_u16_f16(a, 13); - simde_uint16x4_t r16 = simde_vcvt_n_u16_f16(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint16x4_t r13 = simde_vcvt_n_u16_f16(a, 13); + simde_uint16x4_t r16 = simde_vcvt_n_u16_f16(a, 16); + #endif simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); - simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); - simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x4(r16, simde_vld1_u16(test_vec[i].r16)); + #endif } return 0; @@ -161,7 +177,7 @@ test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvt_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { - struct { + static const struct { simde_float32 a[2]; uint32_t r3[2]; uint32_t r10[2]; @@ -182,14 +198,18 @@ test_simde_vcvt_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_uint32x2_t r3 = simde_vcvt_n_u32_f32(a, 3); simde_uint32x2_t r10 = simde_vcvt_n_u32_f32(a, 10); simde_uint32x2_t r16 = simde_vcvt_n_u32_f32(a, 16); - simde_uint32x2_t r23 = simde_vcvt_n_u32_f32(a, 23); - simde_uint32x2_t r32 = simde_vcvt_n_u32_f32(a, 32); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint32x2_t r23 = simde_vcvt_n_u32_f32(a, 23); + simde_uint32x2_t r32 = simde_vcvt_n_u32_f32(a, 32); + #endif simde_test_arm_neon_assert_equal_u32x2(r3, simde_vld1_u32(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u32x2(r10, simde_vld1_u32(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u32x2(r16, simde_vld1_u32(test_vec[i].r16)); - simde_test_arm_neon_assert_equal_u32x2(r23, simde_vld1_u32(test_vec[i].r23)); - simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u32x2(r23, simde_vld1_u32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u32x2(r32, simde_vld1_u32(test_vec[i].r32)); + #endif } return 0; @@ -228,15 +248,19 @@ test_simde_vcvt_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { simde_uint64x1_t r17 = simde_vcvt_n_u64_f64(a, 17); simde_uint64x1_t r23 = simde_vcvt_n_u64_f64(a, 23); simde_uint64x1_t r38 = simde_vcvt_n_u64_f64(a, 38); - simde_uint64x1_t r55 = simde_vcvt_n_u64_f64(a, 55); - simde_uint64x1_t r64 = simde_vcvt_n_u64_f64(a, 64); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint64x1_t r55 = simde_vcvt_n_u64_f64(a, 55); + simde_uint64x1_t r64 = simde_vcvt_n_u64_f64(a, 64); + #endif simde_test_arm_neon_assert_equal_u64x1(r3, simde_vld1_u64(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u64x1(r17, simde_vld1_u64(test_vec[i].r17)); simde_test_arm_neon_assert_equal_u64x1(r23, simde_vld1_u64(test_vec[i].r23)); simde_test_arm_neon_assert_equal_u64x1(r38, simde_vld1_u64(test_vec[i].r38)); - simde_test_arm_neon_assert_equal_u64x1(r55, simde_vld1_u64(test_vec[i].r55)); - simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u64x1(r55, simde_vld1_u64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_u64x1(r64, simde_vld1_u64(test_vec[i].r64)); + #endif } return 0; @@ -252,7 +276,7 @@ test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { int16_t r13[8]; int16_t r16[8]; } test_vec[] = { - { { SIMDE_FLOAT16_C(-0.7), SIMDE_FLOAT16_C(-4.5), SIMDE_FLOAT16_C(0.8), SIMDE_FLOAT16_C(-9.3), SIMDE_FLOAT16_C(-4.4), SIMDE_FLOAT16_C(9.3), SIMDE_FLOAT16_C(6.9), SIMDE_FLOAT16_C(-5.9), }, + { { SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-4.5), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(-9.3), SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(6.9), SIMDE_FLOAT16_VALUE(-5.9), }, { -INT16_C(5), -INT16_C(36), INT16_C(6), -INT16_C(74), -INT16_C(35), INT16_C(74), INT16_C(55), -INT16_C(47), }, { -INT16_C(44), -INT16_C(288), INT16_C(51), -INT16_C(595), -INT16_C(281), INT16_C(595), INT16_C(441), -INT16_C(377), }, { -INT16_C(717), -INT16_C(4608), INT16_C(819), -INT16_C(9520), -INT16_C(4504), INT16_C(9520), INT16_C(7064), -INT16_C(6040), }, @@ -265,14 +289,18 @@ test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_int16x8_t r3 = simde_vcvtq_n_s16_f16(a, 3); simde_int16x8_t r6 = simde_vcvtq_n_s16_f16(a, 6); simde_int16x8_t r10 = simde_vcvtq_n_s16_f16(a, 10); - simde_int16x8_t r13 = simde_vcvtq_n_s16_f16(a, 13); - simde_int16x8_t r16 = simde_vcvtq_n_s16_f16(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int16x8_t r13 = simde_vcvtq_n_s16_f16(a, 13); + simde_int16x8_t r16 = simde_vcvtq_n_s16_f16(a, 16); + #endif simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); - simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); - simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + #endif } return 0; @@ -280,7 +308,7 @@ test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtq_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { - struct { + static const struct { simde_float32 a[4]; int32_t r3[4]; int32_t r10[4]; @@ -301,14 +329,18 @@ test_simde_vcvtq_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_int32x4_t r3 = simde_vcvtq_n_s32_f32(a, 3); simde_int32x4_t r10 = simde_vcvtq_n_s32_f32(a, 10); simde_int32x4_t r16 = simde_vcvtq_n_s32_f32(a, 16); - simde_int32x4_t r23 = simde_vcvtq_n_s32_f32(a, 23); - simde_int32x4_t r32 = simde_vcvtq_n_s32_f32(a, 32); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int32x4_t r23 = simde_vcvtq_n_s32_f32(a, 23); + simde_int32x4_t r32 = simde_vcvtq_n_s32_f32(a, 32); + #endif simde_test_arm_neon_assert_equal_i32x4(r3, simde_vld1q_s32(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i32x4(r10, simde_vld1q_s32(test_vec[i].r10)); simde_test_arm_neon_assert_equal_i32x4(r16, simde_vld1q_s32(test_vec[i].r16)); - simde_test_arm_neon_assert_equal_i32x4(r23, simde_vld1q_s32(test_vec[i].r23)); - simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i32x4(r23, simde_vld1q_s32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + #endif } return 0; @@ -340,15 +372,19 @@ test_simde_vcvtq_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { simde_int64x2_t r17 = simde_vcvtq_n_s64_f64(a, 17); simde_int64x2_t r23 = simde_vcvtq_n_s64_f64(a, 23); simde_int64x2_t r38 = simde_vcvtq_n_s64_f64(a, 38); - simde_int64x2_t r55 = simde_vcvtq_n_s64_f64(a, 55); - simde_int64x2_t r64 = simde_vcvtq_n_s64_f64(a, 64); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_int64x2_t r55 = simde_vcvtq_n_s64_f64(a, 55); + simde_int64x2_t r64 = simde_vcvtq_n_s64_f64(a, 64); + #endif simde_test_arm_neon_assert_equal_i64x2(r3, simde_vld1q_s64(test_vec[i].r3)); simde_test_arm_neon_assert_equal_i64x2(r17, simde_vld1q_s64(test_vec[i].r17)); simde_test_arm_neon_assert_equal_i64x2(r23, simde_vld1q_s64(test_vec[i].r23)); simde_test_arm_neon_assert_equal_i64x2(r38, simde_vld1q_s64(test_vec[i].r38)); - simde_test_arm_neon_assert_equal_i64x2(r55, simde_vld1q_s64(test_vec[i].r55)); - simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_i64x2(r55, simde_vld1q_s64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_i64x2(r64, simde_vld1q_s64(test_vec[i].r64)); + #endif } return 0; @@ -364,7 +400,7 @@ test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { uint16_t r13[8]; uint16_t r16[8]; } test_vec[] = { - { { SIMDE_FLOAT16_C(4.8), SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(0.6), SIMDE_FLOAT16_C(2.6), SIMDE_FLOAT16_C(1.8), SIMDE_FLOAT16_C(6.5), SIMDE_FLOAT16_C(9.8), SIMDE_FLOAT16_C(7.5), }, + { { SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(2.6), SIMDE_FLOAT16_VALUE(1.8), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(7.5), }, { UINT16_C(38), UINT16_C(11), UINT16_C(4), UINT16_C(20), UINT16_C(14), UINT16_C(52), UINT16_C(78), UINT16_C(60), }, { UINT16_C(307), UINT16_C(89), UINT16_C(38), UINT16_C(166), UINT16_C(115), UINT16_C(416), UINT16_C(627), UINT16_C(480), }, { UINT16_C(4916), UINT16_C(1434), UINT16_C(614), UINT16_C(2662), UINT16_C(1843), UINT16_C(6656), UINT16_C(10032), UINT16_C(7680), }, @@ -377,14 +413,18 @@ test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_uint16x8_t r3 = simde_vcvtq_n_u16_f16(a, 3); simde_uint16x8_t r6 = simde_vcvtq_n_u16_f16(a, 6); simde_uint16x8_t r10 = simde_vcvtq_n_u16_f16(a, 10); - simde_uint16x8_t r13 = simde_vcvtq_n_u16_f16(a, 13); - simde_uint16x8_t r16 = simde_vcvtq_n_u16_f16(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint16x8_t r13 = simde_vcvtq_n_u16_f16(a, 13); + simde_uint16x8_t r16 = simde_vcvtq_n_u16_f16(a, 16); + #endif simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); - simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); - simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + #endif } return 0; @@ -392,7 +432,7 @@ test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtq_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { - struct { + static const struct { simde_float32 a[4]; uint32_t r3[4]; uint32_t r10[4]; @@ -413,14 +453,18 @@ test_simde_vcvtq_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { simde_uint32x4_t r3 = simde_vcvtq_n_u32_f32(a, 3); simde_uint32x4_t r10 = simde_vcvtq_n_u32_f32(a, 10); simde_uint32x4_t r16 = simde_vcvtq_n_u32_f32(a, 16); - simde_uint32x4_t r23 = simde_vcvtq_n_u32_f32(a, 23); - simde_uint32x4_t r32 = simde_vcvtq_n_u32_f32(a, 32); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint32x4_t r23 = simde_vcvtq_n_u32_f32(a, 23); + simde_uint32x4_t r32 = simde_vcvtq_n_u32_f32(a, 32); + #endif simde_test_arm_neon_assert_equal_u32x4(r3, simde_vld1q_u32(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u32x4(r10, simde_vld1q_u32(test_vec[i].r10)); simde_test_arm_neon_assert_equal_u32x4(r16, simde_vld1q_u32(test_vec[i].r16)); - simde_test_arm_neon_assert_equal_u32x4(r23, simde_vld1q_u32(test_vec[i].r23)); - simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u32x4(r23, simde_vld1q_u32(test_vec[i].r23)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + #endif } return 0; @@ -452,15 +496,19 @@ test_simde_vcvtq_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { simde_uint64x2_t r17 = simde_vcvtq_n_u64_f64(a, 17); simde_uint64x2_t r23 = simde_vcvtq_n_u64_f64(a, 23); simde_uint64x2_t r38 = simde_vcvtq_n_u64_f64(a, 38); - simde_uint64x2_t r55 = simde_vcvtq_n_u64_f64(a, 55); - simde_uint64x2_t r64 = simde_vcvtq_n_u64_f64(a, 64); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_uint64x2_t r55 = simde_vcvtq_n_u64_f64(a, 55); + simde_uint64x2_t r64 = simde_vcvtq_n_u64_f64(a, 64); + #endif simde_test_arm_neon_assert_equal_u64x2(r3, simde_vld1q_u64(test_vec[i].r3)); simde_test_arm_neon_assert_equal_u64x2(r17, simde_vld1q_u64(test_vec[i].r17)); simde_test_arm_neon_assert_equal_u64x2(r23, simde_vld1q_u64(test_vec[i].r23)); simde_test_arm_neon_assert_equal_u64x2(r38, simde_vld1q_u64(test_vec[i].r38)); - simde_test_arm_neon_assert_equal_u64x2(r55, simde_vld1q_u64(test_vec[i].r55)); - simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_test_arm_neon_assert_equal_u64x2(r55, simde_vld1q_u64(test_vec[i].r55)); + simde_test_arm_neon_assert_equal_u64x2(r64, simde_vld1q_u64(test_vec[i].r64)); + #endif } return 0; @@ -477,17 +525,17 @@ test_simde_vcvt_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16_t r16[4]; } test_vec[] = { { { UINT16_C(19849), UINT16_C(26147), UINT16_C(40838), UINT16_C(40781) }, - { SIMDE_FLOAT16_C(2482.0), SIMDE_FLOAT16_C(3268.4), SIMDE_FLOAT16_C(5104.8), SIMDE_FLOAT16_C(5097.6) }, - { SIMDE_FLOAT16_C(310.2), SIMDE_FLOAT16_C(408.5), SIMDE_FLOAT16_C(638.1), SIMDE_FLOAT16_C(637.2) }, - { SIMDE_FLOAT16_C(19.4), SIMDE_FLOAT16_C(25.5), SIMDE_FLOAT16_C(39.9), SIMDE_FLOAT16_C(39.8) }, - { SIMDE_FLOAT16_C(2.4), SIMDE_FLOAT16_C(3.2), SIMDE_FLOAT16_C(5.0), SIMDE_FLOAT16_C(5.0) }, - { SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(0.6), SIMDE_FLOAT16_C(0.6) } }, + { SIMDE_FLOAT16_VALUE(2482.0), SIMDE_FLOAT16_VALUE(3268.4), SIMDE_FLOAT16_VALUE(5104.8), SIMDE_FLOAT16_VALUE(5097.6) }, + { SIMDE_FLOAT16_VALUE(310.2), SIMDE_FLOAT16_VALUE(408.5), SIMDE_FLOAT16_VALUE(638.1), SIMDE_FLOAT16_VALUE(637.2) }, + { SIMDE_FLOAT16_VALUE(19.4), SIMDE_FLOAT16_VALUE(25.5), SIMDE_FLOAT16_VALUE(39.9), SIMDE_FLOAT16_VALUE(39.8) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(5.0) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.6) } }, { { UINT16_C(10037), UINT16_C(52658), UINT16_C(27371), UINT16_C(28364) }, - { SIMDE_FLOAT16_C(1254.6), SIMDE_FLOAT16_C(6582.3), SIMDE_FLOAT16_C(3421.4), SIMDE_FLOAT16_C(3545.5) }, - { SIMDE_FLOAT16_C(156.875), SIMDE_FLOAT16_C(822.8), SIMDE_FLOAT16_C(427.7), SIMDE_FLOAT16_C(443.2) }, - { SIMDE_FLOAT16_C(9.8), SIMDE_FLOAT16_C(51.4), SIMDE_FLOAT16_C(26.7), SIMDE_FLOAT16_C(27.7) }, - { SIMDE_FLOAT16_C(1.2), SIMDE_FLOAT16_C(6.4), SIMDE_FLOAT16_C(3.3), SIMDE_FLOAT16_C(3.5) }, - { SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(0.8), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(0.4) } }, + { SIMDE_FLOAT16_VALUE(1254.6), SIMDE_FLOAT16_VALUE(6582.3), SIMDE_FLOAT16_VALUE(3421.4), SIMDE_FLOAT16_VALUE(3545.5) }, + { SIMDE_FLOAT16_VALUE(156.875), SIMDE_FLOAT16_VALUE(822.8), SIMDE_FLOAT16_VALUE(427.7), SIMDE_FLOAT16_VALUE(443.2) }, + { SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(51.4), SIMDE_FLOAT16_VALUE(26.7), SIMDE_FLOAT16_VALUE(27.7) }, + { SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(3.5) }, + { SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.4) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -519,17 +567,17 @@ test_simde_vcvt_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16_t r16[4]; } test_vec[] = { { { INT16_C(-1573), INT16_C(-19221), INT16_C(23775), INT16_C(-21379) }, - { SIMDE_FLOAT16_C(-196.625), SIMDE_FLOAT16_C(-2402.6), SIMDE_FLOAT16_C(2971.9), SIMDE_FLOAT16_C(-2672.4) }, - { SIMDE_FLOAT16_C(-24.578125), SIMDE_FLOAT16_C(-300.25), SIMDE_FLOAT16_C(371.5), SIMDE_FLOAT16_C(-334.0) }, - { SIMDE_FLOAT16_C(-1.536133), SIMDE_FLOAT16_C(-18.765625), SIMDE_FLOAT16_C(23.218750), SIMDE_FLOAT16_C(-20.8750) }, - { SIMDE_FLOAT16_C(-0.192017), SIMDE_FLOAT16_C(-2.345703), SIMDE_FLOAT16_C(2.902344), SIMDE_FLOAT16_C(-2.609375) }, - { SIMDE_FLOAT16_C(0.0), SIMDE_FLOAT16_C(-0.293213), SIMDE_FLOAT16_C(0.362793), SIMDE_FLOAT16_C(-0.326172) } }, + { SIMDE_FLOAT16_VALUE(-196.625), SIMDE_FLOAT16_VALUE(-2402.6), SIMDE_FLOAT16_VALUE(2971.9), SIMDE_FLOAT16_VALUE(-2672.4) }, + { SIMDE_FLOAT16_VALUE(-24.578125), SIMDE_FLOAT16_VALUE(-300.25), SIMDE_FLOAT16_VALUE(371.5), SIMDE_FLOAT16_VALUE(-334.0) }, + { SIMDE_FLOAT16_VALUE(-1.536133), SIMDE_FLOAT16_VALUE(-18.765625), SIMDE_FLOAT16_VALUE(23.218750), SIMDE_FLOAT16_VALUE(-20.8750) }, + { SIMDE_FLOAT16_VALUE(-0.192017), SIMDE_FLOAT16_VALUE(-2.345703), SIMDE_FLOAT16_VALUE(2.902344), SIMDE_FLOAT16_VALUE(-2.609375) }, + { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(-0.293213), SIMDE_FLOAT16_VALUE(0.362793), SIMDE_FLOAT16_VALUE(-0.326172) } }, { { INT16_C(-19672), INT16_C(2663), INT16_C(31268), INT16_C(-11631) }, - { SIMDE_FLOAT16_C(-2460.0), SIMDE_FLOAT16_C(333.0), SIMDE_FLOAT16_C(3908.5), SIMDE_FLOAT16_C(-1454.0) }, - { SIMDE_FLOAT16_C(-307.5), SIMDE_FLOAT16_C(41.625), SIMDE_FLOAT16_C(488.5), SIMDE_FLOAT16_C(-181.75) }, - { SIMDE_FLOAT16_C(-19.21875), SIMDE_FLOAT16_C(2.601562), SIMDE_FLOAT16_C(30.531250), SIMDE_FLOAT16_C(-11.359375) }, - { SIMDE_FLOAT16_C(-2.402344), SIMDE_FLOAT16_C(0.325195), SIMDE_FLOAT16_C(3.816406), SIMDE_FLOAT16_C(-1.419922) }, - { SIMDE_FLOAT16_C(-0.300293), SIMDE_FLOAT16_C(0.040649), SIMDE_FLOAT16_C(0.477051), SIMDE_FLOAT16_C(-0.177490) } }, + { SIMDE_FLOAT16_VALUE(-2460.0), SIMDE_FLOAT16_VALUE(333.0), SIMDE_FLOAT16_VALUE(3908.5), SIMDE_FLOAT16_VALUE(-1454.0) }, + { SIMDE_FLOAT16_VALUE(-307.5), SIMDE_FLOAT16_VALUE(41.625), SIMDE_FLOAT16_VALUE(488.5), SIMDE_FLOAT16_VALUE(-181.75) }, + { SIMDE_FLOAT16_VALUE(-19.21875), SIMDE_FLOAT16_VALUE(2.601562), SIMDE_FLOAT16_VALUE(30.531250), SIMDE_FLOAT16_VALUE(-11.359375) }, + { SIMDE_FLOAT16_VALUE(-2.402344), SIMDE_FLOAT16_VALUE(0.325195), SIMDE_FLOAT16_VALUE(3.816406), SIMDE_FLOAT16_VALUE(-1.419922) }, + { SIMDE_FLOAT16_VALUE(-0.300293), SIMDE_FLOAT16_VALUE(0.040649), SIMDE_FLOAT16_VALUE(0.477051), SIMDE_FLOAT16_VALUE(-0.177490) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -561,11 +609,11 @@ test_simde_vcvtq_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16_t r16[8]; } test_vec[] = { { { UINT16_C(19849), UINT16_C(26147), UINT16_C(40838), UINT16_C(40781), UINT16_C(10037), UINT16_C(52658), UINT16_C(27371), UINT16_C(28364) }, - { SIMDE_FLOAT16_C(2481.1), SIMDE_FLOAT16_C(3268.4), SIMDE_FLOAT16_C(5104.8), SIMDE_FLOAT16_C(5097.6), SIMDE_FLOAT16_C(1254.6), SIMDE_FLOAT16_C(6582.3), SIMDE_FLOAT16_C(3421.4), SIMDE_FLOAT16_C(3545.5) }, - { SIMDE_FLOAT16_C(310.2), SIMDE_FLOAT16_C(408.5), SIMDE_FLOAT16_C(638.1), SIMDE_FLOAT16_C(637.2), SIMDE_FLOAT16_C(156.875), SIMDE_FLOAT16_C(822.8), SIMDE_FLOAT16_C(427.7), SIMDE_FLOAT16_C(443.2) }, - { SIMDE_FLOAT16_C(19.4), SIMDE_FLOAT16_C(25.5), SIMDE_FLOAT16_C(39.9), SIMDE_FLOAT16_C(39.8), SIMDE_FLOAT16_C(9.8), SIMDE_FLOAT16_C(51.4), SIMDE_FLOAT16_C(26.7), SIMDE_FLOAT16_C(27.7) }, - { SIMDE_FLOAT16_C(2.4), SIMDE_FLOAT16_C(3.2), SIMDE_FLOAT16_C(5.0), SIMDE_FLOAT16_C(5.0), SIMDE_FLOAT16_C(1.2), SIMDE_FLOAT16_C(6.4), SIMDE_FLOAT16_C(3.3), SIMDE_FLOAT16_C(3.5) }, - { SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(0.6), SIMDE_FLOAT16_C(0.6), SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(0.8), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(0.4) } }, + { SIMDE_FLOAT16_VALUE(2481.1), SIMDE_FLOAT16_VALUE(3268.4), SIMDE_FLOAT16_VALUE(5104.8), SIMDE_FLOAT16_VALUE(5097.6), SIMDE_FLOAT16_VALUE(1254.6), SIMDE_FLOAT16_VALUE(6582.3), SIMDE_FLOAT16_VALUE(3421.4), SIMDE_FLOAT16_VALUE(3545.5) }, + { SIMDE_FLOAT16_VALUE(310.2), SIMDE_FLOAT16_VALUE(408.5), SIMDE_FLOAT16_VALUE(638.1), SIMDE_FLOAT16_VALUE(637.2), SIMDE_FLOAT16_VALUE(156.875), SIMDE_FLOAT16_VALUE(822.8), SIMDE_FLOAT16_VALUE(427.7), SIMDE_FLOAT16_VALUE(443.2) }, + { SIMDE_FLOAT16_VALUE(19.4), SIMDE_FLOAT16_VALUE(25.5), SIMDE_FLOAT16_VALUE(39.9), SIMDE_FLOAT16_VALUE(39.8), SIMDE_FLOAT16_VALUE(9.8), SIMDE_FLOAT16_VALUE(51.4), SIMDE_FLOAT16_VALUE(26.7), SIMDE_FLOAT16_VALUE(27.7) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(3.2), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(5.0), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(3.5) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.6), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.8), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.4) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -597,11 +645,11 @@ test_simde_vcvtq_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16_t r16[8]; } test_vec[] = { { { INT16_C(-1573), INT16_C(-19221), INT16_C(23775), INT16_C(-21379), INT16_C(-19672), INT16_C(2663), INT16_C(31268), INT16_C(-11631) }, - { SIMDE_FLOAT16_C(-196.625), SIMDE_FLOAT16_C(-2402.6), SIMDE_FLOAT16_C(2971.9), SIMDE_FLOAT16_C(-2672.4), SIMDE_FLOAT16_C(-2460.0), SIMDE_FLOAT16_C(333.0), SIMDE_FLOAT16_C(3908.5), SIMDE_FLOAT16_C(-1454.0) }, - { SIMDE_FLOAT16_C(-24.578125), SIMDE_FLOAT16_C(-300.25), SIMDE_FLOAT16_C(371.5), SIMDE_FLOAT16_C(-334.0), SIMDE_FLOAT16_C(-307.5), SIMDE_FLOAT16_C(41.625), SIMDE_FLOAT16_C(488.5), SIMDE_FLOAT16_C(-181.75) }, - { SIMDE_FLOAT16_C(-1.536133), SIMDE_FLOAT16_C(-18.765625), SIMDE_FLOAT16_C(23.218750), SIMDE_FLOAT16_C(-20.8750), SIMDE_FLOAT16_C(-19.21875), SIMDE_FLOAT16_C(2.601562), SIMDE_FLOAT16_C(30.531250), SIMDE_FLOAT16_C(-11.359375) }, - { SIMDE_FLOAT16_C(-0.192017), SIMDE_FLOAT16_C(-2.345703), SIMDE_FLOAT16_C(2.902344), SIMDE_FLOAT16_C(-2.609375), SIMDE_FLOAT16_C(-2.402344), SIMDE_FLOAT16_C(0.325195), SIMDE_FLOAT16_C(3.816406), SIMDE_FLOAT16_C(-1.419922) }, - { SIMDE_FLOAT16_C(0.0), SIMDE_FLOAT16_C(-0.293213), SIMDE_FLOAT16_C(0.362793), SIMDE_FLOAT16_C(-0.326172), SIMDE_FLOAT16_C(-0.300293), SIMDE_FLOAT16_C(0.040649), SIMDE_FLOAT16_C(0.477051), SIMDE_FLOAT16_C(-0.177490) } }, + { SIMDE_FLOAT16_VALUE(-196.625), SIMDE_FLOAT16_VALUE(-2402.6), SIMDE_FLOAT16_VALUE(2971.9), SIMDE_FLOAT16_VALUE(-2672.4), SIMDE_FLOAT16_VALUE(-2460.0), SIMDE_FLOAT16_VALUE(333.0), SIMDE_FLOAT16_VALUE(3908.5), SIMDE_FLOAT16_VALUE(-1454.0) }, + { SIMDE_FLOAT16_VALUE(-24.578125), SIMDE_FLOAT16_VALUE(-300.25), SIMDE_FLOAT16_VALUE(371.5), SIMDE_FLOAT16_VALUE(-334.0), SIMDE_FLOAT16_VALUE(-307.5), SIMDE_FLOAT16_VALUE(41.625), SIMDE_FLOAT16_VALUE(488.5), SIMDE_FLOAT16_VALUE(-181.75) }, + { SIMDE_FLOAT16_VALUE(-1.536133), SIMDE_FLOAT16_VALUE(-18.765625), SIMDE_FLOAT16_VALUE(23.218750), SIMDE_FLOAT16_VALUE(-20.8750), SIMDE_FLOAT16_VALUE(-19.21875), SIMDE_FLOAT16_VALUE(2.601562), SIMDE_FLOAT16_VALUE(30.531250), SIMDE_FLOAT16_VALUE(-11.359375) }, + { SIMDE_FLOAT16_VALUE(-0.192017), SIMDE_FLOAT16_VALUE(-2.345703), SIMDE_FLOAT16_VALUE(2.902344), SIMDE_FLOAT16_VALUE(-2.609375), SIMDE_FLOAT16_VALUE(-2.402344), SIMDE_FLOAT16_VALUE(0.325195), SIMDE_FLOAT16_VALUE(3.816406), SIMDE_FLOAT16_VALUE(-1.419922) }, + { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(-0.293213), SIMDE_FLOAT16_VALUE(0.362793), SIMDE_FLOAT16_VALUE(-0.326172), SIMDE_FLOAT16_VALUE(-0.300293), SIMDE_FLOAT16_VALUE(0.040649), SIMDE_FLOAT16_VALUE(0.477051), SIMDE_FLOAT16_VALUE(-0.177490) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/cvtn.c b/test/arm/neon/cvtn.c index 3852e9970..7a94cd469 100644 --- a/test/arm/neon/cvtn.c +++ b/test/arm/neon/cvtn.c @@ -95,35 +95,33 @@ test_simde_vcvtnq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; int64_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if !defined(SIMDE_RISCV_RVV_NATIVE) - { SIMDE_MATH_NANF, + { SIMDE_NANHF, INT64_C( 0) }, - #endif - { HEDLEY_STATIC_CAST(simde_float16, INT64_MAX), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MAX)), INT64_MAX }, - { HEDLEY_STATIC_CAST(simde_float16, INT64_MIN), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MIN)), INT64_MIN }, #endif - { SIMDE_FLOAT16_C( 12.44), + { SIMDE_FLOAT16_VALUE( 12.44), INT64_C( 12) }, - { SIMDE_FLOAT16_C( 30.46), + { SIMDE_FLOAT16_VALUE( 30.46), INT64_C( 30) }, - { SIMDE_FLOAT16_C( 16.51), + { SIMDE_FLOAT16_VALUE( 16.51), INT64_C( 17) }, - { SIMDE_FLOAT16_C( 74.89), + { SIMDE_FLOAT16_VALUE( 74.89), INT64_C( 75) }, - { SIMDE_FLOAT16_C( -24.05), + { SIMDE_FLOAT16_VALUE( -24.05), -INT64_C( 24) }, - { SIMDE_FLOAT16_C( -7.75), + { SIMDE_FLOAT16_VALUE( -7.75), -INT64_C( 8) }, - { SIMDE_FLOAT16_C( -57.31), + { SIMDE_FLOAT16_VALUE( -57.31), -INT64_C( 57) }, - { SIMDE_FLOAT16_C( -14.65), + { SIMDE_FLOAT16_VALUE( -14.65), -INT64_C( 15) }, }; @@ -138,31 +136,33 @@ test_simde_vcvtnh_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; int32_t r; } test_vec[] = { - { SIMDE_MATH_NANF, - INT32_C( 0) }, - { HEDLEY_STATIC_CAST(simde_float16, INT32_MAX), - INT32_MAX }, - { HEDLEY_STATIC_CAST(simde_float16, INT32_MIN), - INT32_MIN }, - { SIMDE_FLOAT16_C( 12.44), + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)), + INT32_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 12.44), INT32_C( 12) }, - { SIMDE_FLOAT16_C( 30.46), + { SIMDE_FLOAT16_VALUE( 30.46), INT32_C( 30) }, - { SIMDE_FLOAT16_C( 16.51), + { SIMDE_FLOAT16_VALUE( 16.51), INT32_C( 17) }, - { SIMDE_FLOAT16_C( 74.89), + { SIMDE_FLOAT16_VALUE( 74.89), INT32_C( 75) }, - { SIMDE_FLOAT16_C( -24.05), + { SIMDE_FLOAT16_VALUE( -24.05), -INT32_C( 24) }, - { SIMDE_FLOAT16_C( -7.75), + { SIMDE_FLOAT16_VALUE( -7.75), -INT32_C( 8) }, - { SIMDE_FLOAT16_C( -57.31), + { SIMDE_FLOAT16_VALUE( -57.31), -INT32_C( 57) }, - { SIMDE_FLOAT16_C( -14.65), + { SIMDE_FLOAT16_VALUE( -14.65), -INT32_C( 15) }, }; @@ -177,35 +177,33 @@ test_simde_vcvtnh_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; int16_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if !defined(SIMDE_RISCV_RVV_NATIVE) - { SIMDE_MATH_NANF, + { SIMDE_NANHF, INT16_C( 0) }, - #endif - { HEDLEY_STATIC_CAST(simde_float16, INT16_MAX) + SIMDE_FLOAT16_C(100.0), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX)), INT16_MAX }, - { HEDLEY_STATIC_CAST(simde_float16, INT16_MIN) + SIMDE_FLOAT16_C(-100.0), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN)), INT16_MIN }, #endif - { SIMDE_FLOAT16_C( 12.44), + { SIMDE_FLOAT16_VALUE( 12.44), INT16_C( 12) }, - { SIMDE_FLOAT16_C( 30.46), + { SIMDE_FLOAT16_VALUE( 30.46), INT16_C( 30) }, - { SIMDE_FLOAT16_C( 16.51), + { SIMDE_FLOAT16_VALUE( 16.51), INT16_C( 17) }, - { SIMDE_FLOAT16_C( 74.89), + { SIMDE_FLOAT16_VALUE( 74.89), INT16_C( 75) }, - { SIMDE_FLOAT16_C( -24.05), + { SIMDE_FLOAT16_VALUE( -24.05), -INT16_C( 24) }, - { SIMDE_FLOAT16_C( -7.75), + { SIMDE_FLOAT16_VALUE( -7.75), -INT16_C( 8) }, - { SIMDE_FLOAT16_C( -57.31), + { SIMDE_FLOAT16_VALUE( -57.31), -INT16_C( 57) }, - { SIMDE_FLOAT16_C( -14.65), + { SIMDE_FLOAT16_VALUE( -14.65), -INT16_C( 15) }, }; @@ -261,35 +259,33 @@ test_simde_vcvtns_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; uint64_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if !defined(SIMDE_RISCV_RVV_NATIVE) - { SIMDE_MATH_NANF, - INT64_C( 0) }, - #endif - { HEDLEY_STATIC_CAST(simde_float16, UINT64_MAX), - UINT64_MAX }, - { SIMDE_FLOAT16_C(-1000.0), - UINT64_C( 0) }, + { SIMDE_NANHF, + UINT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX)), + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT64_C( 0) }, #endif - { SIMDE_FLOAT16_C( 192.44), + { SIMDE_FLOAT16_VALUE( 192.44), UINT64_C( 192) }, - { SIMDE_FLOAT16_C( 350.46), + { SIMDE_FLOAT16_VALUE( 350.46), UINT64_C( 350) }, - { SIMDE_FLOAT16_C( 163.51), + { SIMDE_FLOAT16_VALUE( 163.51), UINT64_C( 164) }, - { SIMDE_FLOAT16_C( 974.89), + { SIMDE_FLOAT16_VALUE( 974.89), UINT64_C( 975) }, - { SIMDE_FLOAT16_C( 254.05), + { SIMDE_FLOAT16_VALUE( 254.05), UINT64_C( 254) }, - { SIMDE_FLOAT16_C( 707.75), + { SIMDE_FLOAT16_VALUE( 707.75), UINT64_C( 708) }, - { SIMDE_FLOAT16_C( 57.31), + { SIMDE_FLOAT16_VALUE( 57.31), UINT64_C( 57) }, - { SIMDE_FLOAT16_C( 144.65), + { SIMDE_FLOAT16_VALUE( 144.65), UINT64_C( 145) }, }; @@ -304,35 +300,33 @@ test_simde_vcvtnh_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; uint32_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if !defined(SIMDE_RISCV_RVV_NATIVE) - { SIMDE_MATH_NANF, + { SIMDE_NANHF, INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT32_C( 0) }, #endif - { HEDLEY_STATIC_CAST(simde_float16, UINT32_MAX), - UINT32_MAX }, - { SIMDE_FLOAT16_C(-1000.0), - UINT32_C( 0) }, - #endif - { SIMDE_FLOAT16_C( 192.44), + { SIMDE_FLOAT16_VALUE( 192.44), UINT32_C( 192) }, - { SIMDE_FLOAT16_C( 350.46), + { SIMDE_FLOAT16_VALUE( 350.46), UINT32_C( 350) }, - { SIMDE_FLOAT16_C( 163.51), + { SIMDE_FLOAT16_VALUE( 163.51), UINT32_C( 164) }, - { SIMDE_FLOAT16_C( 974.89), + { SIMDE_FLOAT16_VALUE( 974.89), UINT32_C( 975) }, - { SIMDE_FLOAT16_C( 254.05), + { SIMDE_FLOAT16_VALUE( 254.05), UINT32_C( 254) }, - { SIMDE_FLOAT16_C( 707.75), + { SIMDE_FLOAT16_VALUE( 707.75), UINT32_C( 708) }, - { SIMDE_FLOAT16_C( 57.31), + { SIMDE_FLOAT16_VALUE( 57.31), UINT32_C( 57) }, - { SIMDE_FLOAT16_C( 144.65), + { SIMDE_FLOAT16_VALUE( 144.65), UINT32_C( 145) }, }; @@ -347,35 +341,33 @@ test_simde_vcvtnh_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vcvtnh_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; uint16_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - #if !defined(SIMDE_RISCV_RVV_NATIVE) - { SIMDE_MATH_NANF, + { SIMDE_NANHF, INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -192.44), + UINT16_C( 0) }, #endif - { HEDLEY_STATIC_CAST(simde_float16, UINT16_MAX), - UINT16_MAX }, - { SIMDE_FLOAT16_C(-1000.0), - UINT16_C( 0) }, - #endif - { SIMDE_FLOAT16_C( 192.44), + { SIMDE_FLOAT16_VALUE( 192.44), UINT16_C( 192) }, - { SIMDE_FLOAT16_C( 350.46), + { SIMDE_FLOAT16_VALUE( 350.46), UINT16_C( 350) }, - { SIMDE_FLOAT16_C( 163.51), + { SIMDE_FLOAT16_VALUE( 163.51), UINT16_C( 164) }, - { SIMDE_FLOAT16_C( 974.89), + { SIMDE_FLOAT16_VALUE( 974.89), UINT16_C( 975) }, - { SIMDE_FLOAT16_C( 254.05), + { SIMDE_FLOAT16_VALUE( 254.05), UINT16_C( 254) }, - { SIMDE_FLOAT16_C( 707.75), + { SIMDE_FLOAT16_VALUE( 707.75), UINT16_C( 708) }, - { SIMDE_FLOAT16_C( 57.31), + { SIMDE_FLOAT16_VALUE( 57.31), UINT16_C( 57) }, - { SIMDE_FLOAT16_C( 144.65), + { SIMDE_FLOAT16_VALUE( 144.65), UINT16_C( 145) }, }; @@ -642,15 +634,15 @@ test_simde_vcvtnq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16 a[8]; int16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_C(8.5), SIMDE_FLOAT16_C(-2.4), SIMDE_FLOAT16_C(6.5), SIMDE_FLOAT16_C(3.3), SIMDE_FLOAT16_C(-8.9), SIMDE_FLOAT16_C(4.2), SIMDE_FLOAT16_C(11.5), SIMDE_FLOAT16_C(10.2) }, + { { SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(3.3), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(10.2) }, { INT16_C(8), -INT16_C(2), INT16_C(6), INT16_C(3), -INT16_C(9), INT16_C(4), INT16_C(12), INT16_C(10) } }, - { { SIMDE_FLOAT16_C(13.1), SIMDE_FLOAT16_C(-12.9), SIMDE_FLOAT16_C(-3.1), SIMDE_FLOAT16_C(8.9), SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(7.2), SIMDE_FLOAT16_C(-3.1), SIMDE_FLOAT16_C(-12.3) }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(8.9), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-12.3) }, { INT16_C(13), -INT16_C(13), -INT16_C(3), INT16_C(9), INT16_C(1), INT16_C(7), -INT16_C(3), -INT16_C(12) } }, - { { SIMDE_FLOAT16_C(-12.0), SIMDE_FLOAT16_C(9.1), SIMDE_FLOAT16_C(8.7), SIMDE_FLOAT16_C(-2.3), SIMDE_FLOAT16_C(-1.0), SIMDE_FLOAT16_C(1.9), SIMDE_FLOAT16_C(-4.8), SIMDE_FLOAT16_C(3.1) }, + { { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-2.3), SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(3.1) }, { -INT16_C(12), INT16_C(9), INT16_C(9), -INT16_C(2), -INT16_C(1), INT16_C(2), -INT16_C(5), INT16_C(3) } }, - { { SIMDE_FLOAT16_C(8.2), SIMDE_FLOAT16_C(-14.2), SIMDE_FLOAT16_C(9.1), SIMDE_FLOAT16_C(1.3), SIMDE_FLOAT16_C(-12.8), SIMDE_FLOAT16_C(13.9), SIMDE_FLOAT16_C(4.3), SIMDE_FLOAT16_C(-14.8) }, + { { SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(1.3), SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(13.9), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(-14.8) }, { INT16_C(8), -INT16_C(14), INT16_C(9), INT16_C(1), -INT16_C(13), INT16_C(14), INT16_C(4), -INT16_C(15) } }, - { { SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(14.7), SIMDE_FLOAT16_C(9.9), SIMDE_FLOAT16_C(3.4), SIMDE_FLOAT16_C(5.3), SIMDE_FLOAT16_C(9.0), SIMDE_FLOAT16_C(2.0), SIMDE_FLOAT16_C(0.4) }, + { { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.7), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(3.4), SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(0.4) }, { -INT16_C(15), INT16_C(15), INT16_C(10), INT16_C(3), INT16_C(5), INT16_C(9), INT16_C(2), INT16_C(0) } }, }; @@ -669,25 +661,25 @@ test_simde_vcvtn_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16 a[4]; int16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(8.5), SIMDE_FLOAT16_C(-2.4), SIMDE_FLOAT16_C(6.5), SIMDE_FLOAT16_C(3.3) }, + { { SIMDE_FLOAT16_VALUE(8.5), SIMDE_FLOAT16_VALUE(-2.4), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(3.3) }, { INT16_C(8), -INT16_C(2), INT16_C(6), INT16_C(3) } }, - { { SIMDE_FLOAT16_C(-8.9), SIMDE_FLOAT16_C(4.2), SIMDE_FLOAT16_C(11.5), SIMDE_FLOAT16_C(10.2) }, + { { SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(4.2), SIMDE_FLOAT16_VALUE(11.5), SIMDE_FLOAT16_VALUE(10.2) }, { -INT16_C(9), INT16_C(4), INT16_C(12), INT16_C(10) } }, - { { SIMDE_FLOAT16_C(13.1), SIMDE_FLOAT16_C(-12.9), SIMDE_FLOAT16_C(-3.1), SIMDE_FLOAT16_C(8.9) }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(8.9) }, { INT16_C(13), -INT16_C(13), -INT16_C(3), INT16_C(9) } }, - { { SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(7.2), SIMDE_FLOAT16_C(-3.1), SIMDE_FLOAT16_C(-12.3) }, + { { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(-3.1), SIMDE_FLOAT16_VALUE(-12.3) }, { INT16_C(1), INT16_C(7), -INT16_C(3), -INT16_C(12) } }, - { { SIMDE_FLOAT16_C(-12.0), SIMDE_FLOAT16_C(9.1), SIMDE_FLOAT16_C(8.7), SIMDE_FLOAT16_C(-2.3) }, + { { SIMDE_FLOAT16_VALUE(-12.0), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-2.3) }, { -INT16_C(12), INT16_C(9), INT16_C(9), -INT16_C(2) } }, - { { SIMDE_FLOAT16_C(-1.0), SIMDE_FLOAT16_C(1.9), SIMDE_FLOAT16_C(-4.8), SIMDE_FLOAT16_C(3.1) }, + { { SIMDE_FLOAT16_VALUE(-1.0), SIMDE_FLOAT16_VALUE(1.9), SIMDE_FLOAT16_VALUE(-4.8), SIMDE_FLOAT16_VALUE(3.1) }, { -INT16_C(1), INT16_C(2), -INT16_C(5), INT16_C(3) } }, - { { SIMDE_FLOAT16_C(8.2), SIMDE_FLOAT16_C(-14.2), SIMDE_FLOAT16_C(9.1), SIMDE_FLOAT16_C(1.3) }, + { { SIMDE_FLOAT16_VALUE(8.2), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(9.1), SIMDE_FLOAT16_VALUE(1.3) }, { INT16_C(8), -INT16_C(14), INT16_C(9), INT16_C(1) } }, - { { SIMDE_FLOAT16_C(-12.8), SIMDE_FLOAT16_C(13.9), SIMDE_FLOAT16_C(4.3), SIMDE_FLOAT16_C(-14.8) }, + { { SIMDE_FLOAT16_VALUE(-12.8), SIMDE_FLOAT16_VALUE(13.9), SIMDE_FLOAT16_VALUE(4.3), SIMDE_FLOAT16_VALUE(-14.8) }, { -INT16_C(13), INT16_C(14), INT16_C(4), -INT16_C(15) } }, - { { SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(14.7), SIMDE_FLOAT16_C(9.9), SIMDE_FLOAT16_C(3.4) }, + { { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.7), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(3.4) }, { -INT16_C(15), INT16_C(15), INT16_C(10), INT16_C(3) } }, - { { SIMDE_FLOAT16_C(5.3), SIMDE_FLOAT16_C(9.0), SIMDE_FLOAT16_C(2.0), SIMDE_FLOAT16_C(0.4) }, + { { SIMDE_FLOAT16_VALUE(5.3), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(0.4) }, { INT16_C(5), INT16_C(9), INT16_C(2), INT16_C(0) } }, }; @@ -706,15 +698,15 @@ test_simde_vcvtnq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16 a[8]; uint16_t r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_C(4.9), SIMDE_FLOAT16_C(0.5), SIMDE_FLOAT16_C(12.8), SIMDE_FLOAT16_C(11.3), SIMDE_FLOAT16_C(13.5), SIMDE_FLOAT16_C(10.0), SIMDE_FLOAT16_C(13.6), SIMDE_FLOAT16_C(11.1) }, + { { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(13.6), SIMDE_FLOAT16_VALUE(11.1) }, { UINT16_C(5), UINT16_C(0), UINT16_C(13), UINT16_C(11), UINT16_C(14), UINT16_C(10), UINT16_C(14), UINT16_C(11) } }, - { { SIMDE_FLOAT16_C(9.3), SIMDE_FLOAT16_C(8.3), SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(0.0), SIMDE_FLOAT16_C(4.6), SIMDE_FLOAT16_C(11.9), SIMDE_FLOAT16_C(5.0) }, + { { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(4.6), SIMDE_FLOAT16_VALUE(11.9), SIMDE_FLOAT16_VALUE(5.0) }, { UINT16_C(9), UINT16_C(8), UINT16_C(0), UINT16_C(0), UINT16_C(0), UINT16_C(5), UINT16_C(12), UINT16_C(5) } }, - { { SIMDE_FLOAT16_C(1.5), SIMDE_FLOAT16_C(2.7), SIMDE_FLOAT16_C(10.9), SIMDE_FLOAT16_C(4.1), SIMDE_FLOAT16_C(9.0), SIMDE_FLOAT16_C(6.3), SIMDE_FLOAT16_C(13.4), SIMDE_FLOAT16_C(13.1) }, + { { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(10.9), SIMDE_FLOAT16_VALUE(4.1), SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(13.4), SIMDE_FLOAT16_VALUE(13.1) }, { UINT16_C(2), UINT16_C(3), UINT16_C(11), UINT16_C(4), UINT16_C(9), UINT16_C(6), UINT16_C(13), UINT16_C(13) } }, - { { SIMDE_FLOAT16_C(13.1), SIMDE_FLOAT16_C(6.2), SIMDE_FLOAT16_C(6.5), SIMDE_FLOAT16_C(1.0), SIMDE_FLOAT16_C(2.5), SIMDE_FLOAT16_C(2.5), SIMDE_FLOAT16_C(6.8), SIMDE_FLOAT16_C(1.9) }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(1.9) }, { UINT16_C(13), UINT16_C(6), UINT16_C(6), UINT16_C(1), UINT16_C(2), UINT16_C(2), UINT16_C(7), UINT16_C(2) } }, - { { SIMDE_FLOAT16_C(5.1), SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(4.8), SIMDE_FLOAT16_C(7.2), SIMDE_FLOAT16_C(10.0), SIMDE_FLOAT16_C(9.9), SIMDE_FLOAT16_C(14.6), SIMDE_FLOAT16_C(4.0) }, + { { SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(7.2), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(4.0) }, { UINT16_C(5), UINT16_C(0), UINT16_C(5), UINT16_C(7), UINT16_C(10), UINT16_C(10), UINT16_C(15), UINT16_C(4) } }, }; @@ -733,25 +725,25 @@ test_simde_vcvtn_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16 a[4]; uint16_t r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(4.9), SIMDE_FLOAT16_C(0.5), SIMDE_FLOAT16_C(12.8), SIMDE_FLOAT16_C(11.3) }, + { { SIMDE_FLOAT16_VALUE(4.9), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(11.3) }, { UINT16_C(5), UINT16_C(0), UINT16_C(13), UINT16_C(11) } }, - { { SIMDE_FLOAT16_C(13.5), SIMDE_FLOAT16_C(10.0), SIMDE_FLOAT16_C(13.6), SIMDE_FLOAT16_C(11.1) }, + { { SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(13.6), SIMDE_FLOAT16_VALUE(11.1) }, { UINT16_C(14), UINT16_C(10), UINT16_C(14), UINT16_C(11) } }, - { { SIMDE_FLOAT16_C(9.3), SIMDE_FLOAT16_C(8.3), SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(0.2) }, + { { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(8.3), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(0.2) }, { UINT16_C(9), UINT16_C(8), UINT16_C(0), UINT16_C(0) } }, - { { SIMDE_FLOAT16_C(0.0), SIMDE_FLOAT16_C(4.6), SIMDE_FLOAT16_C(11.9), SIMDE_FLOAT16_C(5.0) }, + { { SIMDE_FLOAT16_VALUE(0.0), SIMDE_FLOAT16_VALUE(4.6), SIMDE_FLOAT16_VALUE(11.9), SIMDE_FLOAT16_VALUE(5.0) }, { UINT16_C(0), UINT16_C(5), UINT16_C(12), UINT16_C(5) } }, - { { SIMDE_FLOAT16_C(1.5), SIMDE_FLOAT16_C(2.7), SIMDE_FLOAT16_C(10.9), SIMDE_FLOAT16_C(4.1) }, + { { SIMDE_FLOAT16_VALUE(1.5), SIMDE_FLOAT16_VALUE(2.7), SIMDE_FLOAT16_VALUE(10.9), SIMDE_FLOAT16_VALUE(4.1) }, { UINT16_C(2), UINT16_C(3), UINT16_C(11), UINT16_C(4) } }, - { { SIMDE_FLOAT16_C(9.0), SIMDE_FLOAT16_C(6.3), SIMDE_FLOAT16_C(13.4), SIMDE_FLOAT16_C(13.1) }, + { { SIMDE_FLOAT16_VALUE(9.0), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(13.4), SIMDE_FLOAT16_VALUE(13.1) }, { UINT16_C(9), UINT16_C(6), UINT16_C(13), UINT16_C(13) } }, - { { SIMDE_FLOAT16_C(13.1), SIMDE_FLOAT16_C(6.2), SIMDE_FLOAT16_C(6.5), SIMDE_FLOAT16_C(1.0) }, + { { SIMDE_FLOAT16_VALUE(13.1), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(6.5), SIMDE_FLOAT16_VALUE(1.0) }, { UINT16_C(13), UINT16_C(6), UINT16_C(6), UINT16_C(1) } }, - { { SIMDE_FLOAT16_C(2.5), SIMDE_FLOAT16_C(2.5), SIMDE_FLOAT16_C(6.8), SIMDE_FLOAT16_C(1.9) }, + { { SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(2.5), SIMDE_FLOAT16_VALUE(6.8), SIMDE_FLOAT16_VALUE(1.9) }, { UINT16_C(2), UINT16_C(2), UINT16_C(7), UINT16_C(2) } }, - { { SIMDE_FLOAT16_C(5.1), SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(4.8), SIMDE_FLOAT16_C(7.2) }, + { { SIMDE_FLOAT16_VALUE(5.1), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(4.8), SIMDE_FLOAT16_VALUE(7.2) }, { UINT16_C(5), UINT16_C(0), UINT16_C(5), UINT16_C(7) } }, - { { SIMDE_FLOAT16_C(10.0), SIMDE_FLOAT16_C(9.9), SIMDE_FLOAT16_C(14.6), SIMDE_FLOAT16_C(4.0) }, + { { SIMDE_FLOAT16_VALUE(10.0), SIMDE_FLOAT16_VALUE(9.9), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(4.0) }, { UINT16_C(10), UINT16_C(10), UINT16_C(15), UINT16_C(4) } }, }; diff --git a/test/arm/neon/dup_lane.c b/test/arm/neon/dup_lane.c index 9bd405c07..214a91d57 100644 --- a/test/arm/neon/dup_lane.c +++ b/test/arm/neon/dup_lane.c @@ -14,36 +14,36 @@ test_simde_vdup_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { int lane; simde_float16 r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(-7.6), SIMDE_FLOAT16_C(11.4), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(-0.1) }, + { { SIMDE_FLOAT16_VALUE(-7.6), SIMDE_FLOAT16_VALUE(11.4), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(-0.1) }, INT8_C(2), - { SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7) } }, - { { SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(3.7), SIMDE_FLOAT16_C(-12.5) }, + { SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7) } }, + { { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(3.7), SIMDE_FLOAT16_VALUE(-12.5) }, INT8_C(0), - { SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(1.4) } }, - { { SIMDE_FLOAT16_C(13.3), SIMDE_FLOAT16_C(-11.6), SIMDE_FLOAT16_C(4.4), SIMDE_FLOAT16_C(-1.3) }, + { SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(1.4) } }, + { { SIMDE_FLOAT16_VALUE(13.3), SIMDE_FLOAT16_VALUE(-11.6), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(-1.3) }, INT8_C(2), - { SIMDE_FLOAT16_C(4.4), SIMDE_FLOAT16_C(4.4), SIMDE_FLOAT16_C(4.4), SIMDE_FLOAT16_C(4.4) } }, - { { SIMDE_FLOAT16_C(-11.5), SIMDE_FLOAT16_C(-10.6), SIMDE_FLOAT16_C(13.5), SIMDE_FLOAT16_C(-5.6) }, + { SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4), SIMDE_FLOAT16_VALUE(4.4) } }, + { { SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-10.6), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-5.6) }, INT8_C(0), - { SIMDE_FLOAT16_C(-11.5), SIMDE_FLOAT16_C(-11.5), SIMDE_FLOAT16_C(-11.5), SIMDE_FLOAT16_C(-11.5) } }, - { { SIMDE_FLOAT16_C(-12.6), SIMDE_FLOAT16_C(-12.4), SIMDE_FLOAT16_C(11.3), SIMDE_FLOAT16_C(11.8) }, + { SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5), SIMDE_FLOAT16_VALUE(-11.5) } }, + { { SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.4), SIMDE_FLOAT16_VALUE(11.3), SIMDE_FLOAT16_VALUE(11.8) }, INT8_C(0), - { SIMDE_FLOAT16_C(-12.6), SIMDE_FLOAT16_C(-12.6), SIMDE_FLOAT16_C(-12.6), SIMDE_FLOAT16_C(-12.6) } }, - { { SIMDE_FLOAT16_C(1.7), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-11.1), SIMDE_FLOAT16_C(-11.3) }, + { SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6), SIMDE_FLOAT16_VALUE(-12.6) } }, + { { SIMDE_FLOAT16_VALUE(1.7), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-11.1), SIMDE_FLOAT16_VALUE(-11.3) }, INT8_C(1), - { SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-12.1) } }, - { { SIMDE_FLOAT16_C(-2.9), SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(11.1), SIMDE_FLOAT16_C(8.8) }, + { SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-12.1) } }, + { { SIMDE_FLOAT16_VALUE(-2.9), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(11.1), SIMDE_FLOAT16_VALUE(8.8) }, INT8_C(1), - { SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(0.3), SIMDE_FLOAT16_C(0.3) } }, - { { SIMDE_FLOAT16_C(8.0), SIMDE_FLOAT16_C(-13.7), SIMDE_FLOAT16_C(-3.6), SIMDE_FLOAT16_C(5.8) }, + { SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3), SIMDE_FLOAT16_VALUE(0.3) } }, + { { SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(5.8) }, INT8_C(0), - { SIMDE_FLOAT16_C(8.0), SIMDE_FLOAT16_C(8.0), SIMDE_FLOAT16_C(8.0), SIMDE_FLOAT16_C(8.0) } }, - { { SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(10.1), SIMDE_FLOAT16_C(1.2), SIMDE_FLOAT16_C(-11.7) }, + { SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0), SIMDE_FLOAT16_VALUE(8.0) } }, + { { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(10.1), SIMDE_FLOAT16_VALUE(1.2), SIMDE_FLOAT16_VALUE(-11.7) }, INT8_C(0), - { SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(1.1) } }, - { { SIMDE_FLOAT16_C(-2.0), SIMDE_FLOAT16_C(-14.0), SIMDE_FLOAT16_C(-13.7), SIMDE_FLOAT16_C(-10.2) }, + { SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(1.1) } }, + { { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-14.0), SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-10.2) }, INT8_C(0), - { SIMDE_FLOAT16_C(-2.0), SIMDE_FLOAT16_C(-2.0), SIMDE_FLOAT16_C(-2.0), SIMDE_FLOAT16_C(-2.0) } }, + { SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0), SIMDE_FLOAT16_VALUE(-2.0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -51,7 +51,7 @@ test_simde_vdup_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16x4_t r; vec = simde_vld1_f16(test_vec[i].vec); - SIMDE_CONSTIFY_4_(simde_vdup_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_C(0.0))), test_vec[i].lane, vec); + SIMDE_CONSTIFY_4_(simde_vdup_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } @@ -66,30 +66,30 @@ test_simde_vdupq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { int lane; simde_float16 r[8]; } test_vec[] = { - { { SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(6.4), SIMDE_FLOAT16_C(-7.4), SIMDE_FLOAT16_C(0.5) }, + { { SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(6.4), SIMDE_FLOAT16_VALUE(-7.4), SIMDE_FLOAT16_VALUE(0.5) }, INT8_C(0), - { SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4), - SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4), SIMDE_FLOAT16_C(-3.4) } }, - { { SIMDE_FLOAT16_C(8.1), SIMDE_FLOAT16_C(-0.0), SIMDE_FLOAT16_C(6.2), SIMDE_FLOAT16_C(-5.5) }, + { SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), + SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4), SIMDE_FLOAT16_VALUE(-3.4) } }, + { { SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(6.2), SIMDE_FLOAT16_VALUE(-5.5) }, INT8_C(3), - { SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5), - SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5), SIMDE_FLOAT16_C(-5.5) } }, - { { SIMDE_FLOAT16_C(-9.6), SIMDE_FLOAT16_C(6.7), SIMDE_FLOAT16_C(1.4), SIMDE_FLOAT16_C(10.7) }, + { SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), + SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5), SIMDE_FLOAT16_VALUE(-5.5) } }, + { { SIMDE_FLOAT16_VALUE(-9.6), SIMDE_FLOAT16_VALUE(6.7), SIMDE_FLOAT16_VALUE(1.4), SIMDE_FLOAT16_VALUE(10.7) }, INT8_C(3), - { SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), - SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7), SIMDE_FLOAT16_C(10.7) } }, - { { SIMDE_FLOAT16_C(2.9), SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(14.8), SIMDE_FLOAT16_C(-2.2) }, + { SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), + SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7), SIMDE_FLOAT16_VALUE(10.7) } }, + { { SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(14.8), SIMDE_FLOAT16_VALUE(-2.2) }, INT8_C(1), - { SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0), - SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0), SIMDE_FLOAT16_C(-15.0) } }, - { { SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(0.2), SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(-14.7) }, + { SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), + SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0), SIMDE_FLOAT16_VALUE(-15.0) } }, + { { SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(0.2), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(-14.7) }, INT8_C(2), - { SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5), - SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5), SIMDE_FLOAT16_C(7.5) } }, - { { SIMDE_FLOAT16_C(-6.0), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(-8.9), SIMDE_FLOAT16_C(-11.3) }, + { SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), + SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5), SIMDE_FLOAT16_VALUE(7.5) } }, + { { SIMDE_FLOAT16_VALUE(-6.0), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-8.9), SIMDE_FLOAT16_VALUE(-11.3) }, INT8_C(3), - { SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3), - SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3), SIMDE_FLOAT16_C(-11.3) } }, + { SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), + SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3), SIMDE_FLOAT16_VALUE(-11.3) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -97,7 +97,7 @@ test_simde_vdupq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t r; vec = simde_vld1_f16(test_vec[i].vec); - SIMDE_CONSTIFY_4_(simde_vdupq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_C(0.0))), test_vec[i].lane, vec); + SIMDE_CONSTIFY_4_(simde_vdupq_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, vec); simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } diff --git a/test/arm/neon/ext.c b/test/arm/neon/ext.c index 2085c7102..6ea302d91 100644 --- a/test/arm/neon/ext.c +++ b/test/arm/neon/ext.c @@ -8,52 +8,52 @@ SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vext_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; int n; simde_float16 r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(-13.7), SIMDE_FLOAT16_C(-11.7), SIMDE_FLOAT16_C(-14.2), SIMDE_FLOAT16_C(-6.9) }, - { SIMDE_FLOAT16_C(-1.5), SIMDE_FLOAT16_C(-6.5), SIMDE_FLOAT16_C(-12.9), SIMDE_FLOAT16_C(7.6) }, + { { SIMDE_FLOAT16_VALUE(-13.7), SIMDE_FLOAT16_VALUE(-11.7), SIMDE_FLOAT16_VALUE(-14.2), SIMDE_FLOAT16_VALUE(-6.9) }, + { SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(7.6) }, INT32_C(3), - { SIMDE_FLOAT16_C(-6.9), SIMDE_FLOAT16_C(-1.5), SIMDE_FLOAT16_C(-6.5), SIMDE_FLOAT16_C(-12.9) } }, - { { SIMDE_FLOAT16_C(-11.0), SIMDE_FLOAT16_C(0.5), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(-9.1) }, - { SIMDE_FLOAT16_C(8.7), SIMDE_FLOAT16_C(-5.3), SIMDE_FLOAT16_C(5.4), SIMDE_FLOAT16_C(5.8) }, + { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(-12.9) } }, + { { SIMDE_FLOAT16_VALUE(-11.0), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-9.1) }, + { SIMDE_FLOAT16_VALUE(8.7), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(5.4), SIMDE_FLOAT16_VALUE(5.8) }, INT32_C(0), - { SIMDE_FLOAT16_C(-11.0), SIMDE_FLOAT16_C(0.5), SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(-9.1) } }, - { { SIMDE_FLOAT16_C(-6.9), SIMDE_FLOAT16_C(-14.8), SIMDE_FLOAT16_C(13.5), SIMDE_FLOAT16_C(-10.6) }, - { SIMDE_FLOAT16_C(11.1), SIMDE_FLOAT16_C(12.9), SIMDE_FLOAT16_C(2.9), SIMDE_FLOAT16_C(-7.8) }, + { SIMDE_FLOAT16_VALUE(-11.0), SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-9.1) } }, + { { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-14.8), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-10.6) }, + { SIMDE_FLOAT16_VALUE(11.1), SIMDE_FLOAT16_VALUE(12.9), SIMDE_FLOAT16_VALUE(2.9), SIMDE_FLOAT16_VALUE(-7.8) }, INT32_C(0), - { SIMDE_FLOAT16_C(-6.9), SIMDE_FLOAT16_C(-14.8), SIMDE_FLOAT16_C(13.5), SIMDE_FLOAT16_C(-10.6) } }, - { { SIMDE_FLOAT16_C(0.5), SIMDE_FLOAT16_C(12.8), SIMDE_FLOAT16_C(6.3), SIMDE_FLOAT16_C(11.0) }, - { SIMDE_FLOAT16_C(11.2), SIMDE_FLOAT16_C(8.6), SIMDE_FLOAT16_C(8.6), SIMDE_FLOAT16_C(-0.5) }, + { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-14.8), SIMDE_FLOAT16_VALUE(13.5), SIMDE_FLOAT16_VALUE(-10.6) } }, + { { SIMDE_FLOAT16_VALUE(0.5), SIMDE_FLOAT16_VALUE(12.8), SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(11.0) }, + { SIMDE_FLOAT16_VALUE(11.2), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(8.6), SIMDE_FLOAT16_VALUE(-0.5) }, INT32_C(2), - { SIMDE_FLOAT16_C(6.3), SIMDE_FLOAT16_C(11.0), SIMDE_FLOAT16_C(11.2), SIMDE_FLOAT16_C(8.6) } }, - { { SIMDE_FLOAT16_C(-6.9), SIMDE_FLOAT16_C(-7.3), SIMDE_FLOAT16_C(10.8), SIMDE_FLOAT16_C(-5.3) }, - { SIMDE_FLOAT16_C(10.8), SIMDE_FLOAT16_C(-6.5), SIMDE_FLOAT16_C(11.4), SIMDE_FLOAT16_C(2.7) }, + { SIMDE_FLOAT16_VALUE(6.3), SIMDE_FLOAT16_VALUE(11.0), SIMDE_FLOAT16_VALUE(11.2), SIMDE_FLOAT16_VALUE(8.6) } }, + { { SIMDE_FLOAT16_VALUE(-6.9), SIMDE_FLOAT16_VALUE(-7.3), SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-5.3) }, + { SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-6.5), SIMDE_FLOAT16_VALUE(11.4), SIMDE_FLOAT16_VALUE(2.7) }, INT32_C(2), - { SIMDE_FLOAT16_C(10.8), SIMDE_FLOAT16_C(-5.3), SIMDE_FLOAT16_C(10.8), SIMDE_FLOAT16_C(-6.5) } }, - { { SIMDE_FLOAT16_C(-8.3), SIMDE_FLOAT16_C(-14.3), SIMDE_FLOAT16_C(14.6), SIMDE_FLOAT16_C(-0.7) }, - { SIMDE_FLOAT16_C(-1.5), SIMDE_FLOAT16_C(14.2), SIMDE_FLOAT16_C(-14.9), SIMDE_FLOAT16_C(8.1) }, + { SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-5.3), SIMDE_FLOAT16_VALUE(10.8), SIMDE_FLOAT16_VALUE(-6.5) } }, + { { SIMDE_FLOAT16_VALUE(-8.3), SIMDE_FLOAT16_VALUE(-14.3), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(-0.7) }, + { SIMDE_FLOAT16_VALUE(-1.5), SIMDE_FLOAT16_VALUE(14.2), SIMDE_FLOAT16_VALUE(-14.9), SIMDE_FLOAT16_VALUE(8.1) }, INT32_C(1), - { SIMDE_FLOAT16_C(-14.3), SIMDE_FLOAT16_C(14.6), SIMDE_FLOAT16_C(-0.7), SIMDE_FLOAT16_C(-1.5) } }, - { { SIMDE_FLOAT16_C(15.0), SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(11.7), SIMDE_FLOAT16_C(-13.5) }, - { SIMDE_FLOAT16_C(-0.2), SIMDE_FLOAT16_C(-4.4), SIMDE_FLOAT16_C(-9.7), SIMDE_FLOAT16_C(6.9) }, + { SIMDE_FLOAT16_VALUE(-14.3), SIMDE_FLOAT16_VALUE(14.6), SIMDE_FLOAT16_VALUE(-0.7), SIMDE_FLOAT16_VALUE(-1.5) } }, + { { SIMDE_FLOAT16_VALUE(15.0), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(-13.5) }, + { SIMDE_FLOAT16_VALUE(-0.2), SIMDE_FLOAT16_VALUE(-4.4), SIMDE_FLOAT16_VALUE(-9.7), SIMDE_FLOAT16_VALUE(6.9) }, INT32_C(0), - { SIMDE_FLOAT16_C(15.0), SIMDE_FLOAT16_C(1.1), SIMDE_FLOAT16_C(11.7), SIMDE_FLOAT16_C(-13.5) } }, - { { SIMDE_FLOAT16_C(1.0), SIMDE_FLOAT16_C(9.5), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-3.7) }, - { SIMDE_FLOAT16_C(8.1), SIMDE_FLOAT16_C(2.0), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(8.3) }, + { SIMDE_FLOAT16_VALUE(15.0), SIMDE_FLOAT16_VALUE(1.1), SIMDE_FLOAT16_VALUE(11.7), SIMDE_FLOAT16_VALUE(-13.5) } }, + { { SIMDE_FLOAT16_VALUE(1.0), SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-3.7) }, + { SIMDE_FLOAT16_VALUE(8.1), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(8.3) }, INT32_C(1), - { SIMDE_FLOAT16_C(9.5), SIMDE_FLOAT16_C(-12.1), SIMDE_FLOAT16_C(-3.7), SIMDE_FLOAT16_C(8.1) } }, - { { SIMDE_FLOAT16_C(-0.4), SIMDE_FLOAT16_C(-12.9), SIMDE_FLOAT16_C(-7.7), SIMDE_FLOAT16_C(0.4) }, - { SIMDE_FLOAT16_C(-3.6), SIMDE_FLOAT16_C(-0.0), SIMDE_FLOAT16_C(-0.1), SIMDE_FLOAT16_C(2.6) }, + { SIMDE_FLOAT16_VALUE(9.5), SIMDE_FLOAT16_VALUE(-12.1), SIMDE_FLOAT16_VALUE(-3.7), SIMDE_FLOAT16_VALUE(8.1) } }, + { { SIMDE_FLOAT16_VALUE(-0.4), SIMDE_FLOAT16_VALUE(-12.9), SIMDE_FLOAT16_VALUE(-7.7), SIMDE_FLOAT16_VALUE(0.4) }, + { SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(-0.1), SIMDE_FLOAT16_VALUE(2.6) }, INT32_C(3), - { SIMDE_FLOAT16_C(0.4), SIMDE_FLOAT16_C(-3.6), SIMDE_FLOAT16_C(-0.0), SIMDE_FLOAT16_C(-0.1) } }, - { { SIMDE_FLOAT16_C(-8.6), SIMDE_FLOAT16_C(9.3), SIMDE_FLOAT16_C(2.0), SIMDE_FLOAT16_C(-9.2) }, - { SIMDE_FLOAT16_C(2.4), SIMDE_FLOAT16_C(14.3), SIMDE_FLOAT16_C(-3.8), SIMDE_FLOAT16_C(-12.6) }, + { SIMDE_FLOAT16_VALUE(0.4), SIMDE_FLOAT16_VALUE(-3.6), SIMDE_FLOAT16_VALUE(-0.0), SIMDE_FLOAT16_VALUE(-0.1) } }, + { { SIMDE_FLOAT16_VALUE(-8.6), SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-9.2) }, + { SIMDE_FLOAT16_VALUE(2.4), SIMDE_FLOAT16_VALUE(14.3), SIMDE_FLOAT16_VALUE(-3.8), SIMDE_FLOAT16_VALUE(-12.6) }, INT32_C(1), - { SIMDE_FLOAT16_C(9.3), SIMDE_FLOAT16_C(2.0), SIMDE_FLOAT16_C(-9.2), SIMDE_FLOAT16_C(2.4) } }, + { SIMDE_FLOAT16_VALUE(9.3), SIMDE_FLOAT16_VALUE(2.0), SIMDE_FLOAT16_VALUE(-9.2), SIMDE_FLOAT16_VALUE(2.4) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/fma.c b/test/arm/neon/fma.c index 878063a8f..9513e5c9e 100644 --- a/test/arm/neon/fma.c +++ b/test/arm/neon/fma.c @@ -75,52 +75,52 @@ test_simde_vfma_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmah_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; simde_float16 b; simde_float16 c; simde_float16 r; } test_vec[] = { - { SIMDE_FLOAT16_C(31.59), - SIMDE_FLOAT16_C(4.50), - SIMDE_FLOAT16_C(-3.60), - SIMDE_FLOAT16_C(15.39) }, - { SIMDE_FLOAT16_C(18.30), - SIMDE_FLOAT16_C(1.00), - SIMDE_FLOAT16_C(0.40), - SIMDE_FLOAT16_C(18.70) }, - { SIMDE_FLOAT16_C(-27.00), - SIMDE_FLOAT16_C(-4.80), - SIMDE_FLOAT16_C(1.70), - SIMDE_FLOAT16_C(-35.16) }, - { SIMDE_FLOAT16_C(-24.30), - SIMDE_FLOAT16_C(1.90), - SIMDE_FLOAT16_C(-2.40), - SIMDE_FLOAT16_C(-28.86) }, - { SIMDE_FLOAT16_C(32.19), - SIMDE_FLOAT16_C(1.10), - SIMDE_FLOAT16_C(0.80), - SIMDE_FLOAT16_C(33.06) }, - { SIMDE_FLOAT16_C(-16.59), - SIMDE_FLOAT16_C(-0.20), - SIMDE_FLOAT16_C(1.60), - SIMDE_FLOAT16_C(-16.91) }, - { SIMDE_FLOAT16_C(34.59), - SIMDE_FLOAT16_C(1.60), - SIMDE_FLOAT16_C(-0.40), - SIMDE_FLOAT16_C(33.97) }, - { SIMDE_FLOAT16_C(18.09), - SIMDE_FLOAT16_C(5.00), - SIMDE_FLOAT16_C(2.80), - SIMDE_FLOAT16_C(32.09) }, - { SIMDE_FLOAT16_C(21.30), - SIMDE_FLOAT16_C(-2.50), - SIMDE_FLOAT16_C(-0.10), - SIMDE_FLOAT16_C(21.55) }, - { SIMDE_FLOAT16_C(22.20), - SIMDE_FLOAT16_C(-3.10), - SIMDE_FLOAT16_C(-4.60), - SIMDE_FLOAT16_C(36.47) }, + { SIMDE_FLOAT16_VALUE(31.59), + SIMDE_FLOAT16_VALUE(4.50), + SIMDE_FLOAT16_VALUE(-3.60), + SIMDE_FLOAT16_VALUE(15.39) }, + { SIMDE_FLOAT16_VALUE(18.30), + SIMDE_FLOAT16_VALUE(1.00), + SIMDE_FLOAT16_VALUE(0.40), + SIMDE_FLOAT16_VALUE(18.70) }, + { SIMDE_FLOAT16_VALUE(-27.00), + SIMDE_FLOAT16_VALUE(-4.80), + SIMDE_FLOAT16_VALUE(1.70), + SIMDE_FLOAT16_VALUE(-35.16) }, + { SIMDE_FLOAT16_VALUE(-24.30), + SIMDE_FLOAT16_VALUE(1.90), + SIMDE_FLOAT16_VALUE(-2.40), + SIMDE_FLOAT16_VALUE(-28.86) }, + { SIMDE_FLOAT16_VALUE(32.19), + SIMDE_FLOAT16_VALUE(1.10), + SIMDE_FLOAT16_VALUE(0.80), + SIMDE_FLOAT16_VALUE(33.06) }, + { SIMDE_FLOAT16_VALUE(-16.59), + SIMDE_FLOAT16_VALUE(-0.20), + SIMDE_FLOAT16_VALUE(1.60), + SIMDE_FLOAT16_VALUE(-16.91) }, + { SIMDE_FLOAT16_VALUE(34.59), + SIMDE_FLOAT16_VALUE(1.60), + SIMDE_FLOAT16_VALUE(-0.40), + SIMDE_FLOAT16_VALUE(33.97) }, + { SIMDE_FLOAT16_VALUE(18.09), + SIMDE_FLOAT16_VALUE(5.00), + SIMDE_FLOAT16_VALUE(2.80), + SIMDE_FLOAT16_VALUE(32.09) }, + { SIMDE_FLOAT16_VALUE(21.30), + SIMDE_FLOAT16_VALUE(-2.50), + SIMDE_FLOAT16_VALUE(-0.10), + SIMDE_FLOAT16_VALUE(21.55) }, + { SIMDE_FLOAT16_VALUE(22.20), + SIMDE_FLOAT16_VALUE(-3.10), + SIMDE_FLOAT16_VALUE(-4.60), + SIMDE_FLOAT16_VALUE(36.47) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -133,52 +133,52 @@ test_simde_vfmah_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfma_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; simde_float16 c[4]; simde_float16 r[4]; } test_vec[] = { - { { SIMDE_FLOAT16_C(31.59), SIMDE_FLOAT16_C(4.80), SIMDE_FLOAT16_C(4.80), SIMDE_FLOAT16_C(25.00) }, - { SIMDE_FLOAT16_C(4.50), SIMDE_FLOAT16_C(1.00), SIMDE_FLOAT16_C(3.00), SIMDE_FLOAT16_C(-2.20) } , - { SIMDE_FLOAT16_C(-3.60), SIMDE_FLOAT16_C(-1.80), SIMDE_FLOAT16_C(4.00), SIMDE_FLOAT16_C(-4.70) } , - { SIMDE_FLOAT16_C(15.39), SIMDE_FLOAT16_C(3.00), SIMDE_FLOAT16_C(16.80), SIMDE_FLOAT16_C(35.34) } }, - { { SIMDE_FLOAT16_C(18.30), SIMDE_FLOAT16_C(-39.00), SIMDE_FLOAT16_C(-47.69), SIMDE_FLOAT16_C(-43.50) } , - { SIMDE_FLOAT16_C(1.00), SIMDE_FLOAT16_C(-2.00), SIMDE_FLOAT16_C(-2.30), SIMDE_FLOAT16_C(-2.80) }, - { SIMDE_FLOAT16_C(0.40), SIMDE_FLOAT16_C(4.90), SIMDE_FLOAT16_C(-0.30), SIMDE_FLOAT16_C(-4.50) }, - { SIMDE_FLOAT16_C(18.70), SIMDE_FLOAT16_C(-48.81), SIMDE_FLOAT16_C(-47.00), SIMDE_FLOAT16_C(-30.91) } }, - { { SIMDE_FLOAT16_C(-27.00), SIMDE_FLOAT16_C(-35.59), SIMDE_FLOAT16_C(-37.59), SIMDE_FLOAT16_C(31.50) }, - { SIMDE_FLOAT16_C(-4.80), SIMDE_FLOAT16_C(-4.20), SIMDE_FLOAT16_C(3.10), SIMDE_FLOAT16_C(-3.10) }, - { SIMDE_FLOAT16_C(1.70), SIMDE_FLOAT16_C(-4.70), SIMDE_FLOAT16_C(2.20), SIMDE_FLOAT16_C(3.50) }, - { SIMDE_FLOAT16_C(-35.16), SIMDE_FLOAT16_C(-15.86), SIMDE_FLOAT16_C(-30.78), SIMDE_FLOAT16_C(20.66) } }, - { { SIMDE_FLOAT16_C(-24.30), SIMDE_FLOAT16_C(-8.70), SIMDE_FLOAT16_C(-2.70), SIMDE_FLOAT16_C(-7.60) }, - { SIMDE_FLOAT16_C(1.90), SIMDE_FLOAT16_C(0.40), SIMDE_FLOAT16_C(1.80), SIMDE_FLOAT16_C(1.60) }, - { SIMDE_FLOAT16_C(-2.40), SIMDE_FLOAT16_C(-0.10), SIMDE_FLOAT16_C(-1.10), SIMDE_FLOAT16_C(-2.70) }, - { SIMDE_FLOAT16_C(-28.86), SIMDE_FLOAT16_C(-8.74), SIMDE_FLOAT16_C(-4.68), SIMDE_FLOAT16_C(-11.92) } }, - { { SIMDE_FLOAT16_C(32.19), SIMDE_FLOAT16_C(0.70), SIMDE_FLOAT16_C(10.60), SIMDE_FLOAT16_C(47.50) }, - { SIMDE_FLOAT16_C(1.10), SIMDE_FLOAT16_C(2.90), SIMDE_FLOAT16_C(-2.80), SIMDE_FLOAT16_C(5.00) }, - { SIMDE_FLOAT16_C(0.80), SIMDE_FLOAT16_C(-4.30), SIMDE_FLOAT16_C(-3.30), SIMDE_FLOAT16_C(1.40) }, - { SIMDE_FLOAT16_C(33.06), SIMDE_FLOAT16_C(-11.77), SIMDE_FLOAT16_C(19.84), SIMDE_FLOAT16_C(54.50) } }, - { { SIMDE_FLOAT16_C(-16.59), SIMDE_FLOAT16_C(4.30), SIMDE_FLOAT16_C(45.59), SIMDE_FLOAT16_C(-28.00) }, - { SIMDE_FLOAT16_C(-0.20), SIMDE_FLOAT16_C(2.60), SIMDE_FLOAT16_C(4.90), SIMDE_FLOAT16_C(4.80) }, - { SIMDE_FLOAT16_C(1.60), SIMDE_FLOAT16_C(-4.90), SIMDE_FLOAT16_C(-4.10), SIMDE_FLOAT16_C(-2.80) }, - { SIMDE_FLOAT16_C(-16.91), SIMDE_FLOAT16_C(-8.44), SIMDE_FLOAT16_C(25.50), SIMDE_FLOAT16_C(-41.44) } }, - { { SIMDE_FLOAT16_C(34.59), SIMDE_FLOAT16_C(47.00), SIMDE_FLOAT16_C(-23.30), SIMDE_FLOAT16_C(-23.59) }, - { SIMDE_FLOAT16_C(1.60), SIMDE_FLOAT16_C(-4.90), SIMDE_FLOAT16_C(3.30), SIMDE_FLOAT16_C(-2.60) }, - { SIMDE_FLOAT16_C(-0.40), SIMDE_FLOAT16_C(-4.50), SIMDE_FLOAT16_C(-4.10), SIMDE_FLOAT16_C(0.30) }, - { SIMDE_FLOAT16_C(33.97), SIMDE_FLOAT16_C(69.06), SIMDE_FLOAT16_C(-36.84), SIMDE_FLOAT16_C(-24.38) } }, - { { SIMDE_FLOAT16_C(18.09), SIMDE_FLOAT16_C(2.60), SIMDE_FLOAT16_C(-42.81), SIMDE_FLOAT16_C(34.59) }, - { SIMDE_FLOAT16_C(5.00), SIMDE_FLOAT16_C(-3.50), SIMDE_FLOAT16_C(1.90), SIMDE_FLOAT16_C(-0.20) }, - { SIMDE_FLOAT16_C(2.80), SIMDE_FLOAT16_C(-4.00), SIMDE_FLOAT16_C(-3.50), SIMDE_FLOAT16_C(3.00) }, - { SIMDE_FLOAT16_C(32.09), SIMDE_FLOAT16_C(16.59), SIMDE_FLOAT16_C(-49.47), SIMDE_FLOAT16_C(34.00) } }, - { { SIMDE_FLOAT16_C(21.30), SIMDE_FLOAT16_C(31.59), SIMDE_FLOAT16_C(7.20), SIMDE_FLOAT16_C(45.00) }, - { SIMDE_FLOAT16_C(-2.50), SIMDE_FLOAT16_C(3.90), SIMDE_FLOAT16_C(-1.70), SIMDE_FLOAT16_C(-0.50) }, - { SIMDE_FLOAT16_C(-0.10), SIMDE_FLOAT16_C(-4.20), SIMDE_FLOAT16_C(-4.10), SIMDE_FLOAT16_C(3.80) }, - { SIMDE_FLOAT16_C(21.55), SIMDE_FLOAT16_C(15.22), SIMDE_FLOAT16_C(14.17), SIMDE_FLOAT16_C(43.09) } }, - { { SIMDE_FLOAT16_C(22.20), SIMDE_FLOAT16_C(-18.09), SIMDE_FLOAT16_C(12.90), SIMDE_FLOAT16_C(-21.20) }, - { SIMDE_FLOAT16_C(-3.10), SIMDE_FLOAT16_C(3.20), SIMDE_FLOAT16_C(0.30), SIMDE_FLOAT16_C(-4.20) }, - { SIMDE_FLOAT16_C(-4.60), SIMDE_FLOAT16_C(-3.00), SIMDE_FLOAT16_C(4.90), SIMDE_FLOAT16_C(3.70) }, - { SIMDE_FLOAT16_C(36.47), SIMDE_FLOAT16_C(-27.69), SIMDE_FLOAT16_C(14.37), SIMDE_FLOAT16_C(-36.75) } }, + { { SIMDE_FLOAT16_VALUE(31.59), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(4.80), SIMDE_FLOAT16_VALUE(25.00) }, + { SIMDE_FLOAT16_VALUE(4.50), SIMDE_FLOAT16_VALUE(1.00), SIMDE_FLOAT16_VALUE(3.00), SIMDE_FLOAT16_VALUE(-2.20) } , + { SIMDE_FLOAT16_VALUE(-3.60), SIMDE_FLOAT16_VALUE(-1.80), SIMDE_FLOAT16_VALUE(4.00), SIMDE_FLOAT16_VALUE(-4.70) } , + { SIMDE_FLOAT16_VALUE(15.39), SIMDE_FLOAT16_VALUE(3.00), SIMDE_FLOAT16_VALUE(16.80), SIMDE_FLOAT16_VALUE(35.34) } }, + { { SIMDE_FLOAT16_VALUE(18.30), SIMDE_FLOAT16_VALUE(-39.00), SIMDE_FLOAT16_VALUE(-47.69), SIMDE_FLOAT16_VALUE(-43.50) } , + { SIMDE_FLOAT16_VALUE(1.00), SIMDE_FLOAT16_VALUE(-2.00), SIMDE_FLOAT16_VALUE(-2.30), SIMDE_FLOAT16_VALUE(-2.80) }, + { SIMDE_FLOAT16_VALUE(0.40), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(-0.30), SIMDE_FLOAT16_VALUE(-4.50) }, + { SIMDE_FLOAT16_VALUE(18.70), SIMDE_FLOAT16_VALUE(-48.81), SIMDE_FLOAT16_VALUE(-47.00), SIMDE_FLOAT16_VALUE(-30.91) } }, + { { SIMDE_FLOAT16_VALUE(-27.00), SIMDE_FLOAT16_VALUE(-35.59), SIMDE_FLOAT16_VALUE(-37.59), SIMDE_FLOAT16_VALUE(31.50) }, + { SIMDE_FLOAT16_VALUE(-4.80), SIMDE_FLOAT16_VALUE(-4.20), SIMDE_FLOAT16_VALUE(3.10), SIMDE_FLOAT16_VALUE(-3.10) }, + { SIMDE_FLOAT16_VALUE(1.70), SIMDE_FLOAT16_VALUE(-4.70), SIMDE_FLOAT16_VALUE(2.20), SIMDE_FLOAT16_VALUE(3.50) }, + { SIMDE_FLOAT16_VALUE(-35.16), SIMDE_FLOAT16_VALUE(-15.86), SIMDE_FLOAT16_VALUE(-30.78), SIMDE_FLOAT16_VALUE(20.66) } }, + { { SIMDE_FLOAT16_VALUE(-24.30), SIMDE_FLOAT16_VALUE(-8.70), SIMDE_FLOAT16_VALUE(-2.70), SIMDE_FLOAT16_VALUE(-7.60) }, + { SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(0.40), SIMDE_FLOAT16_VALUE(1.80), SIMDE_FLOAT16_VALUE(1.60) }, + { SIMDE_FLOAT16_VALUE(-2.40), SIMDE_FLOAT16_VALUE(-0.10), SIMDE_FLOAT16_VALUE(-1.10), SIMDE_FLOAT16_VALUE(-2.70) }, + { SIMDE_FLOAT16_VALUE(-28.86), SIMDE_FLOAT16_VALUE(-8.74), SIMDE_FLOAT16_VALUE(-4.68), SIMDE_FLOAT16_VALUE(-11.92) } }, + { { SIMDE_FLOAT16_VALUE(32.19), SIMDE_FLOAT16_VALUE(0.70), SIMDE_FLOAT16_VALUE(10.60), SIMDE_FLOAT16_VALUE(47.50) }, + { SIMDE_FLOAT16_VALUE(1.10), SIMDE_FLOAT16_VALUE(2.90), SIMDE_FLOAT16_VALUE(-2.80), SIMDE_FLOAT16_VALUE(5.00) }, + { SIMDE_FLOAT16_VALUE(0.80), SIMDE_FLOAT16_VALUE(-4.30), SIMDE_FLOAT16_VALUE(-3.30), SIMDE_FLOAT16_VALUE(1.40) }, + { SIMDE_FLOAT16_VALUE(33.06), SIMDE_FLOAT16_VALUE(-11.77), SIMDE_FLOAT16_VALUE(19.84), SIMDE_FLOAT16_VALUE(54.50) } }, + { { SIMDE_FLOAT16_VALUE(-16.59), SIMDE_FLOAT16_VALUE(4.30), SIMDE_FLOAT16_VALUE(45.59), SIMDE_FLOAT16_VALUE(-28.00) }, + { SIMDE_FLOAT16_VALUE(-0.20), SIMDE_FLOAT16_VALUE(2.60), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(4.80) }, + { SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(-4.90), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(-2.80) }, + { SIMDE_FLOAT16_VALUE(-16.91), SIMDE_FLOAT16_VALUE(-8.44), SIMDE_FLOAT16_VALUE(25.50), SIMDE_FLOAT16_VALUE(-41.44) } }, + { { SIMDE_FLOAT16_VALUE(34.59), SIMDE_FLOAT16_VALUE(47.00), SIMDE_FLOAT16_VALUE(-23.30), SIMDE_FLOAT16_VALUE(-23.59) }, + { SIMDE_FLOAT16_VALUE(1.60), SIMDE_FLOAT16_VALUE(-4.90), SIMDE_FLOAT16_VALUE(3.30), SIMDE_FLOAT16_VALUE(-2.60) }, + { SIMDE_FLOAT16_VALUE(-0.40), SIMDE_FLOAT16_VALUE(-4.50), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(0.30) }, + { SIMDE_FLOAT16_VALUE(33.97), SIMDE_FLOAT16_VALUE(69.06), SIMDE_FLOAT16_VALUE(-36.84), SIMDE_FLOAT16_VALUE(-24.38) } }, + { { SIMDE_FLOAT16_VALUE(18.09), SIMDE_FLOAT16_VALUE(2.60), SIMDE_FLOAT16_VALUE(-42.81), SIMDE_FLOAT16_VALUE(34.59) }, + { SIMDE_FLOAT16_VALUE(5.00), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(1.90), SIMDE_FLOAT16_VALUE(-0.20) }, + { SIMDE_FLOAT16_VALUE(2.80), SIMDE_FLOAT16_VALUE(-4.00), SIMDE_FLOAT16_VALUE(-3.50), SIMDE_FLOAT16_VALUE(3.00) }, + { SIMDE_FLOAT16_VALUE(32.09), SIMDE_FLOAT16_VALUE(16.59), SIMDE_FLOAT16_VALUE(-49.47), SIMDE_FLOAT16_VALUE(34.00) } }, + { { SIMDE_FLOAT16_VALUE(21.30), SIMDE_FLOAT16_VALUE(31.59), SIMDE_FLOAT16_VALUE(7.20), SIMDE_FLOAT16_VALUE(45.00) }, + { SIMDE_FLOAT16_VALUE(-2.50), SIMDE_FLOAT16_VALUE(3.90), SIMDE_FLOAT16_VALUE(-1.70), SIMDE_FLOAT16_VALUE(-0.50) }, + { SIMDE_FLOAT16_VALUE(-0.10), SIMDE_FLOAT16_VALUE(-4.20), SIMDE_FLOAT16_VALUE(-4.10), SIMDE_FLOAT16_VALUE(3.80) }, + { SIMDE_FLOAT16_VALUE(21.55), SIMDE_FLOAT16_VALUE(15.22), SIMDE_FLOAT16_VALUE(14.17), SIMDE_FLOAT16_VALUE(43.09) } }, + { { SIMDE_FLOAT16_VALUE(22.20), SIMDE_FLOAT16_VALUE(-18.09), SIMDE_FLOAT16_VALUE(12.90), SIMDE_FLOAT16_VALUE(-21.20) }, + { SIMDE_FLOAT16_VALUE(-3.10), SIMDE_FLOAT16_VALUE(3.20), SIMDE_FLOAT16_VALUE(0.30), SIMDE_FLOAT16_VALUE(-4.20) }, + { SIMDE_FLOAT16_VALUE(-4.60), SIMDE_FLOAT16_VALUE(-3.00), SIMDE_FLOAT16_VALUE(4.90), SIMDE_FLOAT16_VALUE(3.70) }, + { SIMDE_FLOAT16_VALUE(36.47), SIMDE_FLOAT16_VALUE(-27.69), SIMDE_FLOAT16_VALUE(14.37), SIMDE_FLOAT16_VALUE(-36.75) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/fma_lane.c b/test/arm/neon/fma_lane.c index be01605c6..96de9de83 100644 --- a/test/arm/neon/fma_lane.c +++ b/test/arm/neon/fma_lane.c @@ -151,7 +151,7 @@ test_simde_vfmad_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmah_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b; simde_float16_t v[4]; @@ -214,7 +214,7 @@ test_simde_vfmah_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmah_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b; simde_float16_t v[8]; @@ -502,7 +502,7 @@ test_simde_vfmas_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfma_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t v[4]; @@ -716,7 +716,7 @@ test_simde_vfma_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfma_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t v[8]; @@ -1008,7 +1008,7 @@ test_simde_vfma_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmaq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t v[4]; @@ -1254,7 +1254,7 @@ test_simde_vfmaq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmaq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t v[8]; diff --git a/test/arm/neon/fma_n.c b/test/arm/neon/fma_n.c index d4d5f91a8..8d45763bc 100644 --- a/test/arm/neon/fma_n.c +++ b/test/arm/neon/fma_n.c @@ -5,7 +5,7 @@ static int test_simde_vfma_n_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; simde_float16 c; @@ -67,7 +67,7 @@ test_simde_vfma_n_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmaq_n_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[8]; simde_float16 b[8]; simde_float16 c; diff --git a/test/arm/neon/fms.c b/test/arm/neon/fms.c index 84e004a78..28df9a475 100644 --- a/test/arm/neon/fms.c +++ b/test/arm/neon/fms.c @@ -47,7 +47,7 @@ test_simde_vfms_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsh_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[1]; simde_float16 b[1]; simde_float16 c[1]; @@ -85,7 +85,7 @@ test_simde_vfmsh_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfms_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; simde_float16 c[4]; @@ -126,7 +126,7 @@ test_simde_vfms_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[8]; simde_float16 b[8]; simde_float16 c[8]; diff --git a/test/arm/neon/fms_lane.c b/test/arm/neon/fms_lane.c index 96a3c55b1..576e63cbb 100644 --- a/test/arm/neon/fms_lane.c +++ b/test/arm/neon/fms_lane.c @@ -90,7 +90,7 @@ test_simde_vfmsd_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsh_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b; simde_float16_t v[4]; @@ -153,7 +153,7 @@ test_simde_vfmsh_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsh_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b; simde_float16_t v[8]; @@ -365,7 +365,7 @@ test_simde_vfmss_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfms_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t v[4]; @@ -518,7 +518,7 @@ test_simde_vfms_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfms_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t b[4]; simde_float16_t v[8]; @@ -734,7 +734,7 @@ test_simde_vfms_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsq_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t v[4]; @@ -919,7 +919,7 @@ test_simde_vfmsq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[8]; simde_float16_t b[8]; simde_float16_t v[8]; diff --git a/test/arm/neon/fms_n.c b/test/arm/neon/fms_n.c index 602326523..27c5301a0 100644 --- a/test/arm/neon/fms_n.c +++ b/test/arm/neon/fms_n.c @@ -5,7 +5,7 @@ static int test_simde_vfms_n_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; simde_float16 c; @@ -43,7 +43,7 @@ test_simde_vfms_n_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vfmsq_n_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[8]; simde_float16 b[8]; simde_float16 c; diff --git a/test/arm/neon/ld1_dup.c b/test/arm/neon/ld1_dup.c index fcd18427e..a39ef608a 100644 --- a/test/arm/neon/ld1_dup.c +++ b/test/arm/neon/ld1_dup.c @@ -7,7 +7,7 @@ static int test_simde_vld1_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t unused; simde_float16_t r[4]; diff --git a/test/arm/neon/ld1_lane.c b/test/arm/neon/ld1_lane.c index f38bb625d..16573ce09 100644 --- a/test/arm/neon/ld1_lane.c +++ b/test/arm/neon/ld1_lane.c @@ -385,7 +385,7 @@ test_simde_vld1_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld1_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[4]; simde_float16_t buf; simde_float16_t r[4]; @@ -1028,7 +1028,7 @@ test_simde_vld1q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld1q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[8]; simde_float16_t buf; simde_float16_t r[8]; diff --git a/test/arm/neon/ld1_x2.c b/test/arm/neon/ld1_x2.c index f5dc9a421..0f2761373 100644 --- a/test/arm/neon/ld1_x2.c +++ b/test/arm/neon/ld1_x2.c @@ -8,7 +8,7 @@ static int test_simde_vld1_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[8]; simde_float16 expected[2][4]; } test_vec[] = { diff --git a/test/arm/neon/ld1_x3.c b/test/arm/neon/ld1_x3.c index b6a1fc72d..092ce0445 100644 --- a/test/arm/neon/ld1_x3.c +++ b/test/arm/neon/ld1_x3.c @@ -8,7 +8,7 @@ static int test_simde_vld1_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[12]; simde_float16 expected[3][4]; } test_vec[] = { diff --git a/test/arm/neon/ld1_x4.c b/test/arm/neon/ld1_x4.c index 4e2504173..38cb63611 100644 --- a/test/arm/neon/ld1_x4.c +++ b/test/arm/neon/ld1_x4.c @@ -8,7 +8,7 @@ static int test_simde_vld1_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[16]; simde_float16 expected[4][4]; } test_vec[] = { diff --git a/test/arm/neon/ld1q_x2.c b/test/arm/neon/ld1q_x2.c index 9a9071cd9..9d2bb155c 100644 --- a/test/arm/neon/ld1q_x2.c +++ b/test/arm/neon/ld1q_x2.c @@ -8,7 +8,7 @@ static int test_simde_vld1q_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[16]; simde_float16 expected[2][8]; } test_vec[] = { diff --git a/test/arm/neon/ld1q_x3.c b/test/arm/neon/ld1q_x3.c index e5a6868b9..12d5c1ce7 100644 --- a/test/arm/neon/ld1q_x3.c +++ b/test/arm/neon/ld1q_x3.c @@ -8,7 +8,7 @@ static int test_simde_vld1q_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[24]; simde_float16 expected[3][8]; } test_vec[] = { diff --git a/test/arm/neon/ld1q_x4.c b/test/arm/neon/ld1q_x4.c index 5b5270af3..15becdbf1 100644 --- a/test/arm/neon/ld1q_x4.c +++ b/test/arm/neon/ld1q_x4.c @@ -8,7 +8,7 @@ static int test_simde_vld1q_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 buf[32]; simde_float16 expected[4][8]; } test_vec[] = { diff --git a/test/arm/neon/ld2.c b/test/arm/neon/ld2.c index 0b192ea31..6d5b0b15f 100644 --- a/test/arm/neon/ld2.c +++ b/test/arm/neon/ld2.c @@ -603,7 +603,7 @@ test_simde_vld2_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld2_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[8]; simde_float16_t r[2][4]; } test_vec[] = { diff --git a/test/arm/neon/ld2_dup.c b/test/arm/neon/ld2_dup.c index 8b8b3e8b7..6f2f91706 100644 --- a/test/arm/neon/ld2_dup.c +++ b/test/arm/neon/ld2_dup.c @@ -7,7 +7,7 @@ static int test_simde_vld2_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[2]; simde_float16_t unused[2]; simde_float16_t r[2][4]; @@ -622,7 +622,7 @@ test_simde_vld2_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld2q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[2]; simde_float16_t unused[2]; simde_float16_t r[2][8]; diff --git a/test/arm/neon/ld2_lane.c b/test/arm/neon/ld2_lane.c index acb1d1510..99ed72fac 100644 --- a/test/arm/neon/ld2_lane.c +++ b/test/arm/neon/ld2_lane.c @@ -430,7 +430,7 @@ test_simde_vld2_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld2_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[2][4]; simde_float16_t buf[2]; simde_float16_t r[2][4]; @@ -1241,7 +1241,7 @@ test_simde_vld2q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld2q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[2][8]; simde_float16_t buf[2]; simde_float16_t r[2][8]; diff --git a/test/arm/neon/ld3.c b/test/arm/neon/ld3.c index 2985cfda7..d6287174a 100644 --- a/test/arm/neon/ld3.c +++ b/test/arm/neon/ld3.c @@ -60,7 +60,7 @@ test_simde_vld3_s8 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld3_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[12]; simde_float16_t r[3][4]; } test_vec[] = { diff --git a/test/arm/neon/ld3_dup.c b/test/arm/neon/ld3_dup.c index 1ff583211..359bcf050 100644 --- a/test/arm/neon/ld3_dup.c +++ b/test/arm/neon/ld3_dup.c @@ -7,7 +7,7 @@ static int test_simde_vld3_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[3]; simde_float16_t unused[3]; simde_float16_t r[3][4]; @@ -737,7 +737,7 @@ test_simde_vld3_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld3q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[3]; simde_float16_t unused[3]; simde_float16_t r[3][8]; diff --git a/test/arm/neon/ld3_lane.c b/test/arm/neon/ld3_lane.c index 7b87a8b56..0fe1c7df2 100644 --- a/test/arm/neon/ld3_lane.c +++ b/test/arm/neon/ld3_lane.c @@ -545,7 +545,7 @@ test_simde_vld3_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld3_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[3][4]; simde_float16_t buf[3]; simde_float16_t r[3][4]; @@ -1516,7 +1516,7 @@ test_simde_vld3q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld3q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[3][8]; simde_float16_t buf[3]; simde_float16_t r[3][8]; diff --git a/test/arm/neon/ld4.c b/test/arm/neon/ld4.c index 9c27a9d97..b96995627 100644 --- a/test/arm/neon/ld4.c +++ b/test/arm/neon/ld4.c @@ -8,7 +8,7 @@ static int test_simde_vld4_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[16]; simde_float16_t r[4][4]; } test_vec[] = { diff --git a/test/arm/neon/ld4_dup.c b/test/arm/neon/ld4_dup.c index 4d7ffc1b6..44009354f 100644 --- a/test/arm/neon/ld4_dup.c +++ b/test/arm/neon/ld4_dup.c @@ -7,7 +7,7 @@ static int test_simde_vld4_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t unused[4]; simde_float16_t r[4][4]; @@ -863,7 +863,7 @@ test_simde_vld4_dup_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld4q_dup_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a[4]; simde_float16_t unused[4]; simde_float16_t r[4][8]; diff --git a/test/arm/neon/ld4_lane.c b/test/arm/neon/ld4_lane.c index 2262c5012..d4d85e264 100644 --- a/test/arm/neon/ld4_lane.c +++ b/test/arm/neon/ld4_lane.c @@ -702,7 +702,7 @@ test_simde_vld4_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld4_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[4][4]; simde_float16_t buf[4]; simde_float16_t r[4][4]; @@ -1781,7 +1781,7 @@ test_simde_vld4q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vld4q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t src[4][8]; simde_float16_t buf[4]; simde_float16_t r[4][8]; diff --git a/test/arm/neon/mla_lane.c b/test/arm/neon/mla_lane.c index 24940bab5..12b8bc1c5 100644 --- a/test/arm/neon/mla_lane.c +++ b/test/arm/neon/mla_lane.c @@ -64,7 +64,13 @@ test_simde_vmla_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1_f32(test_vec[i].a); b = simde_vld1_f32(test_vec[i].b); v = simde_vld1q_f32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmla_laneq_f32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_f32(0); break; + } simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } @@ -225,7 +231,17 @@ test_simde_vmla_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1_s16(test_vec[i].a); b = simde_vld1_s16(test_vec[i].b); v = simde_vld1q_s16(test_vec[i].v); - SIMDE_CONSTIFY_8_(simde_vmla_laneq_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmla_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmla_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmla_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmla_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s16(0); break; + } simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); } @@ -380,7 +396,13 @@ test_simde_vmla_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1_s32(test_vec[i].a); b = simde_vld1_s32(test_vec[i].b); v = simde_vld1q_s32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmla_laneq_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_s32(0); break; + } simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); } @@ -542,7 +564,17 @@ test_simde_vmla_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1_u16(test_vec[i].a); b = simde_vld1_u16(test_vec[i].b); v = simde_vld1q_u16(test_vec[i].v); - SIMDE_CONSTIFY_8_(simde_vmla_laneq_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmla_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmla_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmla_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmla_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u16(0); break; + } simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); } @@ -697,7 +729,13 @@ test_simde_vmla_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1_u32(test_vec[i].a); b = simde_vld1_u32(test_vec[i].b); v = simde_vld1q_u32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmla_laneq_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmla_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmla_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmla_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmla_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdup_n_u32(0); break; + } simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); } @@ -1298,7 +1336,13 @@ test_simde_vmlaq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1q_f32(test_vec[i].a); b = simde_vld1q_f32(test_vec[i].b); v = simde_vld1q_f32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmlaq_laneq_f32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f32(0)), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_f32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_f32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_f32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_f32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_f32(0); break; + } simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } @@ -1394,7 +1438,17 @@ test_simde_vmlaq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1q_s16(test_vec[i].a); b = simde_vld1q_s16(test_vec[i].b); v = simde_vld1q_s16(test_vec[i].v); - SIMDE_CONSTIFY_8_(simde_vmlaq_laneq_s16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s16(0)), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_s16(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_s16(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_s16(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_s16(a, b, v, 3); break; + case 4: r = simde_vmlaq_laneq_s16(a, b, v, 4); break; + case 5: r = simde_vmlaq_laneq_s16(a, b, v, 5); break; + case 6: r = simde_vmlaq_laneq_s16(a, b, v, 6); break; + case 7: r = simde_vmlaq_laneq_s16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s16(0); break; + } simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); } @@ -1458,7 +1512,13 @@ test_simde_vmlaq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1q_s32(test_vec[i].a); b = simde_vld1q_s32(test_vec[i].b); v = simde_vld1q_s32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmlaq_laneq_s32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_s32(0)), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_s32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_s32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_s32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_s32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_s32(0); break; + } simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); } @@ -1554,7 +1614,17 @@ test_simde_vmlaq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1q_u16(test_vec[i].a); b = simde_vld1q_u16(test_vec[i].b); v = simde_vld1q_u16(test_vec[i].v); - SIMDE_CONSTIFY_8_(simde_vmlaq_laneq_u16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u16(0)), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_u16(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_u16(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_u16(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_u16(a, b, v, 3); break; + case 4: r = simde_vmlaq_laneq_u16(a, b, v, 4); break; + case 5: r = simde_vmlaq_laneq_u16(a, b, v, 5); break; + case 6: r = simde_vmlaq_laneq_u16(a, b, v, 6); break; + case 7: r = simde_vmlaq_laneq_u16(a, b, v, 7); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u16(0); break; + } simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); } @@ -1618,7 +1688,13 @@ test_simde_vmlaq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { a = simde_vld1q_u32(test_vec[i].a); b = simde_vld1q_u32(test_vec[i].b); v = simde_vld1q_u32(test_vec[i].v); - SIMDE_CONSTIFY_4_(simde_vmlaq_laneq_u32, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_u32(0)), test_vec[i].lane, a, b, v); + switch(test_vec[i].lane) { + case 0: r = simde_vmlaq_laneq_u32(a, b, v, 0); break; + case 1: r = simde_vmlaq_laneq_u32(a, b, v, 1); break; + case 2: r = simde_vmlaq_laneq_u32(a, b, v, 2); break; + case 3: r = simde_vmlaq_laneq_u32(a, b, v, 3); break; + default: HEDLEY_UNREACHABLE(); r = simde_vdupq_n_u32(0); break; + } simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); } diff --git a/test/arm/neon/mul_lane.c b/test/arm/neon/mul_lane.c index ca767cf7a..e069faef1 100644 --- a/test/arm/neon/mul_lane.c +++ b/test/arm/neon/mul_lane.c @@ -9,7 +9,7 @@ SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ static int test_simde_vmul_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[4]; int lane; @@ -42,7 +42,7 @@ test_simde_vmul_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); simde_float16x4_t r; - SIMDE_CONSTIFY_4_(simde_vmul_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_C(0.0))), test_vec[i].lane, a, b); + SIMDE_CONSTIFY_4_(simde_vmul_lane_f16, r, (HEDLEY_UNREACHABLE(), simde_vdup_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); } @@ -1238,7 +1238,7 @@ test_simde_vmulq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vmulq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[8]; simde_float16 b[8]; int lane; @@ -1307,7 +1307,7 @@ test_simde_vmulq_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); simde_float16x8_t r; - SIMDE_CONSTIFY_8_(simde_vmulq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_C(0.0))), test_vec[i].lane, a, b); + SIMDE_CONSTIFY_8_(simde_vmulq_laneq_f16, r, (HEDLEY_UNREACHABLE(), simde_vdupq_n_f16(SIMDE_FLOAT16_VALUE(0.0))), test_vec[i].lane, a, b); simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } @@ -1879,7 +1879,7 @@ test_simde_vmuld_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vmulh_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b[4]; int lane; @@ -1988,7 +1988,7 @@ test_simde_vmuls_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vmulh_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16_t a; simde_float16_t b[8]; int lane; @@ -2102,7 +2102,7 @@ test_simde_vmuls_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vmul_laneq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 b[8]; int lane; diff --git a/test/arm/neon/neg.c b/test/arm/neon/neg.c index 493569a2e..0981e4092 100644 --- a/test/arm/neon/neg.c +++ b/test/arm/neon/neg.c @@ -5,7 +5,7 @@ static int test_simde_vnegh_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a; simde_float16 r; } test_vec[] = { @@ -38,7 +38,7 @@ test_simde_vnegh_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vneg_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[4]; simde_float16 r[4]; } test_vec[] = { @@ -72,7 +72,7 @@ test_simde_vneg_f16 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vnegq_f16 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { + struct { simde_float16 a[8]; simde_float16 r[8]; } test_vec[] = { diff --git a/test/arm/neon/qdmull_high_lane.c b/test/arm/neon/qdmull_high_lane.c index 7dbe017ac..fc0b7372a 100644 --- a/test/arm/neon/qdmull_high_lane.c +++ b/test/arm/neon/qdmull_high_lane.c @@ -197,6 +197,38 @@ test_simde_vqdmull_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { int8_t lane; int64_t r[2]; } test_vec[] = { + { { INT32_C( 170394437), INT32_C( 838440752), INT32_C( 2000223965), INT32_C( 224778862) }, + { INT32_C( 1294207306), INT32_C( 797463044), INT32_C( 1281649861), INT32_C( 125061820) }, + INT8_C( 2), + { INT64_C(5127173533422237730), INT64_C(576175594476076364) } }, + { { -INT32_C( 1594036428), INT32_C( 1222292849), INT32_C( 2015861929), -INT32_C( 82090536) }, + { INT32_C( 2123083315), -INT32_C( 204875652), -INT32_C( 937168206), -INT32_C( 508936045) }, + INT8_C( 0), + { INT64_C(8559685653607229270), -INT64_C(348570094602013680) } }, + { { INT32_C( 692200562), -INT32_C( 254752304), INT32_C( 676464785), INT32_C( 1809723204) }, + { INT32_C( 1804668600), INT32_C( 1691972474), INT32_C( 1076408359), INT32_C( 986035332) }, + INT8_C( 1), + { INT64_C(2289119591700656180), INT64_C(6124003693454173392) } }, + { { -INT32_C( 1310053903), INT32_C( 1964973717), -INT32_C( 230274986), -INT32_C( 655165622) }, + { INT32_C( 77849012), INT32_C( 523183193), INT32_C( 306349840), -INT32_C( 1459982813) }, + INT8_C( 2), + { -INT64_C(141089410234204480), -INT64_C(401419766946400960) } }, + { { -INT32_C( 167463656), -INT32_C( 1320629443), -INT32_C( 1195096316), INT32_C( 285557548) }, + { -INT32_C( 905947543), -INT32_C( 461591499), -INT32_C( 1003092768), -INT32_C( 1992120667) }, + INT8_C( 1), + { INT64_C(1103292599903635368), -INT64_C(263621873264168904) } }, + { { INT32_C( 1117388545), INT32_C( 1452962560), INT32_C( 1037242287), -INT32_C( 1218607240) }, + { INT32_C( 1611559047), -INT32_C( 218371100), -INT32_C( 469758461), -INT32_C( 2134578451) }, + INT8_C( 2), + { -INT64_C(974506680850480614), INT64_C(1144902123251715280) } }, + { { -INT32_C( 1462364218), -INT32_C( 1974598648), -INT32_C( 1177302303), INT32_C( 690147037) }, + { INT32_C( 349207586), -INT32_C( 978044017), INT32_C( 1007534029), INT32_C( 1033057756) }, + INT8_C( 1), + { INT64_C(2302906947298942302), -INT64_C(1349988360776255258) } }, + { { -INT32_C( 957553051), INT32_C( 1058397298), INT32_C( 297981031), -INT32_C( 1782179923) }, + { -INT32_C( 2100356702), INT32_C( 1207072133), INT32_C( 443917590), -INT32_C( 985658139) }, + INT8_C( 3), + { -INT64_C(587414856945522618), INT64_C(3513240292534686594) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/test-neon.h b/test/arm/neon/test-neon.h index cd20368f7..345f61fc2 100644 --- a/test/arm/neon/test-neon.h +++ b/test/arm/neon/test-neon.h @@ -162,6 +162,22 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(a1_[0]), a1_, b1_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x2(ET min, ET max) { \ @@ -194,20 +210,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x2_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -234,10 +237,12 @@ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint32x4x2_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_INT_TYPE_FUNCS_( uint64x2x2_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x2_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x2_t, simde_float64_t, simde_float64, 2, q, f64) - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x2_t, simde_float16_t, simde_float16, 4, , f16) SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x2_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x2_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X2_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x2_t, simde_float16_t, simde_float16, 8, q, f16) #endif HEDLEY_DIAGNOSTIC_POP @@ -294,6 +299,26 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(a2_[0]), a2_, b2_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x3(ET min, ET max) { \ @@ -332,24 +357,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x3_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -376,10 +384,12 @@ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint32x4x3_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_INT_TYPE_FUNCS_( uint64x2x3_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x3_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x3_t, simde_float64_t, simde_float64, 2, q, f64) - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x3_t, simde_float16_t, simde_float16, 4, , f16) SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x3_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x3_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X3_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x3_t, simde_float16_t, simde_float16, 8, q, f16) #endif HEDLEY_DIAGNOSTIC_POP @@ -443,6 +453,30 @@ HEDLEY_DIAGNOSTIC_POP && simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(a3_[0]), a3_, b3_, filename, line, astr, bstr); \ } +#define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ + static int \ + simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, ET slop, \ + const char* filename, int line, const char* astr, const char* bstr) { \ + SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ + SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ + SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ + SET a3_[sizeof(a.val[3]) / sizeof(ET)], b3_[sizeof(b.val[3]) / sizeof(ET)]; \ + \ + simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ + simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ + simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ + simde_vst1##modifier##_##symbol_identifier(a3_, a.val[3]); \ + simde_vst1##modifier##_##symbol_identifier(b3_, b.val[3]); \ + \ + return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr) && \ + simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a3_), HEDLEY_REINTERPRET_CAST(SET*, b3_), slop, filename, line, astr, bstr); \ + } \ + #define SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(NT, ET, SET, element_count, modifier, symbol_identifier) \ static simde_##NT \ simde_test_arm_neon_random_##symbol_identifier##x##element_count##x4(ET min, ET max) { \ @@ -487,28 +521,7 @@ HEDLEY_DIAGNOSTIC_POP } \ } \ \ - static int \ - simde_test_arm_neon_assert_equal_##symbol_identifier##x##element_count##x4_(simde_##NT a, simde_##NT b, ET slop, \ - const char* filename, int line, const char* astr, const char* bstr) { \ - SET a0_[sizeof(a.val[0]) / sizeof(ET)], b0_[sizeof(b.val[0]) / sizeof(ET)]; \ - SET a1_[sizeof(a.val[1]) / sizeof(ET)], b1_[sizeof(b.val[1]) / sizeof(ET)]; \ - SET a2_[sizeof(a.val[2]) / sizeof(ET)], b2_[sizeof(b.val[2]) / sizeof(ET)]; \ - SET a3_[sizeof(a.val[3]) / sizeof(ET)], b3_[sizeof(b.val[3]) / sizeof(ET)]; \ - \ - simde_vst1##modifier##_##symbol_identifier(a0_, a.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(b0_, b.val[0]); \ - simde_vst1##modifier##_##symbol_identifier(a1_, a.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(b1_, b.val[1]); \ - simde_vst1##modifier##_##symbol_identifier(a2_, a.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(b2_, b.val[2]); \ - simde_vst1##modifier##_##symbol_identifier(a3_, a.val[3]); \ - simde_vst1##modifier##_##symbol_identifier(b3_, b.val[3]); \ - \ - return simde_assert_equal_v##symbol_identifier##_(sizeof(a0_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a0_), HEDLEY_REINTERPRET_CAST(SET*, b0_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a1_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a1_), HEDLEY_REINTERPRET_CAST(SET*, b1_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a2_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a2_), HEDLEY_REINTERPRET_CAST(SET*, b2_), slop, filename, line, astr, bstr) && \ - simde_assert_equal_v##symbol_identifier##_(sizeof(a3_) / sizeof(ET), HEDLEY_REINTERPRET_CAST(SET*, a3_), HEDLEY_REINTERPRET_CAST(SET*, b3_), slop, filename, line, astr, bstr); \ - } + SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(NT, ET, SET, element_count, modifier, symbol_identifier) \ #if !defined(SIMDE_BUG_INTEL_857088) HEDLEY_DIAGNOSTIC_PUSH @@ -535,10 +548,12 @@ SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint32x4x4_t, uint32_ SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_INT_TYPE_FUNCS_( uint64x2x4_t, uint64_t, 2, q, u64, u64) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float32x4x4_t, simde_float32_t, simde_float32, 4, q, f32) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float64x2x4_t, simde_float64_t, simde_float64, 2, q, f64) - #if SIMDE_FLOAT16_API == SIMDE_FLOAT16_API_FP16 SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float16x4x4_t, simde_float16_t, simde_float16, 4, , f16) SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_FUNCS_(float16x8x4_t, simde_float16_t, simde_float16, 8, q, f16) +#else +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x4x4_t, simde_float16_t, simde_float16, 4, , f16) +SIMDE_TEST_ARM_NEON_GENERATE_X4_VECTOR_FLOAT_TYPE_EQUAL_FUNC_(float16x8x4_t, simde_float16_t, simde_float16, 8, q, f16) #endif HEDLEY_DIAGNOSTIC_POP