Skip to content

Commit

Permalink
arm neon: avoid GCC 11 vst1_*_x4 built-in functions
Browse files Browse the repository at this point in the history
The vst1_*_x4 built-in functions in GCC 11 produce incorrect results,
as following exanple, this patch circumvents the issue by avoiding these
functions in GCC 11 and earlier versions.

float32x2x4_t val = {{
        {0.10f, 0.20f},
        {0.30f, 0.40f},
        {0.50f, 0.60f},
        {0.70f, 0.80f}
    }};
    float result[16];

    vst1_f32_x4(result, val);

get result = {0.100000 0.200000 0.500000 0.600000 0.000000 0.000000 0.000000 0.000000}

Signed-off-by: LI Qingwu <[email protected]>
  • Loading branch information
Qingwu-Li authored and mr-c committed Sep 11, 2024
1 parent 84ebde4 commit 1c75c7b
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 13 deletions.
38 changes: 25 additions & 13 deletions simde/arm/neon/st1_x4.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ simde_vst1_f16_x4(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_float16x4x4
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x2x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_f32_x4(ptr, val);
#else
simde_vst1_f32(ptr, val.val[0]);
Expand All @@ -85,7 +86,7 @@ simde_vst1_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x2x4_t
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_f64_x4(simde_float64 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t val) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_GCC_114521)
vst1_f64_x4(ptr, val);
#else
simde_vst1_f64(ptr, val.val[0]);
Expand All @@ -102,7 +103,8 @@ simde_vst1_f64_x4(simde_float64 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float64x1x4_t
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_s8_x4(int8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_int8x8x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_s8_x4(ptr, val);
#else
simde_vst1_s8(ptr, val.val[0]);
Expand All @@ -119,7 +121,8 @@ simde_vst1_s8_x4(int8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_int8x8x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_s16_x4(int16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int16x4x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_s16_x4(ptr, val);
#else
simde_vst1_s16(ptr, val.val[0]);
Expand All @@ -136,7 +139,8 @@ simde_vst1_s16_x4(int16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_int16x4x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_s32_x4(int32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int32x2x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_s32_x4(ptr, val);
#else
simde_vst1_s32(ptr, val.val[0]);
Expand All @@ -153,7 +157,8 @@ simde_vst1_s32_x4(int32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_int32x2x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_s64_x4(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_s64_x4(ptr, val);
#else
simde_vst1_s64(ptr, val.val[0]);
Expand All @@ -170,7 +175,8 @@ simde_vst1_s64_x4(int64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_int64x1x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_u8_x4(uint8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_uint8x8x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_u8_x4(ptr, val);
#else
simde_vst1_u8(ptr, val.val[0]);
Expand All @@ -187,7 +193,8 @@ simde_vst1_u8_x4(uint8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_uint8x8x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_u16_x4(uint16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint16x4x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_u16_x4(ptr, val);
#else
simde_vst1_u16(ptr, val.val[0]);
Expand All @@ -204,7 +211,8 @@ simde_vst1_u16_x4(uint16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_uint16x4x4_t val)
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_u32_x4(uint32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint32x2x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_u32_x4(ptr, val);
#else
simde_vst1_u32(ptr, val.val[0]);
Expand All @@ -221,7 +229,8 @@ simde_vst1_u32_x4(uint32_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_uint32x2x4_t val) {
SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_u64_x4(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989)
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && !defined(SIMDE_BUG_GCC_REV_260989) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_u64_x4(ptr, val);
#else
simde_vst1_u64(ptr, val.val[0]);
Expand All @@ -239,7 +248,8 @@ SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_p8_x4(simde_poly8_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_poly8x8x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)))
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_p8_x4(ptr, val);
#else
simde_poly8x8_private val_[4];
Expand All @@ -265,7 +275,8 @@ SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_p16_x4(simde_poly16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_poly16x4x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)))
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_p16_x4(ptr, val);
#else
simde_poly16x4_private val_[4];
Expand All @@ -291,7 +302,8 @@ SIMDE_FUNCTION_ATTRIBUTES
void
simde_vst1_p64_x4(simde_poly64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_poly64x1x4_t val) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && \
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE)))
(!defined(HEDLEY_GCC_VERSION) || (HEDLEY_GCC_VERSION_CHECK(8,5,0) && defined(SIMDE_ARM_NEON_A64V8_NATIVE))) \
&& !defined(SIMDE_BUG_GCC_114521)
vst1_p64_x4(ptr, val);
#else
simde_poly64x1_private val_[4];
Expand Down
3 changes: 3 additions & 0 deletions simde/simde-common.h
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,9 @@ HEDLEY_DIAGNOSTIC_POP
# if (!HEDLEY_GCC_VERSION_CHECK(9,0,0) && !defined(SIMDE_ARCH_AARCH64)) || (!defined(SIMDE_ARCH_AARCH64) && defined(SIMDE_ARCH_ARM))
# define SIMDE_BUG_GCC_REV_260989
# endif
# if !HEDLEY_GCC_VERSION_CHECK(11,5,0) && (defined(SIMDE_ARCH_ARM4) || defined(SIMDE_ARCH_AARCH64))
# define SIMDE_BUG_GCC_114521
# endif
# if defined(SIMDE_ARCH_ARM) && !defined(SIMDE_ARCH_AARCH64)
# define SIMDE_BUG_GCC_95399
# define SIMDE_BUG_GCC_95471
Expand Down

0 comments on commit 1c75c7b

Please sign in to comment.