Skip to content

Commit

Permalink
arm: fix some neon2rvv intrinsic function error
Browse files Browse the repository at this point in the history
1. For vqdmlal_s16/s32: the doubling result maybe overflow,
so need to use vqaddq_s16/32 to saturate it. As the same with
vqdmlsl_s16/32.

2. The vrdmulh family function need to use vqadd saturating
function to avoid the doubling result overflow.

3. The result of vrshl family function need to keep the sign
bit of the origin data. If a > 0 && b < 0, the result of
(a + (1 << (-b - 1))) maybe overflow into a negative value.
And in gcc/clang, >> means the arithmetic shift left, so it
will get the incorrect sign bit whithout unsigned extend value.

Signed-off-by: Zhijin Zeng <[email protected]>
  • Loading branch information
zengdage authored and mr-c committed Apr 20, 2024
1 parent 416091e commit 2a548e5
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 36 deletions.
17 changes: 6 additions & 11 deletions simde/arm/neon/qdmlal.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "mul.h"
#include "mul_n.h"
#include "movl.h"
#include "qadd.h"
#include "types.h"

HEDLEY_DIAGNOSTIC_PUSH
Expand Down Expand Up @@ -71,7 +72,8 @@ simde_vqdmlal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vqdmlal_s16(a, b, c);
#else
return simde_vaddq_s32(simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2), a);
simde_int32x4_t temp = simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c));
return simde_vqaddq_s32(simde_vqaddq_s32(temp, temp), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -85,17 +87,10 @@ simde_vqdmlal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vqdmlal_s32(a, b, c);
#else
simde_int64x2_private r_ = simde_int64x2_to_private(
simde_x_vmulq_s64(
simde_int64x2_t r = simde_x_vmulq_s64(
simde_vmovl_s32(b),
simde_vmovl_s32(c)));

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2);
}

return simde_vaddq_s64(a, simde_int64x2_from_private(r_));
simde_vmovl_s32(c));
return simde_vqaddq_s64(a, simde_vqaddq_s64(r, r));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand Down
18 changes: 7 additions & 11 deletions simde/arm/neon/qdmlsl.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include "mul.h"
#include "mul_n.h"
#include "movl.h"
#include "qadd.h"
#include "qsub.h"
#include "types.h"

HEDLEY_DIAGNOSTIC_PUSH
Expand Down Expand Up @@ -71,7 +73,8 @@ simde_vqdmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vqdmlsl_s16(a, b, c);
#else
return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)), 2));
simde_int32x4_t temp = simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c));
return simde_vqsubq_s32(a, simde_vqaddq_s32(temp, temp));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand All @@ -85,17 +88,10 @@ simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vqdmlsl_s32(a, b, c);
#else
simde_int64x2_private r_ = simde_int64x2_to_private(
simde_x_vmulq_s64(
simde_int64x2_t r = simde_x_vmulq_s64(
simde_vmovl_s32(b),
simde_vmovl_s32(c)));

SIMDE_VECTORIZE
for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) {
r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2);
}

return simde_vsubq_s64(a, simde_int64x2_from_private(r_));
simde_vmovl_s32(c));
return simde_vqsubq_s64(a, simde_vqaddq_s64(r, r));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
Expand Down
10 changes: 8 additions & 2 deletions simde/arm/neon/qrdmulh.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,10 @@ simde_vqrdmulhh_s16(int16_t a, int16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqrdmulhh_s16(a, b);
#else
return HEDLEY_STATIC_CAST(int16_t, (((1 << 15) + ((HEDLEY_STATIC_CAST(int32_t, (HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b)))) << 1)) >> 16) & 0xffff);
int32_t temp = HEDLEY_STATIC_CAST(int32_t, a) * HEDLEY_STATIC_CAST(int32_t, b);
int32_t r = temp > 0 ? (temp > (INT32_MAX >> 1) ? INT32_MAX : (temp << 1)) : (temp < (INT32_MIN >> 1) ? INT32_MIN : (temp << 1));
r = (r > (INT32_MAX - (1 << 15))) ? INT32_MAX : ((1 << 15) + r);
return HEDLEY_STATIC_CAST(int16_t, ((r >> 16) & 0xffff));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand All @@ -54,7 +57,10 @@ simde_vqrdmulhs_s32(int32_t a, int32_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vqrdmulhs_s32(a, b);
#else
return HEDLEY_STATIC_CAST(int32_t, (((HEDLEY_STATIC_CAST(int64_t, 1) << 31) + ((HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b)))) << 1)) >> 32) & 0xffffffff);
int64_t temp = HEDLEY_STATIC_CAST(int64_t, a) * HEDLEY_STATIC_CAST(int64_t, b);
int64_t r = temp > 0 ? (temp > (INT64_MAX >> 1) ? INT64_MAX : (temp << 1)) : (temp < (INT64_MIN >> 1) ? INT64_MIN : (temp << 1));
r = (r > (INT64_MAX - (HEDLEY_STATIC_CAST(int64_t, 1) << 31))) ? INT64_MAX : ((HEDLEY_STATIC_CAST(int64_t, 1) << 31) + r);
return HEDLEY_STATIC_CAST(int32_t, ((r >> 32) & 0xffffffff));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand Down
20 changes: 16 additions & 4 deletions simde/arm/neon/qrshl.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ simde_vqrshlb_s8(int8_t a, int8_t b) {
if (b < -8) {
r = 0;
} else if (b < 0) {
r = HEDLEY_STATIC_CAST(int8_t, ((a + (1 << (-b - 1))) >> -b));
r = HEDLEY_STATIC_CAST(int8_t, a <= 0
? ((a + (1 << (-b - 1))) >> -b)
: HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t,
(a + (1 << (-b - 1)))) >> -b) & 0x7FUL)));
} else if (b == 0) {
r = a;
} else if (b < 7) {
Expand Down Expand Up @@ -79,7 +82,10 @@ simde_vqrshlh_s16(int16_t a, int16_t b) {
if (b8 <= -16) {
r = 0;
} else if (b8 < 0) {
r = HEDLEY_STATIC_CAST(int16_t, ((a + (1 << (-b8 - 1))) >> -b8));
r = HEDLEY_STATIC_CAST(int16_t, a <= 0
? ((a + (1 << (-b8 - 1))) >> -b8)
: HEDLEY_STATIC_CAST(int16_t, ((HEDLEY_STATIC_CAST(uint16_t,
(a + (1 << (-b8 - 1)))) >> -b8) & 0x7FFFUL)));
} else if (b8 == 0) {
r = a;
} else if (b8 < 15) {
Expand Down Expand Up @@ -114,7 +120,10 @@ simde_vqrshls_s32(int32_t a, int32_t b) {
if (b8 <= -32) {
r = 0;
} else if (b8 < 0) {
r = ((a + (1 << (-b8 - 1))) >> -b8);
r = a <= 0
? ((a + (1 << (-b8 - 1))) >> -b8)
: HEDLEY_STATIC_CAST(int32_t, ((HEDLEY_STATIC_CAST(uint32_t,
(a + (1 << (-b8 - 1)))) >> -b8) & 0x7FFFFFFFUL));
} else if (b8 == 0) {
r = a;
} else if (b8 < 31) {
Expand Down Expand Up @@ -149,7 +158,10 @@ simde_vqrshld_s64(int64_t a, int64_t b) {
if (b8 <= -64) {
r = 0;
} else if (b8 < 0) {
r = ((a + (INT64_C(1) << (-b8 - 1))) >> -b8);
r = a <= 0
? ((a + (INT64_C(1) << (-b8 - 1))) >> -b8)
: HEDLEY_STATIC_CAST(int64_t, ((HEDLEY_STATIC_CAST(uint64_t,
(a + (INT64_C(1) << (-b8 - 1)))) >> -b8) & 0x7FFFFFFFFFFFFFFFUL));
} else if (b8 == 0) {
r = a;
} else if (b8 < 63) {
Expand Down
30 changes: 22 additions & 8 deletions simde/arm/neon/rshl.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,9 @@ simde_vrshld_s64(int64_t a, int64_t b) {
? 0
: (b >= 0)
? (a << b)
: ((a + (INT64_C(1) << (-b - 1))) >> -b);
: (a <= 0
? ((a + (INT64_C(1) << (-b - 1))) >> -b)
: HEDLEY_STATIC_CAST(int64_t, (HEDLEY_STATIC_CAST(uint64_t, (a + (INT64_C(1) << (-b - 1)))) >> -b)));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
Expand Down Expand Up @@ -148,7 +150,9 @@ simde_vrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t,
(simde_math_abs(b_.values[i]) >= 8) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0x7FUL)))));
}
#endif

Expand Down Expand Up @@ -189,7 +193,9 @@ simde_vrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t,
(simde_math_abs(b_.values[i]) >= 16) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int16_t, ((HEDLEY_STATIC_CAST(uint16_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0x7FFFUL)))));
}
#endif

Expand Down Expand Up @@ -230,7 +236,9 @@ simde_vrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) {
r_.values[i] = HEDLEY_STATIC_CAST(int32_t,
(simde_math_abs(b_.values[i]) >= 32) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int32_t, ((HEDLEY_STATIC_CAST(uint32_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0x7FFFFFFFUL)))));
}
#endif

Expand Down Expand Up @@ -513,7 +521,9 @@ simde_vrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) {
r_.values[i] = HEDLEY_STATIC_CAST(int8_t,
(simde_math_abs(b_.values[i]) >= 8) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int8_t, ((HEDLEY_STATIC_CAST(uint8_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0x7FUL)))));
}
#endif

Expand Down Expand Up @@ -580,7 +590,9 @@ simde_vrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) {
r_.values[i] = HEDLEY_STATIC_CAST(int16_t,
(simde_math_abs(b_.values[i]) >= 16) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int16_t, ((HEDLEY_STATIC_CAST(uint16_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0x7FFFUL)))));
}
#endif

Expand Down Expand Up @@ -634,8 +646,10 @@ simde_vrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) {
b_.values[i] = HEDLEY_STATIC_CAST(int8_t, b_.values[i]);
r_.values[i] = HEDLEY_STATIC_CAST(int32_t,
(simde_math_abs(b_.values[i]) >= 32) ? 0 :
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]));
(b_.values[i] >= 0) ? (a_.values[i] << b_.values[i]) :
((a_.values[i] <= 0) ? ((a_.values[i] + (1 << (-b_.values[i] - 1))) >> -b_.values[i]) :
HEDLEY_STATIC_CAST(int32_t, ((HEDLEY_STATIC_CAST(uint32_t,
(a_.values[i] + (1 << (-b_.values[i] - 1)))) >> -b_.values[i]) & (0X7FFFFFFFUL)))));
}
#endif

Expand Down
8 changes: 8 additions & 0 deletions test/arm/neon/qdmlal.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ test_simde_vqdmlal_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ INT16_C( 8642), -INT16_C( 579), INT16_C( 2963), INT16_C( 9252) },
{ -INT16_C( 7314), -INT16_C( 5230), INT16_C( 8688), INT16_C( 5749) },
{ -INT32_C( 126337050), INT32_C( 6064180), INT32_C( 51565328), INT32_C( 106311312) } },
{ { -INT32_C( 16), -INT32_C( 15), -INT32_C( 14), -INT32_C( 13) },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT32_C(2147483631), INT32_C(2147483632), INT32_C(2147483633), INT32_C(2147483634) } },
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
Expand Down Expand Up @@ -199,6 +203,10 @@ test_simde_vqdmlal_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ -INT32_C( 8397045), INT32_C( 6142639) },
{ -INT32_C( 6977990), -INT32_C( 5600341) },
{ INT64_C(117189633707070), -INT64_C( 68801010159595) } },
{ { -INT64_C( 16), -INT64_C( 15) },
{ INT32_MIN, INT32_MIN },
{ INT32_MIN, INT32_MIN },
{INT64_C(9223372036854775791), INT64_C(9223372036854775792) } },
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
Expand Down
8 changes: 8 additions & 0 deletions test/arm/neon/qdmlsl.c
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,10 @@ test_simde_vqdmlsl_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ INT16_C( 8057), -INT16_C( 9339), INT16_C( 1806), INT16_C( 8600) },
{ -INT16_C( 751), -INT16_C( 6991), INT16_C( 1494), -INT16_C( 6795)},
{ INT32_C( 16070724), -INT32_C( 122085335), INT32_C( 18446020), INT32_C( 153174877) } },
{ { -INT32_C( 16), -INT32_C( 15), -INT32_C( 14), -INT32_C( 13) },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN } },
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
Expand Down Expand Up @@ -199,6 +203,10 @@ test_simde_vqdmlsl_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ -INT32_C( 305245), -INT32_C( 548274) },
{ -INT32_C( 805474), INT32_C( 431866)},
{ -INT64_C( 353429066965), -INT64_C( 166180089563) } },
{ { -INT64_C( 16), -INT64_C( 15) },
{ INT32_MIN, INT32_MIN },
{ INT32_MIN, INT32_MIN },
{ INT64_MIN, INT64_MIN } },
};

for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) {
Expand Down
14 changes: 14 additions & 0 deletions test/arm/neon/qrdmulh.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ test_simde_vqrdmulh_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ { INT16_C( 31066), INT16_C( 19881), INT16_C( 14863), INT16_C( 16264) },
{ INT16_C( 17499), INT16_C( 19391), -INT16_C( 23792), -INT16_C( 25706) },
{ INT16_C( 16590), INT16_C( 11765), -INT16_C( 10792), -INT16_C( 12759) } },
{ { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } },

};

Expand Down Expand Up @@ -94,6 +97,9 @@ test_simde_vqrdmulh_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ { -INT32_C( 1216301242), INT32_C( 231209245) },
{ INT32_C( 1833478310), -INT32_C( 429409792) },
{ -INT32_C( 1038453516), -INT32_C( 46232489) } },
{ { INT32_MIN, INT32_MIN },
{ INT32_MIN, INT32_MIN },
{ INT32_MAX, INT32_MAX } },

};

Expand Down Expand Up @@ -153,6 +159,11 @@ test_simde_vqrdmulhq_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ { INT16_C( 28579), INT16_C( 26571), INT16_C( 23618), INT16_C( 3470), INT16_C( 10594), INT16_C( 31318), -INT16_C( 24794), INT16_C( 1860) },
{ -INT16_C( 22526), -INT16_C( 12632), INT16_C( 21464), INT16_C( 8577), INT16_C( 28627), INT16_C( 27596), -INT16_C( 26895), -INT16_C( 27290) },
{ -INT16_C( 19646), -INT16_C( 10243), INT16_C( 15470), INT16_C( 908), INT16_C( 9255), INT16_C( 26375), INT16_C( 20350), -INT16_C( 1549) } },
#if !defined(SIMDE_X86_SSE_NATIVE) && !defined(SIMDE_X86_MMX_NATIVE)
{ { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
{ INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } },
#endif

};

Expand Down Expand Up @@ -212,6 +223,9 @@ test_simde_vqrdmulhq_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ { -INT32_C( 613662219), -INT32_C( 1259034176), INT32_C( 1695972338), -INT32_C( 22565202) },
{ INT32_C( 1459986413), INT32_C( 865007473), -INT32_C( 921225670), -INT32_C( 335884554) },
{ -INT32_C( 417203876), -INT32_C( 507139587), -INT32_C( 727536740), INT32_C( 3529388) } },
{ { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN },
{ INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN },
{ INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX } },

};

Expand Down
12 changes: 12 additions & 0 deletions test/arm/neon/qrdmulh_n.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@ test_simde_vqrdmulh_n_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ { -INT16_C( 14394), INT16_C( 28773), INT16_C( 30122), -INT16_C( 574) },
-INT16_C( 10708),
{ INT16_C( 4704), -INT16_C( 9403), -INT16_C( 9843), INT16_C( 188) } },
{ { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
INT16_MIN,
{ INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } },

};

Expand Down Expand Up @@ -94,6 +97,9 @@ test_simde_vqrdmulh_n_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ { -INT32_C( 1238271146), INT32_C( 1164109663) },
INT32_C( 737217376),
{ -INT32_C( 425090550), INT32_C( 399631388) } },
{ { INT32_MIN, INT32_MIN },
INT32_MIN,
{ INT32_MAX, INT32_MAX } },

};

Expand Down Expand Up @@ -153,6 +159,9 @@ test_simde_vqrdmulhq_n_s16 (SIMDE_MUNIT_TEST_ARGS) {
{ { INT16_C( 15761), INT16_C( 23849), INT16_C( 9736), INT16_C( 26802), INT16_C( 27881), -INT16_C( 7053), -INT16_C( 14710), -INT16_C( 23581) },
INT16_C( 25688),
{ INT16_C( 12356), INT16_C( 18696), INT16_C( 7632), INT16_C( 21011), INT16_C( 21857), -INT16_C( 5529), -INT16_C( 11532), -INT16_C( 18486) } },
{ { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN },
INT16_MIN,
{ INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } },

};

Expand Down Expand Up @@ -212,6 +221,9 @@ test_simde_vqrdmulhq_n_s32 (SIMDE_MUNIT_TEST_ARGS) {
{ { -INT32_C( 219487289), INT32_C( 1420994589), INT32_C( 889110344), -INT32_C( 2103115347) },
INT32_C( 1735639961),
{ -INT32_C( 177394091), INT32_C( 1148476728), INT32_C( 718597063), -INT32_C( 1699780598) } },
{ { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN },
INT32_MIN,
{ INT32_MAX, INT32_MAX, INT32_MAX, INT32_MAX } },

};

Expand Down
Loading

0 comments on commit 2a548e5

Please sign in to comment.