From b2851908ca8e48f411dc504c82d896e1f1d85d64 Mon Sep 17 00:00:00 2001 From: Ryo Date: Tue, 5 Mar 2024 10:33:10 +0000 Subject: [PATCH 1/3] Fix qdmlsl instructions The qdmlsl instructions were implemented without any saturation. This has been fixed by utilising existing saturating instructions which are implemented correctly. Unit tests have also been updated to test for saturation. Change-Id: Ia9e5a7bd850bc178920c19e390c17db5a3bfbc4f --- simde/arm/neon/qdmlsl.h | 22 +++---- simde/arm/neon/qdmlsl_high.h | 19 ++----- simde/arm/neon/qdmlsl_high_lane.h | 64 ++++----------------- simde/arm/neon/qdmlsl_high_n.h | 22 +------ test/arm/neon/qdmlsl.c | 82 ++++++++++++++++++-------- test/arm/neon/qdmlsl_high.c | 14 +++++ test/arm/neon/qdmlsl_high_lane.c | 33 +++++++++++ test/arm/neon/qdmlsl_high_n.c | 13 +++++ test/arm/neon/qdmlsl_lane.c | 95 +++++++++++++++++++++++++++++++ test/arm/neon/qdmlsl_n.c | 12 ++++ 10 files changed, 251 insertions(+), 125 deletions(-) diff --git a/simde/arm/neon/qdmlsl.h b/simde/arm/neon/qdmlsl.h index 68e17ca05..ae7ef703f 100644 --- a/simde/arm/neon/qdmlsl.h +++ b/simde/arm/neon/qdmlsl.h @@ -27,13 +27,9 @@ #if !defined(SIMDE_ARM_NEON_QDMLSL_H) #define SIMDE_ARM_NEON_QDMLSL_H -#include "sub.h" -#include "mul.h" -#include "mul_n.h" -#include "movl.h" -#include "qadd.h" -#include "qsub.h" #include "types.h" +#include "qsub.h" +#include "qdmull.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -45,7 +41,7 @@ simde_vqdmlslh_s16(int32_t a, int16_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlslh_s16(a, b, c); #else - return a - HEDLEY_STATIC_CAST(int32_t, b) * HEDLEY_STATIC_CAST(int32_t, c) * 2; + return simde_vqsubs_s32(a, simde_vqdmullh_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -59,7 +55,7 @@ simde_vqdmlsls_s32(int64_t a, int32_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsls_s32(a, b, c); #else - return a - HEDLEY_STATIC_CAST(int64_t, b) * HEDLEY_STATIC_CAST(int64_t, c) * 2; + return simde_vqsubd_s64(a, simde_vqdmulls_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -73,8 +69,7 @@ simde_vqdmlsl_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlsl_s16(a, b, c); #else - simde_int32x4_t temp = simde_vmulq_s32(simde_vmovl_s16(b), simde_vmovl_s16(c)); - return simde_vqsubq_s32(a, simde_vqaddq_s32(temp, temp)); + return simde_vqsubq_s32(a, simde_vqdmull_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -88,10 +83,7 @@ simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) return vqdmlsl_s32(a, b, c); #else - simde_int64x2_t r = simde_x_vmulq_s64( - simde_vmovl_s32(b), - simde_vmovl_s32(c)); - return simde_vqsubq_s64(a, simde_vqaddq_s64(r, r)); + return simde_vqsubq_s64(a, simde_vqdmull_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) @@ -103,4 +95,4 @@ simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ \ No newline at end of file diff --git a/simde/arm/neon/qdmlsl_high.h b/simde/arm/neon/qdmlsl_high.h index 18a6f47fe..01722f290 100644 --- a/simde/arm/neon/qdmlsl_high.h +++ b/simde/arm/neon/qdmlsl_high.h @@ -28,10 +28,9 @@ #define SIMDE_ARM_NEON_QDMLSL_HIGH_H #include "movl_high.h" -#include "sub.h" -#include "mul.h" -#include "mul_n.h" #include "types.h" +#include "qdmull_high.h" +#include "qsub.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -43,7 +42,7 @@ simde_vqdmlsl_high_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_s16(a, b, c); #else - return simde_vsubq_s32(a, simde_vmulq_n_s32(simde_vmulq_s32(simde_vmovl_high_s16(b), simde_vmovl_high_s16(c)), 2)); + return simde_vqsubq_s32(a, simde_vqdmull_high_s16(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -57,17 +56,7 @@ simde_vqdmlsl_high_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t c) #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_s32(a, b, c); #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(c))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); + return simde_vqsubq_s64(a, simde_vqdmull_high_s32(b, c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/simde/arm/neon/qdmlsl_high_lane.h b/simde/arm/neon/qdmlsl_high_lane.h index 877c72a2a..41962746f 100644 --- a/simde/arm/neon/qdmlsl_high_lane.h +++ b/simde/arm/neon/qdmlsl_high_lane.h @@ -27,91 +27,49 @@ #if !defined(SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H) #define SIMDE_ARM_NEON_QDMLSL_HIGH_LANE_H -#include "movl_high.h" -#include "sub.h" -#include "mul.h" -#include "mul_n.h" -#include "dup_n.h" +#include "dup_lane.h" +#include "get_high.h" #include "types.h" +#include "qdmlsl.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_lane_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x4_to_private(v).values[lane]))), 2)); -} #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_lane_s16(a, b, v, lane) vqdmlsl_high_lane_s16(a, b, v, lane) +#else + #define simde_vqdmlsl_high_lane_s16(a, b, v, lane) simde_vqdmlsl_s16((a), simde_vget_high_s16((b)), simde_vdup_lane_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsl_high_lane_s16 #define vqdmlsl_high_lane_s16(a, b, v, lane) simde_vqdmlsl_high_lane_s16((a), (b), (v), (lane)) #endif -SIMDE_FUNCTION_ATTRIBUTES -simde_int32x4_t -simde_vqdmlsl_high_laneq_s16(simde_int32x4_t a, simde_int16x8_t b, simde_int16x8_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(simde_int16x8_to_private(v).values[lane]))), 2)); -} #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_laneq_s16(a, b, v, lane) vqdmlsl_high_laneq_s16(a, b, v, lane) +#else + #define simde_vqdmlsl_high_laneq_s16(a, b, v, lane) simde_vqdmlsl_s16((a), simde_vget_high_s16((b)), simde_vdup_laneq_s16((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsl_high_laneq_s16 #define vqdmlsl_high_laneq_s16(a, b, v, lane) simde_vqdmlsl_high_laneq_s16((a), (b), (v), (lane)) #endif -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_lane_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x2_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x2_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); -} #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_lane_s32(a, b, v, lane) vqdmlsl_high_lane_s32(a, b, v, lane) +#else + #define simde_vqdmlsl_high_lane_s32(a, b, v, lane) simde_vqdmlsl_s32((a), simde_vget_high_s32((b)), simde_vdup_lane_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsl_high_lane_s32 #define vqdmlsl_high_lane_s32(a, b, v, lane) simde_vqdmlsl_high_lane_s32((a), (b), (v), (lane)) #endif -SIMDE_FUNCTION_ATTRIBUTES -simde_int64x2_t -simde_vqdmlsl_high_laneq_s32(simde_int64x2_t a, simde_int32x4_t b, simde_int32x4_t v, const int lane) SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(simde_int32x4_to_private(v).values[lane])))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); -} #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqdmlsl_high_laneq_s32(a, b, v, lane) vqdmlsl_high_laneq_s32(a, b, v, lane) +#else + #define simde_vqdmlsl_high_laneq_s32(a, b, v, lane) simde_vqdmlsl_s32((a), simde_vget_high_s32((b)), simde_vdup_laneq_s32((v), (lane))) #endif #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vqdmlsl_high_laneq_s32 diff --git a/simde/arm/neon/qdmlsl_high_n.h b/simde/arm/neon/qdmlsl_high_n.h index 9db3d7e04..901e9a1b0 100644 --- a/simde/arm/neon/qdmlsl_high_n.h +++ b/simde/arm/neon/qdmlsl_high_n.h @@ -29,10 +29,8 @@ #include "movl_high.h" #include "dup_n.h" -#include "sub.h" -#include "mul.h" -#include "mul_n.h" #include "types.h" +#include "qdmlsl_high.h" HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS @@ -44,11 +42,7 @@ simde_vqdmlsl_high_n_s16(simde_int32x4_t a, simde_int16x8_t b, int16_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_n_s16(a, b, c); #else - return simde_vsubq_s32(a, - simde_vmulq_n_s32( - simde_vmulq_s32( - simde_vmovl_high_s16(b), - simde_vmovl_high_s16(simde_vdupq_n_s16(c))), 2)); + return simde_vqdmlsl_high_s16(a, b, simde_vdupq_n_s16(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) @@ -62,17 +56,7 @@ simde_vqdmlsl_high_n_s32(simde_int64x2_t a, simde_int32x4_t b, int32_t c) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vqdmlsl_high_n_s32(a, b, c); #else - simde_int64x2_private r_ = simde_int64x2_to_private( - simde_x_vmulq_s64( - simde_vmovl_high_s32(b), - simde_vmovl_high_s32(simde_vdupq_n_s32(c)))); - - SIMDE_VECTORIZE - for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = r_.values[i] * HEDLEY_STATIC_CAST(int64_t, 2); - } - - return simde_vsubq_s64(a, simde_int64x2_from_private(r_)); + return simde_vqdmlsl_high_s32(a, b, simde_vdupq_n_s32(c)); #endif } #if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) diff --git a/test/arm/neon/qdmlsl.c b/test/arm/neon/qdmlsl.c index aa972a671..83c736302 100644 --- a/test/arm/neon/qdmlsl.c +++ b/test/arm/neon/qdmlsl.c @@ -13,36 +13,48 @@ test_simde_vqdmlslh_s16 (SIMDE_MUNIT_TEST_ARGS) { } test_vec[] = { { { -INT32_C( 7304045) }, { -INT16_C( 4745) }, - { INT16_C( 7757)}, + { INT16_C( 7757) }, { INT32_C( 66309885) } }, { { -INT32_C( 91150936) }, { INT16_C( 275) }, - { -INT16_C( 1162)}, + { -INT16_C( 1162) }, { -INT32_C( 90511836) } }, { { INT32_C( 9182566) }, { -INT16_C( 3247) }, - { -INT16_C( 1614)}, + { -INT16_C( 1614) }, { -INT32_C( 1298750) } }, { { INT32_C( 54973448) }, { INT16_C( 9255) }, - { INT16_C( 5744)}, + { INT16_C( 5744) }, { -INT32_C( 51347992) } }, { { -INT32_C( 97477178) }, { -INT16_C( 9570) }, - { INT16_C( 5135)}, + { INT16_C( 5135) }, { INT32_C( 806722) } }, { { -INT32_C( 54320777) }, { INT16_C( 1869) }, - { -INT16_C( 3076)}, + { -INT16_C( 3076) }, { -INT32_C( 42822689) } }, { { INT32_C( 27872303) }, { -INT16_C( 8110) }, - { -INT16_C( 8328)}, + { -INT16_C( 8328) }, { -INT32_C( 107207857) } }, { { INT32_C( 67057391) }, { -INT16_C( 2309) }, - { INT16_C( 1079)}, + { INT16_C( 1079) }, { INT32_C( 72040213) } }, + { { INT32_C( INT32_MAX) }, + { -INT16_C( 1) }, + { INT16_C( 1) }, + { INT32_C( INT32_MAX) } }, + { { INT32_C( INT32_MIN) }, + { INT16_C( 1) }, + { INT16_C( 1) }, + { INT32_C( INT32_MIN) } }, + { { INT32_C( 0) }, + { INT16_C( INT16_MIN) }, + { INT16_C( INT16_MIN) }, + { INT32_C(INT32_MIN + 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -64,36 +76,48 @@ test_simde_vqdmlsls_s32 (SIMDE_MUNIT_TEST_ARGS) { } test_vec[] = { { { INT64_C( 30598753824) }, { INT32_C( 472121) }, - { -INT32_C( 110072)}, + { -INT32_C( 110072) }, { INT64_C( 134533359248) } }, { { -INT64_C( 879266448960) }, { -INT32_C( 870787) }, - { INT32_C( 215247)}, + { INT32_C( 215247) }, { -INT64_C( 504397870182) } }, { { -INT64_C( 524706562706) }, { -INT32_C( 267546) }, - { -INT32_C( 367178)}, + { -INT32_C( 367178) }, { -INT64_C( 721180573082) } }, { { INT64_C( 141628624861) }, { -INT32_C( 323091) }, - { -INT32_C( 964426)}, + { -INT32_C( 964426) }, { -INT64_C( 481566096671) } }, { { INT64_C( 834712643290) }, { INT32_C( 246066) }, - { INT32_C( 479461)}, + { INT32_C( 479461) }, { INT64_C( 598754542438) } }, { { -INT64_C( 688696419359) }, { -INT32_C( 735157) }, - { -INT32_C( 39524)}, + { -INT32_C( 39524) }, { -INT64_C( 746809109895) } }, { { INT64_C( 903973493156) }, { -INT32_C( 527450) }, - { -INT32_C( 900523)}, + { -INT32_C( 900523) }, { -INT64_C( 45988219544) } }, { { INT64_C( 978260666802) }, { INT32_C( 529515) }, - { -INT32_C( 590095)}, + { -INT32_C( 590095) }, { INT64_C( 1603188974652) } }, + { { INT64_MAX }, + { -INT32_C( 1) }, + { INT32_C( 1) }, + { INT64_MAX } }, + { { INT64_MIN }, + { INT32_C( 1) }, + { INT32_C( 1) }, + { INT64_MIN } }, + { { INT64_C( 0) }, + { INT32_C( INT32_MIN) }, + { INT32_C( INT32_MIN) }, + { INT64_MIN + 1 } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -115,15 +139,15 @@ test_simde_vqdmlsl_s16 (SIMDE_MUNIT_TEST_ARGS) { } test_vec[] = { { { -INT32_C( 43220716), -INT32_C( 19739561), INT32_C( 79041776), INT32_C( 57556654) }, { INT16_C( 8044), -INT16_C( 4160), -INT16_C( 8934), INT16_C( 9184) }, - { -INT16_C( 6473), INT16_C( 5685), -INT16_C( 5752), INT16_C( 6515)}, + { -INT16_C( 6473), INT16_C( 5685), -INT16_C( 5752), INT16_C( 6515) }, { INT32_C( 60916908), INT32_C( 27559639), -INT32_C( 23734960), -INT32_C( 62110866) } }, { { -INT32_C( 17039563), INT32_C( 393668), INT32_C( 79830887), INT32_C( 97106023) }, { -INT16_C( 2620), INT16_C( 2519), -INT16_C( 9767), INT16_C( 2499) }, - { INT16_C( 1205), INT16_C( 7592), INT16_C( 5902), INT16_C( 5791)}, + { INT16_C( 1205), INT16_C( 7592), INT16_C( 5902), INT16_C( 5791) }, { -INT32_C( 10725363), -INT32_C( 37854828), INT32_C( 195120555), INT32_C( 68162605) } }, { { -INT32_C( 23439805), INT32_C( 58026650), -INT32_C( 5180845), INT32_C( 30866311) }, { -INT16_C( 8571), -INT16_C( 8928), INT16_C( 3895), -INT16_C( 9771) }, - { -INT16_C( 3876), INT16_C( 8898), -INT16_C( 9394), -INT16_C( 6781)}, + { -INT16_C( 3876), INT16_C( 8898), -INT16_C( 9394), -INT16_C( 6781) }, { -INT32_C( 89882197), INT32_C( 216909338), INT32_C( 67998415), -INT32_C( 101647991) } }, { { INT32_C( 36239852), -INT32_C( 51532048), -INT32_C( 71094540), -INT32_C( 44844918) }, { -INT16_C( 190), INT16_C( 446), -INT16_C( 752), INT16_C( 8001) }, @@ -131,24 +155,28 @@ test_simde_vqdmlsl_s16 (SIMDE_MUNIT_TEST_ARGS) { { INT32_C( 33647872), -INT32_C( 55975100), -INT32_C( 63094764), -INT32_C( 192431364) } }, { { -INT32_C( 3015366), INT32_C( 11701950), -INT32_C( 53181128), INT32_C( 35548712) }, { -INT16_C( 2890), INT16_C( 7657), INT16_C( 8388), INT16_C( 4823) }, - { -INT16_C( 1859), INT16_C( 7752), -INT16_C( 5345), INT16_C( 6927)}, + { -INT16_C( 1859), INT16_C( 7752), -INT16_C( 5345), INT16_C( 6927) }, { -INT32_C( 13760386), -INT32_C( 107012178), INT32_C( 36486592), -INT32_C( 31269130) } }, { { INT32_C( 29565525), -INT32_C( 44405017), INT32_C( 72557961), INT32_C( 76709539) }, { INT16_C( 6757), INT16_C( 8764), INT16_C( 1978), -INT16_C( 4268) }, - { INT16_C( 2772), -INT16_C( 9818), INT16_C( 8865), INT16_C( 5166)}, + { INT16_C( 2772), -INT16_C( 9818), INT16_C( 8865), INT16_C( 5166) }, { -INT32_C( 7895283), INT32_C( 127684887), INT32_C( 37488021), INT32_C( 120806515) } }, { { INT32_C( 89212585), -INT32_C( 71615372), -INT32_C( 12427788), -INT32_C( 40736248) }, { -INT16_C( 1166), INT16_C( 6690), INT16_C( 7767), -INT16_C( 5984) }, - { -INT16_C( 9261), -INT16_C( 1037), INT16_C( 6708), INT16_C( 4340)}, + { -INT16_C( 9261), -INT16_C( 1037), INT16_C( 6708), INT16_C( 4340) }, { INT32_C( 67615933), -INT32_C( 57740312), -INT32_C( 116629860), INT32_C( 11204872) } }, { { INT32_C( 3969110), INT32_C( 8492563), INT32_C( 23842348), INT32_C( 36300877) }, { INT16_C( 8057), -INT16_C( 9339), INT16_C( 1806), INT16_C( 8600) }, - { -INT16_C( 751), -INT16_C( 6991), INT16_C( 1494), -INT16_C( 6795)}, + { -INT16_C( 751), -INT16_C( 6991), INT16_C( 1494), -INT16_C( 6795) }, { INT32_C( 16070724), -INT32_C( 122085335), INT32_C( 18446020), INT32_C( 153174877) } }, { { -INT32_C( 16), -INT32_C( 15), -INT32_C( 14), -INT32_C( 13) }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }, { INT16_MIN, INT16_MIN, INT16_MIN, INT16_MIN }, { INT32_MIN, INT32_MIN, INT32_MIN, INT32_MIN } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 36300877) }, + { INT16_C( 1), INT16_C( 1), INT16_C( INT16_MIN), INT16_C( 8600) }, + { -INT16_C( 1), INT16_C( 1), INT16_C( INT16_MIN), -INT16_C( 6795) }, + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 153174877) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -207,6 +235,14 @@ test_simde_vqdmlsl_s32 (SIMDE_MUNIT_TEST_ARGS) { { INT32_MIN, INT32_MIN }, { INT32_MIN, INT32_MIN }, { INT64_MIN, INT64_MIN } }, + { { INT64_MAX, INT64_MIN }, + { -INT32_C( 1), INT32_C( 1) }, + { INT32_C( 1), INT32_C( 1) }, + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), -INT64_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0) }, + { INT64_MIN + 1, INT64_C( 0) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/qdmlsl_high.c b/test/arm/neon/qdmlsl_high.c index a3a41545e..c31292c79 100644 --- a/test/arm/neon/qdmlsl_high.c +++ b/test/arm/neon/qdmlsl_high.c @@ -59,6 +59,12 @@ test_simde_vqdmlsl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { { -INT16_C( 4202), -INT16_C( 2749), INT16_C( 5485), INT16_C( 6722), INT16_C( 2934), INT16_C( 6380), INT16_C( 6638), -INT16_C( 8437)}, { -INT32_C( 345375914), -INT32_C( 205639033), -INT32_C( 781620092), -INT32_C( 47771143) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 0) }, + { -INT16_C( 9903), -INT16_C( 7336), INT16_C( 1785), INT16_C( 0), + -INT16_C( 1), INT16_C( 1), INT16_C( INT16_MIN), -INT16_C( 0) }, + { -INT16_C( 9269), -INT16_C( 5310), INT16_C( 5746), INT16_C( 0), + INT16_C( 1), INT16_C( 1), INT16_C( INT16_MIN), -INT16_C( 0) }, + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -113,6 +119,14 @@ test_simde_vqdmlsl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 3312), INT32_C( 750732), INT32_C( 965480), -INT32_C( 996117) }, { INT32_C( 256052), INT32_C( 522416), INT32_C( 150123), -INT32_C( 381846)}, { -INT64_C( 1027616199478), -INT64_C( 502934722274) } }, + { { INT64_MAX, INT64_MIN }, + { -INT32_C( 759050), -INT32_C( 437291), -INT32_C( 1), INT32_C( 1) }, + { -INT32_C( 262650), INT32_C( 912777), INT32_C( 1), INT32_C( 1) }, + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( 0), INT32_C( 0), INT32_C( INT32_MIN), INT32_C( 0) }, + { INT32_C( 0), INT32_C( 0), INT32_C( INT32_MIN), INT32_C( 0) }, + { INT64_C( INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/qdmlsl_high_lane.c b/test/arm/neon/qdmlsl_high_lane.c index 5ce543d31..cf2375442 100644 --- a/test/arm/neon/qdmlsl_high_lane.c +++ b/test/arm/neon/qdmlsl_high_lane.c @@ -60,6 +60,12 @@ test_simde_vqdmlsl_high_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { { -INT16_C( 870), -INT16_C( 4925), -INT16_C( 9268), -INT16_C( 2864)}, INT8_C( 0), { -INT32_C( 425242208), -INT32_C( 88008484), -INT32_C( 775807954), INT32_C( 553410068) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 0) }, + { -INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), + INT16_C( 1), -INT16_C( 1), INT16_C( INT16_MIN), INT16_C( 0) }, + { INT16_C( INT16_MIN), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, + INT8_C( 0), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -140,6 +146,13 @@ test_simde_vqdmlsl_high_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { -INT16_C( 662), -INT16_C( 7109), -INT16_C( 7322), INT16_C( 4903)}, INT8_C( 5), { INT32_C( 439323270), -INT32_C( 810687717), -INT32_C( 901199062), -INT32_C( 16659039) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), -INT32_C( 0), INT32_C( 0) }, + { -INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0), + -INT16_C( 1), INT16_C( 1), INT16_C( 0), INT16_C( 0) }, + { INT16_C( 1), INT16_C( 0), INT16_C( 0), INT16_C( 0), + INT16_C( 0), INT16_C( 0), INT16_C( 0), INT16_C( 0) }, + INT8_C( 0), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -204,6 +217,16 @@ test_simde_vqdmlsl_high_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 515031), INT32_C( 410037), -INT32_C( 630721), -INT32_C( 183181)}, INT8_C( 2), { INT64_C( 487266790129), -INT64_C( 1206376454806) } }, + { { INT64_MAX, INT64_MIN }, + { INT32_C( 1), INT32_C( 1), -INT32_C( 1), INT32_C( 1) }, + { INT32_C( 1), -INT32_C( 49136) }, + INT8_C( 0), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( INT32_MIN), -INT32_C( 0), INT32_C( INT32_MIN), -INT32_C( 0) }, + { INT32_C( INT32_MIN), -INT32_C( 0) }, + INT8_C( 0), + { INT64_C(INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -268,6 +291,16 @@ test_simde_vqdmlsl_high_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { { INT32_C( 310134), INT32_C( 105922)}, INT8_C( 1), { INT64_C( 388382728614), -INT64_C( 623852161652) } }, + { { INT64_MAX, INT64_MIN }, + { INT32_C( 0), INT32_C( 0), -INT32_C( 1), INT32_C( 1) }, + { INT32_C( 1), INT32_C( 49136) }, + INT8_C( 0), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( 0), -INT32_C( 0), INT32_C( INT32_MIN), -INT32_C( 0) }, + { INT32_C( INT32_MIN), -INT32_C( 0) }, + INT8_C( 0), + { INT64_C(INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/qdmlsl_high_n.c b/test/arm/neon/qdmlsl_high_n.c index b83f2d42f..e51a62058 100644 --- a/test/arm/neon/qdmlsl_high_n.c +++ b/test/arm/neon/qdmlsl_high_n.c @@ -51,6 +51,11 @@ test_simde_vqdmlsl_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { -INT16_C( 9265), INT16_C( 3099), -INT16_C( 1349), -INT16_C( 6470) }, -INT16_C( 6407), { INT32_C( 881253899), INT32_C( 102706521), INT32_C( 245406877), INT32_C( 434182741) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), -INT32_C( 0) }, + { -INT16_C( 6426), -INT16_C( 6271), INT16_C( INT16_MIN), -INT16_C( 6015), + INT16_C( 1), -INT16_C( 1), INT16_C( INT16_MIN), -INT16_C( 0) }, + INT16_C( INT16_MIN), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -105,6 +110,14 @@ test_simde_vqdmlsl_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 664547), -INT32_C( 618870), -INT32_C( 428918), -INT32_C( 969644) }, INT32_C( 907914), { INT64_C( 907173343185), INT64_C( 1417800072682) } }, + { { INT64_MAX, INT64_MIN }, + { INT32_C( 535706), INT32_C( 97715), -INT32_C( 1), INT32_C( 1) }, + INT32_C( 1), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 97715), -INT32_C( 1), INT32_C( 0) }, + INT32_C( INT32_MIN), + { INT64_C( INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/qdmlsl_lane.c b/test/arm/neon/qdmlsl_lane.c index 0f1fb5f5f..1e3f2ecb4 100644 --- a/test/arm/neon/qdmlsl_lane.c +++ b/test/arm/neon/qdmlsl_lane.c @@ -54,6 +54,21 @@ test_simde_vqdmlslh_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { { INT16_C( 7185), -INT16_C( 7316), -INT16_C( 2257), INT16_C( 3710)}, INT8_C( 3), { INT32_C( 396191331) } }, + { { INT32_C( INT32_MAX) }, + { INT16_C( 1) }, + { -INT16_C( 1), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C( INT32_MAX) } }, + { { INT32_C( INT32_MIN) }, + { INT16_C( 1) }, + { INT16_C( 1), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C( INT32_MIN) } }, + { { INT32_C( 0) }, + { INT16_C( INT16_MIN) }, + { INT16_C( INT16_MIN), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C(INT32_MIN+1) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -122,6 +137,21 @@ test_simde_vqdmlsls_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { { INT32_C( 158891), -INT32_C( 5388)}, INT8_C( 1), { -INT64_C( 677135162821) } }, + { { INT64_MAX }, + { INT32_C( 1) }, + { -INT32_C( 1), INT32_C( 1026)}, + INT8_C( 0), + { INT64_MAX } }, + { { INT64_MIN }, + { INT32_C( 1) }, + { INT32_C( 1), INT32_C( 1026)}, + INT8_C( 0), + { INT64_MIN } }, + { { INT64_C( 0) }, + { INT32_C( INT32_MIN) }, + { INT32_C( INT32_MIN), INT32_C( 1026)}, + INT8_C( 0), + { INT64_C( INT64_MIN + 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -196,6 +226,24 @@ test_simde_vqdmlslh_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { INT16_C( 6007), -INT16_C( 748), -INT16_C( 8042), -INT16_C( 6337)}, INT8_C( 1), { -INT32_C( 781647588) } }, + { { INT32_C( INT32_MAX) }, + { INT16_C( 1) }, + { -INT16_C( 1), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523), + -INT16_C( 999), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C( INT32_MAX) } }, + { { INT32_C( INT32_MIN) }, + { INT16_C( 1) }, + { INT16_C( 1), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523), + INT16_C( 999), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C( INT32_MIN) } }, + { { INT32_C( 0) }, + { INT16_C( INT16_MIN) }, + { INT16_C( INT16_MIN), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523), + INT16_C( 999), INT16_C( 1026), -INT16_C( 4249), -INT16_C( 6523)}, + INT8_C( 0), + { INT32_C(INT32_MIN+1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -268,6 +316,21 @@ test_simde_vqdmlsls_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 467044), -INT32_C( 608804), -INT32_C( 160391), -INT32_C( 839703)}, INT8_C( 1), { -INT64_C( 1090453675400) } }, + { { INT64_MAX }, + { -INT32_C( 1) }, + { INT32_C( 1), INT32_C( 1026), -INT32_C( 4249), -INT32_C( 6523)}, + INT8_C( 0), + { INT64_MAX } }, + { { INT64_MIN }, + { INT32_C( 1) }, + { INT32_C( 1), INT32_C( 1026), -INT32_C( 4249), -INT32_C( 6523)}, + INT8_C( 0), + { INT64_MIN } }, + { { INT64_C( 0) }, + { INT32_C( INT32_MIN) }, + { INT32_C( INT32_MIN), INT32_C( 1026), -INT32_C( 4249), -INT32_C( 6523)}, + INT8_C( 0), + { INT64_C( INT64_MIN + 1) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -336,6 +399,11 @@ test_simde_vqdmlsl_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { { -INT16_C( 3308), INT16_C( 7584), INT16_C( 3613), INT16_C( 1332)}, INT8_C( 2), { -INT32_C( 436157520), INT32_C( 900459505), INT32_C( 90427411), INT32_C( 858150667) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), -INT32_C( 0), INT32_C( 0) }, + { INT16_C( 1), -INT16_C( 1), INT16_C( INT16_MIN), INT16_C( 0) }, + { INT16_C( INT16_MIN), INT16_C( 0), INT16_C( INT16_MIN), INT16_C( 0) }, + INT8_C( 0), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -406,6 +474,16 @@ test_simde_vqdmlsl_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 495740), -INT32_C( 746578)}, INT8_C( 0), { INT64_C( 1661844907379), -INT64_C( 643119055634) } }, + { { INT64_MAX, INT64_MIN }, + { -INT32_C( 1), INT32_C( 1) }, + { INT32_C( 1), -INT32_C( 49136) }, + INT8_C( 0), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0) }, + { INT32_C( INT32_MIN), -INT32_C( 0) }, + INT8_C( 0), + { INT64_C( INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -482,6 +560,13 @@ test_simde_vqdmlsl_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { INT16_C( 7940), -INT16_C( 8193), INT16_C( 2977), -INT16_C( 9125)}, INT8_C( 6), { -INT32_C( 610893810), INT32_C( 85782822), -INT32_C( 854851323), INT32_C( 295488148) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 0) }, + { INT16_C( 1), -INT16_C( 1), INT16_C( INT16_MIN), -INT16_C( 0) }, + { INT16_C( INT16_MIN), INT16_C( 0), INT16_C( INT16_MIN), INT16_C( 0), + -INT16_C( 7423), INT16_C( 3231), INT16_C( 0), INT16_C( 0)}, + INT8_C( 0), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, + }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -556,6 +641,16 @@ test_simde_vqdmlsl_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { { INT32_C( 746888), -INT32_C( 42237), -INT32_C( 62401), INT32_C( 286730)}, INT8_C( 3), { -INT64_C( 688808069615), INT64_C( 538509412058) } }, + { { INT64_MAX, INT64_MIN }, + { -INT32_C( 1), INT32_C( 1) }, + { INT32_C( 1), -INT32_C( 975727), INT32_C( 913570), INT32_C( 949988)}, + INT8_C( 0), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 913570), INT32_C( 949988)}, + INT8_C( 0), + { INT64_C(INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { diff --git a/test/arm/neon/qdmlsl_n.c b/test/arm/neon/qdmlsl_n.c index b3a49d8a3..cafc15b40 100644 --- a/test/arm/neon/qdmlsl_n.c +++ b/test/arm/neon/qdmlsl_n.c @@ -43,6 +43,10 @@ test_simde_vqdmlsl_n_s16 (SIMDE_MUNIT_TEST_ARGS) { { -INT16_C( 9606), -INT16_C( 5322), -INT16_C( 1423), -INT16_C( 2335) }, -INT16_C( 7502), { INT32_C( 174449212), INT32_C( 62770090), INT32_C( 282404556), -INT32_C( 379167265) } }, + { { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C( 0), INT32_C( 0) }, + { INT16_C( 1), -INT16_C( 1), INT16_C( INT16_MIN), INT16_C( 0) }, + INT16_C( INT16_MIN), + { INT32_C( INT32_MAX), INT32_C( INT32_MIN), INT32_C(INT32_MIN+1), INT32_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -97,6 +101,14 @@ test_simde_vqdmlsl_n_s32 (SIMDE_MUNIT_TEST_ARGS) { { -INT32_C( 331014), -INT32_C( 715967) }, INT32_C( 784785), { INT64_C( 1028566487305), INT64_C( 783287823739) } }, + { { INT64_MAX, INT64_MIN }, + { -INT32_C( 1), INT32_C( 1) }, + INT32_C( 1), + { INT64_MAX, INT64_MIN } }, + { { INT64_C( 0), INT64_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 0) }, + INT32_C( INT32_MIN), + { INT64_C( INT64_MIN + 1), INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { From facf2cbedf2d598db1cb8ac54166504b18163f10 Mon Sep 17 00:00:00 2001 From: Ryo Date: Tue, 5 Mar 2024 10:33:10 +0000 Subject: [PATCH 2/3] Fix qdmlsl instructions The qdmlsl instructions were implemented without any saturation. This has been fixed by utilising existing saturating instructions which are implemented correctly. Unit tests have also been updated to test for saturation. --- test/arm/neon/qdmlsl_high_n.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/arm/neon/qdmlsl_high_n.c b/test/arm/neon/qdmlsl_high_n.c index e51a62058..4ec4e9cb5 100644 --- a/test/arm/neon/qdmlsl_high_n.c +++ b/test/arm/neon/qdmlsl_high_n.c @@ -115,9 +115,9 @@ test_simde_vqdmlsl_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { INT32_C( 1), { INT64_MAX, INT64_MIN } }, { { INT64_C( 0), INT64_C( 0) }, - { INT32_C( INT32_MIN), INT32_C( 97715), -INT32_C( 1), INT32_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 97715), -INT32_C( INT32_MIN), INT32_C( 0) }, INT32_C( INT32_MIN), - { INT64_C( INT64_MIN + 1), INT64_C( 0) } }, + { INT64_MIN + 1, INT64_C( 0) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { From 2d07698be3cf42468b8c06f8feb3257849c064d4 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 20 Aug 2024 10:44:11 +0200 Subject: [PATCH 3/3] fix typos --- simde/arm/neon/qdmlsl.h | 2 +- test/arm/neon/qdmlsl_high_n.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/simde/arm/neon/qdmlsl.h b/simde/arm/neon/qdmlsl.h index ae7ef703f..b476572be 100644 --- a/simde/arm/neon/qdmlsl.h +++ b/simde/arm/neon/qdmlsl.h @@ -95,4 +95,4 @@ simde_vqdmlsl_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP -#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ \ No newline at end of file +#endif /* !defined(SIMDE_ARM_NEON_QDMLSL_H) */ diff --git a/test/arm/neon/qdmlsl_high_n.c b/test/arm/neon/qdmlsl_high_n.c index 4ec4e9cb5..8992e095e 100644 --- a/test/arm/neon/qdmlsl_high_n.c +++ b/test/arm/neon/qdmlsl_high_n.c @@ -115,7 +115,7 @@ test_simde_vqdmlsl_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { INT32_C( 1), { INT64_MAX, INT64_MIN } }, { { INT64_C( 0), INT64_C( 0) }, - { INT32_C( INT32_MIN), INT32_C( 97715), -INT32_C( INT32_MIN), INT32_C( 0) }, + { INT32_C( INT32_MIN), INT32_C( 97715), INT32_C( INT32_MIN), INT32_C( 0) }, INT32_C( INT32_MIN), { INT64_MIN + 1, INT64_C( 0) } }, };