From 0d95a3e42be9c3836ba57e8e8e1831295b9685d1 Mon Sep 17 00:00:00 2001 From: Yang Hau Date: Wed, 24 Jul 2024 04:00:18 +0800 Subject: [PATCH] feat: Add vqshl[b|h|s|d]_n_[s8|s16|s32|s64|u8|u16|u32|u64] --- neon2rvv.h | 33 +++++++--- tests/impl.cpp | 168 ++++++++++++++++++++++++++++++++++++++++++++++--- tests/impl.h | 16 ++--- 3 files changed, 193 insertions(+), 24 deletions(-) diff --git a/neon2rvv.h b/neon2rvv.h index 55e7ef2b..177630f9 100644 --- a/neon2rvv.h +++ b/neon2rvv.h @@ -6270,21 +6270,38 @@ FORCE_INLINE uint64x2_t vqshlq_n_u64(uint64x2_t a, const int b) { return __riscv_vmerge_vxm_u64m1(shl, UINT64_MAX, mask_sat_positive, 2); } -// FORCE_INLINE int8_t vqshlb_n_s8(int8_t a, const int n); +FORCE_INLINE int8_t vqshlb_n_s8(int8_t a, const int n) { return neon2rvv_saturate_int8(a << n); } -// FORCE_INLINE int16_t vqshlh_n_s16(int16_t a, const int n); +FORCE_INLINE int16_t vqshlh_n_s16(int16_t a, const int n) { return neon2rvv_saturate_int16(a << n); } -// FORCE_INLINE int32_t vqshls_n_s32(int32_t a, const int n); +FORCE_INLINE int32_t vqshls_n_s32(int32_t a, const int n) { return neon2rvv_saturate_int32((int64_t)a << n); } -// FORCE_INLINE int64_t vqshld_n_s64(int64_t a, const int n); +FORCE_INLINE int64_t vqshld_n_s64(int64_t a, const int n) { + if (a > 0) { + if (a > (INT64_MAX >> n)) { + return INT64_MAX; + } else { + return a << n; + } + } + if (a < (INT64_MIN >> n)) { + return INT64_MIN; + } + return a << n; +} -// FORCE_INLINE uint8_t vqshlb_n_u8(uint8_t a, const int n); +FORCE_INLINE uint8_t vqshlb_n_u8(uint8_t a, const int n) { return neon2rvv_saturate_uint8(a << n); } -// FORCE_INLINE uint16_t vqshlh_n_u16(uint16_t a, const int n); +FORCE_INLINE uint16_t vqshlh_n_u16(uint16_t a, const int n) { return neon2rvv_saturate_uint16(a << n); } -// FORCE_INLINE uint32_t vqshls_n_u32(uint32_t a, const int n); +FORCE_INLINE uint32_t vqshls_n_u32(uint32_t a, const int n) { return neon2rvv_saturate_uint32((uint64_t)a << n); } -// FORCE_INLINE uint64_t vqshld_n_u64(uint64_t a, const int n); +FORCE_INLINE uint64_t vqshld_n_u64(uint64_t a, const int n) { + if (a > (UINT64_MAX >> n)) { + return UINT64_MAX; + } + return a << n; +} FORCE_INLINE uint8x8_t vqshlu_n_s8(int8x8_t a, const int b) { vint8m1_t a_non_neg = __riscv_vmax_vx_i8m1(a, 0, 8); diff --git a/tests/impl.cpp b/tests/impl.cpp index a6b4f97a..4efea465 100644 --- a/tests/impl.cpp +++ b/tests/impl.cpp @@ -21890,21 +21890,173 @@ result_t test_vqshlq_n_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { #endif // ENABLE_TEST_ALL } -result_t test_vqshlb_n_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vqshlb_n_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const int8_t *_a = (const int8_t *)impl.test_cases_int_pointer1; + int8_t _c, c; + +#define TEST_IMPL(IDX) \ + _c = saturate_int8(_a[0] << IDX); \ + c = vqshlb_n_s8(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_8_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif +} + +result_t test_vqshlh_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1; + int16_t _c, c; + +#define TEST_IMPL(IDX) \ + _c = saturate_int16(_a[0] << IDX); \ + c = vqshlh_n_s16(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_16_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} + +result_t test_vqshls_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1; + int32_t _c, c; + +#define TEST_IMPL(IDX) \ + _c = saturate_int32((int64_t)_a[0] << IDX); \ + c = vqshls_n_s32(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_32_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} + +result_t test_vqshld_n_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const int64_t *_a = (const int64_t *)impl.test_cases_int_pointer1; + int64_t _c, c; + +#define TEST_IMPL(IDX) \ + if (_a[0] > 0) { \ + if (_a[0] > (INT64_MAX >> IDX)) { \ + _c = INT64_MAX; \ + } else { \ + _c = _a[0] << IDX; \ + } \ + } else { \ + if (_a[0] < (INT64_MIN >> IDX)) { \ + _c = INT64_MIN; \ + } else { \ + _c = _a[0] << IDX; \ + } \ + } \ + c = vqshld_n_s64(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_64_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} + +result_t test_vqshlb_n_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const uint8_t *_a = (const uint8_t *)impl.test_cases_int_pointer1; + uint8_t _c, c; + +#define TEST_IMPL(IDX) \ + _c = saturate_uint8(_a[0] << IDX); \ + c = vqshlb_n_u8(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) -result_t test_vqshlh_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } + IMM_8_ITER +#undef TEST_IMPL -result_t test_vqshls_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif +} -result_t test_vqshld_n_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vqshlh_n_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const uint16_t *_a = (const uint16_t *)impl.test_cases_int_pointer1; + uint16_t _c, c; -result_t test_vqshlb_n_u8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +#define TEST_IMPL(IDX) \ + _c = saturate_uint16(_a[0] << IDX); \ + c = vqshlh_n_u16(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) -result_t test_vqshlh_n_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } + IMM_16_ITER +#undef TEST_IMPL -result_t test_vqshls_n_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} -result_t test_vqshld_n_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; } +result_t test_vqshls_n_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const uint32_t *_a = (const uint32_t *)impl.test_cases_int_pointer1; + uint32_t _c, c; + +#define TEST_IMPL(IDX) \ + _c = saturate_uint32((int64_t)_a[0] << IDX); \ + c = vqshls_n_u32(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_32_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} + +result_t test_vqshld_n_u64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { +#ifdef ENABLE_TEST_ALL + const uint64_t *_a = (const uint64_t *)impl.test_cases_int_pointer1; + uint64_t _c, c; + +#define TEST_IMPL(IDX) \ + if (_a[0] > (UINT64_MAX >> IDX)) { \ + _c = UINT64_MAX; \ + } else { \ + _c = _a[0] << IDX; \ + } \ + c = vqshld_n_u64(_a[0], IDX); \ + CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL) + + IMM_64_ITER +#undef TEST_IMPL + + return TEST_SUCCESS; +#else + return TEST_UNIMPL; +#endif // ENABLE_TEST_ALL +} result_t test_vqshlu_n_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { #ifdef ENABLE_TEST_ALL diff --git a/tests/impl.h b/tests/impl.h index 384039aa..98e22579 100644 --- a/tests/impl.h +++ b/tests/impl.h @@ -1203,14 +1203,14 @@ _(vqshlq_n_u16) \ _(vqshlq_n_u32) \ _(vqshlq_n_u64) \ - /*_(vqshlb_n_s8) */ \ - /*_(vqshlh_n_s16) */ \ - /*_(vqshls_n_s32) */ \ - /*_(vqshld_n_s64) */ \ - /*_(vqshlb_n_u8) */ \ - /*_(vqshlh_n_u16) */ \ - /*_(vqshls_n_u32) */ \ - /*_(vqshld_n_u64) */ \ + _(vqshlb_n_s8) \ + _(vqshlh_n_s16) \ + _(vqshls_n_s32) \ + _(vqshld_n_s64) \ + _(vqshlb_n_u8) \ + _(vqshlh_n_u16) \ + _(vqshls_n_u32) \ + _(vqshld_n_u64) \ _(vqshlu_n_s8) \ _(vqshlu_n_s16) \ _(vqshlu_n_s32) \