Skip to content

Commit

Permalink
Merge pull request #445 from howjmay/vqrshrun
Browse files Browse the repository at this point in the history
feat: Add vqrshrun[h|s|d]_[s16|s32|s64]
  • Loading branch information
howjmay authored Jul 24, 2024
2 parents b498bea + e2cb013 commit 0c55d3e
Show file tree
Hide file tree
Showing 3 changed files with 96 additions and 9 deletions.
20 changes: 17 additions & 3 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -6110,11 +6110,25 @@ FORCE_INLINE uint32x2_t vqrshrun_n_s64(int64x2_t a, const int b) {
return __riscv_vnclipu_wx_u32m1(__riscv_vreinterpret_v_i64m2_u64m2(a_eliminate_neg), b, __RISCV_VXRM_RNU, 2);
}

// FORCE_INLINE uint8_t vqrshrunh_n_s16(int16_t a, const int n);
FORCE_INLINE uint8_t vqrshrunh_n_s16(int16_t a, const int n) {
int16_t round_const = 1 << (n - 1);
int16_t tmp = (a + round_const) >> n;
tmp = ~(tmp >> 15) & tmp;
return neon2rvv_saturate_uint8(tmp);
}

// FORCE_INLINE uint16_t vqrshruns_n_s32(int32_t a, const int n);
FORCE_INLINE uint16_t vqrshruns_n_s32(int32_t a, const int n) {
int32_t round_const = 1 << (n - 1);
int32_t tmp = (a + round_const) >> n;
tmp = ~(tmp >> 31) & tmp;
return neon2rvv_saturate_uint16(tmp);
}

// FORCE_INLINE uint32_t vqrshrund_n_s64(int64_t a, const int n);
FORCE_INLINE uint32_t vqrshrund_n_s64(int64_t a, const int n) {
int64_t tmp = a >> (n - 1);
tmp = ~(tmp >> 63) & tmp;
return neon2rvv_saturate_uint32((tmp + 1) >> 1);
}

// FORCE_INLINE uint8x16_t vqrshrun_high_n_s16(uint8x8_t r, int16x8_t a, const int n);

Expand Down
79 changes: 76 additions & 3 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21068,11 +21068,84 @@ result_t test_vqrshrun_n_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vqrshrunh_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqrshrunh_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int16_t *_a = (const int16_t *)impl.test_cases_int_pointer1;
uint8_t _c, c;
uint16_t round_const;

#define TEST_IMPL(IDX) \
for (int i = 0; i < 8; i++) { \
round_const = 1 << ((IDX + 1) - 1); \
int16_t tmp = ((int16_t)_a[0] + round_const) >> (IDX + 1); \
if (tmp < 0) { \
tmp = 0; \
} \
_c = saturate_uint8(tmp); \
} \
c = vqrshrunh_n_s16(_a[0], (IDX + 1)); \
CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL)

result_t test_vqrshruns_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
IMM_8_ITER
#undef TEST_IMPL

result_t test_vqrshrund_n_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqrshruns_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int32_t *_a = (const int32_t *)impl.test_cases_int_pointer1;
uint16_t _c, c;
int32_t round_const;

#define TEST_IMPL(IDX) \
for (int i = 0; i < 4; i++) { \
round_const = 1 << ((IDX + 1) - 1); \
int32_t tmp = ((int32_t)_a[0] + round_const) >> (IDX + 1); \
if (tmp < 0) { \
tmp = 0; \
} \
_c = saturate_uint16(tmp); \
} \
c = vqrshruns_n_s32(_a[0], (IDX + 1)); \
CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL)

IMM_16_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqrshrund_n_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int64_t *_a = (const int64_t *)impl.test_cases_int_pointer1;
uint32_t _c, c;

#define TEST_IMPL(IDX) \
for (int i = 0; i < 2; i++) { \
int64_t tmp = _a[0] >> ((IDX + 1) - 1); \
if (tmp < 0) { \
tmp = 0; \
} \
_c = saturate_uint32((tmp + 1) >> 1); \
} \
c = vqrshrund_n_s64(_a[0], (IDX + 1)); \
CHECK_RESULT(c == _c ? TEST_SUCCESS : TEST_FAIL)

IMM_32_ITER
#undef TEST_IMPL

return TEST_SUCCESS;
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqrshrun_high_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

Expand Down
6 changes: 3 additions & 3 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1163,9 +1163,9 @@
_(vqrshrun_n_s16) \
_(vqrshrun_n_s32) \
_(vqrshrun_n_s64) \
/*_(vqrshrunh_n_s16) */ \
/*_(vqrshruns_n_s32) */ \
/*_(vqrshrund_n_s64) */ \
_(vqrshrunh_n_s16) \
_(vqrshruns_n_s32) \
_(vqrshrund_n_s64) \
/*_(vqrshrun_high_n_s16) */ \
/*_(vqrshrun_high_n_s32) */ \
/*_(vqrshrun_high_n_s64) */ \
Expand Down

0 comments on commit 0c55d3e

Please sign in to comment.