Skip to content

Commit

Permalink
Merge pull request #448 from howjmay/vqmovun_high
Browse files Browse the repository at this point in the history
feat: Add vqmovun_high_[s16|s32|s64]
  • Loading branch information
howjmay authored Jul 24, 2024
2 parents e4be9a7 + fe209e5 commit 8c6804a
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 9 deletions.
18 changes: 15 additions & 3 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -8278,11 +8278,23 @@ FORCE_INLINE uint32x2_t vqmovun_s64(int64x2_t a) {

// FORCE_INLINE uint32_t vqmovund_s64(int64_t a);

// FORCE_INLINE uint8x16_t vqmovun_high_s16(uint8x8_t r, int16x8_t a);
FORCE_INLINE uint8x16_t vqmovun_high_s16(uint8x8_t r, int16x8_t a) {
vuint16m1_t a_non_neg = __riscv_vreinterpret_v_i16m1_u16m1(__riscv_vmax_vx_i16m1(a, 0, 8));
vuint8m1_t vqmovun = __riscv_vlmul_ext_v_u8mf2_u8m1(__riscv_vnclipu_wx_u8mf2(a_non_neg, 0, __RISCV_VXRM_RDN, 8));
return __riscv_vslideup_vx_u8m1(r, vqmovun, 8, 16);
}

// FORCE_INLINE uint16x8_t vqmovun_high_s32(uint16x4_t r, int32x4_t a);
FORCE_INLINE uint16x8_t vqmovun_high_s32(uint16x4_t r, int32x4_t a) {
vuint32m1_t a_non_neg = __riscv_vreinterpret_v_i32m1_u32m1(__riscv_vmax_vx_i32m1(a, 0, 4));
vuint16m1_t vqmovun = __riscv_vlmul_ext_v_u16mf2_u16m1(__riscv_vnclipu_wx_u16mf2(a_non_neg, 0, __RISCV_VXRM_RDN, 4));
return __riscv_vslideup_vx_u16m1(r, vqmovun, 4, 8);
}

// FORCE_INLINE uint32x4_t vqmovun_high_s64(uint32x2_t r, int64x2_t a);
FORCE_INLINE uint32x4_t vqmovun_high_s64(uint32x2_t r, int64x2_t a) {
vuint64m1_t a_non_neg = __riscv_vreinterpret_v_i64m1_u64m1(__riscv_vmax_vx_i64m1(a, 0, 2));
vuint32m1_t vqmovun = __riscv_vlmul_ext_v_u32mf2_u32m1(__riscv_vnclipu_wx_u32mf2(a_non_neg, 0, __RISCV_VXRM_RDN, 2));
return __riscv_vslideup_vx_u32m1(r, vqmovun, 2, 4);
}

FORCE_INLINE int16x8_t vmovl_s8(int8x8_t a) { return __riscv_vlmul_trunc_v_i16m2_i16m1(__riscv_vsext_vf2_i16m2(a, 8)); }

Expand Down
73 changes: 70 additions & 3 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29752,11 +29752,78 @@ result_t test_vqmovuns_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { retu

result_t test_vqmovund_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }

result_t test_vqmovun_high_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqmovun_high_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
const uint8_t *_r = (uint8_t *)impl.test_cases_int_pointer2;
uint8_t _c[16];
for (int i = 0; i < 8; i++) {
if (_a[i] < 0) {
_c[i + 8] = 0;
} else if (_a[i] > UINT8_MAX) {
_c[i + 8] = UINT8_MAX;
} else {
_c[i + 8] = _a[i];
}
_c[i] = _r[i];
}
int16x8_t a = vld1q_s16(_a);
uint8x8_t r = vld1_u8(_r);
uint8x16_t c = vqmovun_high_s16(r, a);
return validate_uint8(c, _c[0], _c[1], _c[2], _c[3], _c[4], _c[5], _c[6], _c[7], _c[8], _c[9], _c[10], _c[11], _c[12],
_c[13], _c[14], _c[15]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqmovun_high_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqmovun_high_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
const uint16_t *_r = (uint16_t *)impl.test_cases_int_pointer2;
uint16_t _c[8];
for (int i = 0; i < 4; i++) {
if (_a[i] < 0) {
_c[i + 4] = 0;
} else if (_a[i] > UINT16_MAX) {
_c[i + 4] = UINT16_MAX;
} else {
_c[i + 4] = _a[i];
}
_c[i] = _r[i];
}
int32x4_t a = vld1q_s32(_a);
uint16x4_t r = vld1_u16(_r);
uint16x8_t c = vqmovun_high_s32(r, a);
return validate_uint16(c, _c[0], _c[1], _c[2], _c[3], _c[4], _c[5], _c[6], _c[7]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqmovun_high_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqmovun_high_s64(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int64_t *_a = (int64_t *)impl.test_cases_int_pointer1;
const uint32_t *_r = (uint32_t *)impl.test_cases_int_pointer2;
uint32_t _c[4];
for (int i = 0; i < 2; i++) {
if (_a[i] < 0) {
_c[i + 2] = 0;
} else if (_a[i] > UINT32_MAX) {
_c[i + 2] = UINT32_MAX;
} else {
_c[i + 2] = _a[i];
}
_c[i] = _r[i];
}
int64x2_t a = vld1q_s64(_a);
uint32x2_t r = vld1_u32(_r);
uint32x4_t c = vqmovun_high_s64(r, a);
return validate_uint32(c, _c[0], _c[1], _c[2], _c[3]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vmovl_s8(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down
6 changes: 3 additions & 3 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -1795,9 +1795,9 @@
/*_(vqmovunh_s16) */ \
/*_(vqmovuns_s32) */ \
/*_(vqmovund_s64) */ \
/*_(vqmovun_high_s16) */ \
/*_(vqmovun_high_s32) */ \
/*_(vqmovun_high_s64) */ \
_(vqmovun_high_s16) \
_(vqmovun_high_s32) \
_(vqmovun_high_s64) \
_(vmovl_s8) \
_(vmovl_s16) \
_(vmovl_s32) \
Expand Down

0 comments on commit 8c6804a

Please sign in to comment.