Skip to content

Commit

Permalink
Merge pull request #438 from howjmay/vqdmull_high_n
Browse files Browse the repository at this point in the history
feat: Add vqdmull_high_n_[s16|s32]
  • Loading branch information
howjmay authored Jul 23, 2024
2 parents d10196b + 3d5c529 commit 05bb5f5
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 6 deletions.
12 changes: 10 additions & 2 deletions neon2rvv.h
Original file line number Diff line number Diff line change
Expand Up @@ -10617,9 +10617,17 @@ FORCE_INLINE int64x2_t vqdmull_n_s32(int32x2_t a, int32_t b) {
return __riscv_vlmul_trunc_v_i64m2_i64m1(__riscv_vsll_vx_i64m2(ab_mul, 1, 2));
}

// FORCE_INLINE int32x4_t vqdmull_high_n_s16(int16x8_t a, int16_t b);
FORCE_INLINE int32x4_t vqdmull_high_n_s16(int16x8_t a, int16_t b) {
vint16m1_t a_high = __riscv_vslidedown_vx_i16m1(a, 4, 8);
vint32m2_t ab_mul = __riscv_vwmul_vx_i32m2(a_high, b, 4);
return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vsll_vx_i32m2(ab_mul, 1, 4));
}

// FORCE_INLINE int64x2_t vqdmull_high_n_s32(int32x4_t a, int32_t b);
FORCE_INLINE int64x2_t vqdmull_high_n_s32(int32x4_t a, int32_t b) {
vint32m1_t a_high = __riscv_vslidedown_vx_i32m1(a, 2, 4);
vint64m2_t ab_mul = __riscv_vwmul_vx_i64m2(a_high, b, 2);
return __riscv_vlmul_trunc_v_i64m2_i64m1(__riscv_vsll_vx_i64m2(ab_mul, 1, 2));
}

FORCE_INLINE int16x8_t vqdmulhq_n_s16(int16x8_t a, int16_t b) {
vint16m1_t b_dup = vdupq_n_s16(b);
Expand Down
34 changes: 32 additions & 2 deletions tests/impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33960,9 +33960,39 @@ result_t test_vqdmull_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#endif // ENABLE_TEST_ALL
}

result_t test_vqdmull_high_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqdmull_high_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
const int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
int32_t _c[4];
int16x8_t a = vld1q_s16(_a);

for (int i = 0; i < 4; i++) {
_c[i] = sat_dmull(_a[i + 4], _b[0]);
}
int32x4_t c = vqdmull_high_n_s16(a, _b[0]);
return validate_int32(c, _c[0], _c[1], _c[2], _c[3]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqdmull_high_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
result_t test_vqdmull_high_n_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
const int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
int64_t _c[2];
int32x4_t a = vld1q_s32(_a);

for (int i = 0; i < 2; i++) {
_c[i] = sat_dmull(_a[i + 2], _b[0]);
}
int64x2_t c = vqdmull_high_n_s32(a, _b[0]);
return validate_int64(c, _c[0], _c[1]);
#else
return TEST_UNIMPL;
#endif // ENABLE_TEST_ALL
}

result_t test_vqdmulhq_n_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
#ifdef ENABLE_TEST_ALL
Expand Down
4 changes: 2 additions & 2 deletions tests/impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -2197,8 +2197,8 @@
_(vmull_high_n_u32) \
_(vqdmull_n_s16) \
_(vqdmull_n_s32) \
/*_(vqdmull_high_n_s16) */ \
/*_(vqdmull_high_n_s32) */ \
_(vqdmull_high_n_s16) \
_(vqdmull_high_n_s32) \
_(vqdmulhq_n_s16) \
_(vqdmulhq_n_s32) \
_(vqdmulh_n_s16) \
Expand Down

0 comments on commit 05bb5f5

Please sign in to comment.