diff --git a/neon2rvv.h b/neon2rvv.h
index 9b499084..a2b4de53 100644
--- a/neon2rvv.h
+++ b/neon2rvv.h
@@ -9025,13 +9025,29 @@ FORCE_INLINE uint64x2_t vmull_laneq_u32(uint32x2_t a, uint32x4_t b, const int la
   return __riscv_vlmul_trunc_v_u64m2_u64m1(__riscv_vwmulu_vv_u64m2(a, b_dup, 2));
 }
 
-// FORCE_INLINE int32x4_t vmull_high_laneq_s16(int16x8_t a, int16x8_t v, const int lane);
+FORCE_INLINE int32x4_t vmull_high_laneq_s16(int16x8_t a, int16x8_t b, const int lane) {
+  vint16m1_t a_high = __riscv_vslidedown_vx_i16m1(a, 4, 8);
+  vint16m1_t b_dup = __riscv_vrgather_vx_i16m1(b, lane, 8);
+  return __riscv_vlmul_trunc_v_i32m2_i32m1(__riscv_vwmul_vv_i32m2(a_high, b_dup, 4));
+}
 
-// FORCE_INLINE int64x2_t vmull_high_laneq_s32(int32x4_t a, int32x4_t v, const int lane);
+FORCE_INLINE int64x2_t vmull_high_laneq_s32(int32x4_t a, int32x4_t b, const int lane) {
+  vint32m1_t a_high = __riscv_vslidedown_vx_i32m1(a, 2, 4);
+  vint32m1_t b_dup = __riscv_vrgather_vx_i32m1(b, lane, 4);
+  return __riscv_vlmul_trunc_v_i64m2_i64m1(__riscv_vwmul_vv_i64m2(a_high, b_dup, 2));
+}
 
-// FORCE_INLINE uint32x4_t vmull_high_laneq_u16(uint16x8_t a, uint16x8_t v, const int lane);
+FORCE_INLINE uint32x4_t vmull_high_laneq_u16(uint16x8_t a, uint16x8_t b, const int lane) {
+  vuint16m1_t a_high = __riscv_vslidedown_vx_u16m1(a, 4, 8);
+  vuint16m1_t b_dup = __riscv_vrgather_vx_u16m1(b, lane, 8);
+  return __riscv_vlmul_trunc_v_u32m2_u32m1(__riscv_vwmulu_vv_u32m2(a_high, b_dup, 4));
+}
 
-// FORCE_INLINE uint64x2_t vmull_high_laneq_u32(uint32x4_t a, uint32x4_t v, const int lane);
+FORCE_INLINE uint64x2_t vmull_high_laneq_u32(uint32x4_t a, uint32x4_t b, const int lane) {
+  vuint32m1_t a_high = __riscv_vslidedown_vx_u32m1(a, 2, 4);
+  vuint32m1_t b_dup = __riscv_vrgather_vx_u32m1(b, lane, 4);
+  return __riscv_vlmul_trunc_v_u64m2_u64m1(__riscv_vwmulu_vv_u64m2(a_high, b_dup, 2));
+}
 
 FORCE_INLINE int32x4_t vqdmull_lane_s16(int16x4_t a, int16x4_t b, const int c) {
   vint16m1_t b_dup_lane = __riscv_vrgather_vx_i16m1(b, c, 4);
diff --git a/tests/impl.cpp b/tests/impl.cpp
index a833e8b4..0cb299e8 100644
--- a/tests/impl.cpp
+++ b/tests/impl.cpp
@@ -31530,13 +31530,105 @@ result_t test_vmull_laneq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
 #endif  // ENABLE_TEST_ALL
 }
 
-result_t test_vmull_high_laneq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+result_t test_vmull_high_laneq_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const int16_t *_a = (int16_t *)impl.test_cases_int_pointer1;
+  const int16_t *_b = (int16_t *)impl.test_cases_int_pointer2;
+  int16x8_t a = vld1q_s16(_a);
+  int16x8_t b = vld1q_s16(_b);
+  int32x4_t c;
+  int32_t _c[4];
+
+#define TEST_IMPL(IDX)                             \
+  for (int i = 0; i < 4; i++) {                    \
+    _c[i] = (int32_t)_a[i + 4] * (int32_t)_b[IDX]; \
+  }                                                \
+  c = vmull_high_laneq_s16(a, b, IDX);             \
+  CHECK_RESULT(validate_int32(c, _c[0], _c[1], _c[2], _c[3]))
+
+  IMM_8_ITER
+#undef TEST_IMPL
+
+  return TEST_SUCCESS;
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
+
+result_t test_vmull_high_laneq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const int32_t *_a = (int32_t *)impl.test_cases_int_pointer1;
+  const int32_t *_b = (int32_t *)impl.test_cases_int_pointer2;
+  int32x4_t a = vld1q_s32(_a);
+  int32x4_t b = vld1q_s32(_b);
+  int64x2_t c;
+  int64_t _c[2];
+
+#define TEST_IMPL(IDX)                             \
+  for (int i = 0; i < 2; i++) {                    \
+    _c[i] = (int64_t)_a[i + 2] * (int64_t)_b[IDX]; \
+  }                                                \
+  c = vmull_high_laneq_s32(a, b, IDX);             \
+  CHECK_RESULT(validate_int64(c, _c[0], _c[1]))
+
+  IMM_4_ITER
+#undef TEST_IMPL
+
+  return TEST_SUCCESS;
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
+
+result_t test_vmull_high_laneq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const uint16_t *_a = (uint16_t *)impl.test_cases_int_pointer1;
+  const uint16_t *_b = (uint16_t *)impl.test_cases_int_pointer2;
+  uint16x8_t a = vld1q_u16(_a);
+  uint16x8_t b = vld1q_u16(_b);
+  uint32x4_t c;
+  uint32_t _c[4];
+
+#define TEST_IMPL(IDX)                               \
+  for (int i = 0; i < 4; i++) {                      \
+    _c[i] = (uint32_t)_a[i + 4] * (uint32_t)_b[IDX]; \
+  }                                                  \
+  c = vmull_high_laneq_u16(a, b, IDX);               \
+  CHECK_RESULT(validate_uint32(c, _c[0], _c[1], _c[2], _c[3]))
+
+  IMM_8_ITER
+#undef TEST_IMPL
+
+  return TEST_SUCCESS;
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
 
-result_t test_vmull_high_laneq_s32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+result_t test_vmull_high_laneq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
+#ifdef ENABLE_TEST_ALL
+  const uint32_t *_a = (uint32_t *)impl.test_cases_int_pointer1;
+  const uint32_t *_b = (uint32_t *)impl.test_cases_int_pointer2;
+  uint32x4_t a = vld1q_u32(_a);
+  uint32x4_t b = vld1q_u32(_b);
+  uint64x2_t c;
+  uint64_t _c[2];
+
+#define TEST_IMPL(IDX)                               \
+  for (int i = 0; i < 2; i++) {                      \
+    _c[i] = (uint64_t)_a[i + 2] * (uint64_t)_b[IDX]; \
+  }                                                  \
+  c = vmull_high_laneq_u32(a, b, IDX);               \
+  CHECK_RESULT(validate_uint64(c, _c[0], _c[1]))
 
-result_t test_vmull_high_laneq_u16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+  IMM_4_ITER
+#undef TEST_IMPL
 
-result_t test_vmull_high_laneq_u32(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) { return TEST_UNIMPL; }
+  return TEST_SUCCESS;
+#else
+  return TEST_UNIMPL;
+#endif  // ENABLE_TEST_ALL
+}
 
 result_t test_vqdmull_lane_s16(const NEON2RVV_TEST_IMPL &impl, uint32_t iter) {
 #ifdef ENABLE_TEST_ALL
diff --git a/tests/impl.h b/tests/impl.h
index 50def390..c350f375 100644
--- a/tests/impl.h
+++ b/tests/impl.h
@@ -2018,10 +2018,10 @@
   _(vmull_laneq_s32)                                                             \
   _(vmull_laneq_u16)                                                             \
   _(vmull_laneq_u32)                                                             \
-  /*_(vmull_high_laneq_s16)                                                   */ \
-  /*_(vmull_high_laneq_s32)                                                   */ \
-  /*_(vmull_high_laneq_u16)                                                   */ \
-  /*_(vmull_high_laneq_u32)                                                   */ \
+  _(vmull_high_laneq_s16)                                                        \
+  _(vmull_high_laneq_s32)                                                        \
+  _(vmull_high_laneq_u16)                                                        \
+  _(vmull_high_laneq_u32)                                                        \
   _(vqdmull_lane_s16)                                                            \
   _(vqdmull_lane_s32)                                                            \
   /*_(vqdmullh_lane_s16)                                                      */ \