Skip to content

Commit

Permalink
[NEON] Add qdmullh_lane{q}_s{16/32} related intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
yyctw committed Oct 16, 2023
1 parent 79cda85 commit cd400ad
Show file tree
Hide file tree
Showing 2 changed files with 411 additions and 0 deletions.
68 changes: 68 additions & 0 deletions simde/arm/neon/qdmull_lane.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,74 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vqdmullh_lane_s16(int16_t a, simde_int16x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int16x4_private
v_ = simde_int16x4_to_private(v);

return simde_vqdmullh_s16(a, v_.values[lane]);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vqdmullh_lane_s16(a, v, lane) vqdmullh_lane_s16(a, v, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vqdmullh_lane_s16
#define vqdmullh_lane_s16(a, v, lane) simde_vqdmullh_lane_s16((a), (v), (lane))
#endif

SIMDE_FUNCTION_ATTRIBUTES
int32_t
simde_vqdmullh_laneq_s16(int16_t a, simde_int16x8_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) {
simde_int16x8_private
v_ = simde_int16x8_to_private(v);

return simde_vqdmullh_s16(a, v_.values[lane]);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vqdmullh_laneq_s16(a, v, lane) vqdmullh_laneq_s16(a, v, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vqdmullh_laneq_s16
#define vqdmullh_laneq_s16(a, v, lane) simde_vqdmullh_laneq_s16((a), (v), (lane))
#endif

SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vqdmulls_lane_s32(int32_t a, simde_int32x2_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 1) {
simde_int32x2_private
v_ = simde_int32x2_to_private(v);

return simde_vqdmulls_s32(a, v_.values[lane]);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vqdmulls_lane_s32(a, v, lane) vqdmulls_lane_s32(a, v, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vqdmulls_lane_s32
#define vqdmulls_lane_s32(a, v, lane) simde_vqdmulls_lane_s32((a), (v), (lane))
#endif

SIMDE_FUNCTION_ATTRIBUTES
int64_t
simde_vqdmulls_laneq_s32(int32_t a, simde_int32x4_t v, const int lane)
SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) {
simde_int32x4_private
v_ = simde_int32x4_to_private(v);

return simde_vqdmulls_s32(a, v_.values[lane]);
}
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
#define simde_vqdmulls_laneq_s32(a, v, lane) vqdmulls_laneq_s32(a, v, lane)
#endif
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vqdmulls_laneq_s32
#define vqdmulls_laneq_s32(a, v, lane) simde_vqdmulls_laneq_s32((a), (v), (lane))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vqdmull_lane_s16(simde_int16x4_t a, simde_int16x4_t b, const int lane)
Expand Down
Loading

0 comments on commit cd400ad

Please sign in to comment.