-
Notifications
You must be signed in to change notification settings - Fork 259
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
arm/neon riscv64: additional RVV implementations - part 2. (#1189)
Contains RVV implementations for the following Neon instructions: `abal`, `abdl_high`, `addw`, `addw_high`, `bcax`, `bic`, `cadd_rot270`, `cadd_rot90`, `cmla_lane`, `cmla_rot180_lane` , `cmla_rot270_lane`, `cmla_rot90_lane`, `combine`, `cvt`, `dot`, `dot_lane`, `dup_n`, `eor`, `ext`, `maxnmv`, `minnmv` , `movl` , `movn` , `qdmull` , `qshlu_n`, `rnda`, `rsubhn` , `shl`, `shl_n`, `shll_n`, `shr_n`, `shrn_n`, `sqadd`, `sqrt`
- Loading branch information
Showing
34 changed files
with
3,092 additions
and
1,231 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
* | ||
* Copyright: | ||
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology) | ||
* 2023 Chi-Wei Chu <[email protected]> (Copyright owned by NTHU pllab) | ||
*/ | ||
|
||
#if !defined(SIMDE_ARM_NEON_ABAL_H) | ||
|
@@ -39,6 +40,14 @@ simde_int16x8_t | |
simde_vabal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_s8(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int16x8_private r_, a_ = simde_int16x8_to_private(a); | ||
simde_int8x8_private b_ = simde_int8x8_to_private(b); | ||
simde_int8x8_private c_ = simde_int8x8_to_private(c); | ||
vint16m1_t rst = __riscv_vwsub_vv_i16m1(__riscv_vlmul_trunc_v_i8m1_i8mf2(b_.sv64) , \ | ||
__riscv_vlmul_trunc_v_i8m1_i8mf2(c_.sv64) , 8); | ||
r_.sv128 = __riscv_vadd_vv_i16m1(__riscv_vmax_vv_i16m1(rst , __riscv_vneg_v_i16m1(rst , 8) , 8), a_.sv128, 8); | ||
return simde_int16x8_from_private(r_); | ||
#else | ||
return simde_vaddq_s16(simde_vabdl_s8(b, c), a); | ||
#endif | ||
|
@@ -53,6 +62,13 @@ simde_int32x4_t | |
simde_vabal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_s16(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int32x4_private r_, a_ = simde_int32x4_to_private(a); | ||
simde_int16x4_private b_ = simde_int16x4_to_private(b); | ||
simde_int16x4_private c_ = simde_int16x4_to_private(c); | ||
vint32m1_t rst = __riscv_vwsub_vv_i32m1(__riscv_vlmul_trunc_v_i16m1_i16mf2(b_.sv64) , __riscv_vlmul_trunc_v_i16m1_i16mf2(c_.sv64) , 4); | ||
r_.sv128 = __riscv_vadd_vv_i32m1(__riscv_vmax_vv_i32m1(rst , __riscv_vneg_v_i32m1(rst , 4) , 4), a_.sv128, 4); | ||
return simde_int32x4_from_private(r_); | ||
#else | ||
return simde_vaddq_s32(simde_vabdl_s16(b, c), a); | ||
#endif | ||
|
@@ -67,6 +83,13 @@ simde_int64x2_t | |
simde_vabal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_s32(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int64x2_private r_, a_ = simde_int64x2_to_private(a); | ||
simde_int32x2_private b_ = simde_int32x2_to_private(b); | ||
simde_int32x2_private c_ = simde_int32x2_to_private(c); | ||
vint64m1_t rst = __riscv_vwsub_vv_i64m1(__riscv_vlmul_trunc_v_i32m1_i32mf2(b_.sv64) , __riscv_vlmul_trunc_v_i32m1_i32mf2(c_.sv64) , 2); | ||
r_.sv128 = __riscv_vadd_vv_i64m1(__riscv_vmax_vv_i64m1(rst , __riscv_vneg_v_i64m1(rst , 2) , 2), a_.sv128, 2); | ||
return simde_int64x2_from_private(r_); | ||
#else | ||
return simde_vaddq_s64(simde_vabdl_s32(b, c), a); | ||
#endif | ||
|
@@ -81,6 +104,16 @@ simde_uint16x8_t | |
simde_vabal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_u8(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint16x8_private r_, a_ = simde_uint16x8_to_private(a); | ||
simde_uint8x8_private b_ = simde_uint8x8_to_private(b); | ||
simde_uint8x8_private c_ = simde_uint8x8_to_private(c); | ||
vint16m1_t a_tmp = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vwcvtu_x_x_v_u16m1(__riscv_vlmul_trunc_v_u8m1_u8mf2(b_.sv64), 8)); | ||
vint16m1_t b_tmp = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vwcvtu_x_x_v_u16m1(__riscv_vlmul_trunc_v_u8m1_u8mf2(c_.sv64), 8)); | ||
vint16m1_t rst = __riscv_vsub_vv_i16m1(a_tmp, b_tmp, 8); | ||
r_.sv128 = __riscv_vadd_vv_u16m1(__riscv_vreinterpret_v_i16m1_u16m1(__riscv_vmax_vv_i16m1(rst , __riscv_vneg_v_i16m1(rst , 8) , 8)), \ | ||
a_.sv128, 8); | ||
return simde_uint16x8_from_private(r_); | ||
#else | ||
return simde_vaddq_u16(simde_vabdl_u8(b, c), a); | ||
#endif | ||
|
@@ -95,6 +128,16 @@ simde_uint32x4_t | |
simde_vabal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_u16(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint32x4_private r_, a_ = simde_uint32x4_to_private(a); | ||
simde_uint16x4_private b_ = simde_uint16x4_to_private(b); | ||
simde_uint16x4_private c_ = simde_uint16x4_to_private(c); | ||
vint32m1_t a_tmp = __riscv_vreinterpret_v_u32m1_i32m1(__riscv_vwcvtu_x_x_v_u32m1(__riscv_vlmul_trunc_v_u16m1_u16mf2(b_.sv64), 4)); | ||
vint32m1_t b_tmp = __riscv_vreinterpret_v_u32m1_i32m1(__riscv_vwcvtu_x_x_v_u32m1(__riscv_vlmul_trunc_v_u16m1_u16mf2(c_.sv64), 4)); | ||
vint32m1_t rst = __riscv_vsub_vv_i32m1(a_tmp, b_tmp, 4); | ||
r_.sv128 = __riscv_vadd_vv_u32m1(__riscv_vreinterpret_v_i32m1_u32m1(__riscv_vmax_vv_i32m1(rst , __riscv_vneg_v_i32m1(rst , 4) , 4)), \ | ||
a_.sv128, 4); | ||
return simde_uint32x4_from_private(r_); | ||
#else | ||
return simde_vaddq_u32(simde_vabdl_u16(b, c), a); | ||
#endif | ||
|
@@ -109,6 +152,16 @@ simde_uint64x2_t | |
simde_vabal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vabal_u32(a, b, c); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint64x2_private r_, a_ = simde_uint64x2_to_private(a); | ||
simde_uint32x2_private b_ = simde_uint32x2_to_private(b); | ||
simde_uint32x2_private c_ = simde_uint32x2_to_private(c); | ||
vint64m1_t a_tmp = __riscv_vreinterpret_v_u64m1_i64m1(__riscv_vwcvtu_x_x_v_u64m1(__riscv_vlmul_trunc_v_u32m1_u32mf2(b_.sv64), 2)); | ||
vint64m1_t b_tmp = __riscv_vreinterpret_v_u64m1_i64m1(__riscv_vwcvtu_x_x_v_u64m1(__riscv_vlmul_trunc_v_u32m1_u32mf2(c_.sv64), 2)); | ||
vint64m1_t rst = __riscv_vsub_vv_i64m1(a_tmp, b_tmp, 4); | ||
r_.sv128 = __riscv_vadd_vv_u64m1(__riscv_vreinterpret_v_i64m1_u64m1(__riscv_vmax_vv_i64m1(rst , __riscv_vneg_v_i64m1(rst , 2) , 2)), \ | ||
a_.sv128, 2); | ||
return simde_uint64x2_from_private(r_); | ||
#else | ||
return simde_vaddq_u64(simde_vabdl_u32(b, c), a); | ||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,7 @@ | |
* | ||
* Copyright: | ||
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology) | ||
* 2023 Chi-Wei Chu <[email protected]> (Copyright owned by NTHU pllab) | ||
*/ | ||
|
||
#if !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) | ||
|
@@ -38,6 +39,14 @@ simde_int16x8_t | |
simde_vabdl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_s8(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int16x8_private r_; | ||
simde_int8x16_private a_ = simde_int8x16_to_private(a); | ||
simde_int8x16_private b_ = simde_int8x16_to_private(b); | ||
vint16m1_t rst = __riscv_vwsub_vv_i16m1(__riscv_vlmul_trunc_v_i8m1_i8mf2(__riscv_vslidedown_vx_i8m1(a_.sv128 , 8 , 16)), | ||
__riscv_vlmul_trunc_v_i8m1_i8mf2(__riscv_vslidedown_vx_i8m1(b_.sv128 , 8 , 16)) , 8); | ||
r_.sv128 = __riscv_vmax_vv_i16m1(rst , __riscv_vneg_v_i16m1(rst , 8) , 8); | ||
return simde_int16x8_from_private(r_); | ||
#else | ||
return simde_vabdl_s8(simde_vget_high_s8(a), simde_vget_high_s8(b)); | ||
#endif | ||
|
@@ -52,6 +61,14 @@ simde_int32x4_t | |
simde_vabdl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_s16(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int32x4_private r_; | ||
simde_int16x8_private a_ = simde_int16x8_to_private(a); | ||
simde_int16x8_private b_ = simde_int16x8_to_private(b); | ||
vint32m1_t rst = __riscv_vwsub_vv_i32m1(__riscv_vlmul_trunc_v_i16m1_i16mf2(__riscv_vslidedown_vx_i16m1(a_.sv128 , 4 , 8)) , \ | ||
__riscv_vlmul_trunc_v_i16m1_i16mf2(__riscv_vslidedown_vx_i16m1(b_.sv128 , 4 , 8)) , 4); | ||
r_.sv128 = __riscv_vmax_vv_i32m1(rst , __riscv_vneg_v_i32m1(rst , 4) , 4); | ||
return simde_int32x4_from_private(r_); | ||
#else | ||
return simde_vabdl_s16(simde_vget_high_s16(a), simde_vget_high_s16(b)); | ||
#endif | ||
|
@@ -66,6 +83,14 @@ simde_int64x2_t | |
simde_vabdl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_s32(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_int64x2_private r_; | ||
simde_int32x4_private a_ = simde_int32x4_to_private(a); | ||
simde_int32x4_private b_ = simde_int32x4_to_private(b); | ||
vint64m1_t rst = __riscv_vwsub_vv_i64m1(__riscv_vlmul_trunc_v_i32m1_i32mf2(__riscv_vslidedown_vx_i32m1(a_.sv128 , 2 , 4)) , \ | ||
__riscv_vlmul_trunc_v_i32m1_i32mf2(__riscv_vslidedown_vx_i32m1(b_.sv128 , 2 , 4)) , 2); | ||
r_.sv128 = __riscv_vmax_vv_i64m1(rst , __riscv_vneg_v_i64m1(rst , 2) , 2); | ||
return simde_int64x2_from_private(r_); | ||
#else | ||
return simde_vabdl_s32(simde_vget_high_s32(a), simde_vget_high_s32(b)); | ||
#endif | ||
|
@@ -80,6 +105,17 @@ simde_uint16x8_t | |
simde_vabdl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_u8(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint16x8_private r_; | ||
simde_uint8x16_private a_ = simde_uint8x16_to_private(a); | ||
simde_uint8x16_private b_ = simde_uint8x16_to_private(b); | ||
vint16m1_t a_tmp = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vwcvtu_x_x_v_u16m1( \ | ||
__riscv_vlmul_trunc_v_u8m1_u8mf2(__riscv_vslidedown_vx_u8m1(a_.sv128 , 8 , 16)), 8)); | ||
vint16m1_t b_tmp = __riscv_vreinterpret_v_u16m1_i16m1(__riscv_vwcvtu_x_x_v_u16m1( \ | ||
__riscv_vlmul_trunc_v_u8m1_u8mf2(__riscv_vslidedown_vx_u8m1(b_.sv128 , 8 , 16)), 8)); | ||
vint16m1_t rst = __riscv_vsub_vv_i16m1(a_tmp, b_tmp, 8); | ||
r_.sv128 = __riscv_vreinterpret_v_i16m1_u16m1(__riscv_vmax_vv_i16m1(rst , __riscv_vneg_v_i16m1(rst , 8) , 8)); | ||
return simde_uint16x8_from_private(r_); | ||
#else | ||
return simde_vabdl_u8(simde_vget_high_u8(a), simde_vget_high_u8(b)); | ||
#endif | ||
|
@@ -94,6 +130,17 @@ simde_uint32x4_t | |
simde_vabdl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_u16(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint32x4_private r_; | ||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a); | ||
simde_uint16x8_private b_ = simde_uint16x8_to_private(b); | ||
vint32m1_t a_tmp = __riscv_vreinterpret_v_u32m1_i32m1(__riscv_vwcvtu_x_x_v_u32m1( \ | ||
__riscv_vlmul_trunc_v_u16m1_u16mf2(__riscv_vslidedown_vx_u16m1(a_.sv128 , 4 , 8)), 4)); | ||
vint32m1_t b_tmp = __riscv_vreinterpret_v_u32m1_i32m1(__riscv_vwcvtu_x_x_v_u32m1( \ | ||
__riscv_vlmul_trunc_v_u16m1_u16mf2(__riscv_vslidedown_vx_u16m1(b_.sv128 , 4 , 8)), 4)); | ||
vint32m1_t rst = __riscv_vsub_vv_i32m1(a_tmp, b_tmp, 4); | ||
r_.sv128 = __riscv_vreinterpret_v_i32m1_u32m1(__riscv_vmax_vv_i32m1(rst , __riscv_vneg_v_i32m1(rst , 4) , 4)); | ||
return simde_uint32x4_from_private(r_); | ||
#else | ||
return simde_vabdl_u16(simde_vget_high_u16(a), simde_vget_high_u16(b)); | ||
#endif | ||
|
@@ -108,6 +155,17 @@ simde_uint64x2_t | |
simde_vabdl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { | ||
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) | ||
return vabdl_high_u32(a, b); | ||
#elif defined(SIMDE_RISCV_V_NATIVE) | ||
simde_uint64x2_private r_; | ||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a); | ||
simde_uint32x4_private b_ = simde_uint32x4_to_private(b); | ||
vint64m1_t a_tmp = __riscv_vreinterpret_v_u64m1_i64m1(__riscv_vwcvtu_x_x_v_u64m1( \ | ||
__riscv_vlmul_trunc_v_u32m1_u32mf2(__riscv_vslidedown_vx_u32m1(a_.sv128 , 2 , 4)), 2)); | ||
vint64m1_t b_tmp = __riscv_vreinterpret_v_u64m1_i64m1(__riscv_vwcvtu_x_x_v_u64m1( \ | ||
__riscv_vlmul_trunc_v_u32m1_u32mf2(__riscv_vslidedown_vx_u32m1(b_.sv128 , 2 , 4)), 2)); | ||
vint64m1_t rst = __riscv_vsub_vv_i64m1(a_tmp, b_tmp, 4); | ||
r_.sv128 = __riscv_vreinterpret_v_i64m1_u64m1(__riscv_vmax_vv_i64m1(rst , __riscv_vneg_v_i64m1(rst , 2) , 2)); | ||
return simde_uint64x2_from_private(r_); | ||
#else | ||
return simde_vabdl_u32(simde_vget_high_u32(a), simde_vget_high_u32(b)); | ||
#endif | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -23,6 +23,7 @@ | |
* Copyright: | ||
* 2020 Evan Nemerson <[email protected]> | ||
* 2020 Sean Maher <[email protected]> (Copyright owned by Google, LLC) | ||
* 2023 Chi-Wei Chu <[email protected]> (Copyright owned by NTHU pllab) | ||
*/ | ||
|
||
#if !defined(SIMDE_ARM_NEON_ADDW_H) | ||
|
@@ -41,14 +42,17 @@ simde_int16x8_t | |
simde_vaddw_s8(simde_int16x8_t a, simde_int8x8_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_s8(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_s16(a, simde_vmovl_s8(b)); | ||
#else | ||
simde_int16x8_private r_; | ||
simde_int16x8_private a_ = simde_int16x8_to_private(a); | ||
simde_int8x8_private b_ = simde_int8x8_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vint8mf2_t vb = __riscv_vlmul_trunc_v_i8m1_i8mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwadd_wv_i16m1(a_.sv128, vb, 8); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
@@ -71,14 +75,17 @@ simde_int32x4_t | |
simde_vaddw_s16(simde_int32x4_t a, simde_int16x4_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_s16(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_s32(a, simde_vmovl_s16(b)); | ||
#else | ||
simde_int32x4_private r_; | ||
simde_int32x4_private a_ = simde_int32x4_to_private(a); | ||
simde_int16x4_private b_ = simde_int16x4_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vint16mf2_t vb = __riscv_vlmul_trunc_v_i16m1_i16mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwadd_wv_i32m1(a_.sv128, vb, 4); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
@@ -101,14 +108,17 @@ simde_int64x2_t | |
simde_vaddw_s32(simde_int64x2_t a, simde_int32x2_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_s32(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_s64(a, simde_vmovl_s32(b)); | ||
#else | ||
simde_int64x2_private r_; | ||
simde_int64x2_private a_ = simde_int64x2_to_private(a); | ||
simde_int32x2_private b_ = simde_int32x2_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vint32mf2_t vb = __riscv_vlmul_trunc_v_i32m1_i32mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwadd_wv_i64m1(a_.sv128, vb, 2); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
@@ -131,14 +141,17 @@ simde_uint16x8_t | |
simde_vaddw_u8(simde_uint16x8_t a, simde_uint8x8_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_u8(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_u16(a, simde_vmovl_u8(b)); | ||
#else | ||
simde_uint16x8_private r_; | ||
simde_uint16x8_private a_ = simde_uint16x8_to_private(a); | ||
simde_uint8x8_private b_ = simde_uint8x8_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vuint8mf2_t vb = __riscv_vlmul_trunc_v_u8m1_u8mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwaddu_wv_u16m1(a_.sv128, vb, 8); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
@@ -161,14 +174,17 @@ simde_uint32x4_t | |
simde_vaddw_u16(simde_uint32x4_t a, simde_uint16x4_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_u16(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_u32(a, simde_vmovl_u16(b)); | ||
#else | ||
simde_uint32x4_private r_; | ||
simde_uint32x4_private a_ = simde_uint32x4_to_private(a); | ||
simde_uint16x4_private b_ = simde_uint16x4_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vuint16mf2_t vb = __riscv_vlmul_trunc_v_u16m1_u16mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwaddu_wv_u32m1(a_.sv128, vb, 4); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
@@ -191,14 +207,17 @@ simde_uint64x2_t | |
simde_vaddw_u32(simde_uint64x2_t a, simde_uint32x2_t b) { | ||
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) | ||
return vaddw_u32(a, b); | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) | ||
#elif SIMDE_NATURAL_VECTOR_SIZE_GE(128) && !defined(SIMDE_RISCV_V_NATIVE) | ||
return simde_vaddq_u64(a, simde_vmovl_u32(b)); | ||
#else | ||
simde_uint64x2_private r_; | ||
simde_uint64x2_private a_ = simde_uint64x2_to_private(a); | ||
simde_uint32x2_private b_ = simde_uint32x2_to_private(b); | ||
|
||
#if (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
#if defined(SIMDE_RISCV_V_NATIVE) | ||
vuint32mf2_t vb = __riscv_vlmul_trunc_v_u32m1_u32mf2 (b_.sv64); | ||
r_.sv128 = __riscv_vwaddu_wv_u64m1(a_.sv128, vb, 2); | ||
#elif (SIMDE_NATURAL_VECTOR_SIZE > 0) && defined(SIMDE_VECTOR_SUBSCRIPT_OPS) && defined(SIMDE_CONVERT_VECTOR_) | ||
SIMDE_CONVERT_VECTOR_(r_.values, b_.values); | ||
r_.values += a_.values; | ||
#else | ||
|
Oops, something went wrong.