Skip to content

Commit

Permalink
NEON: part 1 of implement all intrinsics supported by architecture A64 (
Browse files Browse the repository at this point in the history
#1090)

Add 368 initial implementations and corresponding test cases in 88 families which are listed below:
- `abd`, `abdl_high`, `add`, `addhn_high`, `bsl`, `ceq`, `ceqz`, `cgez`, `cgtz`, `cle`,
- `cltz`, `cmla`, `cmla_rot180`, `cmla_rot270`, `cmla_rot90`, `cnt`, `copy_lane`, `cvt`, `cvt_n`, `cvtm`,
-  `cvtp`, `dot`, `dot_lane`, `dup_n`, `eor`, `fms_n`, `ld1`, `ld3`, `ld4`, `maxnm`,
-  `maxv`, `minnm`, `minv`, `mull`, `mull_high`, `mvn`, `pmin`, `qrdmulh_lane`, `qrshl`, `qrshrn_high_n`,
-  `qrshrun_high_n`, `qshl_n`, `qshlu_n`, `qshrn_high_n`, `qshrn_n`, `qshrun_n`, `qtbl`, `qtbx`, `raddhn`, `raddhn_high`,
-  `rbit`, `reinterpret`, `rev16`, `rev32`, `rev64`, `rnd`, `rndi`, `rndm`, `rndp`, `rshrn_high_n`,
-  `rsubhn`, `rsubhn_high`, `shr_n`, `shrn_n`, `sli_n`, `sri_n`, `st1`, `st1_lane`, `st1_x2`, `st1_x3`,
-  `st1_x4`, `st1q_x2`, `st1q_x3`, `st1q_x4`, `st2_lane`, `st3`, `st3_lane`, `st4`, `st4_lane`, `tbl`,
-  `tbx`, `trn`, `trn1`, `trn2`, `tst`, `uzp`, `uzp1`, `uzp2`
  • Loading branch information
yyctw authored Oct 24, 2023
1 parent 029d749 commit 2eedece
Show file tree
Hide file tree
Showing 100 changed files with 34,035 additions and 1,929 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -659,7 +659,7 @@ jobs:
- name: Configure and Build
run: |
meson --backend=ninja build --cross-file test/arm64cl.txt
meson test -C build --print-errorlogs $(meson test -C build --list | grep -v emul)
ninja -C build test
linux-gcc-loongarch64:
runs-on: ubuntu-22.04
Expand Down
16 changes: 16 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@ simde_neon_families = [
'abal_high',
'abd',
'abdl',
'abdl_high',
'abs',
'add',
'addhn',
'addhn_high',
'addl',
'addlv',
'addl_high',
Expand Down Expand Up @@ -56,8 +58,11 @@ simde_neon_families = [
'cnt',
'cvt',
'cvt_n',
'cvtm',
'cvtn',
'cvtp',
'combine',
'copy_lane',
'create',
'div',
'dot',
Expand Down Expand Up @@ -166,19 +171,26 @@ simde_neon_families = [
'qrdmulh',
'qrdmulh_lane',
'qrdmulh_n',
'qrshl',
'qrshrn_high_n',
'qrshrn_n',
'qrshrun_high_n',
'qrshrun_n',
'qmovn',
'qmovn_high',
'qmovun',
'qneg',
'qshl',
'qshl_n',
'qshlu_n',
'qshrn_high_n',
'qshrn_n',
'qshrun_n',
'qsub',
'qtbl',
'qtbx',
'raddhn',
'raddhn_high',
'rbit',
'recpe',
'recps',
Expand All @@ -194,16 +206,20 @@ simde_neon_families = [
'rndp',
'rshl',
'rshr_n',
'rshrn_high_n',
'rshrn_n',
'rsqrte',
'rsqrts',
'rsra_n',
'rsubhn',
'rsubhn_high',
'set_lane',
'shl',
'shl_n',
'shll_n',
'shr_n',
'shrn_n',
'sli_n',
'sqadd',
'sqrt',
'sra_n',
Expand Down
16 changes: 16 additions & 0 deletions simde/arm/neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,11 @@
#include "neon/abal_high.h"
#include "neon/abd.h"
#include "neon/abdl.h"
#include "neon/abdl_high.h"
#include "neon/abs.h"
#include "neon/add.h"
#include "neon/addhn.h"
#include "neon/addhn_high.h"
#include "neon/addl.h"
#include "neon/addlv.h"
#include "neon/addl_high.h"
Expand Down Expand Up @@ -77,8 +79,11 @@
#include "neon/cnt.h"
#include "neon/cvt.h"
#include "neon/cvt_n.h"
#include "neon/cvtm.h"
#include "neon/cvtn.h"
#include "neon/cvtp.h"
#include "neon/combine.h"
#include "neon/copy_lane.h"
#include "neon/create.h"
#include "neon/div.h"
#include "neon/dot.h"
Expand Down Expand Up @@ -187,19 +192,26 @@
#include "neon/qrdmulh.h"
#include "neon/qrdmulh_lane.h"
#include "neon/qrdmulh_n.h"
#include "neon/qrshl.h"
#include "neon/qrshrn_high_n.h"
#include "neon/qrshrn_n.h"
#include "neon/qrshrun_high_n.h"
#include "neon/qrshrun_n.h"
#include "neon/qmovn.h"
#include "neon/qmovun.h"
#include "neon/qmovn_high.h"
#include "neon/qneg.h"
#include "neon/qsub.h"
#include "neon/qshl.h"
#include "neon/qshl_n.h"
#include "neon/qshlu_n.h"
#include "neon/qshrn_high_n.h"
#include "neon/qshrn_n.h"
#include "neon/qshrun_n.h"
#include "neon/qtbl.h"
#include "neon/qtbx.h"
#include "neon/raddhn.h"
#include "neon/raddhn_high.h"
#include "neon/rbit.h"
#include "neon/recpe.h"
#include "neon/recps.h"
Expand All @@ -215,16 +227,20 @@
#include "neon/rndp.h"
#include "neon/rshl.h"
#include "neon/rshr_n.h"
#include "neon/rshrn_high_n.h"
#include "neon/rshrn_n.h"
#include "neon/rsqrte.h"
#include "neon/rsqrts.h"
#include "neon/rsra_n.h"
#include "neon/rsubhn.h"
#include "neon/rsubhn_high.h"
#include "neon/set_lane.h"
#include "neon/shl.h"
#include "neon/shl_n.h"
#include "neon/shll_n.h"
#include "neon/shr_n.h"
#include "neon/shrn_n.h"
#include "neon/sli_n.h"
#include "neon/sqadd.h"
#include "neon/sqrt.h"
#include "neon/sra_n.h"
Expand Down
46 changes: 46 additions & 0 deletions simde/arm/neon/abd.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
*
* Copyright:
* 2020 Evan Nemerson <[email protected]>
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
*/

#if !defined(SIMDE_ARM_NEON_ABD_H)
Expand All @@ -37,6 +38,23 @@ HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
simde_float16_t
simde_vabdh_f16(simde_float16_t a, simde_float16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabdh_f16(a, b);
#else
simde_float32_t a_ = simde_float16_to_float32(a);
simde_float32_t b_ = simde_float16_to_float32(b);
simde_float32_t r_ = a_ - b_;
return r_ < 0 ? simde_float16_from_float32(-r_) : simde_float16_from_float32(r_);
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdh_f16
#define vabdh_f16(a, b) simde_vabdh_f16((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float32_t
simde_vabds_f32(simde_float32_t a, simde_float32_t b) {
Expand Down Expand Up @@ -67,6 +85,20 @@ simde_vabdd_f64(simde_float64_t a, simde_float64_t b) {
#define vabdd_f64(a, b) simde_vabdd_f64((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float16x4_t
simde_vabd_f16(simde_float16x4_t a, simde_float16x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabd_f16(a, b);
#else
return simde_vabs_f16(simde_vsub_f16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vabd_f16
#define vabd_f16(a, b) simde_vabd_f16((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float32x2_t
simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) {
Expand Down Expand Up @@ -220,6 +252,20 @@ simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) {
#define vabd_u32(a, b) simde_vabd_u32((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float16x8_t
simde_vabdq_f16(simde_float16x8_t a, simde_float16x8_t b) {
#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16)
return vabdq_f16(a, b);
#else
return simde_vabsq_f16(simde_vsubq_f16(a, b));
#endif
}
#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES)
#undef vabdq_f16
#define vabdq_f16(a, b) simde_vabdq_f16((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_float32x4_t
simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) {
Expand Down
123 changes: 123 additions & 0 deletions simde/arm/neon/abdl_high.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
*/

#if !defined(SIMDE_ARM_NEON_ABDL_HIGH_H)
#define SIMDE_ARM_NEON_ABDL_HIGH_H

#include "abdl.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabdl_high_s8(simde_int8x16_t a, simde_int8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_s8(a, b);
#else
return simde_vabdl_s8(simde_vget_high_s8(a), simde_vget_high_s8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_s8
#define vabdl_high_s8(a, b) simde_vabdl_high_s8((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabdl_high_s16(simde_int16x8_t a, simde_int16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_s16(a, b);
#else
return simde_vabdl_s16(simde_vget_high_s16(a), simde_vget_high_s16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_s16
#define vabdl_high_s16(a, b) simde_vabdl_high_s16((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vabdl_high_s32(simde_int32x4_t a, simde_int32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_s32(a, b);
#else
return simde_vabdl_s32(simde_vget_high_s32(a), simde_vget_high_s32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_s32
#define vabdl_high_s32(a, b) simde_vabdl_high_s32((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vabdl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_u8(a, b);
#else
return simde_vabdl_u8(simde_vget_high_u8(a), simde_vget_high_u8(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_u8
#define vabdl_high_u8(a, b) simde_vabdl_high_u8((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vabdl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_u16(a, b);
#else
return simde_vabdl_u16(simde_vget_high_u16(a), simde_vget_high_u16(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_u16
#define vabdl_high_u16(a, b) simde_vabdl_high_u16((a), (b))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vabdl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) {
#if defined(SIMDE_ARM_NEON_A64V8_NATIVE)
return vabdl_high_u32(a, b);
#else
return simde_vabdl_u32(simde_vget_high_u32(a), simde_vget_high_u32(b));
#endif
}
#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES)
#undef vabdl_high_u32
#define vabdl_high_u32(a, b) simde_vabdl_high_u32((a), (b))
#endif

SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

#endif /* !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) */
Loading

0 comments on commit 2eedece

Please sign in to comment.