Skip to content

Commit

Permalink
Merge branch 'simde_master' into qrdmlah
Browse files Browse the repository at this point in the history
  • Loading branch information
eric900115 committed Oct 18, 2023
2 parents c04eb9e + 5e7c4d4 commit 2a241e6
Show file tree
Hide file tree
Showing 141 changed files with 38,806 additions and 190 deletions.
43 changes: 43 additions & 0 deletions meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ cxx = meson.get_compiler('cpp')

simde_neon_families = [
'aba',
'abal',
'abal_high',
'abd',
'abdl',
'abs',
Expand All @@ -25,8 +27,12 @@ simde_neon_families = [
'bcax',
'bic',
'bsl',
'cadd_rot270',
'cadd_rot90',
'cage',
'cagt',
'cale',
'calt',
'ceq',
'ceqz',
'cge',
Expand All @@ -40,11 +46,16 @@ simde_neon_families = [
'cltz',
'clz',
'cmla',
'cmla_lane',
'cmla_rot180_lane',
'cmla_rot270_lane',
'cmla_rot90_lane',
'cmla_rot90',
'cmla_rot180',
'cmla_rot270',
'cnt',
'cvt',
'cvt_n',
'cvtn',
'combine',
'create',
Expand All @@ -58,6 +69,9 @@ simde_neon_families = [
'fma',
'fma_lane',
'fma_n',
'fms',
'fms_lane',
'fms_n',
'get_high',
'get_lane',
'get_low',
Expand All @@ -73,8 +87,13 @@ simde_neon_families = [
'ld1q_x4',
'ld1',
'ld2',
'ld2_dup',
'ld2_lane',
'ld3',
'ld3_dup',
'ld3_lane',
'ld4',
'ld4_dup',
'ld4_lane',
'max',
'maxnm',
Expand All @@ -87,16 +106,20 @@ simde_neon_families = [
'mla_n',
'mlal',
'mlal_high',
'mlal_high_lane',
'mlal_high_n',
'mlal_lane',
'mlal_n',
'mls',
'mls_lane',
'mls_n',
'mlsl',
'mlsl_high',
'mlsl_high_lane',
'mlsl_high_n',
'mlsl_lane',
'mlsl_n',
#'mmlaq',
'movl',
'movl_high',
'movn',
Expand All @@ -119,12 +142,32 @@ simde_neon_families = [
'pmin',
'qadd',
'qabs',
'qdmlal',
'qdmlal_high',
'qdmlal_high_lane',
'qdmlal_high_n',
'qdmlal_lane',
'qdmlal_n',
'qdmlsl',
'qdmlsl_high',
'qdmlsl_high_lane',
'qdmlsl_high_n',
'qdmlsl_lane',
'qdmlsl_n',
'qdmulh',
'qdmulh_lane',
'qdmulh_n',
'qdmull',
<<<<<<< HEAD
=======
'qdmull_high',
'qdmull_high_lane',
'qdmull_high_n',
'qdmull_lane',
'qdmull_n',
'qrdmlah',
'qrdmlsh',
>>>>>>> master
'qrdmulh',
'qrdmulh_lane',
'qrdmulh_n',
Expand Down
40 changes: 40 additions & 0 deletions simde/arm/neon.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
#include "neon/types.h"

#include "neon/aba.h"
#include "neon/abal.h"
#include "neon/abal_high.h"
#include "neon/abd.h"
#include "neon/abdl.h"
#include "neon/abs.h"
Expand All @@ -46,8 +48,12 @@
#include "neon/bcax.h"
#include "neon/bic.h"
#include "neon/bsl.h"
#include "neon/cadd_rot270.h"
#include "neon/cadd_rot90.h"
#include "neon/cage.h"
#include "neon/cagt.h"
#include "neon/cale.h"
#include "neon/calt.h"
#include "neon/ceq.h"
#include "neon/ceqz.h"
#include "neon/cge.h"
Expand All @@ -61,11 +67,16 @@
#include "neon/cltz.h"
#include "neon/clz.h"
#include "neon/cmla.h"
#include "neon/cmla_lane.h"
#include "neon/cmla_rot180_lane.h"
#include "neon/cmla_rot270_lane.h"
#include "neon/cmla_rot90_lane.h"
#include "neon/cmla_rot90.h"
#include "neon/cmla_rot180.h"
#include "neon/cmla_rot270.h"
#include "neon/cnt.h"
#include "neon/cvt.h"
#include "neon/cvt_n.h"
#include "neon/cvtn.h"
#include "neon/combine.h"
#include "neon/create.h"
Expand All @@ -79,6 +90,9 @@
#include "neon/fma.h"
#include "neon/fma_lane.h"
#include "neon/fma_n.h"
#include "neon/fms.h"
#include "neon/fms_lane.h"
#include "neon/fms_n.h"
#include "neon/get_high.h"
#include "neon/get_lane.h"
#include "neon/get_low.h"
Expand All @@ -94,8 +108,13 @@
#include "neon/ld1q_x3.h"
#include "neon/ld1q_x4.h"
#include "neon/ld2.h"
#include "neon/ld2_dup.h"
#include "neon/ld2_lane.h"
#include "neon/ld3.h"
#include "neon/ld3_dup.h"
#include "neon/ld3_lane.h"
#include "neon/ld4.h"
#include "neon/ld4_dup.h"
#include "neon/ld4_lane.h"
#include "neon/max.h"
#include "neon/maxnm.h"
Expand All @@ -108,16 +127,20 @@
#include "neon/mla_n.h"
#include "neon/mlal.h"
#include "neon/mlal_high.h"
#include "neon/mlal_high_lane.h"
#include "neon/mlal_high_n.h"
#include "neon/mlal_lane.h"
#include "neon/mlal_n.h"
#include "neon/mls.h"
#include "neon/mls_lane.h"
#include "neon/mls_n.h"
#include "neon/mlsl.h"
#include "neon/mlsl_high.h"
#include "neon/mlsl_high_lane.h"
#include "neon/mlsl_high_n.h"
#include "neon/mlsl_lane.h"
#include "neon/mlsl_n.h"
//#include "neon/mmlaq.h"
#include "neon/movl.h"
#include "neon/movl_high.h"
#include "neon/movn.h"
Expand All @@ -140,10 +163,27 @@
#include "neon/pmin.h"
#include "neon/qabs.h"
#include "neon/qadd.h"
#include "neon/qdmlal.h"
#include "neon/qdmlal_high.h"
#include "neon/qdmlal_high_lane.h"
#include "neon/qdmlal_high_n.h"
#include "neon/qdmlal_lane.h"
#include "neon/qdmlal_n.h"
#include "neon/qdmlsl.h"
#include "neon/qdmlsl_high.h"
#include "neon/qdmlsl_high_lane.h"
#include "neon/qdmlsl_high_n.h"
#include "neon/qdmlsl_lane.h"
#include "neon/qdmlsl_n.h"
#include "neon/qdmulh.h"
#include "neon/qdmulh_lane.h"
#include "neon/qdmulh_n.h"
#include "neon/qdmull.h"
#include "neon/qdmull_high.h"
#include "neon/qdmull_high_lane.h"
#include "neon/qdmull_high_n.h"
#include "neon/qdmull_lane.h"
#include "neon/qdmull_n.h"
#include "neon/qrdmlah.h"
#include "neon/qrdmlsh.h"
#include "neon/qrdmulh.h"
Expand Down
125 changes: 125 additions & 0 deletions simde/arm/neon/abal.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy,
* modify, merge, publish, distribute, sublicense, and/or sell copies
* of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Copyright:
* 2023 Yi-Yen Chung <[email protected]> (Copyright owned by Andes Technology)
*/

#if !defined(SIMDE_ARM_NEON_ABAL_H)
#define SIMDE_ARM_NEON_ABAL_H

#include "abdl.h"
#include "add.h"

HEDLEY_DIAGNOSTIC_PUSH
SIMDE_DISABLE_UNWANTED_DIAGNOSTICS
SIMDE_BEGIN_DECLS_

SIMDE_FUNCTION_ATTRIBUTES
simde_int16x8_t
simde_vabal_s8(simde_int16x8_t a, simde_int8x8_t b, simde_int8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_s8(a, b, c);
#else
return simde_vaddq_s16(simde_vabdl_s8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_s8
#define vabal_s8(a, b, c) simde_vabal_s8((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int32x4_t
simde_vabal_s16(simde_int32x4_t a, simde_int16x4_t b, simde_int16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_s16(a, b, c);
#else
return simde_vaddq_s32(simde_vabdl_s16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_s16
#define vabal_s16(a, b, c) simde_vabal_s16((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_int64x2_t
simde_vabal_s32(simde_int64x2_t a, simde_int32x2_t b, simde_int32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_s32(a, b, c);
#else
return simde_vaddq_s64(simde_vabdl_s32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_s32
#define vabal_s32(a, b, c) simde_vabal_s32((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint16x8_t
simde_vabal_u8(simde_uint16x8_t a, simde_uint8x8_t b, simde_uint8x8_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_u8(a, b, c);
#else
return simde_vaddq_u16(simde_vabdl_u8(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_u8
#define vabal_u8(a, b, c) simde_vabal_u8((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint32x4_t
simde_vabal_u16(simde_uint32x4_t a, simde_uint16x4_t b, simde_uint16x4_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_u16(a, b, c);
#else
return simde_vaddq_u32(simde_vabdl_u16(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_u16
#define vabal_u16(a, b, c) simde_vabal_u16((a), (b), (c))
#endif

SIMDE_FUNCTION_ATTRIBUTES
simde_uint64x2_t
simde_vabal_u32(simde_uint64x2_t a, simde_uint32x2_t b, simde_uint32x2_t c) {
#if defined(SIMDE_ARM_NEON_A32V7_NATIVE)
return vabal_u32(a, b, c);
#else
return simde_vaddq_u64(simde_vabdl_u32(b, c), a);
#endif
}
#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES)
#undef vabal_u32
#define vabal_u32(a, b, c) simde_vabal_u32((a), (b), (c))
#endif


SIMDE_END_DECLS_
HEDLEY_DIAGNOSTIC_POP

#endif /* !defined(SIMDE_ARM_NEON_abal_H) */
Loading

0 comments on commit 2a241e6

Please sign in to comment.