From 2eedece7a182b9220bee60da5e2f241b91bb05c1 Mon Sep 17 00:00:00 2001 From: Yi-Yen Chung <45251297+yyctw@users.noreply.github.com> Date: Tue, 24 Oct 2023 18:21:08 +0800 Subject: [PATCH] NEON: part 1 of implement all intrinsics supported by architecture A64 (#1090) Add 368 initial implementations and corresponding test cases in 88 families which are listed below: - `abd`, `abdl_high`, `add`, `addhn_high`, `bsl`, `ceq`, `ceqz`, `cgez`, `cgtz`, `cle`, - `cltz`, `cmla`, `cmla_rot180`, `cmla_rot270`, `cmla_rot90`, `cnt`, `copy_lane`, `cvt`, `cvt_n`, `cvtm`, - `cvtp`, `dot`, `dot_lane`, `dup_n`, `eor`, `fms_n`, `ld1`, `ld3`, `ld4`, `maxnm`, - `maxv`, `minnm`, `minv`, `mull`, `mull_high`, `mvn`, `pmin`, `qrdmulh_lane`, `qrshl`, `qrshrn_high_n`, - `qrshrun_high_n`, `qshl_n`, `qshlu_n`, `qshrn_high_n`, `qshrn_n`, `qshrun_n`, `qtbl`, `qtbx`, `raddhn`, `raddhn_high`, - `rbit`, `reinterpret`, `rev16`, `rev32`, `rev64`, `rnd`, `rndi`, `rndm`, `rndp`, `rshrn_high_n`, - `rsubhn`, `rsubhn_high`, `shr_n`, `shrn_n`, `sli_n`, `sri_n`, `st1`, `st1_lane`, `st1_x2`, `st1_x3`, - `st1_x4`, `st1q_x2`, `st1q_x3`, `st1q_x4`, `st2_lane`, `st3`, `st3_lane`, `st4`, `st4_lane`, `tbl`, - `tbx`, `trn`, `trn1`, `trn2`, `tst`, `uzp`, `uzp1`, `uzp2` --- .github/workflows/ci.yml | 2 +- meson.build | 16 + simde/arm/neon.h | 16 + simde/arm/neon/abd.h | 46 + simde/arm/neon/abdl_high.h | 123 + simde/arm/neon/addhn_high.h | 124 + simde/arm/neon/cgez.h | 59 + simde/arm/neon/cgtz.h | 59 + simde/arm/neon/cle.h | 63 + simde/arm/neon/cltz.h | 59 + simde/arm/neon/copy_lane.h | 860 +++++++ simde/arm/neon/cvt.h | 780 +++++- simde/arm/neon/cvt_n.h | 346 ++- simde/arm/neon/cvtm.h | 588 +++++ simde/arm/neon/cvtp.h | 588 +++++ simde/arm/neon/fms_n.h | 8 +- simde/arm/neon/ld3.h | 28 + simde/arm/neon/ld4.h | 20 + simde/arm/neon/qrshl.h | 732 ++++++ simde/arm/neon/qrshrn_high_n.h | 189 ++ simde/arm/neon/qrshrun_high_n.h | 113 + simde/arm/neon/qshl_n.h | 513 ++++ simde/arm/neon/qshrn_high_n.h | 101 + simde/arm/neon/qshrn_n.h | 21 + simde/arm/neon/raddhn.h | 182 ++ simde/arm/neon/raddhn_high.h | 102 + simde/arm/neon/reinterpret.h | 324 +++ simde/arm/neon/rev64.h | 32 +- simde/arm/neon/rshrn_high_n.h | 101 + simde/arm/neon/rsubhn.h | 182 ++ simde/arm/neon/rsubhn_high.h | 102 + simde/arm/neon/shr_n.h | 15 + simde/arm/neon/sli_n.h | 271 ++ simde/arm/neon/st1_lane.h | 33 + simde/arm/neon/st1_x2.h | 18 + simde/arm/neon/st1_x3.h | 18 + simde/arm/neon/st1_x4.h | 18 + simde/arm/neon/st1q_x2.h | 20 +- simde/arm/neon/st1q_x3.h | 20 +- simde/arm/neon/st1q_x4.h | 18 + simde/arm/neon/st2_lane.h | 39 + simde/arm/neon/st3.h | 43 + simde/arm/neon/st3_lane.h | 39 + simde/arm/neon/st4.h | 41 + simde/arm/neon/st4_lane.h | 38 + simde/arm/neon/trn.h | 31 + simde/arm/neon/trn1.h | 55 + simde/arm/neon/trn2.h | 55 + simde/arm/neon/uzp.h | 31 + simde/arm/neon/uzp1.h | 30 + simde/arm/neon/uzp2.h | 30 + test/arm/neon/abd.c | 212 ++ test/arm/neon/abdl_high.c | 513 ++++ test/arm/neon/abs.c | 229 ++ test/arm/neon/add_testgen.py | 114 - test/arm/neon/addhn_high.c | 554 ++++ test/arm/neon/cgez.c | 158 ++ test/arm/neon/cgtz.c | 158 ++ test/arm/neon/cle.c | 201 ++ test/arm/neon/cltz.c | 313 +++ test/arm/neon/copy_lane.c | 4179 +++++++++++++++++++++++++++++++ test/arm/neon/cvt.c | 2185 ++++++++++++++-- test/arm/neon/cvt_n.c | 2292 +++++++++++++++++ test/arm/neon/cvtm.c | 1158 +++++++++ test/arm/neon/cvtp.c | 1147 +++++++++ test/arm/neon/ld3.c | 1651 +----------- test/arm/neon/ld4.c | 179 ++ test/arm/neon/qrshl.c | 1708 +++++++++++++ test/arm/neon/qrshrn_high_n.c | 550 ++++ test/arm/neon/qrshrun_high_n.c | 279 +++ test/arm/neon/qshl_n.c | 1269 ++++++++++ test/arm/neon/qshrn_high_n.c | 549 ++++ test/arm/neon/qshrn_n.c | 156 ++ test/arm/neon/raddhn.c | 370 +++ test/arm/neon/raddhn_high.c | 443 ++++ test/arm/neon/reinterpret.c | 908 ++++++- test/arm/neon/rev64.c | 116 +- test/arm/neon/rshrn_high_n.c | 645 +++++ test/arm/neon/rshrn_n.c | 1 - test/arm/neon/rsubhn.c | 370 +++ test/arm/neon/rsubhn_high.c | 443 ++++ test/arm/neon/sli_n.c | 1561 ++++++++++++ test/arm/neon/st1_lane.c | 98 + test/arm/neon/st1_x2.c | 62 +- test/arm/neon/st1_x3.c | 87 +- test/arm/neon/st1_x4.c | 104 +- test/arm/neon/st1q_x2.c | 100 +- test/arm/neon/st1q_x3.c | 135 +- test/arm/neon/st1q_x4.c | 168 +- test/arm/neon/st2_lane.c | 126 + test/arm/neon/st3.c | 237 ++ test/arm/neon/st3_lane.c | 156 ++ test/arm/neon/st4.c | 294 +++ test/arm/neon/st4_lane.c | 184 ++ test/arm/neon/trn.c | 379 +++ test/arm/neon/trn1.c | 398 +++ test/arm/neon/trn2.c | 398 +++ test/arm/neon/uzp.c | 379 +++ test/arm/neon/uzp1.c | 353 +++ test/arm/neon/uzp2.c | 353 +++ 100 files changed, 34035 insertions(+), 1929 deletions(-) create mode 100644 simde/arm/neon/abdl_high.h create mode 100644 simde/arm/neon/addhn_high.h create mode 100644 simde/arm/neon/copy_lane.h create mode 100644 simde/arm/neon/cvtm.h create mode 100644 simde/arm/neon/cvtp.h create mode 100644 simde/arm/neon/qrshl.h create mode 100644 simde/arm/neon/qrshrn_high_n.h create mode 100644 simde/arm/neon/qrshrun_high_n.h create mode 100644 simde/arm/neon/qshl_n.h create mode 100644 simde/arm/neon/qshrn_high_n.h create mode 100644 simde/arm/neon/raddhn.h create mode 100644 simde/arm/neon/raddhn_high.h create mode 100644 simde/arm/neon/rshrn_high_n.h create mode 100644 simde/arm/neon/rsubhn.h create mode 100644 simde/arm/neon/rsubhn_high.h create mode 100644 simde/arm/neon/sli_n.h create mode 100644 test/arm/neon/abdl_high.c delete mode 100644 test/arm/neon/add_testgen.py create mode 100644 test/arm/neon/addhn_high.c create mode 100644 test/arm/neon/copy_lane.c create mode 100644 test/arm/neon/cvtm.c create mode 100644 test/arm/neon/cvtp.c create mode 100644 test/arm/neon/qrshl.c create mode 100644 test/arm/neon/qrshrn_high_n.c create mode 100644 test/arm/neon/qrshrun_high_n.c create mode 100644 test/arm/neon/qshl_n.c create mode 100644 test/arm/neon/qshrn_high_n.c create mode 100644 test/arm/neon/raddhn.c create mode 100644 test/arm/neon/raddhn_high.c create mode 100644 test/arm/neon/rshrn_high_n.c create mode 100644 test/arm/neon/rsubhn.c create mode 100644 test/arm/neon/rsubhn_high.c create mode 100644 test/arm/neon/sli_n.c diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 71613ada3..acbe405c8 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -659,7 +659,7 @@ jobs: - name: Configure and Build run: | meson --backend=ninja build --cross-file test/arm64cl.txt - meson test -C build --print-errorlogs $(meson test -C build --list | grep -v emul) + ninja -C build test linux-gcc-loongarch64: runs-on: ubuntu-22.04 diff --git a/meson.build b/meson.build index 98306dbfc..4aa8f07dc 100644 --- a/meson.build +++ b/meson.build @@ -14,9 +14,11 @@ simde_neon_families = [ 'abal_high', 'abd', 'abdl', + 'abdl_high', 'abs', 'add', 'addhn', + 'addhn_high', 'addl', 'addlv', 'addl_high', @@ -56,8 +58,11 @@ simde_neon_families = [ 'cnt', 'cvt', 'cvt_n', + 'cvtm', 'cvtn', + 'cvtp', 'combine', + 'copy_lane', 'create', 'div', 'dot', @@ -166,19 +171,26 @@ simde_neon_families = [ 'qrdmulh', 'qrdmulh_lane', 'qrdmulh_n', + 'qrshl', + 'qrshrn_high_n', 'qrshrn_n', + 'qrshrun_high_n', 'qrshrun_n', 'qmovn', 'qmovn_high', 'qmovun', 'qneg', 'qshl', + 'qshl_n', 'qshlu_n', + 'qshrn_high_n', 'qshrn_n', 'qshrun_n', 'qsub', 'qtbl', 'qtbx', + 'raddhn', + 'raddhn_high', 'rbit', 'recpe', 'recps', @@ -194,16 +206,20 @@ simde_neon_families = [ 'rndp', 'rshl', 'rshr_n', + 'rshrn_high_n', 'rshrn_n', 'rsqrte', 'rsqrts', 'rsra_n', + 'rsubhn', + 'rsubhn_high', 'set_lane', 'shl', 'shl_n', 'shll_n', 'shr_n', 'shrn_n', + 'sli_n', 'sqadd', 'sqrt', 'sra_n', diff --git a/simde/arm/neon.h b/simde/arm/neon.h index 88d13b2ed..880cfb4bc 100644 --- a/simde/arm/neon.h +++ b/simde/arm/neon.h @@ -35,9 +35,11 @@ #include "neon/abal_high.h" #include "neon/abd.h" #include "neon/abdl.h" +#include "neon/abdl_high.h" #include "neon/abs.h" #include "neon/add.h" #include "neon/addhn.h" +#include "neon/addhn_high.h" #include "neon/addl.h" #include "neon/addlv.h" #include "neon/addl_high.h" @@ -77,8 +79,11 @@ #include "neon/cnt.h" #include "neon/cvt.h" #include "neon/cvt_n.h" +#include "neon/cvtm.h" #include "neon/cvtn.h" +#include "neon/cvtp.h" #include "neon/combine.h" +#include "neon/copy_lane.h" #include "neon/create.h" #include "neon/div.h" #include "neon/dot.h" @@ -187,7 +192,10 @@ #include "neon/qrdmulh.h" #include "neon/qrdmulh_lane.h" #include "neon/qrdmulh_n.h" +#include "neon/qrshl.h" +#include "neon/qrshrn_high_n.h" #include "neon/qrshrn_n.h" +#include "neon/qrshrun_high_n.h" #include "neon/qrshrun_n.h" #include "neon/qmovn.h" #include "neon/qmovun.h" @@ -195,11 +203,15 @@ #include "neon/qneg.h" #include "neon/qsub.h" #include "neon/qshl.h" +#include "neon/qshl_n.h" #include "neon/qshlu_n.h" +#include "neon/qshrn_high_n.h" #include "neon/qshrn_n.h" #include "neon/qshrun_n.h" #include "neon/qtbl.h" #include "neon/qtbx.h" +#include "neon/raddhn.h" +#include "neon/raddhn_high.h" #include "neon/rbit.h" #include "neon/recpe.h" #include "neon/recps.h" @@ -215,16 +227,20 @@ #include "neon/rndp.h" #include "neon/rshl.h" #include "neon/rshr_n.h" +#include "neon/rshrn_high_n.h" #include "neon/rshrn_n.h" #include "neon/rsqrte.h" #include "neon/rsqrts.h" #include "neon/rsra_n.h" +#include "neon/rsubhn.h" +#include "neon/rsubhn_high.h" #include "neon/set_lane.h" #include "neon/shl.h" #include "neon/shl_n.h" #include "neon/shll_n.h" #include "neon/shr_n.h" #include "neon/shrn_n.h" +#include "neon/sli_n.h" #include "neon/sqadd.h" #include "neon/sqrt.h" #include "neon/sra_n.h" diff --git a/simde/arm/neon/abd.h b/simde/arm/neon/abd.h index c25153430..6880634cc 100644 --- a/simde/arm/neon/abd.h +++ b/simde/arm/neon/abd.h @@ -22,6 +22,7 @@ * * Copyright: * 2020 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ABD_H) @@ -37,6 +38,23 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vabdh_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vabdh_f16(a, b); + #else + simde_float32_t a_ = simde_float16_to_float32(a); + simde_float32_t b_ = simde_float16_to_float32(b); + simde_float32_t r_ = a_ - b_; + return r_ < 0 ? simde_float16_from_float32(-r_) : simde_float16_from_float32(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdh_f16 + #define vabdh_f16(a, b) simde_vabdh_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32_t simde_vabds_f32(simde_float32_t a, simde_float32_t b) { @@ -67,6 +85,20 @@ simde_vabdd_f64(simde_float64_t a, simde_float64_t b) { #define vabdd_f64(a, b) simde_vabdd_f64((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vabd_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vabd_f16(a, b); + #else + return simde_vabs_f16(simde_vsub_f16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vabd_f16 + #define vabd_f16(a, b) simde_vabd_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vabd_f32(simde_float32x2_t a, simde_float32x2_t b) { @@ -220,6 +252,20 @@ simde_vabd_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vabd_u32(a, b) simde_vabd_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vabdq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vabdq_f16(a, b); + #else + return simde_vabsq_f16(simde_vsubq_f16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vabdq_f16 + #define vabdq_f16(a, b) simde_vabdq_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vabdq_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/abdl_high.h b/simde/arm/neon/abdl_high.h new file mode 100644 index 000000000..826b1ba33 --- /dev/null +++ b/simde/arm/neon/abdl_high.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) +#define SIMDE_ARM_NEON_ABDL_HIGH_H + +#include "abdl.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vabdl_high_s8(simde_int8x16_t a, simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_s8(a, b); + #else + return simde_vabdl_s8(simde_vget_high_s8(a), simde_vget_high_s8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_s8 + #define vabdl_high_s8(a, b) simde_vabdl_high_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vabdl_high_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_s16(a, b); + #else + return simde_vabdl_s16(simde_vget_high_s16(a), simde_vget_high_s16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_s16 + #define vabdl_high_s16(a, b) simde_vabdl_high_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vabdl_high_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_s32(a, b); + #else + return simde_vabdl_s32(simde_vget_high_s32(a), simde_vget_high_s32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_s32 + #define vabdl_high_s32(a, b) simde_vabdl_high_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vabdl_high_u8(simde_uint8x16_t a, simde_uint8x16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_u8(a, b); + #else + return simde_vabdl_u8(simde_vget_high_u8(a), simde_vget_high_u8(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_u8 + #define vabdl_high_u8(a, b) simde_vabdl_high_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vabdl_high_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_u16(a, b); + #else + return simde_vabdl_u16(simde_vget_high_u16(a), simde_vget_high_u16(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_u16 + #define vabdl_high_u16(a, b) simde_vabdl_high_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vabdl_high_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vabdl_high_u32(a, b); + #else + return simde_vabdl_u32(simde_vget_high_u32(a), simde_vget_high_u32(b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vabdl_high_u32 + #define vabdl_high_u32(a, b) simde_vabdl_high_u32((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ABDL_HIGH_H) */ diff --git a/simde/arm/neon/addhn_high.h b/simde/arm/neon/addhn_high.h new file mode 100644 index 000000000..0c96a24d4 --- /dev/null +++ b/simde/arm/neon/addhn_high.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_ADDHN_HIGH_H) +#define SIMDE_ARM_NEON_ADDHN_HIGH_H + +#include "addhn.h" +#include "combine.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vaddhn_high_s16(simde_int8x8_t r, simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_s16(r, a, b); + #else + return simde_vcombine_s8(r, simde_vaddhn_s16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_s16 + #define vaddhn_high_s16(r, a, b) simde_vaddhn_high_s16((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vaddhn_high_s32(simde_int16x4_t r, simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_s32(r, a, b); + #else + return simde_vcombine_s16(r, simde_vaddhn_s32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_s32 + #define vaddhn_high_s32(r, a, b) simde_vaddhn_high_s32((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vaddhn_high_s64(simde_int32x2_t r, simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_s64(r, a, b); + #else + return simde_vcombine_s32(r, simde_vaddhn_s64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_s64 + #define vaddhn_high_s64(r, a, b) simde_vaddhn_high_s64((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vaddhn_high_u16(simde_uint8x8_t r, simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_u16(r, a, b); + #else + return simde_vcombine_u8(r, simde_vaddhn_u16(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_u16 + #define vaddhn_high_u16(r, a, b) simde_vaddhn_high_u16((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vaddhn_high_u32(simde_uint16x4_t r, simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_u32(r, a, b); + #else + return simde_vcombine_u16(r, simde_vaddhn_u32(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_u32 + #define vaddhn_high_u32(r, a, b) simde_vaddhn_high_u32((r), (a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vaddhn_high_u64(simde_uint32x2_t r, simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vaddhn_high_u64(r, a, b); + #else + return simde_vcombine_u32(r, simde_vaddhn_u64(a, b)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vaddhn_high_u64 + #define vaddhn_high_u64(r, a, b) simde_vaddhn_high_u64((r), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_ADDHN_HIGH_H) */ diff --git a/simde/arm/neon/cgez.h b/simde/arm/neon/cgez.h index b84408361..04024c48e 100644 --- a/simde/arm/neon/cgez.h +++ b/simde/arm/neon/cgez.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_CGEZ_H) @@ -78,6 +79,42 @@ simde_vcgezs_f32(simde_float32_t a) { #define vcgezs_f32(a) simde_vcgezs_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcgezh_f16(simde_float16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(uint16_t, vcgezh_f16(a)); + #else + return (simde_float16_to_float32(a) >= SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgezh_f16 + #define vcgezh_f16(a) simde_vcgezh_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgezq_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcgezq_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezh_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcgezq_f16 + #define vcgezq_f16(a) simde_vcgezq_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcgezq_f32(simde_float32x4_t a) { @@ -246,6 +283,28 @@ simde_vcgezq_s64(simde_int64x2_t a) { #define vcgezq_s64(a) simde_vcgezq_s64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgez_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcgez_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgezh_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcgez_f16 + #define vcgez_f16(a) simde_vcgez_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgez_f32(simde_float32x2_t a) { diff --git a/simde/arm/neon/cgtz.h b/simde/arm/neon/cgtz.h index 125e009b2..30c6e5dd0 100644 --- a/simde/arm/neon/cgtz.h +++ b/simde/arm/neon/cgtz.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_CGTZ_H) @@ -66,6 +67,42 @@ simde_vcgtzd_f64(simde_float64_t a) { #define vcgtzd_f64(a) simde_vcgtzd_f64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcgtzh_f16(simde_float16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(uint16_t, vcgtzh_f16(a)); + #else + return (simde_float16_to_float32(a) > SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzh_f16 + #define vcgtzh_f16(a) simde_vcgtzh_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcgtzq_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcgtzq_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzh_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcgtzq_f16 + #define vcgtzq_f16(a) simde_vcgtzq_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES uint32_t simde_vcgtzs_f32(simde_float32_t a) { @@ -248,6 +285,28 @@ simde_vcgtzq_s64(simde_int64x2_t a) { #define vcgtzq_s64(a) simde_vcgtzq_s64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcgtz_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcgtz_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcgtzh_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcgtz_f16 + #define vcgtz_f16(a) simde_vcgtz_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcgtz_f32(simde_float32x2_t a) { diff --git a/simde/arm/neon/cle.h b/simde/arm/neon/cle.h index 5a1591b30..fedfcc522 100644 --- a/simde/arm/neon/cle.h +++ b/simde/arm/neon/cle.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_CLE_H) @@ -90,6 +91,44 @@ simde_vcles_f32(simde_float32_t a, simde_float32_t b) { #define vcles_f32(a, b) simde_vcles_f32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcleh_f16(simde_float16_t a, simde_float16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(uint16_t, vcleh_f16(a, b)); + #else + return (simde_float16_to_float32(a) <= simde_float16_to_float32(b)) ? UINT16_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcleh_f16 + #define vcleh_f16(a, b) simde_vcleh_f16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcleq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcleq_f16(a, b); + #else + simde_float16x8_private + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcleh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcleq_f16 + #define vcleq_f16(a, b) simde_vcleq_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcleq_f32(simde_float32x4_t a, simde_float32x4_t b) { @@ -475,6 +514,30 @@ simde_vcleq_u64(simde_uint64x2_t a, simde_uint64x2_t b) { #define vcleq_u64(a, b) simde_vcleq_u64((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcle_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcle_f16(a, b); + #else + simde_float16x4_private + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcleh_f16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcle_f16 + #define vcle_f16(a, b) simde_vcle_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcle_f32(simde_float32x2_t a, simde_float32x2_t b) { diff --git a/simde/arm/neon/cltz.h b/simde/arm/neon/cltz.h index a9c94984e..2c61d1a16 100644 --- a/simde/arm/neon/cltz.h +++ b/simde/arm/neon/cltz.h @@ -22,6 +22,7 @@ * * Copyright: * 2020 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ /* TODO: float fallbacks should use vclt(a, vdup_n(0.0)) */ @@ -81,6 +82,42 @@ simde_vcltzs_f32(simde_float32_t a) { #define vcltzs_f32(a) simde_vcltzs_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcltzh_f16(simde_float16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return HEDLEY_STATIC_CAST(uint16_t, vcltzh_f16(a)); + #else + return (simde_float16_to_float32(a) < SIMDE_FLOAT32_C(0.0)) ? UINT16_MAX : 0; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcltzh_f16 + #define vcltzh_f16(a) simde_vcltzh_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcltz_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcltz_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltzh_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcltz_f16 + #define vcltz_f16(a) simde_vcltz_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcltz_f32(simde_float32x2_t a) { @@ -201,6 +238,28 @@ simde_vcltz_s64(simde_int64x1_t a) { #define vcltz_s64(a) simde_vcltz_s64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcltzq_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcltzq_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcltzh_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcltzq_f16 + #define vcltzq_f16(a) simde_vcltzq_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x4_t simde_vcltzq_f32(simde_float32x4_t a) { diff --git a/simde/arm/neon/copy_lane.h b/simde/arm/neon/copy_lane.h new file mode 100644 index 000000000..eb3c1e047 --- /dev/null +++ b/simde/arm/neon/copy_lane.h @@ -0,0 +1,860 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_COPY_LANE_H) +#define SIMDE_ARM_NEON_COPY_LANE_H + +#include "types.h" +#include "cvt.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcopy_lane_s8(simde_int8x8_t a, const int lane1, simde_int8x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_int8x8_private + b_ = simde_int8x8_to_private(b), + r_ = simde_int8x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_s8(a, lane1, b, lane2) vcopy_lane_s8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_s8 + #define vcopy_lane_s8(a, lane1, b, lane2) simde_vcopy_lane_s8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcopy_lane_s16(simde_int16x4_t a, const int lane1, simde_int16x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_int16x4_private + b_ = simde_int16x4_to_private(b), + r_ = simde_int16x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_s16(a, lane1, b, lane2) vcopy_lane_s16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_s16 + #define vcopy_lane_s16(a, lane1, b, lane2) simde_vcopy_lane_s16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcopy_lane_s32(simde_int32x2_t a, const int lane1, simde_int32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_int32x2_private + b_ = simde_int32x2_to_private(b), + r_ = simde_int32x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_s32(a, lane1, b, lane2) vcopy_lane_s32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_s32 + #define vcopy_lane_s32(a, lane1, b, lane2) simde_vcopy_lane_s32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcopy_lane_s64(simde_int64x1_t a, const int lane1, simde_int64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_int64x1_private + b_ = simde_int64x1_to_private(b), + r_ = simde_int64x1_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_s64(a, lane1, b, lane2) vcopy_lane_s64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_s64 + #define vcopy_lane_s64(a, lane1, b, lane2) simde_vcopy_lane_s64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcopy_lane_u8(simde_uint8x8_t a, const int lane1, simde_uint8x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_uint8x8_private + b_ = simde_uint8x8_to_private(b), + r_ = simde_uint8x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_u8(a, lane1, b, lane2) vcopy_lane_u8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_u8 + #define vcopy_lane_u8(a, lane1, b, lane2) simde_vcopy_lane_u8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcopy_lane_u16(simde_uint16x4_t a, const int lane1, simde_uint16x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_uint16x4_private + b_ = simde_uint16x4_to_private(b), + r_ = simde_uint16x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_u16(a, lane1, b, lane2) vcopy_lane_u16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_u16 + #define vcopy_lane_u16(a, lane1, b, lane2) simde_vcopy_lane_u16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcopy_lane_u32(simde_uint32x2_t a, const int lane1, simde_uint32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_uint32x2_private + b_ = simde_uint32x2_to_private(b), + r_ = simde_uint32x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_u32(a, lane1, b, lane2) vcopy_lane_u32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_u32 + #define vcopy_lane_u32(a, lane1, b, lane2) simde_vcopy_lane_u32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcopy_lane_u64(simde_uint64x1_t a, const int lane1, simde_uint64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_uint64x1_private + b_ = simde_uint64x1_to_private(b), + r_ = simde_uint64x1_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_u64(a, lane1, b, lane2) vcopy_lane_u64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_u64 + #define vcopy_lane_u64(a, lane1, b, lane2) simde_vcopy_lane_u64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcopy_lane_f32(simde_float32x2_t a, const int lane1, simde_float32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_float32x2_private + b_ = simde_float32x2_to_private(b), + r_ = simde_float32x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_f32(a, lane1, b, lane2) vcopy_lane_f32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_f32 + #define vcopy_lane_f32(a, lane1, b, lane2) simde_vcopy_lane_f32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcopy_lane_f64(simde_float64x1_t a, const int lane1, simde_float64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_float64x1_private + b_ = simde_float64x1_to_private(b), + r_ = simde_float64x1_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_lane_f64(a, lane1, b, lane2) vcopy_lane_f64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_lane_f64 + #define vcopy_lane_f64(a, lane1, b, lane2) simde_vcopy_lane_f64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vcopy_laneq_s8(simde_int8x8_t a, const int lane1, simde_int8x16_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { + simde_int8x8_private + r_ = simde_int8x8_to_private(a); + simde_int8x16_private + b_ = simde_int8x16_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_s8(a, lane1, b, lane2) vcopy_laneq_s8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_s8 + #define vcopy_laneq_s8(a, lane1, b, lane2) simde_vcopy_laneq_s8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcopy_laneq_s16(simde_int16x4_t a, const int lane1, simde_int16x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_int16x4_private + r_ = simde_int16x4_to_private(a); + simde_int16x8_private + b_ = simde_int16x8_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_s16(a, lane1, b, lane2) vcopy_laneq_s16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_s16 + #define vcopy_laneq_s16(a, lane1, b, lane2) simde_vcopy_laneq_s16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcopy_laneq_s32(simde_int32x2_t a, const int lane1, simde_int32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_int32x2_private + r_ = simde_int32x2_to_private(a); + simde_int32x4_private + b_ = simde_int32x4_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_s32(a, lane1, b, lane2) vcopy_laneq_s32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_s32 + #define vcopy_laneq_s32(a, lane1, b, lane2) simde_vcopy_laneq_s32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcopy_laneq_s64(simde_int64x1_t a, const int lane1, simde_int64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_int64x1_private + r_ = simde_int64x1_to_private(a); + simde_int64x2_private + b_ = simde_int64x2_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_s64(a, lane1, b, lane2) vcopy_laneq_s64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_s64 + #define vcopy_laneq_s64(a, lane1, b, lane2) simde_vcopy_laneq_s64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vcopy_laneq_u8(simde_uint8x8_t a, const int lane1, simde_uint8x16_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { + simde_uint8x8_private + r_ = simde_uint8x8_to_private(a); + simde_uint8x16_private + b_ = simde_uint8x16_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_u8(a, lane1, b, lane2) vcopy_laneq_u8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_u8 + #define vcopy_laneq_u8(a, lane1, b, lane2) simde_vcopy_laneq_u8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcopy_laneq_u16(simde_uint16x4_t a, const int lane1, simde_uint16x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_uint16x4_private + r_ = simde_uint16x4_to_private(a); + simde_uint16x8_private + b_ = simde_uint16x8_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_u16(a, lane1, b, lane2) vcopy_laneq_u16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_u16 + #define vcopy_laneq_u16(a, lane1, b, lane2) simde_vcopy_laneq_u16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcopy_laneq_u32(simde_uint32x2_t a, const int lane1, simde_uint32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_uint32x2_private + r_ = simde_uint32x2_to_private(a); + simde_uint32x4_private + b_ = simde_uint32x4_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_u32(a, lane1, b, lane2) vcopy_laneq_u32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_u32 + #define vcopy_laneq_u32(a, lane1, b, lane2) simde_vcopy_laneq_u32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcopy_laneq_u64(simde_uint64x1_t a, const int lane1, simde_uint64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_uint64x1_private + r_ = simde_uint64x1_to_private(a); + simde_uint64x2_private + b_ = simde_uint64x2_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_u64(a, lane1, b, lane2) vcopy_laneq_u64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_u64 + #define vcopy_laneq_u64(a, lane1, b, lane2) simde_vcopy_laneq_u64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcopy_laneq_f32(simde_float32x2_t a, const int lane1, simde_float32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_float32x2_private + r_ = simde_float32x2_to_private(a); + simde_float32x4_private + b_ = simde_float32x4_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_float32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_f32(a, lane1, b, lane2) vcopy_laneq_f32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_f32 + #define vcopy_laneq_f32(a, lane1, b, lane2) simde_vcopy_laneq_f32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vcopy_laneq_f64(simde_float64x1_t a, const int lane1, simde_float64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 0) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_float64x1_private + r_ = simde_float64x1_to_private(a); + simde_float64x2_private + b_ = simde_float64x2_to_private(b); + + r_.values[lane1] = b_.values[lane2]; + return simde_float64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopy_laneq_f64(a, lane1, b, lane2) vcopy_laneq_f64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopy_laneq_f64 + #define vcopy_laneq_f64(a, lane1, b, lane2) simde_vcopy_laneq_f64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vcopyq_lane_s8(simde_int8x16_t a, const int lane1, simde_int8x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_int8x8_private + b_ = simde_int8x8_to_private(b); + simde_int8x16_private + r_ = simde_int8x16_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_s8(a, lane1, b, lane2) vcopyq_lane_s8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_s8 + #define vcopyq_lane_s8(a, lane1, b, lane2) simde_vcopyq_lane_s8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcopyq_lane_s16(simde_int16x8_t a, const int lane1, simde_int16x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_int16x4_private + b_ = simde_int16x4_to_private(b); + simde_int16x8_private + r_ = simde_int16x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_s16(a, lane1, b, lane2) vcopyq_lane_s16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_s16 + #define vcopyq_lane_s16(a, lane1, b, lane2) simde_vcopyq_lane_s16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcopyq_lane_s32(simde_int32x4_t a, const int lane1, simde_int32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_int32x2_private + b_ = simde_int32x2_to_private(b); + simde_int32x4_private + r_ = simde_int32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_s32(a, lane1, b, lane2) vcopyq_lane_s32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_s32 + #define vcopyq_lane_s32(a, lane1, b, lane2) simde_vcopyq_lane_s32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcopyq_lane_s64(simde_int64x2_t a, const int lane1, simde_int64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_int64x1_private + b_ = simde_int64x1_to_private(b); + simde_int64x2_private + r_ = simde_int64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_s64(a, lane1, b, lane2) vcopyq_lane_s64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_s64 + #define vcopyq_lane_s64(a, lane1, b, lane2) simde_vcopyq_lane_s64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcopyq_lane_u8(simde_uint8x16_t a, const int lane1, simde_uint8x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_uint8x8_private + b_ = simde_uint8x8_to_private(b); + simde_uint8x16_private + r_ = simde_uint8x16_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_u8(a, lane1, b, lane2) vcopyq_lane_u8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_u8 + #define vcopyq_lane_u8(a, lane1, b, lane2) simde_vcopyq_lane_u8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcopyq_lane_u16(simde_uint16x8_t a, const int lane1, simde_uint16x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_uint16x4_private + b_ = simde_uint16x4_to_private(b); + simde_uint16x8_private + r_ = simde_uint16x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_u16(a, lane1, b, lane2) vcopyq_lane_u16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_u16 + #define vcopyq_lane_u16(a, lane1, b, lane2) simde_vcopyq_lane_u16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcopyq_lane_u32(simde_uint32x4_t a, const int lane1, simde_uint32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_uint32x2_private + b_ = simde_uint32x2_to_private(b); + simde_uint32x4_private + r_ = simde_uint32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_u32(a, lane1, b, lane2) vcopyq_lane_u32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_u32 + #define vcopyq_lane_u32(a, lane1, b, lane2) simde_vcopyq_lane_u32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcopyq_lane_u64(simde_uint64x2_t a, const int lane1, simde_uint64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_uint64x1_private + b_ = simde_uint64x1_to_private(b); + simde_uint64x2_private + r_ = simde_uint64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_u64(a, lane1, b, lane2) vcopyq_lane_u64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_u64 + #define vcopyq_lane_u64(a, lane1, b, lane2) simde_vcopyq_lane_u64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcopyq_lane_f32(simde_float32x4_t a, const int lane1, simde_float32x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_float32x2_private + b_ = simde_float32x2_to_private(b); + simde_float32x4_private + r_ = simde_float32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_f32(a, lane1, b, lane2) vcopyq_lane_f32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_f32 + #define vcopyq_lane_f32(a, lane1, b, lane2) simde_vcopyq_lane_f32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcopyq_lane_f64(simde_float64x2_t a, const int lane1, simde_float64x1_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 0) { + simde_float64x1_private + b_ = simde_float64x1_to_private(b); + simde_float64x2_private + r_ = simde_float64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_lane_f64(a, lane1, b, lane2) vcopyq_lane_f64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_lane_f64 + #define vcopyq_lane_f64(a, lane1, b, lane2) simde_vcopyq_lane_f64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vcopyq_laneq_s8(simde_int8x16_t a, const int lane1, simde_int8x16_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { + simde_int8x16_private + b_ = simde_int8x16_to_private(b), + r_ = simde_int8x16_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_s8(a, lane1, b, lane2) vcopyq_laneq_s8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_s8 + #define vcopyq_laneq_s8(a, lane1, b, lane2) simde_vcopyq_laneq_s8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcopyq_laneq_s16(simde_int16x8_t a, const int lane1, simde_int16x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_int16x8_private + b_ = simde_int16x8_to_private(b), + r_ = simde_int16x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_s16(a, lane1, b, lane2) vcopyq_laneq_s16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_s16 + #define vcopyq_laneq_s16(a, lane1, b, lane2) simde_vcopyq_laneq_s16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcopyq_laneq_s32(simde_int32x4_t a, const int lane1, simde_int32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_int32x4_private + b_ = simde_int32x4_to_private(b), + r_ = simde_int32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_s32(a, lane1, b, lane2) vcopyq_laneq_s32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_s32 + #define vcopyq_laneq_s32(a, lane1, b, lane2) simde_vcopyq_laneq_s32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcopyq_laneq_s64(simde_int64x2_t a, const int lane1, simde_int64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_int64x2_private + b_ = simde_int64x2_to_private(b), + r_ = simde_int64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_s64(a, lane1, b, lane2) vcopyq_laneq_s64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_s64 + #define vcopyq_laneq_s64(a, lane1, b, lane2) simde_vcopyq_laneq_s64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vcopyq_laneq_u8(simde_uint8x16_t a, const int lane1, simde_uint8x16_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 15) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 15) { + simde_uint8x16_private + b_ = simde_uint8x16_to_private(b), + r_ = simde_uint8x16_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_u8(a, lane1, b, lane2) vcopyq_laneq_u8((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_u8 + #define vcopyq_laneq_u8(a, lane1, b, lane2) simde_vcopyq_laneq_u8((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcopyq_laneq_u16(simde_uint16x8_t a, const int lane1, simde_uint16x8_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 7) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 7) { + simde_uint16x8_private + b_ = simde_uint16x8_to_private(b), + r_ = simde_uint16x8_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_u16(a, lane1, b, lane2) vcopyq_laneq_u16((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_u16 + #define vcopyq_laneq_u16(a, lane1, b, lane2) simde_vcopyq_laneq_u16((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcopyq_laneq_u32(simde_uint32x4_t a, const int lane1, simde_uint32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_uint32x4_private + b_ = simde_uint32x4_to_private(b), + r_ = simde_uint32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_u32(a, lane1, b, lane2) vcopyq_laneq_u32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_u32 + #define vcopyq_laneq_u32(a, lane1, b, lane2) simde_vcopyq_laneq_u32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcopyq_laneq_u64(simde_uint64x2_t a, const int lane1, simde_uint64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_uint64x2_private + b_ = simde_uint64x2_to_private(b), + r_ = simde_uint64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_u64(a, lane1, b, lane2) vcopyq_laneq_u64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_u64 + #define vcopyq_laneq_u64(a, lane1, b, lane2) simde_vcopyq_laneq_u64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcopyq_laneq_f32(simde_float32x4_t a, const int lane1, simde_float32x4_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 3) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 3) { + simde_float32x4_private + b_ = simde_float32x4_to_private(b), + r_ = simde_float32x4_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_f32(a, lane1, b, lane2) vcopyq_laneq_f32((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_f32 + #define vcopyq_laneq_f32(a, lane1, b, lane2) simde_vcopyq_laneq_f32((a), (lane1), (b), (lane2)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcopyq_laneq_f64(simde_float64x2_t a, const int lane1, simde_float64x2_t b, const int lane2) + SIMDE_REQUIRE_CONSTANT_RANGE(lane1, 0, 1) + SIMDE_REQUIRE_CONSTANT_RANGE(lane2, 0, 1) { + simde_float64x2_private + b_ = simde_float64x2_to_private(b), + r_ = simde_float64x2_to_private(a); + + r_.values[lane1] = b_.values[lane2]; + return simde_float64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcopyq_laneq_f64(a, lane1, b, lane2) vcopyq_laneq_f64((a), (lane1), (b), (lane2)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcopyq_laneq_f64 + #define vcopyq_laneq_f64(a, lane1, b, lane2) simde_vcopyq_laneq_f64((a), (lane1), (b), (lane2)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_COPY_LANE_H */ diff --git a/simde/arm/neon/cvt.h b/simde/arm/neon/cvt.h index 9dec4a1ed..e1b0232d6 100644 --- a/simde/arm/neon/cvt.h +++ b/simde/arm/neon/cvt.h @@ -144,13 +144,16 @@ int16_t simde_vcvth_s16_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvth_s16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int16_t, + simde_float16_to_float32(a)); #else simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af < HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { return INT16_MIN; - } else if (HEDLEY_UNLIKELY(af > HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { return INT16_MAX; - } else if (HEDLEY_UNLIKELY(simde_math_isnanf(af))) { + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { return 0; } else { return HEDLEY_STATIC_CAST(int16_t, af); @@ -167,13 +170,16 @@ uint16_t simde_vcvth_u16_f16(simde_float16 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) return vcvth_u16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, + simde_float16_to_float32(a)); #else simde_float32 af = simde_float16_to_float32(a); - if (HEDLEY_UNLIKELY(af < SIMDE_FLOAT32_C(0.0))) { + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { return 0; - } else if (HEDLEY_UNLIKELY(af > HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { return UINT16_MAX; - } else if (simde_math_isnanf(af)) { + } else if (simde_isnanhf(a)) { return 0; } else { return HEDLEY_STATIC_CAST(uint16_t, af); @@ -185,6 +191,110 @@ simde_vcvth_u16_f16(simde_float16 a) { #define vcvth_u16_f16(a) simde_vcvth_u16_f16(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvth_s32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_s32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, + simde_float16_to_float32(a)); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, af); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_s32_f16 + #define vcvth_s32_f16(a) simde_vcvth_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvth_u32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_u32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, + simde_float16_to_float32(a)); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, af); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_u32_f16 + #define vcvth_u32_f16(a) simde_vcvth_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvth_s64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_s64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, + simde_float16_to_float32(a)); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { + return INT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, af); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_s64_f16 + #define vcvth_s64_f16(a) simde_vcvth_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvth_u64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_u64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, + simde_float16_to_float32(a)); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { + return UINT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, af); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_u64_f16 + #define vcvth_u64_f16(a) simde_vcvth_u64_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vcvts_s32_f32(simde_float32 a) { @@ -338,6 +448,102 @@ simde_vcvtd_f64_u64(uint64_t a) { #define vcvtd_f64_u64(a) simde_vcvtd_f64_u64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_u32(uint32_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_u32(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_u32 + #define vcvth_f16_u32(a) simde_vcvth_f16_u32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_u64(uint64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_u64(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_u64 + #define vcvth_f16_u64(a) simde_vcvth_f16_u64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_s32(int32_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_s32(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_s32 + #define vcvth_f16_s32(a) simde_vcvth_f16_s32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_s64(int64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_s64(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_s64 + #define vcvth_f16_s64(a) simde_vcvth_f16_s64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_s16(int16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_s16(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_s16 + #define vcvth_f16_s16(a) simde_vcvth_f16_s16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_f16_u16(uint16_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvth_f16_u16(a); + #elif SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_PORTABLE && SIMDE_FLOAT16_API != SIMDE_FLOAT16_API_FP16_NO_ABI + return HEDLEY_STATIC_CAST(simde_float16_t, a); + #else + return simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, a)); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_f16_u16 + #define vcvth_f16_u16(a) simde_vcvth_f16_u16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vcvt_s16_f16(simde_float16x4_t a) { @@ -1177,15 +1383,221 @@ simde_vcvtq_f64_u64(simde_uint64x2_t a) { #define vcvtq_f64_u64(a) simde_vcvtq_f64_u64(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vcvtah_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_s16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int16_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + return INT16_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + return INT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int16_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_s16_f16 + #define vcvtah_s16_f16(a) simde_vcvtah_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcvtah_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_u16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + return UINT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint16_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_u16_f16 + #define vcvtah_u16_f16(a) simde_vcvtah_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtah_s32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_s32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_s32_f16 + #define vcvtah_s32_f16(a) simde_vcvtah_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtah_u32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_u32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_u32_f16 + #define vcvtah_u32_f16(a) simde_vcvtah_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtah_s64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_s64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { + return INT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_s64_f16 + #define vcvtah_s64_f16(a) simde_vcvtah_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtah_u64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) && defined(SIMDE_ARM_NEON_FP16) + return vcvtah_u64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, + simde_math_roundf(simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { + return UINT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_roundf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtah_u64_f16 + #define vcvtah_u64_f16(a) simde_vcvtah_u64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtad_s64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtad_s64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, simde_math_round(a)); + #else + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { + return INT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnan(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_round(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtad_s64_f64 + #define vcvtad_s64_f64(a) simde_vcvtad_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtad_u64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtad_u64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, simde_math_round(a)); + #else + if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_round(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtad_u64_f64 + #define vcvtad_u64_f64(a) simde_vcvtad_u64_f64(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES int32_t simde_vcvtas_s32_f32(simde_float32 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtas_s32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, simde_math_roundf(a)); #else - if (HEDLEY_UNLIKELY(a < HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { return INT32_MIN; - } else if (HEDLEY_UNLIKELY(a > HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { return INT32_MAX; } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { return 0; @@ -1204,6 +1616,8 @@ uint32_t simde_vcvtas_u32_f32(simde_float32 a) { #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) return vcvtas_u32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, simde_math_roundf(a)); #else if (HEDLEY_UNLIKELY(a < SIMDE_FLOAT32_C(0.0))) { return 0; @@ -1222,9 +1636,100 @@ simde_vcvtas_u32_f32(simde_float32 a) { #define vcvtas_u32_f32(a) simde_vcvtas_u32_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvta_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvta_s16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtah_s16_f16(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvta_s16_f16 + #define vcvta_s16_f16(a) simde_vcvta_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvta_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvta_u16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtah_u16_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvta_u16_f16 + #define vcvta_u16_f16(a) simde_vcvta_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvta_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvta_s64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtad_s64_f64(a_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvta_s64_f64 + #define vcvta_s64_f64(a) simde_vcvta_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvta_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvta_u64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtad_u64_f64(a_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvta_u64_f64 + #define vcvta_u64_f64(a) simde_vcvta_u64_f64(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_int32x2_t simde_vcvta_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvta_s32_f32(a); + #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_int32x2_private r_; @@ -1234,12 +1739,57 @@ simde_vcvta_s32_f32(simde_float32x2_t a) { } return simde_int32x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvta_s32_f32 #define vcvta_s32_f32(a) simde_vcvta_s32_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtaq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtaq_s16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtah_s16_f16(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtaq_s16_f16 + #define vcvtaq_s16_f16(a) simde_vcvtaq_s16_f16(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtaq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtaq_u16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtah_u16_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtaq_u16_f16 + #define vcvtaq_u16_f16(a) simde_vcvtaq_u16_f16(a) +#endif SIMDE_FUNCTION_ATTRIBUTES simde_int32x4_t @@ -1263,9 +1813,57 @@ simde_vcvtaq_s32_f32(simde_float32x4_t a) { #define vcvtaq_s32_f32(a) simde_vcvtaq_s32_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtaq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtaq_s64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtad_s64_f64(a_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtaq_s64_f64 + #define vcvtaq_s64_f64(a) simde_vcvtaq_s64_f64(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtaq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtaq_u64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtad_u64_f64(a_.values[i]); + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtaq_u64_f64 + #define vcvtaq_u64_f64(a) simde_vcvtaq_u64_f64(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_uint32x2_t simde_vcvta_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvta_u32_f32(a); + #else simde_float32x2_private a_ = simde_float32x2_to_private(a); simde_uint32x2_private r_; @@ -1275,6 +1873,7 @@ simde_vcvta_u32_f32(simde_float32x2_t a) { } return simde_uint32x2_from_private(r_); + #endif } #if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) #undef vcvta_u32_f32 @@ -1303,6 +1902,171 @@ simde_vcvtaq_u32_f32(simde_float32x4_t a) { #define vcvtaq_u32_f32(a) simde_vcvtaq_u32_f32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vcvt_high_f16_f32(simde_float16x4_t r, simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_high_f16_f32(r, a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_float16x4_private b_ = simde_float16x4_to_private(r); + simde_float16x8_private r_; + + size_t half_pos = (sizeof(r_.values) / sizeof(r_.values[0]) / 2); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < half_pos; i++) { + r_.values[i] = b_.values[i]; + } + SIMDE_VECTORIZE + for (size_t i = half_pos; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_float16_from_float32(a_.values[i-half_pos]); + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_high_f16_f32 + #define vcvt_high_f16_f32(r, a) simde_vcvt_high_f16_f32((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvt_high_f32_f64(simde_float32x2_t r, simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_high_f32_f64(r, a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_float32x2_private b_ = simde_float32x2_to_private(r); + simde_float32x4_private r_; + + size_t half_pos = (sizeof(r_.values) / sizeof(r_.values[0]) / 2); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < half_pos; i++) { + r_.values[i] = b_.values[i]; + } + SIMDE_VECTORIZE + for (size_t i = half_pos; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, a_.values[i-half_pos]); + } + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_high_f32_f64 + #define vcvt_high_f32_f64(r, a) simde_vcvt_high_f32_f64((r), (a)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvt_high_f32_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvt_high_f32_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_float32x4_private r_; + + size_t rsize = (sizeof(r_.values) / sizeof(r_.values[0])); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < rsize; i++) { + r_.values[i] = simde_float16_to_float32(a_.values[i+rsize]); + } + + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_high_f32_f16 + #define vcvt_high_f32_f16(a) simde_vcvt_high_f32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vcvt_high_f64_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvt_high_f64_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_float64x2_private r_; + + size_t rsize = (sizeof(r_.values) / sizeof(r_.values[0])); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, a_.values[i+rsize]); + } + + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvt_high_f64_f32 + #define vcvt_high_f64_f32(a) simde_vcvt_high_f64_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vcvtxd_f32_f64(simde_float64_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtxd_f32_f64(a); + #else + return HEDLEY_STATIC_CAST(simde_float32_t, a); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtxd_f32_f64 + #define vcvtxd_f32_f64(a) simde_vcvtxd_f32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vcvtx_f32_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtx_f32_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_float32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtxd_f32_f64(a_.values[i]); + } + + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtx_f32_f64 + #define vcvtx_f32_f64(a) simde_vcvtx_f32_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vcvtx_high_f32_f64(simde_float32x2_t r, simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtx_high_f32_f64(r, a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_float32x2_private r_ = simde_float32x2_to_private(r); + simde_float32x4_private ret; + + size_t half_pos = (sizeof(ret.values) / sizeof(ret.values[0]) / 2); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < half_pos; i++) { + ret.values[i] = r_.values[i]; + } + SIMDE_VECTORIZE + for (size_t i = half_pos; i < (sizeof(ret.values) / sizeof(ret.values[0])) ; i++) { + ret.values[i] = simde_vcvtxd_f32_f64(a_.values[i-half_pos]); + } + + return simde_float32x4_from_private(ret); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtx_high_f32_f64 + #define vcvtx_high_f32_f64(r, a) simde_vcvtx_high_f32_f64((r), (a)) +#endif SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/arm/neon/cvt_n.h b/simde/arm/neon/cvt_n.h index 99b5cb4b1..23b7f3dfd 100644 --- a/simde/arm/neon/cvt_n.h +++ b/simde/arm/neon/cvt_n.h @@ -34,6 +34,312 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vcvth_n_s16_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_s16_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_s16_f16(a, n) vcvth_n_s16_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_s16_f16 + #define vcvth_n_s16_f16(a, n) simde_vcvth_n_s16_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvth_n_s32_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_s32_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_s32_f16(a, n) vcvth_n_s32_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_s32_f16 + #define vcvth_n_s32_f16(a, n) simde_vcvth_n_s32_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvth_n_s64_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_s64_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_s64_f16(a, n) vcvth_n_s64_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_s64_f16 + #define vcvth_n_s64_f16(a, n) simde_vcvth_n_s64_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcvth_n_u16_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_u16_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_u16_f16(a, n) vcvth_n_u16_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_u16_f16 + #define vcvth_n_u16_f16(a, n) simde_vcvth_n_u16_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvth_n_u32_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_u32_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_u32_f16(a, n) vcvth_n_u32_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_u32_f16 + #define vcvth_n_u32_f16(a, n) simde_vcvth_n_u32_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvth_n_u64_f16(simde_float16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_vcvth_u64_f16( + simde_float16_from_float32( + simde_float16_to_float32(a) * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_u64_f16(a, n) vcvth_n_u64_f16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_u64_f16 + #define vcvth_n_u64_f16(a, n) simde_vcvth_n_u64_f16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_s16(int16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_s16(a, n) vcvth_n_f16_s16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_s16 + #define vcvth_n_f16_s16(a, n) simde_vcvth_n_f16_s16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_s32(a, n) vcvth_n_f16_s32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_s32 + #define vcvth_n_f16_s32(a, n) simde_vcvth_n_f16_s32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_s64(a, n) vcvth_n_f16_s64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_s64 + #define vcvth_n_f16_s64(a, n) simde_vcvth_n_f16_s64(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_u16(uint16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_u16(a, n) vcvth_n_f16_u16(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_u16 + #define vcvth_n_f16_u16(a, n) simde_vcvth_n_f16_u16(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_u32(uint32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_u32(a, n) vcvth_n_f16_u32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_u32 + #define vcvth_n_f16_u32(a, n) simde_vcvth_n_f16_u32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16_t +simde_vcvth_n_f16_u64(uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return simde_float16_from_float32( + HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vcvth_n_f16_u64(a, n) vcvth_n_f16_u64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvth_n_f16_u64 + #define vcvth_n_f16_u64(a, n) simde_vcvth_n_f16_u64(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvts_n_s32_f32(simde_float32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return simde_vcvts_s32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvts_n_s32_f32(a, n) vcvts_n_s32_f32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_n_s32_f32 + #define vcvts_n_s32_f32(a, n) simde_vcvts_n_s32_f32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvts_n_u32_f32(simde_float32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return simde_vcvts_u32_f32(a * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvts_n_u32_f32(a, n) vcvts_n_u32_f32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_n_u32_f32 + #define vcvts_n_u32_f32(a, n) simde_vcvts_n_u32_f32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vcvts_n_f32_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvts_n_f32_s32(a, n) vcvts_n_f32_s32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_n_f32_s32 + #define vcvts_n_f32_s32(a, n) simde_vcvts_n_f32_s32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32_t +simde_vcvts_n_f32_u32(uint32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + return HEDLEY_STATIC_CAST(simde_float32_t, + HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvts_n_f32_u32(a, n) vcvts_n_f32_u32(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvts_n_f32_u32 + #define vcvts_n_f32_u32(a, n) simde_vcvts_n_f32_u32(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtd_n_s64_f64(simde_float64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return simde_vcvtd_s64_f64(a * pow(2, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtd_n_s64_f64(a, n) vcvtd_n_s64_f64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_n_s64_f64 + #define vcvtd_n_s64_f64(a, n) simde_vcvtd_n_s64_f64(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtd_n_u64_f64(simde_float64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return simde_vcvtd_u64_f64(a * pow(2, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtd_n_u64_f64(a, n) vcvtd_n_u64_f64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_n_u64_f64 + #define vcvtd_n_u64_f64(a, n) simde_vcvtd_n_u64_f64(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vcvtd_n_f64_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtd_n_f64_s64(a, n) vcvtd_n_f64_s64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_n_f64_s64 + #define vcvtd_n_f64_s64(a, n) simde_vcvtd_n_f64_s64(a, n) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64_t +simde_vcvtd_n_f64_u64(uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 64) { + return HEDLEY_STATIC_CAST(simde_float64_t, a) / pow(2, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vcvtd_n_f64_u64(a, n) vcvtd_n_f64_u64(a, n) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtd_n_f64_u64 + #define vcvtd_n_f64_u64(a, n) simde_vcvtd_n_f64_u64(a, n) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_int16x4_t simde_vcvt_n_s16_f16(simde_float16x4_t a, const int n) @@ -45,7 +351,7 @@ simde_vcvt_n_s16_f16(simde_float16x4_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(float, pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); } return simde_int16x4_from_private(r_); @@ -67,7 +373,7 @@ simde_vcvt_n_s32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); } return simde_int32x2_from_private(r_); @@ -113,7 +419,7 @@ simde_vcvt_n_u16_f16(simde_float16x4_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(float, pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); } return simde_uint16x4_from_private(r_); @@ -135,7 +441,7 @@ simde_vcvt_n_u32_f32(simde_float32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); } return simde_uint32x2_from_private(r_); @@ -181,7 +487,7 @@ simde_vcvtq_n_s16_f16(simde_float16x8_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_s16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(float, pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); } return simde_int16x8_from_private(r_); @@ -203,7 +509,7 @@ simde_vcvtq_n_s32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + r_.values[i] = simde_vcvts_s32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); } return simde_int32x4_from_private(r_); @@ -249,7 +555,7 @@ simde_vcvtq_n_u16_f16(simde_float16x8_t a, const int n) for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { r_.values[i] = simde_vcvth_u16_f16(simde_float16_from_float32( simde_float16_to_float32(a_.values[i]) * - HEDLEY_STATIC_CAST(float, pow(2, n)))); + HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n)))); } return simde_uint16x8_from_private(r_); @@ -271,7 +577,7 @@ simde_vcvtq_n_u32_f32(simde_float32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(float, pow(2, n))); + r_.values[i] = simde_vcvts_u32_f32(a_.values[i] * HEDLEY_STATIC_CAST(simde_float32_t, pow(2, n))); } return simde_uint32x4_from_private(r_); @@ -315,7 +621,7 @@ simde_vcvt_n_f16_u16(simde_uint16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); } return simde_float16x4_from_private(r_); @@ -337,7 +643,7 @@ simde_vcvt_n_f16_s16(simde_int16x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); } return simde_float16x4_from_private(r_); @@ -359,7 +665,7 @@ simde_vcvtq_n_f16_u16(simde_uint16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n))); } return simde_float16x8_from_private(r_); @@ -381,7 +687,7 @@ simde_vcvtq_n_f16_s16(simde_int16x8_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, (a_.values[i] / pow(2, n)))); + r_.values[i] = simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, (a_.values[i] / pow(2, n)))); } return simde_float16x8_from_private(r_); @@ -403,7 +709,7 @@ simde_vcvt_n_f32_u32(simde_uint32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float32x2_from_private(r_); @@ -425,7 +731,7 @@ simde_vcvt_n_f32_s32(simde_int32x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float32x2_from_private(r_); @@ -447,7 +753,7 @@ simde_vcvt_n_f64_u64(simde_uint64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float64x1_from_private(r_); @@ -470,7 +776,7 @@ simde_vcvtq_n_f64_u64(simde_uint64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float64x2_from_private(r_); @@ -493,7 +799,7 @@ simde_vcvt_n_f64_s64(simde_int64x1_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float64x1_from_private(r_); @@ -515,7 +821,7 @@ simde_vcvtq_n_f64_s64(simde_int64x2_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float64, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float64_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float64x2_from_private(r_); @@ -537,7 +843,7 @@ simde_vcvtq_n_f32_s32(simde_int32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float32x4_from_private(r_); @@ -559,7 +865,7 @@ simde_vcvtq_n_f32_u32(simde_uint32x4_t a, const int n) SIMDE_VECTORIZE for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { - r_.values[i] = HEDLEY_STATIC_CAST(simde_float32, HEDLEY_STATIC_CAST(simde_float64, a_.values[i]) / pow(2, n)); + r_.values[i] = HEDLEY_STATIC_CAST(simde_float32_t, HEDLEY_STATIC_CAST(simde_float64_t, a_.values[i]) / pow(2, n)); } return simde_float32x4_from_private(r_); diff --git a/simde/arm/neon/cvtm.h b/simde/arm/neon/cvtm.h new file mode 100644 index 000000000..6029c5e7a --- /dev/null +++ b/simde/arm/neon/cvtm.h @@ -0,0 +1,588 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_CVTM_H) +#define SIMDE_ARM_NEON_CVTM_H + +#include "types.h" +#include "cvt.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtmh_s64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_s64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { + return INT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_s64_f16 + #define vcvtmh_s64_f16(a) simde_vcvtmh_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtmh_s32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_s32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_s32_f16 + #define vcvtmh_s32_f16(a) simde_vcvtmh_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vcvtmh_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_s16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int16_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + return INT16_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + return INT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int16_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_s16_f16 + #define vcvtmh_s16_f16(a) simde_vcvtmh_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtmh_u64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_u64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { + return UINT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_u64_f16 + #define vcvtmh_u64_f16(a) simde_vcvtmh_u64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtmh_u32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_u32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_u32_f16 + #define vcvtmh_u32_f16(a) simde_vcvtmh_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcvtmh_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmh_u16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, + simde_math_floorf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + return UINT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint16_t, simde_math_floorf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmh_u16_f16 + #define vcvtmh_u16_f16(a) simde_vcvtmh_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtms_s32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtms_s32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(a)); + #else + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_floorf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtms_s32_f32 + #define vcvtms_s32_f32(a) simde_vcvtms_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtms_u32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtms_u32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(a)); + #else + if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_floorf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtms_u32_f32 + #define vcvtms_u32_f32(a) simde_vcvtms_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtmd_s64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtmd_s64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, simde_math_floor(a)); + #else + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { + return INT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_floor(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmd_s64_f64 + #define vcvtmd_s64_f64(a) simde_vcvtmd_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtmd_u64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtmd_u64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, simde_math_floor(a)); + #else + if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_floor(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmd_u64_f64 + #define vcvtmd_u64_f64(a) simde_vcvtmd_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtmq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmq_s16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmh_s16_f16(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_s16_f16 + #define vcvtmq_s16_f16(a) simde_vcvtmq_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcvtmq_s32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtmq_s32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_int32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtms_s32_f32(a_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_s32_f32 + #define vcvtmq_s32_f32(a) simde_vcvtmq_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtmq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtmq_s64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmd_s64_f64(a_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_s64_f64 + #define vcvtmq_s64_f64(a) simde_vcvtmq_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtmq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtmq_u16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmh_u16_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_u16_f16 + #define vcvtmq_u16_f16(a) simde_vcvtmq_u16_f16(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcvtmq_u32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtmq_u32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtps_epu32(a_.m128); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtps_epu32(a_.m128); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtms_u32_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_u32_f32 + #define vcvtmq_u32_f32(a) simde_vcvtmq_u32_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtmq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtmq_u64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmd_u64_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtmq_u64_f64 + #define vcvtmq_u64_f64(a) simde_vcvtmq_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvtm_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtm_s16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmh_s16_f16(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_s16_f16 + #define vcvtm_s16_f16(a) simde_vcvtm_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvtm_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtm_u16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmh_u16_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_u16_f16 + #define vcvtm_u16_f16(a) simde_vcvtm_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcvtm_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtm_u32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtms_u32_f32(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_u32_f32 + #define vcvtm_u32_f32(a) simde_vcvtm_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcvtm_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtm_s32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_int32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtms_s32_f32(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_s32_f32 + #define vcvtm_s32_f32(a) simde_vcvtm_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvtm_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtm_s64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmd_s64_f64(a_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_s64_f64 + #define vcvtm_s64_f64(a) simde_vcvtm_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvtm_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtm_u64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtmd_u64_f64(a_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtm_u64_f64 + #define vcvtm_u64_f64(a) simde_vcvtm_u64_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_CVTM_H */ diff --git a/simde/arm/neon/cvtp.h b/simde/arm/neon/cvtp.h new file mode 100644 index 000000000..48c51136b --- /dev/null +++ b/simde/arm/neon/cvtp.h @@ -0,0 +1,588 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_CVTP_H) +#define SIMDE_ARM_NEON_CVTP_H + +#include "types.h" +#include "cvt.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtph_s64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_s64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT64_MAX))) { + return INT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_s64_f16 + #define vcvtph_s64_f16(a) simde_vcvtph_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtph_s32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_s32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_s32_f16 + #define vcvtph_s32_f16(a) simde_vcvtph_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vcvtph_s16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_s16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int16_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= HEDLEY_STATIC_CAST(simde_float32, INT16_MIN))) { + return INT16_MIN; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, INT16_MAX))) { + return INT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int16_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_s16_f16 + #define vcvtph_s16_f16(a) simde_vcvtph_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtph_u64_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_u64_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX))) { + return UINT64_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_u64_f16 + #define vcvtph_u64_f16(a) simde_vcvtph_u64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtph_u32_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_u32_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_u32_f16 + #define vcvtph_u32_f16(a) simde_vcvtph_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vcvtph_u16_f16(simde_float16 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtph_u16_f16(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint16_t, + simde_math_ceilf( + simde_float16_to_float32(a))); + #else + simde_float32 af = simde_float16_to_float32(a); + if (HEDLEY_UNLIKELY(af <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(af >= HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX))) { + return UINT16_MAX; + } else if (HEDLEY_UNLIKELY(simde_isnanhf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint16_t, simde_math_ceilf(af)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtph_u16_f16 + #define vcvtph_u16_f16(a) simde_vcvtph_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vcvtps_s32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtps_s32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(a)); + #else + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float32, INT32_MIN))) { + return INT32_MIN; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, INT32_MAX))) { + return INT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int32_t, simde_math_ceilf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtps_s32_f32 + #define vcvtps_s32_f32(a) simde_vcvtps_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vcvtps_u32_f32(simde_float32 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtps_u32_f32(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(a)); + #else + if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT32_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX))) { + return UINT32_MAX; + } else if (HEDLEY_UNLIKELY(simde_math_isnanf(a))) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint32_t, simde_math_ceilf(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtps_u32_f32 + #define vcvtps_u32_f32(a) simde_vcvtps_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vcvtpd_s64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtpd_s64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(int64_t, simde_math_ceil(a)); + #else + if (HEDLEY_UNLIKELY(a <= HEDLEY_STATIC_CAST(simde_float64, INT64_MIN))) { + return INT64_MIN; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, INT64_MAX))) { + return INT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(int64_t, simde_math_ceil(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpd_s64_f64 + #define vcvtpd_s64_f64(a) simde_vcvtpd_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vcvtpd_u64_f64(simde_float64 a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtpd_u64_f64(a); + #elif defined(SIMDE_FAST_CONVERSION_RANGE) + return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceil(a)); + #else + if (HEDLEY_UNLIKELY(a <= SIMDE_FLOAT64_C(0.0))) { + return 0; + } else if (HEDLEY_UNLIKELY(a >= HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX))) { + return UINT64_MAX; + } else if (simde_math_isnan(a)) { + return 0; + } else { + return HEDLEY_STATIC_CAST(uint64_t, simde_math_ceil(a)); + } + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpd_u64_f64 + #define vcvtpd_u64_f64(a) simde_vcvtpd_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vcvtpq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtpq_s16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtph_s16_f16(a_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_s16_f16 + #define vcvtpq_s16_f16(a) simde_vcvtpq_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vcvtpq_s32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtpq_s32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_int32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtps_s32_f32(a_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_s32_f32 + #define vcvtpq_s32_f32(a) simde_vcvtpq_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vcvtpq_s64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtpq_s64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_int64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtpd_s64_f64(a_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_s64_f64 + #define vcvtpq_s64_f64(a) simde_vcvtpq_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vcvtpq_u16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtpq_u16_f16(a); + #else + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtph_u16_f16(a_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_u16_f16 + #define vcvtpq_u16_f16(a) simde_vcvtpq_u16_f16(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vcvtpq_u32_f32(simde_float32x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && !defined(SIMDE_BUG_CLANG_46844) + return vcvtpq_u32_f32(a); + #else + simde_float32x4_private a_ = simde_float32x4_to_private(a); + simde_uint32x4_private r_; + + #if 0 && defined(SIMDE_X86_AVX512F_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtps_epu32(a_.m128); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtps_epu32(a_.m128); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtps_u32_f32(a_.values[i]); + } + #endif + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_u32_f32 + #define vcvtpq_u32_f32(a) simde_vcvtpq_u32_f32(a) +#endif + + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vcvtpq_u64_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtpq_u64_f64(a); + #else + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_uint64x2_private r_; + + #if 0 && defined(SIMDE_X86_AVX512DQ_NATIVE) && defined(SIMDE_X86_AVX512VL_NATIVE) + // Hmm.. this doesn't work, unlike the signed versions + if (HEDLEY_UNLIKELY(_MM_GET_ROUNDING_MODE() != _MM_ROUND_NEAREST)) { + unsigned int rounding_mode = _MM_GET_ROUNDING_MODE(); + _MM_SET_ROUNDING_MODE(_MM_ROUND_NEAREST); + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + _MM_SET_ROUNDING_MODE(rounding_mode); + } else { + r_.m128i = _mm_cvtpd_epu64(a_.m128d); + } + #else + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtpd_u64_f64(a_.values[i]); + } + #endif + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtpq_u64_f64 + #define vcvtpq_u64_f64(a) simde_vcvtpq_u64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vcvtp_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtp_s16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtph_s16_f16(a_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_s16_f16 + #define vcvtp_s16_f16(a) simde_vcvtp_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vcvtp_u16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vcvtp_u16_f16(a); + #else + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint16x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtph_u16_f16(a_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_u16_f16 + #define vcvtp_u16_f16(a) simde_vcvtp_u16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vcvtp_u32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtp_u32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_uint32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtps_u32_f32(a_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_u32_f32 + #define vcvtp_u32_f32(a) simde_vcvtp_u32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vcvtp_s32_f32(simde_float32x2_t a) { + #if defined(SIMDE_ARM_NEON_A32V8_NATIVE) + return vcvtp_s32_f32(a); + #else + simde_float32x2_private a_ = simde_float32x2_to_private(a); + simde_int32x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtps_s32_f32(a_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_s32_f32 + #define vcvtp_s32_f32(a) simde_vcvtp_s32_f32(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vcvtp_s64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtp_s64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_int64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtpd_s64_f64(a_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_s64_f64 + #define vcvtp_s64_f64(a) simde_vcvtp_s64_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vcvtp_u64_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + return vcvtp_u64_f64(a); + #else + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_uint64x1_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vcvtpd_u64_f64(a_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vcvtp_u64_f64 + #define vcvtp_u64_f64(a) simde_vcvtp_u64_f64(a) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* SIMDE_ARM_NEON_CVTP_H */ diff --git a/simde/arm/neon/fms_n.h b/simde/arm/neon/fms_n.h index eb95818f8..6011ae415 100644 --- a/simde/arm/neon/fms_n.h +++ b/simde/arm/neon/fms_n.h @@ -66,13 +66,13 @@ simde_vfmsq_n_f16(simde_float16x8_t a, simde_float16x8_t b, simde_float16_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vfms_n_f32(simde_float32x2_t a, simde_float32x2_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) return vfms_n_f32(a, b, c); #else return simde_vfms_f32(a, b, simde_vdup_n_f32(c)); #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vfms_n_f32 #define vfms_n_f32(a, b, c) simde_vfms_n_f32(a, b, c) #endif @@ -94,13 +94,13 @@ simde_vfms_n_f64(simde_float64x1_t a, simde_float64x1_t b, simde_float64_t c) { SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vfmsq_n_f32(simde_float32x4_t a, simde_float32x4_t b, simde_float32_t c) { - #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARCH_ARM_FMA) && (!defined(__clang__) || SIMDE_DETECT_CLANG_VERSION_CHECK(7,0,0)) && !defined(SIMDE_BUG_GCC_95399) return vfmsq_n_f32(a, b, c); #else return simde_vfmsq_f32(a, b, simde_vdupq_n_f32(c)); #endif } -#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) #undef vfmsq_n_f32 #define vfmsq_n_f32(a, b, c) simde_vfmsq_n_f32(a, b, c) #endif diff --git a/simde/arm/neon/ld3.h b/simde/arm/neon/ld3.h index 6ab02f321..c5fdc762c 100644 --- a/simde/arm/neon/ld3.h +++ b/simde/arm/neon/ld3.h @@ -349,6 +349,34 @@ simde_vld3_u64(uint64_t const *ptr) { #define vld3_u64(a) simde_vld3_u64((a)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x3_t +simde_vld3q_f16(simde_float16 const *ptr) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld3q_f16(ptr); + #else + simde_float16x8_private r_[3]; + + for (size_t i = 0; i < (sizeof(r_) / sizeof(r_[0])); i++) { + for (size_t j = 0 ; j < (sizeof(r_[0].values) / sizeof(r_[0].values[0])) ; j++) { + r_[i].values[j] = ptr[i + (j * (sizeof(r_) / sizeof(r_[0])))]; + } + } + + simde_float16x8x3_t r = { { + simde_float16x8_from_private(r_[0]), + simde_float16x8_from_private(r_[1]), + simde_float16x8_from_private(r_[2]) + } }; + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld3q_f16 + #define vld3q_f16(a) simde_vld3q_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x3_t simde_vld3q_f32(simde_float32 const *ptr) { diff --git a/simde/arm/neon/ld4.h b/simde/arm/neon/ld4.h index 4eb2f3a47..676cd2712 100644 --- a/simde/arm/neon/ld4.h +++ b/simde/arm/neon/ld4.h @@ -260,6 +260,26 @@ simde_vld4_u64(uint64_t const ptr[HEDLEY_ARRAY_PARAM(4)]) { #define vld4_u64(a) simde_vld4_u64((a)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x4_t +simde_vld4q_f16(simde_float16 const ptr[HEDLEY_ARRAY_PARAM(32)]) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vld4q_f16(ptr); + #else + simde_float16x8_private a_[4]; + for (size_t i = 0; i < (sizeof(simde_float16x8_t) / sizeof(*ptr)) * 4 ; i++) { + a_[i % 4].values[i / 4] = ptr[i]; + } + simde_float16x8x4_t s_ = { { simde_float16x8_from_private(a_[0]), simde_float16x8_from_private(a_[1]), + simde_float16x8_from_private(a_[2]), simde_float16x8_from_private(a_[3]) } }; + return s_; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vld4q_f16 + #define vld4q_f16(a) simde_vld4q_f16((a)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x4_t simde_vld4q_f32(simde_float32 const ptr[HEDLEY_ARRAY_PARAM(16)]) { diff --git a/simde/arm/neon/qrshl.h b/simde/arm/neon/qrshl.h new file mode 100644 index 000000000..cd30b6ff5 --- /dev/null +++ b/simde/arm/neon/qrshl.h @@ -0,0 +1,732 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QRSHL_H) +#define SIMDE_ARM_NEON_QRSHL_H +#include "../../x86/avx.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqrshlb_s8(int8_t a, int8_t b) { + int8_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqrshlb_s8(a, b); + #else + if (b < -8) { + r = 0; + } else if (b < 0) { + r = HEDLEY_STATIC_CAST(int8_t, ((a + (1 << (-b - 1))) >> -b)); + } else if (b == 0) { + r = a; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(int8_t, a << b); + if ((r >> b) != a) { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT8_MIN : INT8_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshlb_s8 + #define vqrshlb_s8(a, b) simde_vqrshlb_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqrshlh_s16(int16_t a, int16_t b) { + int16_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqrshlh_s16(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 <= -16) { + r = 0; + } else if (b8 < 0) { + r = HEDLEY_STATIC_CAST(int16_t, ((a + (1 << (-b8 - 1))) >> -b8)); + } else if (b8 == 0) { + r = a; + } else if (b8 < 15) { + r = HEDLEY_STATIC_CAST(int16_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT16_MIN : INT16_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshlh_s16 + #define vqrshlh_s16(a, b) simde_vqrshlh_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqrshls_s32(int32_t a, int32_t b) { + int32_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqrshls_s32(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 <= -32) { + r = 0; + } else if (b8 < 0) { + r = ((a + (1 << (-b8 - 1))) >> -b8); + } else if (b8 == 0) { + r = a; + } else if (b8 < 31) { + r = HEDLEY_STATIC_CAST(int32_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT32_MIN : INT32_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshls_s32 + #define vqrshls_s32(a, b) simde_vqrshls_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqrshld_s64(int64_t a, int64_t b) { + int64_t r; + + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + r = vqrshld_s64(a, b); + #else + int8_t b8 = HEDLEY_STATIC_CAST(int8_t, b); + + if (b8 <= -64) { + r = 0; + } else if (b8 < 0) { + r = ((a + (INT64_C(1) << (-b8 - 1))) >> -b8); + } else if (b8 == 0) { + r = a; + } else if (b8 < 63) { + r = HEDLEY_STATIC_CAST(int64_t, a << b8); + if ((r >> b8) != a) { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = (a < 0) ? INT64_MIN : INT64_MAX; + } + #endif + + return r; +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshld_s64 + #define vqrshld_s64(a, b) simde_vqrshld_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqrshlb_u8(uint8_t a, int8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqrshlb_u8(a, HEDLEY_STATIC_CAST(uint8_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + /* https://github.com/llvm/llvm-project/commit/f0a78bdfdc6d56b25e0081884580b3960a3c2429 */ + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqrshlb_u8(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqrshlb_u8(a, b); + #endif + #else + uint8_t r; + + if (b < -8) { + r = 0; + } else if (b < 0) { + r = (a >> -b) + ((a >> (-b - 1)) & 1); + } else if (b == 0) { + r = a; + } else if (b < 7) { + r = HEDLEY_STATIC_CAST(uint8_t, a << b); + if ((r >> b) != a) { + r = UINT8_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT8_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshlb_u8 + #define vqrshlb_u8(a, b) simde_vqrshlb_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqrshlh_u16(uint16_t a, int16_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqrshlh_u16(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqrshlh_u16(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqrshlh_u16(a, b); + #endif + #else + b = HEDLEY_STATIC_CAST(int8_t, b); + uint16_t r; + + if (b < -16) { + r = 0; + } else if (b < 0) { + r = (a >> -b) + ((a >> (-b - 1)) & 1); + } else if (b == 0) { + r = a; + } else if (b < 15) { + r = HEDLEY_STATIC_CAST(uint16_t, a << b); + if ((r >> b) != a) { + r = UINT16_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT16_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshlh_u16 + #define vqrshlh_u16(a, b) simde_vqrshlh_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqrshls_u32(uint32_t a, int32_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqrshls_u32(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqrshls_u32(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqrshls_u32(a, b); + #endif + #else + b = HEDLEY_STATIC_CAST(int8_t, b); + uint32_t r; + + if (b < -32) { + r = 0; + } else if (b < 0) { + r = (a >> -b) + ((a >> (-b - 1)) & 1); + } else if (b == 0) { + r = a; + } else if (b < 31) { + r = HEDLEY_STATIC_CAST(uint32_t, a << b); + if ((r >> b) != a) { + r = UINT32_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT32_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshls_u32 + #define vqrshls_u32(a, b) simde_vqrshls_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqrshld_u64(uint64_t a, int64_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #if defined(HEDLEY_GCC_VERSION) && !HEDLEY_GCC_VERSION_CHECK(11,0,0) + return vqrshld_u64(a, HEDLEY_STATIC_CAST(uint16_t, b)); + #elif HEDLEY_HAS_WARNING("-Wsign-conversion") + HEDLEY_DIAGNOSTIC_PUSH + #pragma clang diagnostic ignored "-Wsign-conversion" + return vqrshld_u64(a, b); + HEDLEY_DIAGNOSTIC_POP + #else + return vqrshld_u64(a, b); + #endif + #else + b = HEDLEY_STATIC_CAST(int8_t, b); + uint64_t r; + + if (b < -64) { + r = 0; + } else if (b < 0) { + r = (a >> -b) + ((a >> (-b - 1)) & 1); + } else if (b == 0) { + r = a; + } else if (b < 63) { + r = HEDLEY_STATIC_CAST(uint64_t, a << b); + if ((r >> b) != a) { + r = UINT64_MAX; + } + } else if (a == 0) { + r = 0; + } else { + r = UINT64_MAX; + } + + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshld_u64 + #define vqrshld_u64(a, b) simde_vqrshld_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqrshl_s8 (const simde_int8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_s8(a, b); + #else + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a), + b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_s8 + #define vqrshl_s8(a, b) simde_vqrshl_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqrshl_s16 (const simde_int16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_s16(a, b); + #else + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a), + b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_s16 + #define vqrshl_s16(a, b) simde_vqrshl_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqrshl_s32 (const simde_int32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_s32(a, b); + #else + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a), + b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_s32 + #define vqrshl_s32(a, b) simde_vqrshl_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqrshl_s64 (const simde_int64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_s64(a, b); + #else + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a), + b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_s64 + #define vqrshl_s64(a, b) simde_vqrshl_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqrshl_u8 (const simde_uint8x8_t a, const simde_int8x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_u8(a, b); + #else + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + simde_int8x8_private b_ = simde_int8x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_u8 + #define vqrshl_u8(a, b) simde_vqrshl_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqrshl_u16 (const simde_uint16x4_t a, const simde_int16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_u16(a, b); + #else + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + simde_int16x4_private b_ = simde_int16x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_u16 + #define vqrshl_u16(a, b) simde_vqrshl_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqrshl_u32 (const simde_uint32x2_t a, const simde_int32x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_u32(a, b); + #else + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + simde_int32x2_private b_ = simde_int32x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_u32 + #define vqrshl_u32(a, b) simde_vqrshl_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqrshl_u64 (const simde_uint64x1_t a, const simde_int64x1_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshl_u64(a, b); + #else + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + simde_int64x1_private b_ = simde_int64x1_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshl_u64 + #define vqrshl_u64(a, b) simde_vqrshl_u64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqrshlq_s8 (const simde_int8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_s8(a, b); + #else + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a), + b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlb_s8(a_.values[i], b_.values[i]); + } + + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_s8 + #define vqrshlq_s8(a, b) simde_vqrshlq_s8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqrshlq_s16 (const simde_int16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlh_s16(a_.values[i], b_.values[i]); + } + + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_s16 + #define vqrshlq_s16(a, b) simde_vqrshlq_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqrshlq_s32 (const simde_int32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshls_s32(a_.values[i], b_.values[i]); + } + + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_s32 + #define vqrshlq_s32(a, b) simde_vqrshlq_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqrshlq_s64 (const simde_int64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshld_s64(a_.values[i], b_.values[i]); + } + + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_s64 + #define vqrshlq_s64(a, b) simde_vqrshlq_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqrshlq_u8 (const simde_uint8x16_t a, const simde_int8x16_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_u8(a, b); + #else + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + simde_int8x16_private b_ = simde_int8x16_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlb_u8(a_.values[i], b_.values[i]); + } + + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_u8 + #define vqrshlq_u8(a, b) simde_vqrshlq_u8((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqrshlq_u16 (const simde_uint16x8_t a, const simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + simde_int16x8_private b_ = simde_int16x8_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshlh_u16(a_.values[i], b_.values[i]); + } + + return simde_uint16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_u16 + #define vqrshlq_u16(a, b) simde_vqrshlq_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqrshlq_u32 (const simde_uint32x4_t a, const simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + simde_int32x4_private b_ = simde_int32x4_to_private(b); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshls_u32(a_.values[i], b_.values[i]); + } + + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_u32 + #define vqrshlq_u32(a, b) simde_vqrshlq_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqrshlq_u64 (const simde_uint64x2_t a, const simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vqrshlq_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + simde_int64x2_private b_ = simde_int64x2_to_private(b); + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqrshld_u64(a_.values[i], b_.values[i]); + } + + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqrshlq_u64 + #define vqrshlq_u64(a, b) simde_vqrshlq_u64((a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRSHL_H) */ diff --git a/simde/arm/neon/qrshrn_high_n.h b/simde/arm/neon/qrshrn_high_n.h new file mode 100644 index 000000000..0080e739a --- /dev/null +++ b/simde/arm/neon/qrshrn_high_n.h @@ -0,0 +1,189 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QRSHRN_HIGH_N_H) +#define SIMDE_ARM_NEON_QRSHRN_HIGH_N_H + +#include "combine.h" +#include "qmovn.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqrshrn_high_n_s16(simde_int8x8_t r, simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int16_t tmp = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); + if (tmp > INT8_MAX) tmp = INT8_MAX; + else if (tmp < INT8_MIN) tmp = INT8_MIN; + r_.values[i] = HEDLEY_STATIC_CAST(int8_t, tmp); + } + return simde_vcombine_s8(r, simde_vqmovn_s16(simde_int16x8_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_s16(r, a, n) vqrshrn_high_n_s16((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_s16 + #define vqrshrn_high_n_s16(r, a, n) simde_vqrshrn_high_n_s16((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqrshrn_high_n_s32(simde_int16x4_t r, simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); + if (tmp > INT16_MAX) tmp = INT16_MAX; + else if (tmp < INT16_MIN) tmp = INT16_MIN; + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, tmp); + } + return simde_vcombine_s16(r, simde_vqmovn_s32(simde_int32x4_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_s32(r, a, n) vqrshrn_high_n_s32((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_s32 + #define vqrshrn_high_n_s32(r, a, n) simde_vqrshrn_high_n_s32((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqrshrn_high_n_s64(simde_int32x2_t r, simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); + if (tmp > INT32_MAX) tmp = INT32_MAX; + else if (tmp < INT32_MIN) tmp = INT32_MIN; + r_.values[i] = HEDLEY_STATIC_CAST(int32_t, tmp); + } + return simde_vcombine_s32(r, simde_vqmovn_s64(simde_int64x2_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_s64(r, a, n) vqrshrn_high_n_s64((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_s64 + #define vqrshrn_high_n_s64(r, a, n) simde_vqrshrn_high_n_s64((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqrshrn_high_n_u16(simde_uint8x8_t r, simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + uint16_t tmp = HEDLEY_STATIC_CAST(uint16_t, (a_.values[i] + (1 << (n - 1))) >> n); + if (tmp > UINT8_MAX) tmp = UINT8_MAX; + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp); + } + return simde_vcombine_u8(r, simde_vqmovn_u16(simde_uint16x8_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_u16(r, a, n) vqrshrn_high_n_u16((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_u16 + #define vqrshrn_high_n_u16(r, a, n) simde_vqrshrn_high_n_u16((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqrshrn_high_n_u32(simde_uint16x4_t r, simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + uint32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(uint32_t, UINT32_C(1) << (n - 1))) != 0); + if (tmp > UINT16_MAX) tmp = UINT16_MAX; + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp); + } + return simde_vcombine_u16(r, simde_vqmovn_u32(simde_uint32x4_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_u32(r, a, n) vqrshrn_high_n_u32((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_u32 + #define vqrshrn_high_n_u32(r, a, n) simde_vqrshrn_high_n_u32((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqrshrn_high_n_u64(simde_uint32x2_t r, simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + uint64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(uint64_t, UINT64_C(1) << (n - 1))) != 0); + if (tmp > UINT32_MAX) tmp = UINT32_MAX; + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp); + } + return simde_vcombine_u32(r, simde_vqmovn_u64(simde_uint64x2_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqrshrn_high_n_u64(r, a, n) vqrshrn_high_n_u64((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrn_high_n_u64 + #define vqrshrn_high_n_u64(r, a, n) simde_vqrshrn_high_n_u64((r), (a), (n)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) */ diff --git a/simde/arm/neon/qrshrun_high_n.h b/simde/arm/neon/qrshrun_high_n.h new file mode 100644 index 000000000..b035681c3 --- /dev/null +++ b/simde/arm/neon/qrshrun_high_n.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H) +#define SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H + +#include "combine.h" +#include "qmovn.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqrshrun_high_n_s16(simde_uint8x8_t r, simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 8) { + simde_int16x8_private a_ = simde_int16x8_to_private(a); + simde_uint16x8_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int16_t tmp = HEDLEY_STATIC_CAST(int16_t, (a_.values[i] + (1 << (n - 1))) >> n); + if (tmp > UINT8_MAX) tmp = UINT8_MAX; + else if (tmp < 0) tmp = 0; + r_.values[i] = HEDLEY_STATIC_CAST(uint8_t, tmp); + } + return simde_vcombine_u8(r, simde_vqmovn_u16(simde_uint16x8_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) + #define simde_vqrshrun_high_n_s16(r, a, n) vqrshrun_high_n_s16((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_high_n_s16 + #define vqrshrun_high_n_s16(r, a, n) simde_vqrshrun_high_n_s16((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqrshrun_high_n_s32(simde_uint16x4_t r, simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + simde_int32x4_private a_ = simde_int32x4_to_private(a); + simde_uint32x4_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int32_t tmp = (a_.values[i] >> ((n == 32) ? 31 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int32_t, UINT32_C(1) << (n - 1))) != 0); + if (tmp > UINT16_MAX) tmp = UINT16_MAX; + else if (tmp < 0) tmp = 0; + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, tmp); + } + return simde_vcombine_u16(r, simde_vqmovn_u32(simde_uint32x4_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) + #define simde_vqrshrun_high_n_s32(r, a, n) vqrshrun_high_n_s32((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_high_n_s32 + #define vqrshrun_high_n_s32(r, a, n) simde_vqrshrun_high_n_s32((r), (a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqrshrun_high_n_s64(simde_uint32x2_t r, simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 32) { + simde_int64x2_private a_ = simde_int64x2_to_private(a); + simde_uint64x2_private r_; + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + int64_t tmp = (a_.values[i] >> ((n == 64) ? 63 : n)) + ((a_.values[i] & HEDLEY_STATIC_CAST(int64_t, UINT64_C(1) << (n - 1))) != 0); + if (tmp > UINT32_MAX) tmp = UINT32_MAX; + else if (tmp < 0) tmp = 0; + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, tmp); + } + return simde_vcombine_u32(r, simde_vqmovn_u64(simde_uint64x2_from_private(r_))); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && !defined(__clang__) + #define simde_vqrshrun_high_n_s64(r, a, n) vqrshrun_high_n_s64((r), (a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqrshrun_high_n_s64 + #define vqrshrun_high_n_s64(r, a, n) simde_vqrshrun_high_n_s64((r), (a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QRSHRUN_HIGH_N_H) */ diff --git a/simde/arm/neon/qshl_n.h b/simde/arm/neon/qshl_n.h new file mode 100644 index 000000000..e3d4c924d --- /dev/null +++ b/simde/arm/neon/qshl_n.h @@ -0,0 +1,513 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QSHL_N_H) +#define SIMDE_ARM_NEON_QSHL_N_H + +#include "types.h" +#include "cls.h" +#include "qshl.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +int8_t +simde_vqshlb_n_s8(int8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + return simde_vqshlb_s8(a, HEDLEY_STATIC_CAST(int8_t, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlb_n_s8(a, n) vqshlb_n_s8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_n_s8 + #define vqshlb_n_s8(a, n) simde_vqshlb_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_vqshlh_n_s16(int16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + return simde_vqshlh_s16(a, HEDLEY_STATIC_CAST(int16_t, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlh_n_s16(a, n) vqshlh_n_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_n_s16 + #define vqshlh_n_s16(a, n) simde_vqshlh_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int32_t +simde_vqshls_n_s32(int32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + return simde_vqshls_s32(a, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshls_n_s32(a, n) vqshls_n_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_n_s32 + #define vqshls_n_s32(a, n) simde_vqshls_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +int64_t +simde_vqshld_n_s64(int64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + return simde_vqshld_s64(a, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshld_n_s64(a, n) vqshld_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshld_n_s64 + #define vqshld_n_s64(a, n) simde_vqshld_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint8_t +simde_vqshlb_n_u8(uint8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + return simde_vqshlb_u8(a, HEDLEY_STATIC_CAST(int8_t, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlb_n_u8(a, n) vqshlb_n_u8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlb_n_u8 + #define vqshlb_n_u8(a, n) simde_vqshlb_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_vqshlh_n_u16(uint16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + return simde_vqshlh_u16(a, HEDLEY_STATIC_CAST(int16_t, n)); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshlh_n_u16(a, n) vqshlh_n_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshlh_n_u16 + #define vqshlh_n_u16(a, n) simde_vqshlh_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint32_t +simde_vqshls_n_u32(uint32_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + return simde_vqshls_u32(a, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshls_n_u32(a, n) vqshls_n_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshls_n_u32 + #define vqshls_n_u32(a, n) simde_vqshls_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +uint64_t +simde_vqshld_n_u64(uint64_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + return simde_vqshld_u64(a, n); +} +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshld_n_u64(a, n) vqshld_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshld_n_u64 + #define vqshld_n_u64(a, n) simde_vqshld_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vqshl_n_s8 (const simde_int8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_int8x8_private + r_, + a_ = simde_int8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_8_(simde_vqshlb_n_s8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + return simde_int8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_s8(a, n) vqshl_n_s8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_s8 + #define vqshl_n_s8(a, n) simde_vqshl_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vqshl_n_s16 (const simde_int16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_int16x4_private + r_, + a_ = simde_int16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_16_(simde_vqshlh_n_s16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + return simde_int16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_s16(a, n) vqshl_n_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_s16 + #define vqshl_n_s16(a, n) simde_vqshl_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vqshl_n_s32 (const simde_int32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_int32x2_private + r_, + a_ = simde_int32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], n); + } + return simde_int32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_s32(a, n) vqshl_n_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_s32 + #define vqshl_n_s32(a, n) simde_vqshl_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vqshl_n_s64 (const simde_int64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_int64x1_private + r_, + a_ = simde_int64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], n); + } + return simde_int64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_s64(a, n) vqshl_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_s64 + #define vqshl_n_s64(a, n) simde_vqshl_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vqshl_n_u8 (const simde_uint8x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_uint8x8_private + r_, + a_ = simde_uint8x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_8_(simde_vqshlb_n_u8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + return simde_uint8x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_u8(a, n) vqshl_n_u8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_u8 + #define vqshl_n_u8(a, n) simde_vqshl_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vqshl_n_u16 (const simde_uint16x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_uint16x4_private + r_, + a_ = simde_uint16x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_16_(simde_vqshlh_n_u16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + return simde_uint16x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_u16(a, n) vqshl_n_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_u16 + #define vqshl_n_u16(a, n) simde_vqshl_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vqshl_n_u32 (const simde_uint32x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_uint32x2_private + r_, + a_ = simde_uint32x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], n); + } + return simde_uint32x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_u32(a, n) vqshl_n_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_u32 + #define vqshl_n_u32(a, n) simde_vqshl_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x1_t +simde_vqshl_n_u64 (const simde_uint64x1_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_uint64x1_private + r_, + a_ = simde_uint64x1_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], n); + } + return simde_uint64x1_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshl_n_u64(a, n) vqshl_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshl_n_u64 + #define vqshl_n_u64(a, n) simde_vqshl_n_u64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vqshlq_n_s8 (const simde_int8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_int8x16_private + r_, + a_ = simde_int8x16_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_8_(simde_vqshlb_n_s8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + + return simde_int8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_s8(a, n) vqshlq_n_s8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_s8 + #define vqshlq_n_s8(a, n) simde_vqshlq_n_s8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vqshlq_n_s16 (const simde_int16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_16_(simde_vqshlh_n_s16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + + return simde_int16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_s16(a, n) vqshlq_n_s16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_s16 + #define vqshlq_n_s16(a, n) simde_vqshlq_n_s16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vqshlq_n_s32 (const simde_int32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_s32(a_.values[i], n); + } + + return simde_int32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_s32(a, n) vqshlq_n_s32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_s32 + #define vqshlq_n_s32(a, n) simde_vqshlq_n_s32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vqshlq_n_s64 (const simde_int64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_s64(a_.values[i], n); + } + + return simde_int64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_s64(a, n) vqshlq_n_s64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_s64 + #define vqshlq_n_s64(a, n) simde_vqshlq_n_s64((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vqshlq_n_u8 (const simde_uint8x16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 7) { + simde_uint8x16_private + r_, + a_ = simde_uint8x16_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_8_(simde_vqshlb_n_u8, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + + return simde_uint8x16_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_u8(a, n) vqshlq_n_u8((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_u8 + #define vqshlq_n_u8(a, n) simde_vqshlq_n_u8((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x8_t +simde_vqshlq_n_u16 (const simde_uint16x8_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 15) { + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + SIMDE_CONSTIFY_16_(simde_vqshlh_n_u16, r_.values[i], (HEDLEY_UNREACHABLE(), 0), n, a_.values[i]); + } + + return simde_uint16x8_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_u16(a, n) vqshlq_n_u16((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_u16 + #define vqshlq_n_u16(a, n) simde_vqshlq_n_u16((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vqshlq_n_u32 (const simde_uint32x4_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 31) { + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshls_u32(a_.values[i], n); + } + + return simde_uint32x4_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_u32(a, n) vqshlq_n_u32((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_u32 + #define vqshlq_n_u32(a, n) simde_vqshlq_n_u32((a), (n)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vqshlq_n_u64 (const simde_uint64x2_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 0, 63) { + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a); + + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = simde_vqshld_u64(a_.values[i], n); + } + + return simde_uint64x2_from_private(r_); +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vqshlq_n_u64(a, n) vqshlq_n_u64((a), (n)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vqshlq_n_u64 + #define vqshlq_n_u64(a, n) simde_vqshlq_n_u64((a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHL_N_H) */ diff --git a/simde/arm/neon/qshrn_high_n.h b/simde/arm/neon/qshrn_high_n.h new file mode 100644 index 000000000..59e6d8d93 --- /dev/null +++ b/simde/arm/neon/qshrn_high_n.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_QSHRN_HIGH_N_H) +#define SIMDE_ARM_NEON_QSHRN_HIGH_N_H + +#include "types.h" +#include "shr_n.h" +#include "qmovn.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_s16(r, a, n) vqshrn_high_n_s16((r), (a), (n)) +#else + #define simde_vqshrn_high_n_s16(r, a, n) simde_vcombine_s8(r, simde_vqmovn_s16(simde_vshrq_n_s16(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_s16 + #define vqshrn_high_n_s16(r, a, n) simde_vqshrn_high_n_s16((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_s32(r, a, n) vqshrn_high_n_s32((r), (a), (n)) +#else + #define simde_vqshrn_high_n_s32(r, a, n) simde_vcombine_s16(r, simde_vqmovn_s32(simde_vshrq_n_s32(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_s32 + #define vqshrn_high_n_s32(r, a, n) simde_vqshrn_high_n_s32((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_s64(r, a, n) vqshrn_high_n_s64((r), (a), (n)) +#else + #define simde_vqshrn_high_n_s64(r, a, n) simde_vcombine_s32(r, simde_vqmovn_s64(simde_vshrq_n_s64(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_s64 + #define vqshrn_high_n_s64(r, a, n) simde_vqshrn_high_n_s64((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_u16(r, a, n) vqshrn_high_n_u16((r), (a), (n)) +#else + #define simde_vqshrn_high_n_u16(r, a, n) simde_vcombine_u8(r, simde_vqmovn_u16(simde_vshrq_n_u16(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_u16 + #define vqshrn_high_n_u16(r, a, n) simde_vqshrn_high_n_u16((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_u32(r, a, n) vqshrn_high_n_u32((r), (a), (n)) +#else + #define simde_vqshrn_high_n_u32(r, a, n) simde_vcombine_u16(r, simde_vqmovn_u32(simde_vshrq_n_u32(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_u32 + #define vqshrn_high_n_u32(r, a, n) simde_vqshrn_high_n_u32((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrn_high_n_u64(r, a, n) vqshrn_high_n_u64((r), (a), (n)) +#else + #define simde_vqshrn_high_n_u64(r, a, n) simde_vcombine_u32(r, simde_vqmovn_u64(simde_vshrq_n_u64(a, n))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrn_high_n_u64 + #define vqshrn_high_n_u64(r, a, n) simde_vqshrn_high_n_u64((r), (a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_QSHRN_HIGH_N_H) */ diff --git a/simde/arm/neon/qshrn_n.h b/simde/arm/neon/qshrn_n.h index 93ab96c1f..abd47dcf7 100644 --- a/simde/arm/neon/qshrn_n.h +++ b/simde/arm/neon/qshrn_n.h @@ -23,6 +23,7 @@ * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) * 2021 Evan Nemerson + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_QSHRN_N_H) @@ -36,6 +37,26 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrnh_n_s16(a, n) vqshrnh_n_s16(a, n) +#else + #define simde_vqshrnh_n_s16(a, n) simde_vqmovnh_s16(simde_x_vshrh_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrnh_n_s16 + #define vqshrnh_n_s16(a, n) simde_vqshrnh_n_s16(a, n) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vqshrnh_n_u16(a, n) vqshrnh_n_u16(a, n) +#else + #define simde_vqshrnh_n_u16(a, n) simde_vqmovnh_u16(simde_x_vshrh_n_u16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vqshrnh_n_u16 + #define vqshrnh_n_u16(a, n) simde_vqshrnh_n_u16(a, n) +#endif + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) #define simde_vqshrns_n_s32(a, n) vqshrns_n_s32(a, n) #else diff --git a/simde/arm/neon/raddhn.h b/simde/arm/neon/raddhn.h new file mode 100644 index 000000000..0f16e446e --- /dev/null +++ b/simde/arm/neon/raddhn.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_RADDHN_H) +#define SIMDE_ARM_NEON_RADDHN_H + +#include "add.h" +#include "shr_n.h" +#include "movn.h" + +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vraddhn_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + int16_t round_cast = 1 << 7; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i] + round_cast; + } + return simde_vmovn_s16(simde_vshrq_n_s16(simde_int16x8_from_private(r_), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_s16 + #define vraddhn_s16(a, b) simde_vraddhn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vraddhn_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + int round_cast = 1 << 15; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] + b_.values[i] + round_cast; + } + return simde_vmovn_s32(simde_vshrq_n_s32(simde_int32x4_from_private(r_), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_s32 + #define vraddhn_s32(a, b) simde_vraddhn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vraddhn_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + int64_t round_cast = 1ll << 31; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] + b_.values[i] + round_cast) >> 32); + } + return simde_vmovn_s64(simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_s64 + #define vraddhn_s64(a, b) simde_vraddhn_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vraddhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + uint16_t round_cast = 1 << 7; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] + b_.values[i] + round_cast); + } + return simde_vmovn_u16(simde_vshrq_n_u16(simde_uint16x8_from_private(r_), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_u16 + #define vraddhn_u16(a, b) simde_vraddhn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vraddhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + uint32_t round_cast = 1 << 15; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint32_t, a_.values[i] + b_.values[i] + round_cast); + } + return simde_vmovn_u32(simde_vshrq_n_u32(simde_uint32x4_from_private(r_), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_u32 + #define vraddhn_u32(a, b) simde_vraddhn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vraddhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vraddhn_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + uint64_t round_cast = 1ull << 31; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] + b_.values[i] + round_cast) >> 32); + } + return simde_vmovn_u64(simde_uint64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vraddhn_u64 + #define vraddhn_u64(a, b) simde_vraddhn_u64((a), (b)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RADDHN_H) */ diff --git a/simde/arm/neon/raddhn_high.h b/simde/arm/neon/raddhn_high.h new file mode 100644 index 000000000..dc911698c --- /dev/null +++ b/simde/arm/neon/raddhn_high.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_RADDHN_HIGH_H) +#define SIMDE_ARM_NEON_RADDHN_HIGH_H + +#include "raddhn.h" +#include "combine.h" + +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_s16(r, a, b) vraddhn_high_s16((r), (a), (b)) +#else + #define simde_vraddhn_high_s16(r, a, b) simde_vcombine_s8(r, simde_vraddhn_s16(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_s16 + #define vraddhn_high_s16(r, a, b) simde_vraddhn_high_s16((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_s32(r, a, b) vraddhn_high_s32((r), (a), (b)) +#else + #define simde_vraddhn_high_s32(r, a, b) simde_vcombine_s16(r, simde_vraddhn_s32(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_s32 + #define vraddhn_high_s32(r, a, b) simde_vraddhn_high_s32((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_s64(r, a, b) vraddhn_high_s64((r), (a), (b)) +#else + #define simde_vraddhn_high_s64(r, a, b) simde_vcombine_s32(r, simde_vraddhn_s64(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_s64 + #define vraddhn_high_s64(r, a, b) simde_vraddhn_high_s64((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_u16(r, a, b) vraddhn_high_u16((r), (a), (b)) +#else + #define simde_vraddhn_high_u16(r, a, b) simde_vcombine_u8(r, simde_vraddhn_u16(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_u16 + #define vraddhn_high_u16(r, a, b) simde_vraddhn_high_u16((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_u32(r, a, b) vraddhn_high_u32((r), (a), (b)) +#else + #define simde_vraddhn_high_u32(r, a, b) simde_vcombine_u16(r, simde_vraddhn_u32(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_u32 + #define vraddhn_high_u32(r, a, b) simde_vraddhn_high_u32((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vraddhn_high_u64(r, a, b) vraddhn_high_u64((r), (a), (b)) +#else + #define simde_vraddhn_high_u64(r, a, b) simde_vcombine_u32(r, simde_vraddhn_u64(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vraddhn_high_u64 + #define vraddhn_high_u64(r, a, b) simde_vraddhn_high_u64((r), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RADDHN_HIGH_H) */ diff --git a/simde/arm/neon/reinterpret.h b/simde/arm/neon/reinterpret.h index d7efddfbb..f43bdfb53 100644 --- a/simde/arm/neon/reinterpret.h +++ b/simde/arm/neon/reinterpret.h @@ -2688,6 +2688,7 @@ simde_vreinterpret_f32_u64(simde_uint64x1_t a) { #define vreinterpret_f32_u64 simde_vreinterpret_f32_u64 #endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vreinterpret_f32_f64(simde_float64x1_t a) { @@ -3453,6 +3454,329 @@ simde_vreinterpretq_f16_u8(simde_uint8x16_t a) { #define vreinterpretq_f16_u8(a) simde_vreinterpretq_f16_u8(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vreinterpret_f16_f64(simde_float64x1_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_f16_f64(a); + #else + simde_float16x4_private r_; + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f16_f64 + #define vreinterpret_f16_f64 simde_vreinterpret_f16_f64 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vreinterpretq_f16_f64(simde_float64x2_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_f16_f64(a); + #else + simde_float16x8_private r_; + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f16_f64 + #define vreinterpretq_f16_f64(a) simde_vreinterpretq_f16_f64(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x2_t +simde_vreinterpret_f32_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_f32_f16(a); + #else + simde_float32x2_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f32_f16 + #define vreinterpret_f32_f16 simde_vreinterpret_f32_f16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float32x4_t +simde_vreinterpretq_f32_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_f32_f16(a); + #else + simde_float32x4_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f32_f16 + #define vreinterpretq_f32_f16 simde_vreinterpretq_f32_f16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x1_t +simde_vreinterpret_f64_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_f64_f16(a); + #else + simde_float64x1_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_f64_f16 + #define vreinterpret_f64_f16 simde_vreinterpret_f64_f16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_float64x2_t +simde_vreinterpretq_f64_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_f64_f16(a); + #else + simde_float64x2_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_float64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_f64_f16 + #define vreinterpretq_f64_f16 simde_vreinterpretq_f64_f16 +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vreinterpret_u8_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_u8_f16(a); + #else + simde_uint8x8_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u8_f16 + #define vreinterpret_u8_f16(a) simde_vreinterpret_u8_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x16_t +simde_vreinterpretq_u8_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_u8_f16(a); + #else + simde_uint8x16_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u8_f16 + #define vreinterpretq_u8_f16(a) simde_vreinterpretq_u8_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vreinterpret_s8_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_s8_f16(a); + #else + simde_int8x8_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s8_f16 + #define vreinterpret_s8_f16(a) simde_vreinterpret_s8_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x16_t +simde_vreinterpretq_s8_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_s8_f16(a); + #else + simde_int8x16_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int8x16_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s8_f16 + #define vreinterpretq_s8_f16(a) simde_vreinterpretq_s8_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vreinterpret_s16_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_s16_f16(a); + #else + simde_int16x4_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s16_f16 + #define vreinterpret_s16_f16(a) simde_vreinterpret_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x8_t +simde_vreinterpretq_s16_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_s16_f16(a); + #else + simde_int16x8_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s16_f16 + #define vreinterpretq_s16_f16(a) simde_vreinterpretq_s16_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vreinterpret_s32_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_s32_f16(a); + #else + simde_int32x2_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s32_f16 + #define vreinterpret_s32_f16(a) simde_vreinterpret_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x4_t +simde_vreinterpretq_s32_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_s32_f16(a); + #else + simde_int32x4_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s32_f16 + #define vreinterpretq_s32_f16(a) simde_vreinterpretq_s32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x1_t +simde_vreinterpret_s64_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_s64_f16(a); + #else + simde_int64x1_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x1_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_s64_f16 + #define vreinterpret_s64_f16(a) simde_vreinterpret_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int64x2_t +simde_vreinterpretq_s64_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_s64_f16(a); + #else + simde_int64x2_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_int64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_s64_f16 + #define vreinterpretq_s64_f16(a) simde_vreinterpretq_s64_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vreinterpret_u32_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpret_u32_f16(a); + #else + simde_uint32x2_private r_; + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpret_u32_f16 + #define vreinterpret_u32_f16(a) simde_vreinterpret_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x4_t +simde_vreinterpretq_u32_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_u32_f16(a); + #else + simde_uint32x4_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint32x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u32_f16 + #define vreinterpretq_u32_f16(a) simde_vreinterpretq_u32_f16(a) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint64x2_t +simde_vreinterpretq_u64_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vreinterpretq_u64_f16(a); + #else + simde_uint64x2_private r_; + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_memcpy(&r_, &a_, sizeof(r_)); + return simde_uint64x2_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vreinterpretq_u64_f16 + #define vreinterpretq_u64_f16 simde_vreinterpretq_u64_f16 +#endif + SIMDE_END_DECLS_ HEDLEY_DIAGNOSTIC_POP diff --git a/simde/arm/neon/rev64.h b/simde/arm/neon/rev64.h index 274f08126..31645a8ef 100644 --- a/simde/arm/neon/rev64.h +++ b/simde/arm/neon/rev64.h @@ -23,11 +23,9 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ -/* N.B. CM: vrev64_f16 and vrev64q_f16 are omitted as - * SIMDe has no 16-bit floating point support. */ - #if !defined(SIMDE_ARM_NEON_REV64_H) #define SIMDE_ARM_NEON_REV64_H @@ -167,6 +165,20 @@ simde_vrev64_u32(simde_uint32x2_t a) { #define vrev64_u32(a) simde_vrev64_u32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vrev64_f16(simde_float16x4_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vrev64_f16(a); + #else + return simde_vreinterpret_f16_s16(simde_vrev64_s16(simde_vreinterpret_s16_f16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64_f16 + #define vrev64_f16(a) simde_vrev64_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vrev64_f32(simde_float32x2_t a) { @@ -334,6 +346,20 @@ simde_vrev64q_u32(simde_uint32x4_t a) { #define vrev64q_u32(a) simde_vrev64q_u32(a) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vrev64q_f16(simde_float16x8_t a) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vrev64q_f16(a); + #else + return simde_vreinterpretq_f16_s16(simde_vrev64q_s16(simde_vreinterpretq_s16_f16(a))); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrev64q_f16 + #define vrev64q_f16(a) simde_vrev64q_f16(a) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vrev64q_f32(simde_float32x4_t a) { diff --git a/simde/arm/neon/rshrn_high_n.h b/simde/arm/neon/rshrn_high_n.h new file mode 100644 index 000000000..7897581a5 --- /dev/null +++ b/simde/arm/neon/rshrn_high_n.h @@ -0,0 +1,101 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) +#define SIMDE_ARM_NEON_RSHRN_HIGH_N_H + +#include "rshrn_n.h" +#include "combine.h" +#include "types.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_s16(r, a, n) vrshrn_high_n_s16((r), (a), (n)) +#else + #define simde_vrshrn_high_n_s16(r, a, n) simde_vcombine_s8(r, simde_vrshrn_n_s16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_s16 + #define vrshrn_high_n_s16(r, a, n) simde_vrshrn_high_n_s16((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_s32(r, a, n) vrshrn_high_n_s32((r), (a), (n)) +#else + #define simde_vrshrn_high_n_s32(r, a, n) simde_vcombine_s16(r, simde_vrshrn_n_s32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_s32 + #define vrshrn_high_n_s32(r, a, n) simde_vrshrn_high_n_s32((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_s64(r, a, n) vrshrn_high_n_s64((r), (a), (n)) +#else + #define simde_vrshrn_high_n_s64(r, a, n) simde_vcombine_s32(r, simde_vrshrn_n_s64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_s64 + #define vrshrn_high_n_s64(r, a, n) simde_vrshrn_high_n_s64((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_u16(r, a, n) vrshrn_high_n_u16((r), (a), (n)) +#else + #define simde_vrshrn_high_n_u16(r, a, n) simde_vcombine_u8(r, simde_vrshrn_n_u16(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_u16 + #define vrshrn_high_n_u16(r, a, n) simde_vrshrn_high_n_u16((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_u32(r, a, n) vrshrn_high_n_u32((r), (a), (n)) +#else + #define simde_vrshrn_high_n_u32(r, a, n) simde_vcombine_u16(r, simde_vrshrn_n_u32(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_u32 + #define vrshrn_high_n_u32(r, a, n) simde_vrshrn_high_n_u32((r), (a), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrshrn_high_n_u64(r, a, n) vrshrn_high_n_u64((r), (a), (n)) +#else + #define simde_vrshrn_high_n_u64(r, a, n) simde_vcombine_u32(r, simde_vrshrn_n_u64(a, n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrshrn_high_n_u64 + #define vrshrn_high_n_u64(r, a, n) simde_vrshrn_high_n_u64((r), (a), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSHRN_HIGH_N_H) */ diff --git a/simde/arm/neon/rsubhn.h b/simde/arm/neon/rsubhn.h new file mode 100644 index 000000000..2d6a15da4 --- /dev/null +++ b/simde/arm/neon/rsubhn.h @@ -0,0 +1,182 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_RSUBHN_H) +#define SIMDE_ARM_NEON_RSUBHN_H + +#include "sub.h" +#include "shr_n.h" +#include "movn.h" + +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +SIMDE_FUNCTION_ATTRIBUTES +simde_int8x8_t +simde_vrsubhn_s16(simde_int16x8_t a, simde_int16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_s16(a, b); + #else + simde_int16x8_private + r_, + a_ = simde_int16x8_to_private(a), + b_ = simde_int16x8_to_private(b); + int16_t round_cast = 1 << 7; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(int16_t, a_.values[i] - b_.values[i] + round_cast); + } + return simde_vmovn_s16(simde_vshrq_n_s16(simde_int16x8_from_private(r_), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_s16 + #define vrsubhn_s16(a, b) simde_vrsubhn_s16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int16x4_t +simde_vrsubhn_s32(simde_int32x4_t a, simde_int32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_s32(a, b); + #else + simde_int32x4_private + r_, + a_ = simde_int32x4_to_private(a), + b_ = simde_int32x4_to_private(b); + int round_cast = 1 << 15; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i] + round_cast; + } + return simde_vmovn_s32(simde_vshrq_n_s32(simde_int32x4_from_private(r_), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_s32 + #define vrsubhn_s32(a, b) simde_vrsubhn_s32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_int32x2_t +simde_vrsubhn_s64(simde_int64x2_t a, simde_int64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_s64(a, b); + #else + simde_int64x2_private + r_, + a_ = simde_int64x2_to_private(a), + b_ = simde_int64x2_to_private(b); + int64_t round_cast = 1ll << 31; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] - b_.values[i] + round_cast) >> 32); + } + return simde_vmovn_s64(simde_int64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_s64 + #define vrsubhn_s64(a, b) simde_vrsubhn_s64((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint8x8_t +simde_vrsubhn_u16(simde_uint16x8_t a, simde_uint16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_u16(a, b); + #else + simde_uint16x8_private + r_, + a_ = simde_uint16x8_to_private(a), + b_ = simde_uint16x8_to_private(b); + uint16_t round_cast = 1 << 7; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = HEDLEY_STATIC_CAST(uint16_t, a_.values[i] - b_.values[i] + round_cast); + } + return simde_vmovn_u16(simde_vshrq_n_u16(simde_uint16x8_from_private(r_), 8)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_u16 + #define vrsubhn_u16(a, b) simde_vrsubhn_u16((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint16x4_t +simde_vrsubhn_u32(simde_uint32x4_t a, simde_uint32x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_u32(a, b); + #else + simde_uint32x4_private + r_, + a_ = simde_uint32x4_to_private(a), + b_ = simde_uint32x4_to_private(b); + uint32_t round_cast = 1 << 15; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = a_.values[i] - b_.values[i] + round_cast; + } + return simde_vmovn_u32(simde_vshrq_n_u32(simde_uint32x4_from_private(r_), 16)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_u32 + #define vrsubhn_u32(a, b) simde_vrsubhn_u32((a), (b)) +#endif + +SIMDE_FUNCTION_ATTRIBUTES +simde_uint32x2_t +simde_vrsubhn_u64(simde_uint64x2_t a, simde_uint64x2_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + return vrsubhn_u64(a, b); + #else + simde_uint64x2_private + r_, + a_ = simde_uint64x2_to_private(a), + b_ = simde_uint64x2_to_private(b); + uint64_t round_cast = 1ull << 31; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < (sizeof(r_.values) / sizeof(r_.values[0])) ; i++) { + r_.values[i] = ((a_.values[i] - b_.values[i] + round_cast) >> 32); + } + return simde_vmovn_u64(simde_uint64x2_from_private(r_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_u64 + #define vrsubhn_u64(a, b) simde_vrsubhn_u64((a), (b)) +#endif + + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSUBHN_H) */ diff --git a/simde/arm/neon/rsubhn_high.h b/simde/arm/neon/rsubhn_high.h new file mode 100644 index 000000000..d7b19849e --- /dev/null +++ b/simde/arm/neon/rsubhn_high.h @@ -0,0 +1,102 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_RSUBHN_HIGH_H) +#define SIMDE_ARM_NEON_RSUBHN_HIGH_H + +#include "rsubhn.h" +#include "combine.h" + +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_s16(r, a, b) vrsubhn_high_s16((r), (a), (b)) +#else + #define simde_vrsubhn_high_s16(r, a, b) simde_vcombine_s8(r, simde_vrsubhn_s16(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_s16 + #define vrsubhn_high_s16(r, a, b) simde_vrsubhn_high_s16((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_s32(r, a, b) vrsubhn_high_s32((r), (a), (b)) +#else + #define simde_vrsubhn_high_s32(r, a, b) simde_vcombine_s16(r, simde_vrsubhn_s32(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_s32 + #define vrsubhn_high_s32(r, a, b) simde_vrsubhn_high_s32((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_s64(r, a, b) vrsubhn_high_s64((r), (a), (b)) +#else + #define simde_vrsubhn_high_s64(r, a, b) simde_vcombine_s32(r, simde_vrsubhn_s64(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_s64 + #define vrsubhn_high_s64(r, a, b) simde_vrsubhn_high_s64((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_u16(r, a, b) vrsubhn_high_u16((r), (a), (b)) +#else + #define simde_vrsubhn_high_u16(r, a, b) simde_vcombine_u8(r, simde_vrsubhn_u16(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_u16 + #define vrsubhn_high_u16(r, a, b) simde_vrsubhn_high_u16((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_u32(r, a, b) vrsubhn_high_u32((r), (a), (b)) +#else + #define simde_vrsubhn_high_u32(r, a, b) simde_vcombine_u16(r, simde_vrsubhn_u32(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_u32 + #define vrsubhn_high_u32(r, a, b) simde_vrsubhn_high_u32((r), (a), (b)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vrsubhn_high_u64(r, a, b) vrsubhn_high_u64((r), (a), (b)) +#else + #define simde_vrsubhn_high_u64(r, a, b) simde_vcombine_u32(r, simde_vrsubhn_u64(a, b)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vrsubhn_high_u64 + #define vrsubhn_high_u64(r, a, b) simde_vrsubhn_high_u64((r), (a), (b)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_RSUBHN_HIGH_H) */ diff --git a/simde/arm/neon/shr_n.h b/simde/arm/neon/shr_n.h index 5c912571e..10f77d786 100644 --- a/simde/arm/neon/shr_n.h +++ b/simde/arm/neon/shr_n.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Christopher Moore + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_SHR_N_H) @@ -34,6 +35,20 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +int16_t +simde_x_vshrh_n_s16(int16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return a >> ((n == 16) ? 15 : n); +} + +SIMDE_FUNCTION_ATTRIBUTES +uint16_t +simde_x_vshrh_n_u16(uint16_t a, const int n) + SIMDE_REQUIRE_CONSTANT_RANGE(n, 1, 16) { + return (n == 16) ? 0 : a >> n; +} + SIMDE_FUNCTION_ATTRIBUTES int32_t simde_x_vshrs_n_s32(int32_t a, const int n) diff --git a/simde/arm/neon/sli_n.h b/simde/arm/neon/sli_n.h new file mode 100644 index 000000000..b3d9b4308 --- /dev/null +++ b/simde/arm/neon/sli_n.h @@ -0,0 +1,271 @@ +/* SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Copyright: + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) + */ + +#if !defined(SIMDE_ARM_NEON_SLI_N_H) +#define SIMDE_ARM_NEON_SLI_N_H + +#include "types.h" +#include "shl_n.h" +#include "dup_n.h" +#include "and.h" +#include "orr.h" +#include "reinterpret.h" + +HEDLEY_DIAGNOSTIC_PUSH +SIMDE_DISABLE_UNWANTED_DIAGNOSTICS +SIMDE_BEGIN_DECLS_ + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vslid_n_s64(a, b, n) vslid_n_s64(a, b, n) +#else + #define simde_vslid_n_s64(a, b, n) \ + HEDLEY_STATIC_CAST(int64_t, \ + simde_vslid_n_u64(HEDLEY_STATIC_CAST(uint64_t, a), HEDLEY_STATIC_CAST(uint64_t, b), n)) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vslid_n_s64 + #define vslid_n_s64(a, b, n) simde_vslid_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A64V8_NATIVE) + #define simde_vslid_n_u64(a, b, n) vslid_n_u64(a, b, n) +#else +#define simde_vslid_n_u64(a, b, n) \ + (((a & (UINT64_C(0xffffffffffffffff) >> (64 - n))) | simde_vshld_n_u64((b), (n)))) +#endif +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vslid_n_u64 + #define vslid_n_u64(a, b, n) simde_vslid_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_s8(a, b, n) vsli_n_s8((a), (b), (n)) +#else + #define simde_vsli_n_s8(a, b, n) \ + simde_vreinterpret_s8_u8(simde_vsli_n_u8( \ + simde_vreinterpret_u8_s8((a)), simde_vreinterpret_u8_s8((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_s8 + #define vsli_n_s8(a, b, n) simde_vsli_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_u8(a, b, n) vsli_n_u8((a), (b), (n)) +#else + #define simde_vsli_n_u8(a, b, n) \ + simde_vorr_u8( \ + simde_vand_u8((a), simde_vdup_n_u8((UINT8_C(0xff) >> (8 - n)))), \ + simde_vshl_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_u8 + #define vsli_n_u8(a, b, n) simde_vsli_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_s16(a, b, n) vsli_n_s16((a), (b), (n)) +#else + #define simde_vsli_n_s16(a, b, n) \ + simde_vreinterpret_s16_u16(simde_vsli_n_u16( \ + simde_vreinterpret_u16_s16((a)), simde_vreinterpret_u16_s16((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_s16 + #define vsli_n_s16(a, b, n) simde_vsli_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_u16(a, b, n) vsli_n_u16((a), (b), (n)) +#else + #define simde_vsli_n_u16(a, b, n) \ + simde_vorr_u16( \ + simde_vand_u16((a), simde_vdup_n_u16((UINT16_C(0xffff) >> (16 - n)))), \ + simde_vshl_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_u16 + #define vsli_n_u16(a, b, n) simde_vsli_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_s32(a, b, n) vsli_n_s32((a), (b), (n)) +#else + #define simde_vsli_n_s32(a, b, n) \ + simde_vreinterpret_s32_u32(simde_vsli_n_u32( \ + simde_vreinterpret_u32_s32((a)), simde_vreinterpret_u32_s32((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_s32 + #define vsli_n_s32(a, b, n) simde_vsli_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_u32(a, b, n) vsli_n_u32((a), (b), (n)) +#else + #define simde_vsli_n_u32(a, b, n) \ + simde_vorr_u32( \ + simde_vand_u32((a), \ + simde_vdup_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \ + simde_vshl_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_u32 + #define vsli_n_u32(a, b, n) simde_vsli_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_s64(a, b, n) vsli_n_s64((a), (b), (n)) +#else + #define simde_vsli_n_s64(a, b, n) \ + simde_vreinterpret_s64_u64(simde_vsli_n_u64( \ + simde_vreinterpret_u64_s64((a)), simde_vreinterpret_u64_s64((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_s64 + #define vsli_n_s64(a, b, n) simde_vsli_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsli_n_u64(a, b, n) vsli_n_u64((a), (b), (n)) +#else +#define simde_vsli_n_u64(a, b, n) \ + simde_vorr_u64( \ + simde_vand_u64((a), simde_vdup_n_u64( \ + (UINT64_C(0xffffffffffffffff) >> (64 - n)))), \ + simde_vshl_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsli_n_u64 + #define vsli_n_u64(a, b, n) simde_vsli_n_u64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_s8(a, b, n) vsliq_n_s8((a), (b), (n)) +#else + #define simde_vsliq_n_s8(a, b, n) \ + simde_vreinterpretq_s8_u8(simde_vsliq_n_u8( \ + simde_vreinterpretq_u8_s8((a)), simde_vreinterpretq_u8_s8((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_s8 + #define vsliq_n_s8(a, b, n) simde_vsliq_n_s8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_u8(a, b, n) vsliq_n_u8((a), (b), (n)) +#else + #define simde_vsliq_n_u8(a, b, n) \ + simde_vorrq_u8( \ + simde_vandq_u8((a), simde_vdupq_n_u8((UINT8_C(0xff) >> (8 - n)))), \ + simde_vshlq_n_u8((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_u8 + #define vsliq_n_u8(a, b, n) simde_vsliq_n_u8((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_s16(a, b, n) vsliq_n_s16((a), (b), (n)) +#else + #define simde_vsliq_n_s16(a, b, n) \ + simde_vreinterpretq_s16_u16(simde_vsliq_n_u16( \ + simde_vreinterpretq_u16_s16((a)), simde_vreinterpretq_u16_s16((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_s16 + #define vsliq_n_s16(a, b, n) simde_vsliq_n_s16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_u16(a, b, n) vsliq_n_u16((a), (b), (n)) +#else + #define simde_vsliq_n_u16(a, b, n) \ + simde_vorrq_u16( \ + simde_vandq_u16((a), simde_vdupq_n_u16((UINT16_C(0xffff) >> (16 - n)))), \ + simde_vshlq_n_u16((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_u16 + #define vsliq_n_u16(a, b, n) simde_vsliq_n_u16((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_s32(a, b, n) vsliq_n_s32((a), (b), (n)) +#else + #define simde_vsliq_n_s32(a, b, n) \ + simde_vreinterpretq_s32_u32(simde_vsliq_n_u32( \ + simde_vreinterpretq_u32_s32((a)), simde_vreinterpretq_u32_s32((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_s32 + #define vsliq_n_s32(a, b, n) simde_vsliq_n_s32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_u32(a, b, n) vsliq_n_u32((a), (b), (n)) +#else + #define simde_vsliq_n_u32(a, b, n) \ + simde_vorrq_u32( \ + simde_vandq_u32((a), \ + simde_vdupq_n_u32((UINT32_C(0xffffffff) >> (32 - n)))), \ + simde_vshlq_n_u32((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_u32 + #define vsliq_n_u32(a, b, n) simde_vsliq_n_u32((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_s64(a, b, n) vsliq_n_s64((a), (b), (n)) +#else + #define simde_vsliq_n_s64(a, b, n) \ + simde_vreinterpretq_s64_u64(simde_vsliq_n_u64( \ + simde_vreinterpretq_u64_s64((a)), simde_vreinterpretq_u64_s64((b)), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_s64 + #define vsliq_n_s64(a, b, n) simde_vsliq_n_s64((a), (b), (n)) +#endif + +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) + #define simde_vsliq_n_u64(a, b, n) vsliq_n_u64((a), (b), (n)) +#else +#define simde_vsliq_n_u64(a, b, n) \ + simde_vorrq_u64( \ + simde_vandq_u64((a), simde_vdupq_n_u64( \ + (UINT64_C(0xffffffffffffffff) >> (64 - n)))), \ + simde_vshlq_n_u64((b), (n))) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vsliq_n_u64 + #define vsliq_n_u64(a, b, n) simde_vsliq_n_u64((a), (b), (n)) +#endif + +SIMDE_END_DECLS_ +HEDLEY_DIAGNOSTIC_POP + +#endif /* !defined(SIMDE_ARM_NEON_SLI_N_H) */ diff --git a/simde/arm/neon/st1_lane.h b/simde/arm/neon/st1_lane.h index f0e78365c..8e5a88dfc 100644 --- a/simde/arm/neon/st1_lane.h +++ b/simde/arm/neon/st1_lane.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST1_LANE_H) @@ -33,6 +34,22 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_lane_f16(simde_float16_t *ptr, simde_float16x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_4_NO_RESULT_(vst1_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x4_private val_ = simde_float16x4_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_lane_f16 + #define vst1_lane_f16(a, b, c) simde_vst1_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_lane_f32(simde_float32_t *ptr, simde_float32x2_t val, const int lane) @@ -196,6 +213,22 @@ simde_vst1_lane_u64(uint64_t *ptr, simde_uint64x1_t val, const int lane) #define vst1_lane_u64(a, b, c) simde_vst1_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_lane_f16(simde_float16_t *ptr, simde_float16x8_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_8_NO_RESULT_(vst1q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x8_private val_ = simde_float16x8_to_private(val); + *ptr = val_.values[lane]; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_lane_f16 + #define vst1q_lane_f16(a, b, c) simde_vst1q_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_lane_f32(simde_float32_t *ptr, simde_float32x4_t val, const int lane) diff --git a/simde/arm/neon/st1_x2.h b/simde/arm/neon/st1_x2.h index 53e7107be..f50630864 100644 --- a/simde/arm/neon/st1_x2.h +++ b/simde/arm/neon/st1_x2.h @@ -37,6 +37,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f16_x2(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(8)], simde_float16x4x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) && !defined(SIMDE_BUG_GCC_REV_260989) + vst1_f16_x2(ptr, val); + #else + simde_float16x4_private val_[2]; + for (size_t i = 0; i < 2; i++) { + val_[i] = simde_float16x4_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_f16_x2 + #define vst1_f16_x2(ptr, val) simde_vst1_f16_x2((ptr), (val)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_f32_x2(simde_float32 ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x2_t val) { diff --git a/simde/arm/neon/st1_x3.h b/simde/arm/neon/st1_x3.h index 7b85cfab1..97d14116c 100644 --- a/simde/arm/neon/st1_x3.h +++ b/simde/arm/neon/st1_x3.h @@ -37,6 +37,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f16_x3(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float16x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1_f16_x3(ptr, val); + #else + simde_float16x4_private val_[3]; + for (size_t i = 0; i < 3; i++) { + val_[i] = simde_float16x4_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_f16_x3 + #define vst1_f16_x3(a, b) simde_vst1_f16_x3((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_f32_x3(simde_float32 ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t val) { diff --git a/simde/arm/neon/st1_x4.h b/simde/arm/neon/st1_x4.h index b9b3497d1..1d4a2234d 100644 --- a/simde/arm/neon/st1_x4.h +++ b/simde/arm/neon/st1_x4.h @@ -37,6 +37,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1_f16_x4(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_float16x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1_f16_x4(ptr, val); + #else + simde_float16x4_private val_[4]; + for (size_t i = 0; i < 4; i++) { + val_[i] = simde_float16x4_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1_f16_x4 + #define vst1_f16_x4(a, b) simde_vst1_f16_x4((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x2x4_t val) { diff --git a/simde/arm/neon/st1q_x2.h b/simde/arm/neon/st1q_x2.h index a429fde9b..b5e12a75a 100644 --- a/simde/arm/neon/st1q_x2.h +++ b/simde/arm/neon/st1q_x2.h @@ -21,8 +21,6 @@ * SOFTWARE. * * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ @@ -37,6 +35,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f16_x2(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(16)], simde_float16x8x2_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1q_f16_x2(ptr, val); + #else + simde_float16x8_private val_[2]; + for (size_t i = 0; i < 2; i++) { + val_[i] = simde_float16x8_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_f16_x2 + #define vst1q_f16_x2(a, b) simde_vst1q_f16_x2((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_f32_x2(simde_float32 ptr[HEDLEY_ARRAY_PARAM(8)], simde_float32x4x2_t val) { diff --git a/simde/arm/neon/st1q_x3.h b/simde/arm/neon/st1q_x3.h index 72ccfba6d..b53270674 100644 --- a/simde/arm/neon/st1q_x3.h +++ b/simde/arm/neon/st1q_x3.h @@ -21,8 +21,6 @@ * SOFTWARE. * * Copyright: - * 2020 Evan Nemerson - * 2021 Décio Luiz Gazzoni Filho * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ @@ -37,6 +35,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f16_x3(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_float16x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1q_f16_x3(ptr, val); + #else + simde_float16x8_private val_[3]; + for (size_t i = 0; i < 3; i++) { + val_[i] = simde_float16x8_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_f16_x3 + #define vst1q_f16_x3(a, b) simde_vst1q_f16_x3((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_f32_x3(simde_float32 ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t val) { diff --git a/simde/arm/neon/st1q_x4.h b/simde/arm/neon/st1q_x4.h index c52140444..8ac268282 100644 --- a/simde/arm/neon/st1q_x4.h +++ b/simde/arm/neon/st1q_x4.h @@ -37,6 +37,24 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst1q_f16_x4(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(32)], simde_float16x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst1q_f16_x4(ptr, val); + #else + simde_float16x8_private val_[4]; + for (size_t i = 0; i < 4; i++) { + val_[i] = simde_float16x8_to_private(val.val[i]); + } + simde_memcpy(ptr, &val_, sizeof(val_)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst1q_f16_x4 + #define vst1q_f16_x4(a, b) simde_vst1q_f16_x4((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst1q_f32_x4(simde_float32 ptr[HEDLEY_ARRAY_PARAM(16)], simde_float32x4x4_t val) { diff --git a/simde/arm/neon/st2_lane.h b/simde/arm/neon/st2_lane.h index 0eee6a8a4..3f168bac0 100644 --- a/simde/arm/neon/st2_lane.h +++ b/simde/arm/neon/st2_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST2_LANE_H) @@ -189,6 +190,25 @@ simde_vst2_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x1x2_t val, #define vst2_lane_u64(a, b, c) simde_vst2_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x4x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_4_NO_RESULT_(vst2_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x4_private r; + for (size_t i = 0 ; i < 2 ; i ++) { + r = simde_float16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2_lane_f16 + #define vst2_lane_f16(a, b, c) simde_vst2_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst2_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x2x2_t val, const int lane) @@ -380,6 +400,25 @@ simde_vst2q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_uint64x2x2_t val #define vst2q_lane_u64(a, b, c) simde_vst2q_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst2q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float16x8x2_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_8_NO_RESULT_(vst2q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x8_private r; + for (size_t i = 0 ; i < 2 ; i++) { + r = simde_float16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst2q_lane_f16 + #define vst2q_lane_f16(a, b, c) simde_vst2q_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst2q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(2)], simde_float32x4x2_t val, const int lane) diff --git a/simde/arm/neon/st3.h b/simde/arm/neon/st3.h index 2a3616d42..4d159ca34 100644 --- a/simde/arm/neon/st3.h +++ b/simde/arm/neon/st3.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST3_H) @@ -37,6 +38,27 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float16x4x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst3_f16(ptr, val); + #else + simde_float16x4_private a[3] = { simde_float16x4_to_private(val.val[0]), + simde_float16x4_to_private(val.val[1]), + simde_float16x4_to_private(val.val[2]) }; + simde_float16_t buf[12]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_f16 + #define vst3_f16(a, b) simde_vst3_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(6)], simde_float32x2x3_t val) { @@ -348,6 +370,27 @@ simde_vst3_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val) { #define vst3_u64(a, b) simde_vst3_u64((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(24)], simde_float16x8x3_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst3q_f16(ptr, val); + #else + simde_float16x8_private a_[3] = { simde_float16x8_to_private(val.val[0]), + simde_float16x8_to_private(val.val[1]), + simde_float16x8_to_private(val.val[2]) }; + simde_float16_t buf[24]; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 3 ; i++) { + buf[i] = a_[i % 3].values[i / 3]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_f16 + #define vst3q_f16(a, b) simde_vst3q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(12)], simde_float32x4x3_t val) { diff --git a/simde/arm/neon/st3_lane.h b/simde/arm/neon/st3_lane.h index ba3283b24..2419441c1 100644 --- a/simde/arm/neon/st3_lane.h +++ b/simde/arm/neon/st3_lane.h @@ -22,6 +22,7 @@ * * Copyright: * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST3_LANE_H) @@ -189,6 +190,25 @@ simde_vst3_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x1x3_t val, #define vst3_lane_u64(a, b, c) simde_vst3_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x4x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_4_NO_RESULT_(vst3_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x4_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3_lane_f16 + #define vst3_lane_f16(a, b, c) simde_vst3_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst3_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x2x3_t val, const int lane) @@ -380,6 +400,25 @@ simde_vst3q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_uint64x2x3_t val #define vst3q_lane_u64(a, b, c) simde_vst3q_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst3q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float16x8x3_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_8_NO_RESULT_(vst3q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x8_private r; + for (size_t i = 0 ; i < 3 ; i++) { + r = simde_float16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst3q_lane_f16 + #define vst3q_lane_f16(a, b, c) simde_vst3q_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst3q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(3)], simde_float32x4x3_t val, const int lane) diff --git a/simde/arm/neon/st4.h b/simde/arm/neon/st4.h index 2ccb1c3dd..cd8d25f44 100644 --- a/simde/arm/neon/st4.h +++ b/simde/arm/neon/st4.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST4_H) @@ -36,6 +37,26 @@ SIMDE_BEGIN_DECLS_ #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_f16(simde_float16_t *ptr, simde_float16x4x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst4_f16(ptr, val); + #else + simde_float16_t buf[16]; + simde_float16x4_private a_[4] = { simde_float16x4_to_private(val.val[0]), simde_float16x4_to_private(val.val[1]), + simde_float16x4_to_private(val.val[2]), simde_float16x4_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_f16 + #define vst4_f16(a, b) simde_vst4_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_f32(simde_float32_t *ptr, simde_float32x2x4_t val) { @@ -236,6 +257,26 @@ simde_vst4_u64(uint64_t *ptr, simde_uint64x1x4_t val) { #define vst4_u64(a, b) simde_vst4_u64((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_f16(simde_float16_t *ptr, simde_float16x8x4_t val) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + vst4q_f16(ptr, val); + #else + simde_float16_t buf[32]; + simde_float16x8_private a_[4] = { simde_float16x8_to_private(val.val[0]), simde_float16x8_to_private(val.val[1]), + simde_float16x8_to_private(val.val[2]), simde_float16x8_to_private(val.val[3]) }; + for (size_t i = 0; i < (sizeof(val.val[0]) / sizeof(*ptr)) * 4 ; i++) { + buf[i] = a_[i % 4].values[i / 4]; + } + simde_memcpy(ptr, buf, sizeof(buf)); + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_f16 + #define vst4q_f16(a, b) simde_vst4q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_f32(simde_float32_t *ptr, simde_float32x4x4_t val) { diff --git a/simde/arm/neon/st4_lane.h b/simde/arm/neon/st4_lane.h index e5101e46d..f4a97353e 100644 --- a/simde/arm/neon/st4_lane.h +++ b/simde/arm/neon/st4_lane.h @@ -23,6 +23,7 @@ * Copyright: * 2021 Evan Nemerson * 2021 Zhi An Ng (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_ST4_LANE_H) @@ -190,6 +191,24 @@ simde_vst4_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x1x4_t val, #define vst4_lane_u64(a, b, c) simde_vst4_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x4x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 3) { + simde_float16x4_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float16x4_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } +} +#if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + #define simde_vst4_lane_f16(a, b, c) vst4_lane_f16((a), (b), (c)) +#endif +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4_lane_f16 + #define vst4_lane_f16(a, b, c) simde_vst4_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst4_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x2x4_t val, const int lane) @@ -381,6 +400,25 @@ simde_vst4q_lane_u64(uint64_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_uint64x2x4_t val #define vst4q_lane_u64(a, b, c) simde_vst4q_lane_u64((a), (b), (c)) #endif +SIMDE_FUNCTION_ATTRIBUTES +void +simde_vst4q_lane_f16(simde_float16_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float16x8x4_t val, const int lane) + SIMDE_REQUIRE_CONSTANT_RANGE(lane, 0, 7) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + SIMDE_CONSTIFY_8_NO_RESULT_(vst4q_lane_f16, HEDLEY_UNREACHABLE(), lane, ptr, val); + #else + simde_float16x8_private r; + for (size_t i = 0 ; i < 4 ; i++) { + r = simde_float16x8_to_private(val.val[i]); + ptr[i] = r.values[lane]; + } + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vst4q_lane_f16 + #define vst4q_lane_f16(a, b, c) simde_vst4q_lane_f16((a), (b), (c)) +#endif + SIMDE_FUNCTION_ATTRIBUTES void simde_vst4q_lane_f32(simde_float32_t ptr[HEDLEY_ARRAY_PARAM(4)], simde_float32x4x4_t val, const int lane) diff --git a/simde/arm/neon/trn.h b/simde/arm/neon/trn.h index 9f9184849..e485340fd 100644 --- a/simde/arm/neon/trn.h +++ b/simde/arm/neon/trn.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_TRN_H) && !defined(SIMDE_BUG_INTEL_857088) @@ -36,6 +37,21 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t +simde_vtrn_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrn_f16(a, b); + #else + simde_float16x4x2_t r = { { simde_vtrn1_f16(a, b), simde_vtrn2_f16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrn_f16 + #define vtrn_f16(a, b) simde_vtrn_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vtrn_f32(simde_float32x2_t a, simde_float32x2_t b) { @@ -141,6 +157,21 @@ simde_vtrn_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vtrn_u32(a, b) simde_vtrn_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x2_t +simde_vtrnq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrnq_f16(a, b); + #else + simde_float16x8x2_t r = { { simde_vtrn1q_f16(a, b), simde_vtrn2q_f16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vtrnq_f16 + #define vtrnq_f16(a, b) simde_vtrnq_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vtrnq_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/trn1.h b/simde/arm/neon/trn1.h index f3b1521aa..990f92583 100644 --- a/simde/arm/neon/trn1.h +++ b/simde/arm/neon/trn1.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_TRN1_H) @@ -34,6 +35,33 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vtrn1_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrn1_f16(a, b); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1_f16 + #define vtrn1_f16(a, b) simde_vtrn1_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vtrn1_f32(simde_float32x2_t a, simde_float32x2_t b) { @@ -223,6 +251,33 @@ simde_vtrn1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vtrn1_u32(a, b) simde_vtrn1_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vtrn1q_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrn1q_f16(a, b); + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx]; + r_.values[idx | 1] = b_.values[idx]; + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn1q_f16 + #define vtrn1q_f16(a, b) simde_vtrn1q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vtrn1q_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/trn2.h b/simde/arm/neon/trn2.h index 31bd7dc4e..ea64e6732 100644 --- a/simde/arm/neon/trn2.h +++ b/simde/arm/neon/trn2.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_TRN2_H) @@ -34,6 +35,33 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4_t +simde_vtrn2_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrn2_f16(a, b); + #else + simde_float16x4_private + r_, + a_ = simde_float16x4_to_private(a), + b_ = simde_float16x4_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_float16x4_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2_f16 + #define vtrn2_f16(a, b) simde_vtrn2_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2_t simde_vtrn2_f32(simde_float32x2_t a, simde_float32x2_t b) { @@ -223,6 +251,33 @@ simde_vtrn2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vtrn2_u32(a, b) simde_vtrn2_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vtrn2q_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vtrn2q_f16(a, b); + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[idx] = a_.values[idx | 1]; + r_.values[idx | 1] = b_.values[idx | 1]; + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vtrn2q_f16 + #define vtrn2q_f16(a, b) simde_vtrn2q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vtrn2q_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/uzp.h b/simde/arm/neon/uzp.h index b44db4477..859ba7749 100644 --- a/simde/arm/neon/uzp.h +++ b/simde/arm/neon/uzp.h @@ -23,6 +23,7 @@ * Copyright: * 2020 Evan Nemerson * 2020 Sean Maher (Copyright owned by Google, LLC) + * 2023 Yi-Yen Chung (Copyright owned by Andes Technology) */ #if !defined(SIMDE_ARM_NEON_UZP_H) && !defined(SIMDE_BUG_INTEL_857088) @@ -36,6 +37,21 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DISABLE_UNWANTED_DIAGNOSTICS SIMDE_BEGIN_DECLS_ +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x4x2_t +simde_vuzp_f16(simde_float16x4_t a, simde_float16x4_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vuzp_f16(a, b); + #else + simde_float16x4x2_t r = { { simde_vuzp1_f16(a, b), simde_vuzp2_f16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzp_f16 + #define vuzp_f16(a, b) simde_vuzp_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x2x2_t simde_vuzp_f32(simde_float32x2_t a, simde_float32x2_t b) { @@ -141,6 +157,21 @@ simde_vuzp_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vuzp_u32(a, b) simde_vuzp_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8x2_t +simde_vuzpq_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vuzpq_f16(a, b); + #else + simde_float16x8x2_t r = { { simde_vuzp1q_f16(a, b), simde_vuzp2q_f16(a, b) } }; + return r; + #endif +} +#if defined(SIMDE_ARM_NEON_A32V7_ENABLE_NATIVE_ALIASES) + #undef vuzpq_f16 + #define vuzpq_f16(a, b) simde_vuzpq_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4x2_t simde_vuzpq_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/uzp1.h b/simde/arm/neon/uzp1.h index 02ab056f8..cb776cd97 100644 --- a/simde/arm/neon/uzp1.h +++ b/simde/arm/neon/uzp1.h @@ -300,6 +300,36 @@ simde_vuzp1_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vuzp1_u32(a, b) simde_vuzp1_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vuzp1q_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vuzp1q_f16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + float16x8x2_t t = vuzpq_f16(a, b); + return t.val[0]; + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx]; + r_.values[i + halfway_point] = b_.values[idx]; + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp1q_f16 + #define vuzp1q_f16(a, b) simde_vuzp1q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vuzp1q_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/simde/arm/neon/uzp2.h b/simde/arm/neon/uzp2.h index 830b3e254..a36a1292c 100644 --- a/simde/arm/neon/uzp2.h +++ b/simde/arm/neon/uzp2.h @@ -300,6 +300,36 @@ simde_vuzp2_u32(simde_uint32x2_t a, simde_uint32x2_t b) { #define vuzp2_u32(a, b) simde_vuzp2_u32((a), (b)) #endif +SIMDE_FUNCTION_ATTRIBUTES +simde_float16x8_t +simde_vuzp2q_f16(simde_float16x8_t a, simde_float16x8_t b) { + #if defined(SIMDE_ARM_NEON_A64V8_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + return vuzp2q_f16(a, b); + #elif defined(SIMDE_ARM_NEON_A32V7_NATIVE) && defined(SIMDE_ARM_NEON_FP16) + float16x8x2_t t = vuzpq_f16(a, b); + return t.val[1]; + #else + simde_float16x8_private + r_, + a_ = simde_float16x8_to_private(a), + b_ = simde_float16x8_to_private(b); + + const size_t halfway_point = sizeof(r_.values) / sizeof(r_.values[0]) / 2; + SIMDE_VECTORIZE + for (size_t i = 0 ; i < halfway_point ; i++) { + const size_t idx = i << 1; + r_.values[ i ] = a_.values[idx | 1]; + r_.values[i + halfway_point] = b_.values[idx | 1]; + } + + return simde_float16x8_from_private(r_); + #endif +} +#if defined(SIMDE_ARM_NEON_A64V8_ENABLE_NATIVE_ALIASES) + #undef vuzp2q_f16 + #define vuzp2q_f16(a, b) simde_vuzp2q_f16((a), (b)) +#endif + SIMDE_FUNCTION_ATTRIBUTES simde_float32x4_t simde_vuzp2q_f32(simde_float32x4_t a, simde_float32x4_t b) { diff --git a/test/arm/neon/abd.c b/test/arm/neon/abd.c index c214e65ae..98c1de390 100644 --- a/test/arm/neon/abd.c +++ b/test/arm/neon/abd.c @@ -8,6 +8,74 @@ # define TEST_SIMDE_VABD_NO_TEST_32 #endif +static int +test_simde_vabdh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + simde_float16 b; + simde_float16 r; + } test_vec[] = { + #if !defined(SIMDE_FAST_NANS) + { SIMDE_NANHF, + SIMDE_FLOAT16_VALUE( 0.52), + SIMDE_NANHF }, + { SIMDE_FLOAT16_VALUE( 705.02), + SIMDE_NANHF, + SIMDE_NANHF }, + { SIMDE_NANHF, + SIMDE_NANHF, + SIMDE_NANHF }, + #endif + { SIMDE_FLOAT16_VALUE( - 0.300), + SIMDE_FLOAT16_VALUE( - 99.250), + SIMDE_FLOAT16_VALUE( 98.950) }, + { SIMDE_FLOAT16_VALUE( - 47.131), + SIMDE_FLOAT16_VALUE( 97.405), + SIMDE_FLOAT16_VALUE( 144.535) }, + { SIMDE_FLOAT16_VALUE( - 77.318), + SIMDE_FLOAT16_VALUE( - 84.590), + SIMDE_FLOAT16_VALUE( 7.272) }, + { SIMDE_FLOAT16_VALUE( - 24.059), + SIMDE_FLOAT16_VALUE( 37.447), + SIMDE_FLOAT16_VALUE( 61.506) }, + { SIMDE_FLOAT16_VALUE( - 52.274), + SIMDE_FLOAT16_VALUE( 37.809), + SIMDE_FLOAT16_VALUE( 90.084) }, + { SIMDE_FLOAT16_VALUE( - 85.927), + SIMDE_FLOAT16_VALUE( - 59.935), + SIMDE_FLOAT16_VALUE( 25.992) }, + { SIMDE_FLOAT16_VALUE( -81.790), + SIMDE_FLOAT16_VALUE( -18.384), + SIMDE_FLOAT16_VALUE( 63.407) }, + { SIMDE_FLOAT16_VALUE( -52.199), + SIMDE_FLOAT16_VALUE( -87.447), + SIMDE_FLOAT16_VALUE( 35.248) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t r = simde_vabdh_f16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_float16_t b = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_float16_t r = simde_vabdh_f16(a, b); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vabds_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -144,6 +212,65 @@ test_simde_vabdd_f64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vabd_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 12.132), SIMDE_FLOAT16_VALUE( - 13.736), SIMDE_FLOAT16_VALUE( - 15.777), SIMDE_FLOAT16_VALUE( 27.640) }, + { SIMDE_FLOAT16_VALUE( - 15.705), SIMDE_FLOAT16_VALUE( 17.382), SIMDE_FLOAT16_VALUE( - 21.155), SIMDE_FLOAT16_VALUE( - 29.967) }, + { SIMDE_FLOAT16_VALUE( 27.837), SIMDE_FLOAT16_VALUE( 31.118), SIMDE_FLOAT16_VALUE( 5.377), SIMDE_FLOAT16_VALUE( 57.606) } }, + { { SIMDE_FLOAT16_VALUE( - 12.901), SIMDE_FLOAT16_VALUE( 9.796), SIMDE_FLOAT16_VALUE( - 18.982), SIMDE_FLOAT16_VALUE( - 0.347) }, + { SIMDE_FLOAT16_VALUE( - 20.401), SIMDE_FLOAT16_VALUE( 6.071), SIMDE_FLOAT16_VALUE( - 4.926), SIMDE_FLOAT16_VALUE( - 26.124) }, + { SIMDE_FLOAT16_VALUE( 7.500), SIMDE_FLOAT16_VALUE( 3.725), SIMDE_FLOAT16_VALUE( 14.056), SIMDE_FLOAT16_VALUE( 25.778) } }, + { { SIMDE_FLOAT16_VALUE( 27.929), SIMDE_FLOAT16_VALUE( 27.206), SIMDE_FLOAT16_VALUE( 22.101), SIMDE_FLOAT16_VALUE( - 22.873) }, + { SIMDE_FLOAT16_VALUE( 16.997), SIMDE_FLOAT16_VALUE( - 9.517), SIMDE_FLOAT16_VALUE( - 8.882), SIMDE_FLOAT16_VALUE( - 2.052) }, + { SIMDE_FLOAT16_VALUE( 10.932), SIMDE_FLOAT16_VALUE( 36.722), SIMDE_FLOAT16_VALUE( 30.983), SIMDE_FLOAT16_VALUE( 20.821) } }, + { { SIMDE_FLOAT16_VALUE( 22.885), SIMDE_FLOAT16_VALUE( 6.304), SIMDE_FLOAT16_VALUE( - 21.768), SIMDE_FLOAT16_VALUE( - 21.264) }, + { SIMDE_FLOAT16_VALUE( 24.212), SIMDE_FLOAT16_VALUE( - 5.067), SIMDE_FLOAT16_VALUE( - 5.485), SIMDE_FLOAT16_VALUE( 27.821) }, + { SIMDE_FLOAT16_VALUE( 1.327), SIMDE_FLOAT16_VALUE( 11.372), SIMDE_FLOAT16_VALUE( 16.283), SIMDE_FLOAT16_VALUE( 49.085) } }, + { { SIMDE_FLOAT16_VALUE( 10.035), SIMDE_FLOAT16_VALUE( - 23.960), SIMDE_FLOAT16_VALUE( 26.853), SIMDE_FLOAT16_VALUE( - 14.856) }, + { SIMDE_FLOAT16_VALUE( - 22.040), SIMDE_FLOAT16_VALUE( 5.943), SIMDE_FLOAT16_VALUE( 8.366), SIMDE_FLOAT16_VALUE( - 14.752) }, + { SIMDE_FLOAT16_VALUE( 32.074), SIMDE_FLOAT16_VALUE( 29.903), SIMDE_FLOAT16_VALUE( 18.486), SIMDE_FLOAT16_VALUE( 0.104) } }, + { { SIMDE_FLOAT16_VALUE( - 29.884), SIMDE_FLOAT16_VALUE( - 8.797), SIMDE_FLOAT16_VALUE( - 6.539), SIMDE_FLOAT16_VALUE( - 23.907) }, + { SIMDE_FLOAT16_VALUE( - 13.521), SIMDE_FLOAT16_VALUE( 17.627), SIMDE_FLOAT16_VALUE( 1.295), SIMDE_FLOAT16_VALUE( - 26.102) }, + { SIMDE_FLOAT16_VALUE( 16.363), SIMDE_FLOAT16_VALUE( 26.424), SIMDE_FLOAT16_VALUE( 7.834), SIMDE_FLOAT16_VALUE( 2.195) } }, + { { SIMDE_FLOAT16_VALUE( 16.252), SIMDE_FLOAT16_VALUE( 6.172), SIMDE_FLOAT16_VALUE( 24.020), SIMDE_FLOAT16_VALUE( 25.796) }, + { SIMDE_FLOAT16_VALUE( 24.965), SIMDE_FLOAT16_VALUE( - 1.593), SIMDE_FLOAT16_VALUE( - 4.069), SIMDE_FLOAT16_VALUE( 29.576) }, + { SIMDE_FLOAT16_VALUE( 8.712), SIMDE_FLOAT16_VALUE( 7.765), SIMDE_FLOAT16_VALUE( 28.089), SIMDE_FLOAT16_VALUE( 3.780) } }, + { { SIMDE_FLOAT16_VALUE( - 7.863), SIMDE_FLOAT16_VALUE( - 12.557), SIMDE_FLOAT16_VALUE( 23.059), SIMDE_FLOAT16_VALUE( - 2.263) }, + { SIMDE_FLOAT16_VALUE( 21.537), SIMDE_FLOAT16_VALUE( - 26.467), SIMDE_FLOAT16_VALUE( - 13.547), SIMDE_FLOAT16_VALUE( - 13.547) }, + { SIMDE_FLOAT16_VALUE( 29.400), SIMDE_FLOAT16_VALUE( 13.910), SIMDE_FLOAT16_VALUE( 36.606), SIMDE_FLOAT16_VALUE( 11.284) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r = simde_vabd_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vabd_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vabd_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -613,6 +740,88 @@ test_simde_vabd_u32 (SIMDE_MUNIT_TEST_ARGS) { } #endif +static int +test_simde_vabdq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 8.295), SIMDE_FLOAT16_VALUE( 4.995), SIMDE_FLOAT16_VALUE( 22.222), SIMDE_FLOAT16_VALUE( 0.992), + SIMDE_FLOAT16_VALUE( - 2.210), SIMDE_FLOAT16_VALUE( 12.462), SIMDE_FLOAT16_VALUE( 20.310), SIMDE_FLOAT16_VALUE( 23.937) }, + { SIMDE_FLOAT16_VALUE( - 7.930), SIMDE_FLOAT16_VALUE( - 24.358), SIMDE_FLOAT16_VALUE( 0.338), SIMDE_FLOAT16_VALUE( 8.720), + SIMDE_FLOAT16_VALUE( - 23.127), SIMDE_FLOAT16_VALUE( 16.025), SIMDE_FLOAT16_VALUE( - 21.022), SIMDE_FLOAT16_VALUE( - 22.508) }, + { SIMDE_FLOAT16_VALUE( 0.365), SIMDE_FLOAT16_VALUE( 29.353), SIMDE_FLOAT16_VALUE( 21.884), SIMDE_FLOAT16_VALUE( 7.728), + SIMDE_FLOAT16_VALUE( 20.917), SIMDE_FLOAT16_VALUE( 3.564), SIMDE_FLOAT16_VALUE( 41.333), SIMDE_FLOAT16_VALUE( 46.446) } }, + { { SIMDE_FLOAT16_VALUE( - 22.970), SIMDE_FLOAT16_VALUE( 8.664), SIMDE_FLOAT16_VALUE( - 23.457), SIMDE_FLOAT16_VALUE( - 28.825), + SIMDE_FLOAT16_VALUE( 7.204), SIMDE_FLOAT16_VALUE( - 13.343), SIMDE_FLOAT16_VALUE( - 15.886), SIMDE_FLOAT16_VALUE( 21.571) }, + { SIMDE_FLOAT16_VALUE( 23.596), SIMDE_FLOAT16_VALUE( - 11.564), SIMDE_FLOAT16_VALUE( 3.180), SIMDE_FLOAT16_VALUE( - 21.809), + SIMDE_FLOAT16_VALUE( - 3.536), SIMDE_FLOAT16_VALUE( - 8.427), SIMDE_FLOAT16_VALUE( - 2.409), SIMDE_FLOAT16_VALUE( 17.366) }, + { SIMDE_FLOAT16_VALUE( 46.566), SIMDE_FLOAT16_VALUE( 20.228), SIMDE_FLOAT16_VALUE( 26.637), SIMDE_FLOAT16_VALUE( 7.015), + SIMDE_FLOAT16_VALUE( 10.740), SIMDE_FLOAT16_VALUE( 4.915), SIMDE_FLOAT16_VALUE( 13.476), SIMDE_FLOAT16_VALUE( 4.205) } }, + { { SIMDE_FLOAT16_VALUE( - 20.105), SIMDE_FLOAT16_VALUE( - 23.724), SIMDE_FLOAT16_VALUE( - 29.947), SIMDE_FLOAT16_VALUE( 7.383), + SIMDE_FLOAT16_VALUE( 14.751), SIMDE_FLOAT16_VALUE( 6.109), SIMDE_FLOAT16_VALUE( 4.821), SIMDE_FLOAT16_VALUE( - 12.984) }, + { SIMDE_FLOAT16_VALUE( - 10.612), SIMDE_FLOAT16_VALUE( - 13.998), SIMDE_FLOAT16_VALUE( 5.695), SIMDE_FLOAT16_VALUE( - 20.897), + SIMDE_FLOAT16_VALUE( - 2.946), SIMDE_FLOAT16_VALUE( - 7.980), SIMDE_FLOAT16_VALUE( 21.397), SIMDE_FLOAT16_VALUE( - 18.629) }, + { SIMDE_FLOAT16_VALUE( 9.493), SIMDE_FLOAT16_VALUE( 9.727), SIMDE_FLOAT16_VALUE( 35.642), SIMDE_FLOAT16_VALUE( 28.280), + SIMDE_FLOAT16_VALUE( 17.697), SIMDE_FLOAT16_VALUE( 14.090), SIMDE_FLOAT16_VALUE( 16.576), SIMDE_FLOAT16_VALUE( 5.646) } }, + { { SIMDE_FLOAT16_VALUE( - 7.193), SIMDE_FLOAT16_VALUE( 21.992), SIMDE_FLOAT16_VALUE( - 29.519), SIMDE_FLOAT16_VALUE( - 25.565), + SIMDE_FLOAT16_VALUE( - 13.073), SIMDE_FLOAT16_VALUE( - 11.804), SIMDE_FLOAT16_VALUE( 7.752), SIMDE_FLOAT16_VALUE( - 4.463) }, + { SIMDE_FLOAT16_VALUE( - 28.742), SIMDE_FLOAT16_VALUE( 26.026), SIMDE_FLOAT16_VALUE( - 5.720), SIMDE_FLOAT16_VALUE( 25.719), + SIMDE_FLOAT16_VALUE( 11.675), SIMDE_FLOAT16_VALUE( 12.609), SIMDE_FLOAT16_VALUE( - 14.604), SIMDE_FLOAT16_VALUE( - 25.271) }, + { SIMDE_FLOAT16_VALUE( 21.549), SIMDE_FLOAT16_VALUE( 4.034), SIMDE_FLOAT16_VALUE( 23.799), SIMDE_FLOAT16_VALUE( 51.284), + SIMDE_FLOAT16_VALUE( 24.748), SIMDE_FLOAT16_VALUE( 24.413), SIMDE_FLOAT16_VALUE( 22.356), SIMDE_FLOAT16_VALUE( 20.808) } }, + { { SIMDE_FLOAT16_VALUE( 8.905), SIMDE_FLOAT16_VALUE( - 14.001), SIMDE_FLOAT16_VALUE( - 8.390), SIMDE_FLOAT16_VALUE( - 15.984), + SIMDE_FLOAT16_VALUE( - 19.624), SIMDE_FLOAT16_VALUE( - 0.882), SIMDE_FLOAT16_VALUE( - 2.926), SIMDE_FLOAT16_VALUE( 18.326) }, + { SIMDE_FLOAT16_VALUE( - 29.855), SIMDE_FLOAT16_VALUE( - 19.437), SIMDE_FLOAT16_VALUE( - 3.480), SIMDE_FLOAT16_VALUE( 20.717), + SIMDE_FLOAT16_VALUE( 15.087), SIMDE_FLOAT16_VALUE( 29.414), SIMDE_FLOAT16_VALUE( - 18.136), SIMDE_FLOAT16_VALUE( 23.088) }, + { SIMDE_FLOAT16_VALUE( 38.760), SIMDE_FLOAT16_VALUE( 5.436), SIMDE_FLOAT16_VALUE( 4.909), SIMDE_FLOAT16_VALUE( 36.701), + SIMDE_FLOAT16_VALUE( 34.711), SIMDE_FLOAT16_VALUE( 30.296), SIMDE_FLOAT16_VALUE( 15.211), SIMDE_FLOAT16_VALUE( 4.761) } }, + { { SIMDE_FLOAT16_VALUE( - 11.174), SIMDE_FLOAT16_VALUE( 17.793), SIMDE_FLOAT16_VALUE( - 15.861), SIMDE_FLOAT16_VALUE( 4.535), + SIMDE_FLOAT16_VALUE( 3.586), SIMDE_FLOAT16_VALUE( - 6.093), SIMDE_FLOAT16_VALUE( 16.589), SIMDE_FLOAT16_VALUE( - 7.298) }, + { SIMDE_FLOAT16_VALUE( - 0.016), SIMDE_FLOAT16_VALUE( 11.946), SIMDE_FLOAT16_VALUE( - 7.115), SIMDE_FLOAT16_VALUE( - 2.201), + SIMDE_FLOAT16_VALUE( 25.600), SIMDE_FLOAT16_VALUE( 5.034), SIMDE_FLOAT16_VALUE( 4.084), SIMDE_FLOAT16_VALUE( - 12.814) }, + { SIMDE_FLOAT16_VALUE( 11.157), SIMDE_FLOAT16_VALUE( 5.847), SIMDE_FLOAT16_VALUE( 8.747), SIMDE_FLOAT16_VALUE( 6.736), + SIMDE_FLOAT16_VALUE( 22.014), SIMDE_FLOAT16_VALUE( 11.127), SIMDE_FLOAT16_VALUE( 12.505), SIMDE_FLOAT16_VALUE( 5.515) } }, + { { SIMDE_FLOAT16_VALUE( - 6.550), SIMDE_FLOAT16_VALUE( 21.553), SIMDE_FLOAT16_VALUE( - 24.361), SIMDE_FLOAT16_VALUE( - 5.605), + SIMDE_FLOAT16_VALUE( - 3.650), SIMDE_FLOAT16_VALUE( 1.541), SIMDE_FLOAT16_VALUE( - 24.603), SIMDE_FLOAT16_VALUE( - 14.867) }, + { SIMDE_FLOAT16_VALUE( 13.611), SIMDE_FLOAT16_VALUE( - 17.264), SIMDE_FLOAT16_VALUE( - 29.209), SIMDE_FLOAT16_VALUE( 25.166), + SIMDE_FLOAT16_VALUE( 27.878), SIMDE_FLOAT16_VALUE( 4.965), SIMDE_FLOAT16_VALUE( 2.760), SIMDE_FLOAT16_VALUE( 7.916) }, + { SIMDE_FLOAT16_VALUE( 20.161), SIMDE_FLOAT16_VALUE( 38.817), SIMDE_FLOAT16_VALUE( 4.847), SIMDE_FLOAT16_VALUE( 30.772), + SIMDE_FLOAT16_VALUE( 31.527), SIMDE_FLOAT16_VALUE( 3.424), SIMDE_FLOAT16_VALUE( 27.363), SIMDE_FLOAT16_VALUE( 22.784) } }, + { { SIMDE_FLOAT16_VALUE( - 12.183), SIMDE_FLOAT16_VALUE( 28.248), SIMDE_FLOAT16_VALUE( 25.999), SIMDE_FLOAT16_VALUE( - 20.310), + SIMDE_FLOAT16_VALUE( 28.887), SIMDE_FLOAT16_VALUE( 8.731), SIMDE_FLOAT16_VALUE( 13.501), SIMDE_FLOAT16_VALUE( 19.172) }, + { SIMDE_FLOAT16_VALUE( - 0.224), SIMDE_FLOAT16_VALUE( 9.143), SIMDE_FLOAT16_VALUE( 28.172), SIMDE_FLOAT16_VALUE( - 7.432), + SIMDE_FLOAT16_VALUE( 14.048), SIMDE_FLOAT16_VALUE( 19.218), SIMDE_FLOAT16_VALUE( - 15.964), SIMDE_FLOAT16_VALUE( 18.074) }, + { SIMDE_FLOAT16_VALUE( 11.959), SIMDE_FLOAT16_VALUE( 19.105), SIMDE_FLOAT16_VALUE( 2.173), SIMDE_FLOAT16_VALUE( 12.878), + SIMDE_FLOAT16_VALUE( 14.838), SIMDE_FLOAT16_VALUE( 10.487), SIMDE_FLOAT16_VALUE( 29.465), SIMDE_FLOAT16_VALUE( 1.098) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vabdq_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vabdq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vabdq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1127,9 +1336,11 @@ test_simde_vabdq_u32 (SIMDE_MUNIT_TEST_ARGS) { #endif SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vabdh_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vabds_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdd_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vabd_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_s8) @@ -1137,6 +1348,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vabd_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vabd_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vabdq_s8) diff --git a/test/arm/neon/abdl_high.c b/test/arm/neon/abdl_high.c new file mode 100644 index 000000000..53865a35a --- /dev/null +++ b/test/arm/neon/abdl_high.c @@ -0,0 +1,513 @@ +#define SIMDE_TEST_ARM_NEON_INSN abdl_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/abdl_high.h" + +static int +test_simde_vabdl_high_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t a[16]; + int8_t b[16]; + int16_t r[8]; + } test_vec[] = { + { { INT8_C( 5), INT8_C( 25), -INT8_C( 13), INT8_C( 124), + -INT8_C( 100), INT8_C( 4), -INT8_C( 109), -INT8_C( 86), + INT8_C( 3), INT8_C( 17), INT8_C( 57), -INT8_C( 9), + INT8_C( 75), INT8_C( 92), -INT8_C( 54), -INT8_C( 126) }, + { INT8_C( 39), -INT8_C( 9), -INT8_C( 71), -INT8_C( 47), + -INT8_C( 12), -INT8_C( 17), -INT8_C( 70), -INT8_C( 122), + INT8_C( 117), INT8_C( 86), INT8_C( 28), -INT8_C( 69), + INT8_C( 124), INT8_C( 81), INT8_C( 3), INT8_C( 94) }, + { INT16_C( 114), INT16_C( 69), INT16_C( 29), INT16_C( 60), + INT16_C( 49), INT16_C( 11), INT16_C( 57), INT16_C( 220) } }, + { { INT8_C( 28), -INT8_C( 68), -INT8_C( 50), INT8_C( 88), + -INT8_C( 106), INT8_C( 12), INT8_C( 60), INT8_C( 30), + INT8_C( 112), INT8_C( 78), INT8_C( 120), INT8_C( 45), + INT8_C( 94), -INT8_C( 32), -INT8_C( 77), -INT8_C( 8) }, + { -INT8_C( 98), INT8_C( 81), INT8_C( 62), INT8_C( 32), + INT8_C( 104), INT8_C( 18), -INT8_C( 52), INT8_C( 30), + INT8_C( 9), -INT8_C( 2), -INT8_C( 123), -INT8_C( 4), + INT8_C( 5), INT8_C( 104), INT8_C( 13), -INT8_C( 31) }, + { INT16_C( 103), INT16_C( 80), INT16_C( 243), INT16_C( 49), + INT16_C( 89), INT16_C( 136), INT16_C( 90), INT16_C( 23) } }, + { { INT8_C( 119), -INT8_C( 43), -INT8_C( 14), -INT8_C( 9), + -INT8_C( 69), -INT8_C( 5), -INT8_C( 69), -INT8_C( 41), + INT8_C( 38), INT8_C( 13), INT8_C( 82), INT8_C( 106), + -INT8_C( 118), -INT8_C( 26), -INT8_C( 23), -INT8_C( 23) }, + { INT8_C( 30), INT8_C( 91), -INT8_C( 100), INT8_C( 88), + INT8_C( 37), INT8_C( 122), INT8_C( 96), INT8_C( 24), + -INT8_C( 64), -INT8_C( 19), -INT8_C( 33), -INT8_C( 74), + -INT8_C( 93), -INT8_C( 64), INT8_C( 75), INT8_C( 81) }, + { INT16_C( 102), INT16_C( 32), INT16_C( 115), INT16_C( 180), + INT16_C( 25), INT16_C( 38), INT16_C( 98), INT16_C( 104) } }, + { { INT8_C( 35), INT8_C( 125), INT8_C( 50), -INT8_C( 42), + -INT8_C( 90), -INT8_C( 50), -INT8_C( 42), INT8_C( 124), + INT8_C( 125), INT8_C( 79), INT8_C( 108), -INT8_C( 102), + -INT8_C( 67), INT8_C( 111), -INT8_C( 128), -INT8_C( 86) }, + { INT8_C( 52), -INT8_C( 104), -INT8_C( 112), -INT8_C( 112), + -INT8_C( 12), INT8_C( 42), INT8_C( 69), -INT8_C( 87), + INT8_C( 6), INT8_C( 23), -INT8_C( 54), -INT8_C( 112), + INT8_C( 96), -INT8_C( 59), INT8_C( 44), -INT8_C( 77) }, + { INT16_C( 119), INT16_C( 56), INT16_C( 162), INT16_C( 10), + INT16_C( 163), INT16_C( 170), INT16_C( 172), INT16_C( 9) } }, + { { INT8_C( 47), -INT8_C( 101), INT8_C( 77), -INT8_C( 79), + -INT8_C( 55), -INT8_C( 110), -INT8_C( 59), -INT8_C( 82), + INT8_C( 18), INT8_C( 120), INT8_C( 8), -INT8_C( 20), + INT8_C( 124), -INT8_C( 31), -INT8_C( 84), INT8_C( 2) }, + { -INT8_C( 122), INT8_C( 112), INT8_C( 74), -INT8_C( 92), + -INT8_C( 79), -INT8_C( 127), -INT8_C( 117), -INT8_C( 27), + -INT8_C( 115), -INT8_C( 87), -INT8_C( 1), -INT8_C( 77), + INT8_C( 63), -INT8_C( 74), -INT8_C( 59), INT8_C( 1) }, + { INT16_C( 133), INT16_C( 207), INT16_C( 9), INT16_C( 57), + INT16_C( 61), INT16_C( 43), INT16_C( 25), INT16_C( 1) } }, + { { INT8_C( 7), -INT8_C( 84), INT8_C( 67), INT8_C( 1), + INT8_C( 58), -INT8_C( 90), -INT8_C( 26), -INT8_C( 90), + INT8_C( 14), INT8_C( 57), -INT8_C( 49), INT8_C( 93), + INT8_C( 119), INT8_C( 27), -INT8_C( 90), INT8_C( 107) }, + { -INT8_C( 97), INT8_C( 82), INT8_C( 84), INT8_C( 62), + INT8_C( 126), -INT8_C( 10), INT8_C( 87), INT8_C( 70), + -INT8_C( 23), INT8_C( 5), -INT8_C( 49), -INT8_C( 28), + -INT8_C( 37), -INT8_C( 26), -INT8_C( 45), INT8_C( 75) }, + { INT16_C( 37), INT16_C( 52), INT16_C( 0), INT16_C( 121), + INT16_C( 156), INT16_C( 53), INT16_C( 45), INT16_C( 32) } }, + { { INT8_C( 56), -INT8_C( 12), INT8_C( 78), INT8_C( 19), + -INT8_C( 99), INT8_C( 116), INT8_C( 27), INT8_C( 60), + INT8_C( 7), INT8_C( 82), INT8_C( 82), INT8_C( 32), + INT8_C( 65), INT8_C( 86), INT8_C( 116), -INT8_C( 105) }, + { INT8_C( 104), -INT8_C( 124), INT8_C( 29), -INT8_C( 107), + -INT8_C( 8), INT8_C( 24), -INT8_C( 120), -INT8_C( 118), + -INT8_C( 64), INT8_C( 120), INT8_C( 16), INT8_C( 33), + -INT8_C( 65), -INT8_C( 26), -INT8_C( 77), -INT8_C( 107) }, + { INT16_C( 71), INT16_C( 38), INT16_C( 66), INT16_C( 1), + INT16_C( 130), INT16_C( 112), INT16_C( 193), INT16_C( 2) } }, + { { -INT8_C( 48), -INT8_C( 94), -INT8_C( 19), INT8_C( 64), + -INT8_C( 66), -INT8_C( 42), -INT8_C( 85), -INT8_C( 75), + INT8_C( 63), INT8_C( 76), -INT8_C( 116), INT8_C( 86), + INT8_C( 48), -INT8_C( 38), -INT8_C( 11), -INT8_C( 63) }, + { INT8_C( 36), -INT8_C( 15), INT8_C( 10), INT8_C( 125), + INT8_C( 25), INT8_C( 7), -INT8_C( 113), INT8_C( 18), + -INT8_C( 78), INT8_C( 19), INT8_C( 10), -INT8_C( 39), + INT8_C( 33), -INT8_C( 116), -INT8_C( 88), -INT8_C( 117) }, + { INT16_C( 141), INT16_C( 57), INT16_C( 126), INT16_C( 125), + INT16_C( 15), INT16_C( 78), INT16_C( 77), INT16_C( 54) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + simde_int16x8_t r = simde_vabdl_high_s8(a, b); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int16x8_t r = simde_vabdl_high_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vabdl_high_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a[8]; + int16_t b[8]; + int32_t r[4]; + } test_vec[] = { + { { INT16_C( 29690), -INT16_C( 24168), -INT16_C( 27667), INT16_C( 8070), + -INT16_C( 21906), INT16_C( 4540), INT16_C( 29724), INT16_C( 24943) }, + { -INT16_C( 1819), INT16_C( 9889), -INT16_C( 26106), INT16_C( 5615), + INT16_C( 19638), INT16_C( 27513), INT16_C( 2962), INT16_C( 21043) }, + { INT32_C( 41544), INT32_C( 22973), INT32_C( 26762), INT32_C( 3900) } }, + { { INT16_C( 28652), INT16_C( 18761), INT16_C( 4451), INT16_C( 10847), + -INT16_C( 5071), INT16_C( 3419), -INT16_C( 32086), INT16_C( 21511) }, + { -INT16_C( 19278), -INT16_C( 305), -INT16_C( 22484), INT16_C( 2209), + INT16_C( 6312), -INT16_C( 26943), INT16_C( 3625), INT16_C( 2183) }, + { INT32_C( 11383), INT32_C( 30362), INT32_C( 35711), INT32_C( 19328) } }, + { { INT16_C( 22486), INT16_C( 29554), -INT16_C( 10737), INT16_C( 3632), + INT16_C( 2333), INT16_C( 14499), -INT16_C( 22389), -INT16_C( 22353) }, + { -INT16_C( 8960), -INT16_C( 30270), INT16_C( 19487), INT16_C( 17272), + -INT16_C( 12445), -INT16_C( 20518), -INT16_C( 28048), INT16_C( 26583) }, + { INT32_C( 14778), INT32_C( 35017), INT32_C( 5659), INT32_C( 48936) } }, + { { INT16_C( 14410), INT16_C( 5324), -INT16_C( 23116), -INT16_C( 25881), + INT16_C( 5142), INT16_C( 5975), -INT16_C( 22642), -INT16_C( 9474) }, + { INT16_C( 30417), -INT16_C( 24641), -INT16_C( 29549), -INT16_C( 17979), + INT16_C( 29468), INT16_C( 15706), INT16_C( 29466), -INT16_C( 2986) }, + { INT32_C( 24326), INT32_C( 9731), INT32_C( 52108), INT32_C( 6488) } }, + { { -INT16_C( 17906), INT16_C( 24882), -INT16_C( 18998), -INT16_C( 31125), + INT16_C( 19250), INT16_C( 6092), INT16_C( 7400), -INT16_C( 20958) }, + { INT16_C( 8828), INT16_C( 31854), -INT16_C( 26593), INT16_C( 29376), + INT16_C( 13278), -INT16_C( 30888), -INT16_C( 10545), -INT16_C( 20911) }, + { INT32_C( 5972), INT32_C( 36980), INT32_C( 17945), INT32_C( 47) } }, + { { -INT16_C( 12386), INT16_C( 10034), INT16_C( 16722), -INT16_C( 30032), + INT16_C( 32463), -INT16_C( 32041), -INT16_C( 25016), -INT16_C( 9365) }, + { INT16_C( 11965), -INT16_C( 25381), INT16_C( 9794), INT16_C( 3483), + -INT16_C( 18921), INT16_C( 25033), INT16_C( 15507), INT16_C( 31746) }, + { INT32_C( 51384), INT32_C( 57074), INT32_C( 40523), INT32_C( 41111) } }, + { { -INT16_C( 32282), INT16_C( 14736), INT16_C( 6831), -INT16_C( 31451), + -INT16_C( 32288), INT16_C( 4220), INT16_C( 3340), INT16_C( 18843) }, + { -INT16_C( 25272), INT16_C( 401), -INT16_C( 29291), INT16_C( 1110), + -INT16_C( 19746), INT16_C( 27566), -INT16_C( 25524), -INT16_C( 20491) }, + { INT32_C( 12542), INT32_C( 23346), INT32_C( 28864), INT32_C( 39334) } }, + { { -INT16_C( 16675), INT16_C( 13257), INT16_C( 19538), INT16_C( 11877), + -INT16_C( 31293), INT16_C( 15882), -INT16_C( 15409), -INT16_C( 7949) }, + { -INT16_C( 22845), -INT16_C( 6299), -INT16_C( 10856), -INT16_C( 31293), + -INT16_C( 20761), INT16_C( 17929), -INT16_C( 3424), -INT16_C( 27554) }, + { INT32_C( 10532), INT32_C( 2047), INT32_C( 11985), INT32_C( 19605) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int32x4_t r = simde_vabdl_high_s16(a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int32x4_t r = simde_vabdl_high_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vabdl_high_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a[4]; + int32_t b[4]; + int64_t r[2]; + } test_vec[] = { + { { -INT32_C( 787468699), INT32_C( 1695271439), -INT32_C( 351095935), -INT32_C( 1545598394) }, + { -INT32_C( 1783679309), -INT32_C( 970531182), INT32_C( 175616594), INT32_C( 1309057832) }, + { INT64_C( 526712529), INT64_C( 2854656226) } }, + { { INT32_C( 1080367040), INT32_C( 1130061194), -INT32_C( 960325369), -INT32_C( 460539914) }, + { -INT32_C( 1121247508), -INT32_C( 1971610234), INT32_C( 1201086722), -INT32_C( 864366830) }, + { INT64_C( 2161412091), INT64_C( 403826916) } }, + { { -INT32_C( 1575160001), INT32_C( 282864985), INT32_C( 1075615256), -INT32_C( 432396887) }, + { INT32_C( 2109360031), -INT32_C( 1753323785), INT32_C( 1229001961), INT32_C( 202062281) }, + { INT64_C( 153386705), INT64_C( 634459168) } }, + { { INT32_C( 68291402), -INT32_C( 1313041192), INT32_C( 329338864), INT32_C( 297630675) }, + { INT32_C( 542845242), -INT32_C( 1361117108), INT32_C( 171758027), INT32_C( 1488836943) }, + { INT64_C( 157580837), INT64_C( 1191206268) } }, + { { INT32_C( 1239184031), INT32_C( 276088380), INT32_C( 366348024), -INT32_C( 977026686) }, + { -INT32_C( 1788467255), -INT32_C( 1337699041), INT32_C( 1941530698), INT32_C( 1724505255) }, + { INT64_C( 1575182674), INT64_C( 2701531941) } }, + { { -INT32_C( 394449041), -INT32_C( 1496706648), INT32_C( 461438308), -INT32_C( 462426104) }, + { INT32_C( 316700335), INT32_C( 2112040702), -INT32_C( 812534998), INT32_C( 363376033) }, + { INT64_C( 1273973306), INT64_C( 825802137) } }, + { { INT32_C( 1732933553), -INT32_C( 496211310), INT32_C( 689320739), -INT32_C( 1431777230) }, + { INT32_C( 34189302), -INT32_C( 778655550), INT32_C( 1839840728), INT32_C( 1359603307) }, + { INT64_C( 1150519989), INT64_C( 2791380537) } }, + { { INT32_C( 543026096), -INT32_C( 2143144503), INT32_C( 962996740), -INT32_C( 359161913) }, + { INT32_C( 1590535608), -INT32_C( 2105115010), INT32_C( 1494859055), INT32_C( 292780821) }, + { INT64_C( 531862315), INT64_C( 651942734) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int64x2_t r = simde_vabdl_high_s32(a, b); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int64x2_t r = simde_vabdl_high_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vabdl_high_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t a[16]; + uint8_t b[16]; + uint16_t r[8]; + } test_vec[] = { + { { UINT8_C( 227), UINT8_C( 184), UINT8_C( 17), UINT8_C( 37), + UINT8_C( 21), UINT8_C( 160), UINT8_C( 222), UINT8_C( 160), + UINT8_C( 79), UINT8_C( 122), UINT8_C( 82), UINT8_C( 237), + UINT8_C( 70), UINT8_C( 55), UINT8_C( 190), UINT8_C( 188) }, + { UINT8_C( 17), UINT8_C( 235), UINT8_C( 28), UINT8_C( 202), + UINT8_C( 74), UINT8_C( 105), UINT8_C( 129), UINT8_C( 67), + UINT8_C( 70), UINT8_C( 191), UINT8_C( 51), UINT8_C( 88), + UINT8_C( 234), UINT8_C( 164), UINT8_C( 168), UINT8_C( 83) }, + { UINT16_C( 9), UINT16_C( 69), UINT16_C( 31), UINT16_C( 149), + UINT16_C( 164), UINT16_C( 109), UINT16_C( 22), UINT16_C( 105) } }, + { { UINT8_C( 138), UINT8_C( 229), UINT8_C( 107), UINT8_C( 243), + UINT8_C( 0), UINT8_C( 181), UINT8_C( 227), UINT8_C( 95), + UINT8_C( 5), UINT8_C( 124), UINT8_C( 79), UINT8_C( 131), + UINT8_C( 38), UINT8_C( 197), UINT8_C( 101), UINT8_C( 192) }, + { UINT8_C( 176), UINT8_C( 255), UINT8_C( 71), UINT8_C( 90), + UINT8_C( 167), UINT8_C( 36), UINT8_C( 133), UINT8_C( 99), + UINT8_C( 56), UINT8_C( 250), UINT8_C( 193), UINT8_C( 5), + UINT8_C( 19), UINT8_C( 123), UINT8_C( 185), UINT8_C( 128) }, + { UINT16_C( 51), UINT16_C( 126), UINT16_C( 114), UINT16_C( 126), + UINT16_C( 19), UINT16_C( 74), UINT16_C( 84), UINT16_C( 64) } }, + { { UINT8_C( 55), UINT8_C( 102), UINT8_C( 236), UINT8_C( 4), + UINT8_C( 119), UINT8_C( 65), UINT8_C( 67), UINT8_C( 67), + UINT8_C( 225), UINT8_C( 99), UINT8_C( 7), UINT8_C( 10), + UINT8_C( 96), UINT8_C( 114), UINT8_C( 244), UINT8_C( 218) }, + { UINT8_C( 30), UINT8_C( 137), UINT8_C( 111), UINT8_C( 227), + UINT8_C( 232), UINT8_C( 120), UINT8_C( 229), UINT8_C( 115), + UINT8_C( 155), UINT8_C( 50), UINT8_C( 250), UINT8_C( 70), + UINT8_C( 200), UINT8_C( 95), UINT8_C( 184), UINT8_C( 254) }, + { UINT16_C( 70), UINT16_C( 49), UINT16_C( 243), UINT16_C( 60), + UINT16_C( 104), UINT16_C( 19), UINT16_C( 60), UINT16_C( 36) } }, + { { UINT8_C( 213), UINT8_C( 223), UINT8_C( 163), UINT8_C( 143), + UINT8_C( 214), UINT8_C( 173), UINT8_C( 75), UINT8_C( 16), + UINT8_C( 174), UINT8_C( 196), UINT8_C( 192), UINT8_C( 153), + UINT8_C( 123), UINT8_C( 10), UINT8_C( 161), UINT8_C( 250) }, + { UINT8_C( 255), UINT8_C( 49), UINT8_C( 201), UINT8_C( 125), + UINT8_C( 165), UINT8_C( 241), UINT8_C( 100), UINT8_C( 249), + UINT8_C( 133), UINT8_C( 74), UINT8_C( 34), UINT8_C( 109), + UINT8_C( 226), UINT8_C( 60), UINT8_C( 157), UINT8_C( 118) }, + { UINT16_C( 41), UINT16_C( 122), UINT16_C( 158), UINT16_C( 44), + UINT16_C( 103), UINT16_C( 50), UINT16_C( 4), UINT16_C( 132) } }, + { { UINT8_C( 100), UINT8_C( 76), UINT8_C( 199), UINT8_C( 194), + UINT8_C( 148), UINT8_C( 6), UINT8_C( 174), UINT8_C( 180), + UINT8_C( 217), UINT8_C( 107), UINT8_C( 111), UINT8_C( 196), + UINT8_C( 166), UINT8_C( 234), UINT8_C( 207), UINT8_C( 98) }, + { UINT8_C( 171), UINT8_C( 231), UINT8_C( 153), UINT8_C( 116), + UINT8_C( 140), UINT8_C( 194), UINT8_C( 233), UINT8_C( 242), + UINT8_C( 79), UINT8_C( 37), UINT8_C( 70), UINT8_C( 26), + UINT8_C( 105), UINT8_C( 40), UINT8_C( 135), UINT8_C( 131) }, + { UINT16_C( 138), UINT16_C( 70), UINT16_C( 41), UINT16_C( 170), + UINT16_C( 61), UINT16_C( 194), UINT16_C( 72), UINT16_C( 33) } }, + { { UINT8_C( 87), UINT8_C( 46), UINT8_C( 59), UINT8_C( 163), + UINT8_C( 6), UINT8_C( 216), UINT8_C( 223), UINT8_C( 146), + UINT8_C( 208), UINT8_C( 83), UINT8_C( 84), UINT8_C( 233), + UINT8_C( 193), UINT8_C( 19), UINT8_C( 196), UINT8_C( 3) }, + { UINT8_C( 18), UINT8_C( 57), UINT8_C( 85), UINT8_C( 23), + UINT8_C( 176), UINT8_C( 178), UINT8_C( 31), UINT8_C( 46), + UINT8_C( 189), UINT8_C( 148), UINT8_C( 249), UINT8_C( 205), + UINT8_C( 86), UINT8_C( 240), UINT8_C( 175), UINT8_C( 171) }, + { UINT16_C( 19), UINT16_C( 65), UINT16_C( 165), UINT16_C( 28), + UINT16_C( 107), UINT16_C( 221), UINT16_C( 21), UINT16_C( 168) } }, + { { UINT8_C( 34), UINT8_C( 208), UINT8_C( 154), UINT8_C( 166), + UINT8_C( 95), UINT8_C( 255), UINT8_C( 138), UINT8_C( 144), + UINT8_C( 185), UINT8_C( 107), UINT8_C( 94), UINT8_C( 107), + UINT8_C( 1), UINT8_C( 117), UINT8_C( 3), UINT8_C( 144) }, + { UINT8_C( 88), UINT8_C( 134), UINT8_C( 39), UINT8_C( 53), + UINT8_C( 61), UINT8_C( 79), UINT8_C( 253), UINT8_C( 177), + UINT8_C( 253), UINT8_C( 17), UINT8_C( 1), UINT8_C( 101), + UINT8_C( 210), UINT8_C( 128), UINT8_C( 255), UINT8_C( 45) }, + { UINT16_C( 68), UINT16_C( 90), UINT16_C( 93), UINT16_C( 6), + UINT16_C( 209), UINT16_C( 11), UINT16_C( 252), UINT16_C( 99) } }, + { { UINT8_C( 126), UINT8_C( 225), UINT8_C( 243), UINT8_C( 203), + UINT8_C( 180), UINT8_C( 184), UINT8_C( 187), UINT8_C( 61), + UINT8_C( 246), UINT8_C( 189), UINT8_C( 128), UINT8_C( 135), + UINT8_C( 167), UINT8_C( 92), UINT8_C( 40), UINT8_C( 168) }, + { UINT8_C( 25), UINT8_C( 142), UINT8_C( 132), UINT8_C( 14), + UINT8_C( 29), UINT8_C( 204), UINT8_C( 58), UINT8_C( 229), + UINT8_C( 65), UINT8_C( 238), UINT8_C( 210), UINT8_C( 45), + UINT8_C( 191), UINT8_C( 63), UINT8_C( 220), UINT8_C( 200) }, + { UINT16_C( 181), UINT16_C( 49), UINT16_C( 82), UINT16_C( 90), + UINT16_C( 24), UINT16_C( 29), UINT16_C( 180), UINT16_C( 32) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); + simde_uint16x8_t r = simde_vabdl_high_u8(a, b); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint16x8_t r = simde_vabdl_high_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vabdl_high_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint32_t r[4]; + } test_vec[] = { + { { UINT16_C( 11080), UINT16_C( 28941), UINT16_C( 54244), UINT16_C( 2424), + UINT16_C( 6185), UINT16_C( 35176), UINT16_C( 9035), UINT16_C( 6428) }, + { UINT16_C( 65511), UINT16_C( 46505), UINT16_C( 37008), UINT16_C( 6083), + UINT16_C( 57861), UINT16_C( 8256), UINT16_C( 49759), UINT16_C( 43953) }, + { UINT32_C( 51676), UINT32_C( 26920), UINT32_C( 40724), UINT32_C( 37525) } }, + { { UINT16_C( 610), UINT16_C( 19991), UINT16_C( 48540), UINT16_C( 23933), + UINT16_C( 17176), UINT16_C( 27734), UINT16_C( 2781), UINT16_C( 9794) }, + { UINT16_C( 14579), UINT16_C( 50810), UINT16_C( 25818), UINT16_C( 54221), + UINT16_C( 13934), UINT16_C( 50029), UINT16_C( 28040), UINT16_C( 54984) }, + { UINT32_C( 3242), UINT32_C( 22295), UINT32_C( 25259), UINT32_C( 45190) } }, + { { UINT16_C( 20635), UINT16_C( 15524), UINT16_C( 56681), UINT16_C( 34063), + UINT16_C( 12313), UINT16_C( 34359), UINT16_C( 22139), UINT16_C( 62709) }, + { UINT16_C( 48540), UINT16_C( 49524), UINT16_C( 26037), UINT16_C( 33636), + UINT16_C( 2174), UINT16_C( 30778), UINT16_C( 32366), UINT16_C( 36639) }, + { UINT32_C( 10139), UINT32_C( 3581), UINT32_C( 10227), UINT32_C( 26070) } }, + { { UINT16_C( 7906), UINT16_C( 59201), UINT16_C( 11503), UINT16_C( 30882), + UINT16_C( 36534), UINT16_C( 53191), UINT16_C( 2796), UINT16_C( 42790) }, + { UINT16_C( 17653), UINT16_C( 53464), UINT16_C( 5406), UINT16_C( 42682), + UINT16_C( 61443), UINT16_C( 36702), UINT16_C( 62252), UINT16_C( 33301) }, + { UINT32_C( 24909), UINT32_C( 16489), UINT32_C( 59456), UINT32_C( 9489) } }, + { { UINT16_C( 368), UINT16_C( 28659), UINT16_C( 17363), UINT16_C( 52426), + UINT16_C( 18831), UINT16_C( 31144), UINT16_C( 26653), UINT16_C( 4962) }, + { UINT16_C( 13272), UINT16_C( 56638), UINT16_C( 26816), UINT16_C( 9319), + UINT16_C( 33450), UINT16_C( 46002), UINT16_C( 46512), UINT16_C( 11194) }, + { UINT32_C( 14619), UINT32_C( 14858), UINT32_C( 19859), UINT32_C( 6232) } }, + { { UINT16_C( 24373), UINT16_C( 3749), UINT16_C( 30479), UINT16_C( 53729), + UINT16_C( 36062), UINT16_C( 2091), UINT16_C( 20756), UINT16_C( 37273) }, + { UINT16_C( 14682), UINT16_C( 15683), UINT16_C( 59327), UINT16_C( 55116), + UINT16_C( 48897), UINT16_C( 34602), UINT16_C( 26495), UINT16_C( 13053) }, + { UINT32_C( 12835), UINT32_C( 32511), UINT32_C( 5739), UINT32_C( 24220) } }, + { { UINT16_C( 61763), UINT16_C( 15289), UINT16_C( 14482), UINT16_C( 35178), + UINT16_C( 28312), UINT16_C( 40394), UINT16_C( 54355), UINT16_C( 8397) }, + { UINT16_C( 31322), UINT16_C( 46229), UINT16_C( 21846), UINT16_C( 5486), + UINT16_C( 783), UINT16_C( 16002), UINT16_C( 57330), UINT16_C( 30354) }, + { UINT32_C( 27529), UINT32_C( 24392), UINT32_C( 2975), UINT32_C( 21957) } }, + { { UINT16_C( 61340), UINT16_C( 10896), UINT16_C( 25008), UINT16_C( 62657), + UINT16_C( 6688), UINT16_C( 40315), UINT16_C( 60584), UINT16_C( 9942) }, + { UINT16_C( 49306), UINT16_C( 62288), UINT16_C( 5856), UINT16_C( 51005), + UINT16_C( 57343), UINT16_C( 24018), UINT16_C( 20003), UINT16_C( 26261) }, + { UINT32_C( 50655), UINT32_C( 16297), UINT32_C( 40581), UINT32_C( 16319) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint32x4_t r = simde_vabdl_high_u16(a, b); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint32x4_t r = simde_vabdl_high_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vabdl_high_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint64_t r[2]; + } test_vec[] = { + { { UINT32_C( 4207167503), UINT32_C( 4182969940), UINT32_C( 4265939436), UINT32_C( 3572856407) }, + { UINT32_C( 1122616623), UINT32_C( 1433619022), UINT32_C( 3926591544), UINT32_C( 238681857) }, + { UINT64_C( 339347892), UINT64_C( 3334174550) } }, + { { UINT32_C( 3618013472), UINT32_C( 2859644811), UINT32_C( 3674865098), UINT32_C( 2171453939) }, + { UINT32_C( 3082235870), UINT32_C( 3143735662), UINT32_C( 1101178394), UINT32_C( 1015750680) }, + { UINT64_C( 2573686704), UINT64_C( 1155703259) } }, + { { UINT32_C( 2881588591), UINT32_C( 1972897176), UINT32_C( 1922891383), UINT32_C( 4044065985) }, + { UINT32_C( 27808731), UINT32_C( 3506496718), UINT32_C( 354305683), UINT32_C( 1811920773) }, + { UINT64_C( 1568585700), UINT64_C( 2232145212) } }, + { { UINT32_C( 796289424), UINT32_C( 3657819838), UINT32_C( 882914741), UINT32_C( 590761261) }, + { UINT32_C( 916488915), UINT32_C( 3250147444), UINT32_C( 4194695410), UINT32_C( 2666509233) }, + { UINT64_C( 3311780669), UINT64_C( 2075747972) } }, + { { UINT32_C( 1593704277), UINT32_C( 353382449), UINT32_C( 300269132), UINT32_C( 466013123) }, + { UINT32_C( 3161726502), UINT32_C( 2871668464), UINT32_C( 3932181037), UINT32_C( 593185941) }, + { UINT64_C( 3631911905), UINT64_C( 127172818) } }, + { { UINT32_C( 1353149015), UINT32_C( 2743917811), UINT32_C( 1661571991), UINT32_C( 2186200682) }, + { UINT32_C( 705050360), UINT32_C( 1373863029), UINT32_C( 3778121108), UINT32_C( 4074917396) }, + { UINT64_C( 2116549117), UINT64_C( 1888716714) } }, + { { UINT32_C( 3636552503), UINT32_C( 2704297856), UINT32_C( 2100074352), UINT32_C( 3166585636) }, + { UINT32_C( 1117640888), UINT32_C( 3540546728), UINT32_C( 3237396642), UINT32_C( 564732391) }, + { UINT64_C( 1137322290), UINT64_C( 2601853245) } }, + { { UINT32_C( 4050420069), UINT32_C( 159811253), UINT32_C( 3375491289), UINT32_C( 3543754642) }, + { UINT32_C( 719089014), UINT32_C( 2621554727), UINT32_C( 2869382109), UINT32_C( 269948350) }, + { UINT64_C( 506109180), UINT64_C( 3273806292) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint64x2_t r = simde_vabdl_high_u32(a, b); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint64x2_t r = simde_vabdl_high_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vabdl_high_u32) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/abs.c b/test/arm/neon/abs.c index beda70cb3..72154a27d 100644 --- a/test/arm/neon/abs.c +++ b/test/arm/neon/abs.c @@ -3,6 +3,52 @@ #include "test-neon.h" #include "../../../simde/arm/neon/abs.h" +static int +test_simde_vabsh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + simde_float16 r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( - 24.786), + SIMDE_FLOAT16_VALUE( 24.786) }, + { SIMDE_FLOAT16_VALUE( 27.135), + SIMDE_FLOAT16_VALUE( 27.135) }, + { SIMDE_FLOAT16_VALUE( 16.821), + SIMDE_FLOAT16_VALUE( 16.821) }, + { SIMDE_FLOAT16_VALUE( - 25.652), + SIMDE_FLOAT16_VALUE( 25.652) }, + { SIMDE_FLOAT16_VALUE( 8.252), + SIMDE_FLOAT16_VALUE( 8.252) }, + { SIMDE_FLOAT16_VALUE( 1.127), + SIMDE_FLOAT16_VALUE( 1.127) }, + { SIMDE_FLOAT16_VALUE( - 26.641), + SIMDE_FLOAT16_VALUE( 26.641) }, + { SIMDE_FLOAT16_VALUE( - 2.663), + SIMDE_FLOAT16_VALUE( 2.663) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t r = simde_vabsh_f16(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_float16_t r = simde_vabsh_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vabsd_s64 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -50,6 +96,7 @@ test_simde_vabsd_s64 (SIMDE_MUNIT_TEST_ARGS) { static int test_simde_vabs_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[4]; simde_float16 r[4]; @@ -84,10 +131,23 @@ test_simde_vabs_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vabs_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[2]; simde_float32 r[2]; @@ -118,10 +178,23 @@ test_simde_vabs_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vabs_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[1]; simde_float64 r[1]; @@ -152,10 +225,23 @@ test_simde_vabs_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x1_t r = simde_vabs_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int8_t a[8]; int8_t r[8]; @@ -186,10 +272,23 @@ test_simde_vabs_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vabs_s8(a); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int16_t a[4]; int16_t r[4]; @@ -220,10 +319,23 @@ test_simde_vabs_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vabs_s16(a); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int32_t a[2]; int32_t r[2]; @@ -254,10 +366,23 @@ test_simde_vabs_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vabs_s32(a); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabs_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int64_t a[1]; int64_t r[1]; @@ -288,10 +413,23 @@ test_simde_vabs_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); + simde_int64x1_t r = simde_vabs_s64(a); + + simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[8]; simde_float16 r[8]; @@ -325,10 +463,23 @@ test_simde_vabsq_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vabsq_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[4]; simde_float32 r[4]; @@ -358,10 +509,23 @@ test_simde_vabsq_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vabsq_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[2]; simde_float64 r[2]; @@ -392,10 +556,23 @@ test_simde_vabsq_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r = simde_vabsq_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int8_t a[16]; int8_t r[16]; @@ -442,10 +619,23 @@ test_simde_vabsq_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vabsq_s8(a); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int16_t a[8]; int16_t r[8]; @@ -476,10 +666,23 @@ test_simde_vabsq_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vabsq_s16(a); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int32_t a[4]; int32_t r[4]; @@ -509,10 +712,23 @@ test_simde_vabsq_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vabsq_s32(a); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vabsq_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int64_t a[2]; int64_t r[2]; @@ -542,9 +758,22 @@ test_simde_vabsq_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vabsq_s64(a); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vabsh_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vabsd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vabs_f16) diff --git a/test/arm/neon/add_testgen.py b/test/arm/neon/add_testgen.py deleted file mode 100644 index 66d6ad5ad..000000000 --- a/test/arm/neon/add_testgen.py +++ /dev/null @@ -1,114 +0,0 @@ -import os -import re -import csv - - -def gen_test(v_type_list, v_ele_list, v_name_list, func_name): - print(v_type_list) - print(v_ele_list) - print(v_name_list) - test_content = ''' -#else - fputc('\\n', stdout); - for (int i = 0 ; i < 8 ; i++) {\n''' - for i in range(len(v_type_list)-1): - test_content = test_content+' simde_'+v_type_list[i]+v_ele_list[i]+'_t '+v_name_list[i]+' = simde_test_arm_neon_random_'+v_type_list[i][0]+v_ele_list[i]+'();\n' - test_content = test_content+' simde_'+v_type_list[-1]+v_ele_list[-1]+'_t '+v_name_list[-1]+' = '+func_name+'(' - for i in range(len(v_name_list)-1): - if i != len(v_name_list)-2: - test_content = test_content+v_name_list[i]+', ' - else: - test_content = test_content+v_name_list[i]+');\n\n' - - for i in range(len(v_name_list)): - if i == 0: - test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_FIRST);\n' - elif i == len(v_name_list)-1: - test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_LAST);\n' - else: - test_content = test_content + ' simde_test_arm_neon_write_'+v_type_list[i][0]+v_ele_list[i]+'(2, '+v_name_list[i]+', SIMDE_TEST_VEC_POS_MIDDLE);\n' - test_content = test_content + ' }\n return 1;\n#endif\n' - - return test_content - - -type_list = [["float16"], - ["float32", "float"], - ["float64", "double"], - ["uint8"], - ["uint16"], - ["uint32", "unsigned int", "unsigned"], - ["uint64"], - ["int8"], - ["int16"], - ["int32", "int"], - ["int64"]] - -dic_type_list = {"float16":["float", "16"], - "float32":["float", "32"],} - -def main_gen(file_path): - # Open the file for reading - with open(file_path, 'r') as file: - lines = file.readlines() - for i in range(len(lines)): - if "static int" in lines[i]: - if "#if 1" not in lines[i+2]: - func_name = lines[i+1][5:lines[i+1].find(' ')] - lines.insert(i+2, '#if 1\n') - print(f"line numbers: {i}, {func_name}") - # get input para - v_type_list = [] # ex. float16 or uint32 - v_ele_list = [] # ex. 32x2 - v_name_list = [] # ex. a - for j in range(i, i+1000, 1): - if "struct" in lines[j]: - while 'test_vec' not in lines[j]: - j += 1 - # get type - found = False - variable_len = ['1'] - for rows in range(len(type_list)): - if not found: - for cols in range(len(type_list[rows])): - if type_list[rows][cols] in lines[j]: - v_type = '' - for c in type_list[rows][0]: - if c.isdigit(): - break - v_type += c - v_type_list.append(v_type) - found = True - variable_len = re.findall(r'\d+', type_list[rows][0]) - break - else: - break - # get elements - if '[' in lines[j] and '}' not in lines[j]: - v_ele_list.append(variable_len[0]+'x'+lines[j][lines[j].find('[')+1:lines[j].find(']')]) - v_name_list.append(lines[j][lines[j].rfind(' ')+1:lines[j].rfind('[')]) - elif '}' not in lines[j]: - v_ele_list.append(variable_len[0]+'x1') - v_name_list.append(lines[j][lines[j].rfind(' ')+1:lines[j].rfind(';')]) - if "return" in lines[j]: - # Add gen_test function - add_content = gen_test(v_type_list, v_ele_list, v_name_list, func_name) - lines.insert(j+1, add_content) - break - # Write the modified content back to the file - with open(file_path, 'w') as file: - file.writelines(lines) - - pass - - -if __name__ == '__main__': - # Open the modify_c.txt file and read its contents - with open('modify_c.txt', 'r') as modify_c_file: - file_names = modify_c_file.read().splitlines() - - for file_name in file_names: - print(f'Start {file_name}') - main_gen(file_name) - print(f'Done {file_name}') - diff --git a/test/arm/neon/addhn_high.c b/test/arm/neon/addhn_high.c new file mode 100644 index 000000000..ab415341a --- /dev/null +++ b/test/arm/neon/addhn_high.c @@ -0,0 +1,554 @@ +#define SIMDE_TEST_ARM_NEON_INSN addhn_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/addhn_high.h" + +static int +test_simde_vaddhn_high_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t origin_r[8]; + int16_t a[8]; + int16_t b[8]; + int8_t r[16]; + } test_vec[] = { + { { -INT8_C( 61), -INT8_C( 36), INT8_C( 101), INT8_C( 118), + INT8_C( 5), INT8_C( 38), INT8_C( 90), -INT8_C( 48) }, + { -INT16_C( 8244), -INT16_C( 939), INT16_C( 7799), INT16_C( 10369), + INT16_C( 10076), INT16_C( 15809), -INT16_C( 15725), INT16_C( 16407) }, + { INT16_C( 19796), -INT16_C( 5464), -INT16_C( 28478), INT16_C( 11563), + -INT16_C( 10609), -INT16_C( 21211), -INT16_C( 28058), INT16_C( 14771) }, + { -INT8_C( 61), -INT8_C( 36), INT8_C( 101), INT8_C( 118), + INT8_C( 5), INT8_C( 38), INT8_C( 90), -INT8_C( 48), + INT8_C( 45), -INT8_C( 26), -INT8_C( 81), INT8_C( 85), + -INT8_C( 3), -INT8_C( 22), INT8_C( 84), INT8_C( 121) } }, + { { -INT8_C( 97), -INT8_C( 63), -INT8_C( 29), INT8_C( 38), + INT8_C( 64), -INT8_C( 66), -INT8_C( 24), -INT8_C( 43) }, + { INT16_C( 24705), INT16_C( 30976), INT16_C( 631), -INT16_C( 31504), + INT16_C( 9667), INT16_C( 2488), INT16_C( 8715), -INT16_C( 199) }, + { -INT16_C( 24960), INT16_C( 20198), INT16_C( 22009), -INT16_C( 409), + INT16_C( 5277), INT16_C( 12713), INT16_C( 14944), INT16_C( 32237) }, + { -INT8_C( 97), -INT8_C( 63), -INT8_C( 29), INT8_C( 38), + INT8_C( 64), -INT8_C( 66), -INT8_C( 24), -INT8_C( 43), + -INT8_C( 1), -INT8_C( 57), INT8_C( 88), -INT8_C( 125), + INT8_C( 58), INT8_C( 59), INT8_C( 92), INT8_C( 125) } }, + { { INT8_C( 72), INT8_C( 73), -INT8_C( 107), -INT8_C( 96), + -INT8_C( 30), INT8_C( 39), -INT8_C( 123), INT8_C( 21) }, + { -INT16_C( 21549), INT16_C( 21299), -INT16_C( 11846), -INT16_C( 11763), + -INT16_C( 17359), INT16_C( 23796), -INT16_C( 27921), INT16_C( 7298) }, + { -INT16_C( 17251), INT16_C( 29846), -INT16_C( 19967), -INT16_C( 6977), + -INT16_C( 14403), INT16_C( 20581), -INT16_C( 21783), INT16_C( 8203) }, + { INT8_C( 72), INT8_C( 73), -INT8_C( 107), -INT8_C( 96), + -INT8_C( 30), INT8_C( 39), -INT8_C( 123), INT8_C( 21), + INT8_C( 104), -INT8_C( 57), -INT8_C( 125), -INT8_C( 74), + -INT8_C( 125), -INT8_C( 83), INT8_C( 61), INT8_C( 60) } }, + { { INT8_C( 52), INT8_C( 56), -INT8_C( 1), -INT8_C( 32), + INT8_C( 88), -INT8_C( 36), -INT8_C( 98), -INT8_C( 75) }, + { -INT16_C( 1503), INT16_C( 11183), -INT16_C( 26958), -INT16_C( 23638), + -INT16_C( 27007), -INT16_C( 4796), INT16_C( 1261), -INT16_C( 18964) }, + { -INT16_C( 15248), INT16_C( 13875), INT16_C( 18453), INT16_C( 26114), + INT16_C( 13003), -INT16_C( 24538), -INT16_C( 11457), INT16_C( 2464) }, + { INT8_C( 52), INT8_C( 56), -INT8_C( 1), -INT8_C( 32), + INT8_C( 88), -INT8_C( 36), -INT8_C( 98), -INT8_C( 75), + -INT8_C( 66), INT8_C( 97), -INT8_C( 34), INT8_C( 9), + -INT8_C( 55), -INT8_C( 115), -INT8_C( 40), -INT8_C( 65) } }, + { { -INT8_C( 84), -INT8_C( 1), INT8_C( 50), -INT8_C( 44), + INT8_C( 83), -INT8_C( 31), INT8_C( 8), INT8_C( 71) }, + { -INT16_C( 6417), -INT16_C( 17538), INT16_C( 29763), INT16_C( 1214), + -INT16_C( 28461), INT16_C( 1968), -INT16_C( 9702), -INT16_C( 26363) }, + { INT16_C( 31827), -INT16_C( 14828), -INT16_C( 28361), -INT16_C( 24697), + -INT16_C( 25550), INT16_C( 18231), -INT16_C( 19362), INT16_C( 19659) }, + { -INT8_C( 84), -INT8_C( 1), INT8_C( 50), -INT8_C( 44), + INT8_C( 83), -INT8_C( 31), INT8_C( 8), INT8_C( 71), + INT8_C( 99), -INT8_C( 127), INT8_C( 5), -INT8_C( 92), + INT8_C( 45), INT8_C( 78), -INT8_C( 114), -INT8_C( 27) } }, + { { -INT8_C( 41), -INT8_C( 7), -INT8_C( 118), INT8_C( 82), + INT8_C( 21), INT8_C( 79), INT8_C( 37), INT8_C( 96) }, + { -INT16_C( 14532), -INT16_C( 2875), INT16_C( 27061), -INT16_C( 4795), + INT16_C( 9795), INT16_C( 8274), INT16_C( 3770), -INT16_C( 15366) }, + { INT16_C( 15622), INT16_C( 26195), INT16_C( 32415), INT16_C( 30676), + INT16_C( 5556), INT16_C( 16742), -INT16_C( 3588), INT16_C( 24022) }, + { -INT8_C( 41), -INT8_C( 7), -INT8_C( 118), INT8_C( 82), + INT8_C( 21), INT8_C( 79), INT8_C( 37), INT8_C( 96), + INT8_C( 4), INT8_C( 91), -INT8_C( 24), INT8_C( 101), + INT8_C( 59), INT8_C( 97), INT8_C( 0), INT8_C( 33) } }, + { { -INT8_C( 59), -INT8_C( 2), INT8_C( 9), INT8_C( 108), + -INT8_C( 99), -INT8_C( 43), -INT8_C( 16), INT8_C( 89) }, + { INT16_C( 13559), -INT16_C( 904), INT16_C( 6886), INT16_C( 19984), + INT16_C( 18666), -INT16_C( 8533), INT16_C( 22436), -INT16_C( 15261) }, + { INT16_C( 7691), INT16_C( 13814), INT16_C( 14534), INT16_C( 23813), + INT16_C( 23486), INT16_C( 5908), -INT16_C( 20988), -INT16_C( 25417) }, + { -INT8_C( 59), -INT8_C( 2), INT8_C( 9), INT8_C( 108), + -INT8_C( 99), -INT8_C( 43), -INT8_C( 16), INT8_C( 89), + INT8_C( 83), INT8_C( 50), INT8_C( 83), -INT8_C( 85), + -INT8_C( 92), -INT8_C( 11), INT8_C( 5), INT8_C( 97) } }, + { { -INT8_C( 57), -INT8_C( 64), -INT8_C( 98), INT8_C( 99), + INT8_C( 4), -INT8_C( 85), -INT8_C( 15), INT8_C( 26) }, + { -INT16_C( 28861), INT16_C( 4168), -INT16_C( 2113), INT16_C( 23408), + INT16_C( 2195), -INT16_C( 25357), INT16_C( 4606), -INT16_C( 5610) }, + { -INT16_C( 29669), -INT16_C( 957), INT16_C( 20282), INT16_C( 18151), + -INT16_C( 29362), INT16_C( 18807), -INT16_C( 20997), INT16_C( 24015) }, + { -INT8_C( 57), -INT8_C( 64), -INT8_C( 98), INT8_C( 99), + INT8_C( 4), -INT8_C( 85), -INT8_C( 15), INT8_C( 26), + INT8_C( 27), INT8_C( 12), INT8_C( 70), -INT8_C( 94), + -INT8_C( 107), -INT8_C( 26), -INT8_C( 65), INT8_C( 71) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t origin_r = simde_vld1_s8(test_vec[i].origin_r); + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int8x16_t r = simde_vaddhn_high_s16(origin_r, a, b); + + simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t origin_r = simde_test_arm_neon_random_i8x8(); + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int8x16_t r = simde_vaddhn_high_s16(origin_r, a, b); + + simde_test_arm_neon_write_i8x8(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vaddhn_high_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t origin_r[4]; + int32_t a[4]; + int32_t b[4]; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C( 24669), INT16_C( 18457), INT16_C( 18918), -INT16_C( 14001) }, + { INT32_C( 1483207355), -INT32_C( 456511157), INT32_C( 1424887713), INT32_C( 94302488) }, + { -INT32_C( 1132458979), -INT32_C( 2126257994), INT32_C( 1514575984), -INT32_C( 1335835137) }, + { -INT16_C( 24669), INT16_C( 18457), INT16_C( 18918), -INT16_C( 14001), + INT16_C( 5351), INT16_C( 26126), -INT16_C( 20684), -INT16_C( 18945) } }, + { { INT16_C( 31189), -INT16_C( 11485), INT16_C( 4407), -INT16_C( 16586) }, + { INT32_C( 215140660), -INT32_C( 1171538740), INT32_C( 1718439732), INT32_C( 815282501) }, + { INT32_C( 404107251), INT32_C( 914811459), INT32_C( 251170030), -INT32_C( 360609987) }, + { INT16_C( 31189), -INT16_C( 11485), INT16_C( 4407), -INT16_C( 16586), + INT16_C( 9448), -INT16_C( 3918), INT16_C( 30053), INT16_C( 6937) } }, + { { INT16_C( 15685), INT16_C( 30826), -INT16_C( 24755), INT16_C( 25262) }, + { INT32_C( 613112243), -INT32_C( 284779600), -INT32_C( 515248826), INT32_C( 642309127) }, + { -INT32_C( 1339990145), INT32_C( 576195649), -INT32_C( 1767316590), -INT32_C( 2037097214) }, + { INT16_C( 15685), INT16_C( 30826), -INT16_C( 24755), INT16_C( 25262), + -INT16_C( 11092), INT16_C( 4446), INT16_C( 30706), -INT16_C( 21283) } }, + { { INT16_C( 6325), -INT16_C( 19117), INT16_C( 24681), INT16_C( 21422) }, + { -INT32_C( 2066760783), -INT32_C( 1270926266), -INT32_C( 1737064360), INT32_C( 1326893499) }, + { -INT32_C( 1770139956), INT32_C( 303972981), -INT32_C( 1658815640), INT32_C( 67382087) }, + { INT16_C( 6325), -INT16_C( 19117), INT16_C( 24681), INT16_C( 21422), + INT16_C( 6989), -INT16_C( 14755), INT16_C( 13718), INT16_C( 21274) } }, + { { INT16_C( 9569), -INT16_C( 13533), -INT16_C( 53), -INT16_C( 24443) }, + { -INT32_C( 856343336), INT32_C( 490191900), -INT32_C( 795751106), -INT32_C( 1123832281) }, + { -INT32_C( 526858630), -INT32_C( 2098625289), -INT32_C( 1069818864), -INT32_C( 1888359905) }, + { INT16_C( 9569), -INT16_C( 13533), -INT16_C( 53), -INT16_C( 24443), + -INT16_C( 21106), -INT16_C( 24543), -INT16_C( 28467), INT16_C( 19573) } }, + { { -INT16_C( 9906), INT16_C( 2769), -INT16_C( 32487), INT16_C( 28195) }, + { -INT32_C( 358150056), INT32_C( 610719560), -INT32_C( 1263704656), INT32_C( 414821454) }, + { INT32_C( 2109161422), INT32_C( 705138128), -INT32_C( 1263227665), -INT32_C( 1697222027) }, + { -INT16_C( 9906), INT16_C( 2769), -INT16_C( 32487), INT16_C( 28195), + INT16_C( 26718), INT16_C( 20078), INT16_C( 26978), -INT16_C( 19568) } }, + { { INT16_C( 5638), -INT16_C( 20590), -INT16_C( 15113), -INT16_C( 16728) }, + { INT32_C( 1222471517), -INT32_C( 703191341), -INT32_C( 837435852), INT32_C( 1063583750) }, + { INT32_C( 125790875), INT32_C( 433330324), INT32_C( 1232993791), -INT32_C( 602419856) }, + { INT16_C( 5638), -INT16_C( 20590), -INT16_C( 15113), -INT16_C( 16728), + INT16_C( 20572), -INT16_C( 4118), INT16_C( 6035), INT16_C( 7036) } }, + { { INT16_C( 22367), -INT16_C( 9818), INT16_C( 19398), INT16_C( 5711) }, + { INT32_C( 959448246), -INT32_C( 1653704784), INT32_C( 1771944911), -INT32_C( 1736697356) }, + { INT32_C( 204323872), INT32_C( 1921740113), INT32_C( 183457932), -INT32_C( 1347390424) }, + { INT16_C( 22367), -INT16_C( 9818), INT16_C( 19398), INT16_C( 5711), + INT16_C( 17757), INT16_C( 4089), INT16_C( 29837), INT16_C( 18476) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t origin_r = simde_vld1_s16(test_vec[i].origin_r); + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int16x8_t r = simde_vaddhn_high_s32(origin_r, a, b); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t origin_r = simde_test_arm_neon_random_i16x4(); + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int16x8_t r = simde_vaddhn_high_s32(origin_r, a, b); + + simde_test_arm_neon_write_i16x4(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vaddhn_high_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t origin_r[2]; + int64_t a[2]; + int64_t b[2]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 875919724), -INT32_C( 611701371) }, + { -INT64_C( 5363233495659755355), -INT64_C( 2481988998786220685) }, + { INT64_C( 7239945546212710468), -INT64_C( 5153377132087920374) }, + { -INT32_C( 875919724), -INT32_C( 611701371), INT32_C( 436956074), -INT32_C( 1777747211) } }, + { { INT32_C( 1546325407), INT32_C( 25514132) }, + { INT64_C( 2045254645663010519), -INT64_C( 2356451622031062525) }, + { -INT64_C( 1683308634378662434), INT64_C( 1426644783803511503) }, + { INT32_C( 1546325407), INT32_C( 25514132), INT32_C( 84272122), -INT32_C( 216487525) } }, + { { -INT32_C( 1877683093), INT32_C( 1644070792) }, + { INT64_C( 7916377094681146216), INT64_C( 5937992222963267175) }, + { -INT64_C( 242985364762285855), -INT64_C( 1604448757119539491) }, + { -INT32_C( 1877683093), INT32_C( 1644070792), INT32_C( 1786600735), INT32_C( 1008981714) } }, + { { -INT32_C( 208383898), -INT32_C( 341136280) }, + { -INT64_C( 8252129662742766661), INT64_C( 1604094474450927037) }, + { INT64_C( 4050857789190689228), -INT64_C( 1684910522907015914) }, + { -INT32_C( 208383898), -INT32_C( 341136280), -INT32_C( 978184835), -INT32_C( 18816453) } }, + { { -INT32_C( 1817703838), INT32_C( 1140769713) }, + { -INT64_C( 1055906823835588567), INT64_C( 1204751403051979576) }, + { -INT64_C( 290103604780850678), INT64_C( 7712064513047616751) }, + { -INT32_C( 1817703838), INT32_C( 1140769713), -INT32_C( 313392475), INT32_C( 2076107989) } }, + { { -INT32_C( 486477035), -INT32_C( 23379121) }, + { -INT64_C( 1523463849335043181), -INT64_C( 5118519349782276590) }, + { INT64_C( 2340765730881937810), -INT64_C( 5575603933151014689) }, + { -INT32_C( 486477035), -INT32_C( 23379121), INT32_C( 190292923), INT32_C( 1805047688) } }, + { { -INT32_C( 1828626319), -INT32_C( 574613224) }, + { -INT64_C( 451861104164026903), INT64_C( 3418646987013530109) }, + { INT64_C( 2607517827968759110), -INT64_C( 6646145769758301701) }, + { -INT32_C( 1828626319), -INT32_C( 574613224), INT32_C( 501902942), -INT32_C( 751460619) } }, + { { INT32_C( 284641515), INT32_C( 291374305) }, + { INT64_C( 1991838246836783916), -INT64_C( 3225070997924962494) }, + { INT64_C( 989020284874251523), -INT64_C( 2365308517439811907) }, + { INT32_C( 284641515), INT32_C( 291374305), INT32_C( 694035210), -INT32_C( 1301611661) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t origin_r = simde_vld1_s32(test_vec[i].origin_r); + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int32x4_t r = simde_vaddhn_high_s64(origin_r, a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t origin_r = simde_test_arm_neon_random_i32x2(); + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int32x4_t r = simde_vaddhn_high_s64(origin_r, a, b); + + simde_test_arm_neon_write_i32x2(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vaddhn_high_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t origin_r[8]; + uint16_t a[8]; + uint16_t b[8]; + uint8_t r[16]; + } test_vec[] = { + { { UINT8_C( 235), UINT8_C( 148), UINT8_C( 52), UINT8_C( 228), + UINT8_C( 229), UINT8_C( 76), UINT8_C( 37), UINT8_C( 43) }, + { UINT16_C( 59991), UINT16_C( 10679), UINT16_C( 58806), UINT16_C( 11644), + UINT16_C( 59568), UINT16_C( 4439), UINT16_C( 62018), UINT16_C( 6926) }, + { UINT16_C( 32490), UINT16_C( 54533), UINT16_C( 28256), UINT16_C( 61377), + UINT16_C( 56063), UINT16_C( 58874), UINT16_C( 47058), UINT16_C( 41345) }, + { UINT8_C( 235), UINT8_C( 148), UINT8_C( 52), UINT8_C( 228), + UINT8_C( 229), UINT8_C( 76), UINT8_C( 37), UINT8_C( 43), + UINT8_C( 105), UINT8_C( 254), UINT8_C( 84), UINT8_C( 29), + UINT8_C( 195), UINT8_C( 247), UINT8_C( 170), UINT8_C( 188) } }, + { { UINT8_C( 154), UINT8_C( 69), UINT8_C( 254), UINT8_C( 158), + UINT8_C( 171), UINT8_C( 8), UINT8_C( 83), UINT8_C( 247) }, + { UINT16_C( 39091), UINT16_C( 96), UINT16_C( 16665), UINT16_C( 61050), + UINT16_C( 63106), UINT16_C( 5836), UINT16_C( 58088), UINT16_C( 41694) }, + { UINT16_C( 10893), UINT16_C( 21538), UINT16_C( 1339), UINT16_C( 25269), + UINT16_C( 29881), UINT16_C( 25995), UINT16_C( 57072), UINT16_C( 1949) }, + { UINT8_C( 154), UINT8_C( 69), UINT8_C( 254), UINT8_C( 158), + UINT8_C( 171), UINT8_C( 8), UINT8_C( 83), UINT8_C( 247), + UINT8_C( 195), UINT8_C( 84), UINT8_C( 70), UINT8_C( 81), + UINT8_C( 107), UINT8_C( 124), UINT8_C( 193), UINT8_C( 170) } }, + { { UINT8_C( 229), UINT8_C( 66), UINT8_C( 141), UINT8_C( 174), + UINT8_C( 106), UINT8_C( 214), UINT8_C( 238), UINT8_C( 221) }, + { UINT16_C( 14687), UINT16_C( 16815), UINT16_C( 25086), UINT16_C( 7268), + UINT16_C( 64446), UINT16_C( 39053), UINT16_C( 6226), UINT16_C( 52078) }, + { UINT16_C( 48646), UINT16_C( 53755), UINT16_C( 16387), UINT16_C( 857), + UINT16_C( 11253), UINT16_C( 59544), UINT16_C( 42058), UINT16_C( 56009) }, + { UINT8_C( 229), UINT8_C( 66), UINT8_C( 141), UINT8_C( 174), + UINT8_C( 106), UINT8_C( 214), UINT8_C( 238), UINT8_C( 221), + UINT8_C( 247), UINT8_C( 19), UINT8_C( 162), UINT8_C( 31), + UINT8_C( 39), UINT8_C( 129), UINT8_C( 188), UINT8_C( 166) } }, + { { UINT8_C( 145), UINT8_C( 112), UINT8_C( 93), UINT8_C( 126), + UINT8_C( 170), UINT8_C( 107), UINT8_C( 141), UINT8_C( 241) }, + { UINT16_C( 55228), UINT16_C( 54638), UINT16_C( 8684), UINT16_C( 64081), + UINT16_C( 24013), UINT16_C( 61659), UINT16_C( 37575), UINT16_C( 55057) }, + { UINT16_C( 16933), UINT16_C( 18833), UINT16_C( 16187), UINT16_C( 23898), + UINT16_C( 13390), UINT16_C( 4974), UINT16_C( 51021), UINT16_C( 17832) }, + { UINT8_C( 145), UINT8_C( 112), UINT8_C( 93), UINT8_C( 126), + UINT8_C( 170), UINT8_C( 107), UINT8_C( 141), UINT8_C( 241), + UINT8_C( 25), UINT8_C( 30), UINT8_C( 97), UINT8_C( 87), + UINT8_C( 146), UINT8_C( 4), UINT8_C( 90), UINT8_C( 28) } }, + { { UINT8_C( 220), UINT8_C( 44), UINT8_C( 37), UINT8_C( 53), + UINT8_C( 136), UINT8_C( 114), UINT8_C( 33), UINT8_C( 93) }, + { UINT16_C( 18411), UINT16_C( 57288), UINT16_C( 61228), UINT16_C( 10173), + UINT16_C( 18310), UINT16_C( 59072), UINT16_C( 43164), UINT16_C( 56049) }, + { UINT16_C( 40950), UINT16_C( 60464), UINT16_C( 61096), UINT16_C( 61247), + UINT16_C( 22991), UINT16_C( 41963), UINT16_C( 30287), UINT16_C( 15373) }, + { UINT8_C( 220), UINT8_C( 44), UINT8_C( 37), UINT8_C( 53), + UINT8_C( 136), UINT8_C( 114), UINT8_C( 33), UINT8_C( 93), + UINT8_C( 231), UINT8_C( 203), UINT8_C( 221), UINT8_C( 22), + UINT8_C( 161), UINT8_C( 138), UINT8_C( 30), UINT8_C( 22) } }, + { { UINT8_C( 101), UINT8_C( 113), UINT8_C( 9), UINT8_C( 195), + UINT8_C( 242), UINT8_C( 223), UINT8_C( 120), UINT8_C( 29) }, + { UINT16_C( 45990), UINT16_C( 17262), UINT16_C( 20427), UINT16_C( 9484), + UINT16_C( 6727), UINT16_C( 63330), UINT16_C( 1127), UINT16_C( 21155) }, + { UINT16_C( 35209), UINT16_C( 55926), UINT16_C( 60255), UINT16_C( 36713), + UINT16_C( 58877), UINT16_C( 4044), UINT16_C( 33835), UINT16_C( 25741) }, + { UINT8_C( 101), UINT8_C( 113), UINT8_C( 9), UINT8_C( 195), + UINT8_C( 242), UINT8_C( 223), UINT8_C( 120), UINT8_C( 29), + UINT8_C( 61), UINT8_C( 29), UINT8_C( 59), UINT8_C( 180), + UINT8_C( 0), UINT8_C( 7), UINT8_C( 136), UINT8_C( 183) } }, + { { UINT8_C( 251), UINT8_C( 182), UINT8_C( 137), UINT8_C( 181), + UINT8_C( 102), UINT8_C( 216), UINT8_C( 34), UINT8_C( 214) }, + { UINT16_C( 59002), UINT16_C( 42857), UINT16_C( 34543), UINT16_C( 12998), + UINT16_C( 17765), UINT16_C( 18315), UINT16_C( 55739), UINT16_C( 54044) }, + { UINT16_C( 50115), UINT16_C( 53343), UINT16_C( 37227), UINT16_C( 38681), + UINT16_C( 51396), UINT16_C( 65434), UINT16_C( 14319), UINT16_C( 37063) }, + { UINT8_C( 251), UINT8_C( 182), UINT8_C( 137), UINT8_C( 181), + UINT8_C( 102), UINT8_C( 216), UINT8_C( 34), UINT8_C( 214), + UINT8_C( 170), UINT8_C( 119), UINT8_C( 24), UINT8_C( 201), + UINT8_C( 14), UINT8_C( 71), UINT8_C( 17), UINT8_C( 99) } }, + { { UINT8_C( 59), UINT8_C( 235), UINT8_C( 41), UINT8_C( 151), + UINT8_C( 200), UINT8_C( 100), UINT8_C( 90), UINT8_C( 81) }, + { UINT16_C( 496), UINT16_C( 28246), UINT16_C( 17686), UINT16_C( 6514), + UINT16_C( 2532), UINT16_C( 19169), UINT16_C( 55181), UINT16_C( 51073) }, + { UINT16_C( 48042), UINT16_C( 49131), UINT16_C( 26293), UINT16_C( 36384), + UINT16_C( 32401), UINT16_C( 16635), UINT16_C( 62305), UINT16_C( 48077) }, + { UINT8_C( 59), UINT8_C( 235), UINT8_C( 41), UINT8_C( 151), + UINT8_C( 200), UINT8_C( 100), UINT8_C( 90), UINT8_C( 81), + UINT8_C( 189), UINT8_C( 46), UINT8_C( 171), UINT8_C( 167), + UINT8_C( 136), UINT8_C( 139), UINT8_C( 202), UINT8_C( 131) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t origin_r = simde_vld1_u8(test_vec[i].origin_r); + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint8x16_t r = simde_vaddhn_high_u16(origin_r, a, b); + + simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t origin_r = simde_test_arm_neon_random_u8x8(); + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint8x16_t r = simde_vaddhn_high_u16(origin_r, a, b); + + simde_test_arm_neon_write_u8x8(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vaddhn_high_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t origin_r[4]; + uint32_t a[4]; + uint32_t b[4]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 22402), UINT16_C( 19447), UINT16_C( 29079), UINT16_C( 14209) }, + { UINT32_C( 1929567810), UINT32_C( 1266888035), UINT32_C( 1849623859), UINT32_C( 3726673809) }, + { UINT32_C( 2519677390), UINT32_C( 2059703146), UINT32_C( 2573172414), UINT32_C( 1595598271) }, + { UINT16_C( 22402), UINT16_C( 19447), UINT16_C( 29079), UINT16_C( 14209), + UINT16_C( 2354), UINT16_C( 50759), UINT16_C( 1950), UINT16_C( 15675) } }, + { { UINT16_C( 618), UINT16_C( 51138), UINT16_C( 65366), UINT16_C( 31419) }, + { UINT32_C( 1868195489), UINT32_C( 1420069982), UINT32_C( 4044850792), UINT32_C( 3232126363) }, + { UINT32_C( 948569143), UINT32_C( 2414434135), UINT32_C( 3377393960), UINT32_C( 3007343278) }, + { UINT16_C( 618), UINT16_C( 51138), UINT16_C( 65366), UINT16_C( 31419), + UINT16_C( 42980), UINT16_C( 58509), UINT16_C( 47718), UINT16_C( 29670) } }, + { { UINT16_C( 29712), UINT16_C( 62413), UINT16_C( 3556), UINT16_C( 12336) }, + { UINT32_C( 3579194679), UINT32_C( 3597667256), UINT32_C( 3534168561), UINT32_C( 586088586) }, + { UINT32_C( 3730139011), UINT32_C( 4072807016), UINT32_C( 1665138045), UINT32_C( 2345160034) }, + { UINT16_C( 29712), UINT16_C( 62413), UINT16_C( 3556), UINT16_C( 12336), + UINT16_C( 45995), UINT16_C( 51506), UINT16_C( 13799), UINT16_C( 44727) } }, + { { UINT16_C( 47199), UINT16_C( 8365), UINT16_C( 17491), UINT16_C( 36856) }, + { UINT32_C( 3633156783), UINT32_C( 4249061138), UINT32_C( 2972923334), UINT32_C( 2067160322) }, + { UINT32_C( 924301717), UINT32_C( 4091983461), UINT32_C( 2873169028), UINT32_C( 3375663598) }, + { UINT16_C( 47199), UINT16_C( 8365), UINT16_C( 17491), UINT16_C( 36856), + UINT16_C( 4005), UINT16_C( 61738), UINT16_C( 23668), UINT16_C( 17514) } }, + { { UINT16_C( 49769), UINT16_C( 33766), UINT16_C( 57708), UINT16_C( 57639) }, + { UINT32_C( 2701853101), UINT32_C( 1016519639), UINT32_C( 360812144), UINT32_C( 61524080) }, + { UINT32_C( 3779201450), UINT32_C( 3717588777), UINT32_C( 3560643117), UINT32_C( 1620040156) }, + { UINT16_C( 49769), UINT16_C( 33766), UINT16_C( 57708), UINT16_C( 57639), + UINT16_C( 33357), UINT16_C( 6700), UINT16_C( 59836), UINT16_C( 25658) } }, + { { UINT16_C( 17387), UINT16_C( 23001), UINT16_C( 18935), UINT16_C( 33412) }, + { UINT32_C( 1640049796), UINT32_C( 669756688), UINT32_C( 966637419), UINT32_C( 131869858) }, + { UINT32_C( 857685405), UINT32_C( 1691950446), UINT32_C( 977461260), UINT32_C( 1655627858) }, + { UINT16_C( 17387), UINT16_C( 23001), UINT16_C( 18935), UINT16_C( 33412), + UINT16_C( 38112), UINT16_C( 36036), UINT16_C( 29664), UINT16_C( 27275) } }, + { { UINT16_C( 59350), UINT16_C( 33131), UINT16_C( 29709), UINT16_C( 15264) }, + { UINT32_C( 331712893), UINT32_C( 307801489), UINT32_C( 3795005980), UINT32_C( 1037830267) }, + { UINT32_C( 1418078308), UINT32_C( 4288944375), UINT32_C( 1176888264), UINT32_C( 2500393476) }, + { UINT16_C( 59350), UINT16_C( 33131), UINT16_C( 29709), UINT16_C( 15264), + UINT16_C( 26699), UINT16_C( 4604), UINT16_C( 10329), UINT16_C( 53989) } }, + { { UINT16_C( 42489), UINT16_C( 62752), UINT16_C( 12059), UINT16_C( 6768) }, + { UINT32_C( 3028104468), UINT32_C( 3015341958), UINT32_C( 1166329203), UINT32_C( 2318732010) }, + { UINT32_C( 968805234), UINT32_C( 3483825356), UINT32_C( 1767838217), UINT32_C( 1323995871) }, + { UINT16_C( 42489), UINT16_C( 62752), UINT16_C( 12059), UINT16_C( 6768), + UINT16_C( 60988), UINT16_C( 33633), UINT16_C( 44771), UINT16_C( 55583) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t origin_r = simde_vld1_u16(test_vec[i].origin_r); + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint16x8_t r = simde_vaddhn_high_u32(origin_r, a, b); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t origin_r = simde_test_arm_neon_random_u16x4(); + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint16x8_t r = simde_vaddhn_high_u32(origin_r, a, b); + + simde_test_arm_neon_write_u16x4(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vaddhn_high_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t origin_r[2]; + uint64_t a[2]; + uint64_t b[2]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 1614372443), UINT32_C( 3760914029) }, + { UINT64_C(14111524090166271938), UINT64_C(11987192328408582711) }, + { UINT64_C( 1979811475086130307), UINT64_C( 5079953232012328519) }, + { UINT32_C( 1614372443), UINT32_C( 3760914029), UINT32_C( 3746556016), UINT32_C( 3973754486) } }, + { { UINT32_C( 1671544856), UINT32_C( 621225568) }, + { UINT64_C( 2454814551482193407), UINT64_C(15126065627233573393) }, + { UINT64_C(13389663609935176150), UINT64_C( 5067323661827478264) }, + { UINT32_C( 1671544856), UINT32_C( 621225568), UINT32_C( 3689080048), UINT32_C( 406672529) } }, + { { UINT32_C( 4157953561), UINT32_C( 3598934380) }, + { UINT64_C(13862906695549580271), UINT64_C( 1364886238029594751) }, + { UINT64_C( 9277431902693012927), UINT64_C( 5665327731565945524) }, + { UINT32_C( 4157953561), UINT32_C( 3598934380), UINT32_C( 1092812634), UINT32_C( 1636849243) } }, + { { UINT32_C( 598455139), UINT32_C( 1760953140) }, + { UINT64_C(11137287007818636556), UINT64_C(16646019750688770140) }, + { UINT64_C(15703041268694317777), UINT64_C( 9055302360228056339) }, + { UINT32_C( 598455139), UINT32_C( 1760953140), UINT32_C( 1954283612), UINT32_C( 1689088073) } }, + { { UINT32_C( 4035395122), UINT32_C( 1344874899) }, + { UINT64_C(16349648078972151750), UINT64_C(13793035675891121759) }, + { UINT64_C(18200340564669114955), UINT64_C(11322608606770847581) }, + { UINT32_C( 4035395122), UINT32_C( 1344874899), UINT32_C( 3749328798), UINT32_C( 1552724328) } }, + { { UINT32_C( 3563821591), UINT32_C( 2566264915) }, + { UINT64_C( 517198210232281093), UINT64_C(13450896300641508922) }, + { UINT64_C(15869387284253798122), UINT64_C( 8980549949703342540) }, + { UINT32_C( 3563821591), UINT32_C( 2566264915), UINT32_C( 3815299247), UINT32_C( 927760772) } }, + { { UINT32_C( 3561202493), UINT32_C( 1572226616) }, + { UINT64_C( 2151790066031981522), UINT64_C(14801720919763243321) }, + { UINT64_C(14397953553210680038), UINT64_C(10049855790183662030) }, + { UINT32_C( 3561202493), UINT32_C( 1572226616), UINT32_C( 3853287459), UINT32_C( 1491241305) } }, + { { UINT32_C( 1863803273), UINT32_C( 3450561043) }, + { UINT64_C(10341098681536271081), UINT64_C( 8356083269093513182) }, + { UINT64_C( 6589842696213060292), UINT64_C(12490602975102226734) }, + { UINT32_C( 1863803273), UINT32_C( 3450561043), UINT32_C( 3942041978), UINT32_C( 558780080) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t origin_r = simde_vld1_u32(test_vec[i].origin_r); + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + simde_uint32x4_t r = simde_vaddhn_high_u64(origin_r, a, b); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t origin_r = simde_test_arm_neon_random_u32x2(); + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint32x4_t r = simde_vaddhn_high_u64(origin_r, a, b); + + simde_test_arm_neon_write_u32x2(2, origin_r, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_s64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vaddhn_high_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/cgez.c b/test/arm/neon/cgez.c index d78778ef5..9749d33d9 100644 --- a/test/arm/neon/cgez.c +++ b/test/arm/neon/cgez.c @@ -9,6 +9,53 @@ #include "../../../simde/arm/neon.h" #endif +static int +test_simde_vcgez_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -29.138), SIMDE_FLOAT16_VALUE( -21.302), SIMDE_FLOAT16_VALUE( 9.731), SIMDE_FLOAT16_VALUE( 7.200) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(13.547), SIMDE_FLOAT16_VALUE(21.721), SIMDE_FLOAT16_VALUE(20.327), SIMDE_FLOAT16_VALUE(14.625) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 5.193), SIMDE_FLOAT16_VALUE( 6.652), SIMDE_FLOAT16_VALUE( 5.809), SIMDE_FLOAT16_VALUE(25.819) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(19.948), SIMDE_FLOAT16_VALUE( -12.175), SIMDE_FLOAT16_VALUE( 1.189), SIMDE_FLOAT16_VALUE( -13.534) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -26.028), SIMDE_FLOAT16_VALUE(26.314), SIMDE_FLOAT16_VALUE(13.049), SIMDE_FLOAT16_VALUE( - 5.316) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(14.137), SIMDE_FLOAT16_VALUE( -20.428), SIMDE_FLOAT16_VALUE( -28.249), SIMDE_FLOAT16_VALUE( -29.967) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 5.889), SIMDE_FLOAT16_VALUE( -14.727), SIMDE_FLOAT16_VALUE(28.104), SIMDE_FLOAT16_VALUE( 1.839) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 6.364), SIMDE_FLOAT16_VALUE( -22.098), SIMDE_FLOAT16_VALUE( 1.153), SIMDE_FLOAT16_VALUE( 4.894) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcgez_f16(a); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_uint16x4_t r = simde_vcgez_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcgez_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -333,6 +380,69 @@ test_simde_vcgez_s64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcgezq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 4.672), SIMDE_FLOAT16_VALUE( - 4.393), SIMDE_FLOAT16_VALUE( - 2.459), SIMDE_FLOAT16_VALUE(17.741), + SIMDE_FLOAT16_VALUE( -23.092), SIMDE_FLOAT16_VALUE( - 1.619), SIMDE_FLOAT16_VALUE(24.559), SIMDE_FLOAT16_VALUE(16.008) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(16.244), SIMDE_FLOAT16_VALUE( - 9.222), SIMDE_FLOAT16_VALUE( 9.959), SIMDE_FLOAT16_VALUE(28.196), + SIMDE_FLOAT16_VALUE(18.095), SIMDE_FLOAT16_VALUE( 6.379), SIMDE_FLOAT16_VALUE(20.858), SIMDE_FLOAT16_VALUE(16.596) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 6.235), SIMDE_FLOAT16_VALUE(19.480), SIMDE_FLOAT16_VALUE( - 9.343), SIMDE_FLOAT16_VALUE( -24.237), + SIMDE_FLOAT16_VALUE( 5.971), SIMDE_FLOAT16_VALUE(21.376), SIMDE_FLOAT16_VALUE( -24.306), SIMDE_FLOAT16_VALUE( -27.147) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 9.624), SIMDE_FLOAT16_VALUE( -23.992), SIMDE_FLOAT16_VALUE( - 5.807), SIMDE_FLOAT16_VALUE(16.083), + SIMDE_FLOAT16_VALUE( - 3.714), SIMDE_FLOAT16_VALUE( -14.069), SIMDE_FLOAT16_VALUE(11.006), SIMDE_FLOAT16_VALUE(16.313) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 8.730), SIMDE_FLOAT16_VALUE( 9.904), SIMDE_FLOAT16_VALUE( -12.342), SIMDE_FLOAT16_VALUE( -28.297), + SIMDE_FLOAT16_VALUE( - 6.298), SIMDE_FLOAT16_VALUE(23.263), SIMDE_FLOAT16_VALUE( 2.989), SIMDE_FLOAT16_VALUE( - 2.717) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(23.160), SIMDE_FLOAT16_VALUE( -19.910), SIMDE_FLOAT16_VALUE(14.770), SIMDE_FLOAT16_VALUE( - 7.653), + SIMDE_FLOAT16_VALUE(12.325), SIMDE_FLOAT16_VALUE(27.268), SIMDE_FLOAT16_VALUE( -20.187), SIMDE_FLOAT16_VALUE( -15.730) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -14.195), SIMDE_FLOAT16_VALUE( -11.151), SIMDE_FLOAT16_VALUE( -15.147), SIMDE_FLOAT16_VALUE( -17.527), + SIMDE_FLOAT16_VALUE(24.002), SIMDE_FLOAT16_VALUE( - 4.927), SIMDE_FLOAT16_VALUE( -27.014), SIMDE_FLOAT16_VALUE( - 4.630) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -20.267), SIMDE_FLOAT16_VALUE( -16.263), SIMDE_FLOAT16_VALUE(22.561), SIMDE_FLOAT16_VALUE(29.945), + SIMDE_FLOAT16_VALUE( 0.612), SIMDE_FLOAT16_VALUE( -18.683), SIMDE_FLOAT16_VALUE(27.621), SIMDE_FLOAT16_VALUE( -20.027) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcgezq_f16(a); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_uint16x8_t r = simde_vcgezq_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcgezq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -805,7 +915,53 @@ test_simde_vcgezs_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcgezh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16_t a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( -13.269), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -16.135), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE(11.274), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -12.903), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 7.352), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( 7.827), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(29.401), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -17.429), + UINT16_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcgezh_f16(test_vec[i].a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcgezh_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s8) @@ -813,6 +969,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgez_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s8) @@ -823,6 +980,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcgezq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgezs_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcgezh_f16) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/cgtz.c b/test/arm/neon/cgtz.c index eb7c6820b..a6901303b 100644 --- a/test/arm/neon/cgtz.c +++ b/test/arm/neon/cgtz.c @@ -9,6 +9,53 @@ #include "../../../simde/arm/neon.h" #endif +static int +test_simde_vcgtz_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 9.473), SIMDE_FLOAT16_VALUE( -14.640), SIMDE_FLOAT16_VALUE( - 7.967), SIMDE_FLOAT16_VALUE( -23.200) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(10.878), SIMDE_FLOAT16_VALUE( -23.457), SIMDE_FLOAT16_VALUE(26.306), SIMDE_FLOAT16_VALUE( -22.584) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 1.308), SIMDE_FLOAT16_VALUE(21.518), SIMDE_FLOAT16_VALUE( - 0.314), SIMDE_FLOAT16_VALUE( -27.151) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(28.216), SIMDE_FLOAT16_VALUE(10.608), SIMDE_FLOAT16_VALUE( 1.568), SIMDE_FLOAT16_VALUE( 6.133) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -10.926), SIMDE_FLOAT16_VALUE( - 1.431), SIMDE_FLOAT16_VALUE( -24.284), SIMDE_FLOAT16_VALUE(16.395) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(16.278), SIMDE_FLOAT16_VALUE( -26.233), SIMDE_FLOAT16_VALUE( -16.260), SIMDE_FLOAT16_VALUE(14.480) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(22.265), SIMDE_FLOAT16_VALUE(10.034), SIMDE_FLOAT16_VALUE( -29.299), SIMDE_FLOAT16_VALUE( 3.379) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -23.089), SIMDE_FLOAT16_VALUE(14.378), SIMDE_FLOAT16_VALUE(19.162), SIMDE_FLOAT16_VALUE( - 5.240) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcgtz_f16(a); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_uint16x4_t r = simde_vcgtz_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcgtz_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -333,6 +380,69 @@ test_simde_vcgtz_s64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcgtzq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 6.928), SIMDE_FLOAT16_VALUE(11.681), SIMDE_FLOAT16_VALUE( 8.633), SIMDE_FLOAT16_VALUE( - 5.431), + SIMDE_FLOAT16_VALUE( -23.547), SIMDE_FLOAT16_VALUE( - 0.180), SIMDE_FLOAT16_VALUE( - 7.273), SIMDE_FLOAT16_VALUE( - 9.610) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0), + UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -29.882), SIMDE_FLOAT16_VALUE( -29.123), SIMDE_FLOAT16_VALUE( -29.037), SIMDE_FLOAT16_VALUE( 1.019), + SIMDE_FLOAT16_VALUE( - 5.271), SIMDE_FLOAT16_VALUE( 9.973), SIMDE_FLOAT16_VALUE(25.139), SIMDE_FLOAT16_VALUE( -13.522) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -10.945), SIMDE_FLOAT16_VALUE( 8.447), SIMDE_FLOAT16_VALUE( -16.769), SIMDE_FLOAT16_VALUE(21.783), + SIMDE_FLOAT16_VALUE( -11.988), SIMDE_FLOAT16_VALUE(21.388), SIMDE_FLOAT16_VALUE( - 7.251), SIMDE_FLOAT16_VALUE( -11.695) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -14.490), SIMDE_FLOAT16_VALUE(26.771), SIMDE_FLOAT16_VALUE( -12.493), SIMDE_FLOAT16_VALUE( -16.313), + SIMDE_FLOAT16_VALUE( 5.500), SIMDE_FLOAT16_VALUE( -12.452), SIMDE_FLOAT16_VALUE(13.667), SIMDE_FLOAT16_VALUE( 7.351) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 1.234), SIMDE_FLOAT16_VALUE(15.157), SIMDE_FLOAT16_VALUE( - 8.083), SIMDE_FLOAT16_VALUE( 9.647), + SIMDE_FLOAT16_VALUE( -10.742), SIMDE_FLOAT16_VALUE( -17.108), SIMDE_FLOAT16_VALUE( 4.144), SIMDE_FLOAT16_VALUE(25.679) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -21.979), SIMDE_FLOAT16_VALUE(19.728), SIMDE_FLOAT16_VALUE( -23.139), SIMDE_FLOAT16_VALUE( - 2.597), + SIMDE_FLOAT16_VALUE( -14.903), SIMDE_FLOAT16_VALUE( - 0.047), SIMDE_FLOAT16_VALUE( - 1.270), SIMDE_FLOAT16_VALUE(27.934) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -14.065), SIMDE_FLOAT16_VALUE(16.118), SIMDE_FLOAT16_VALUE( -22.444), SIMDE_FLOAT16_VALUE( -13.467), + SIMDE_FLOAT16_VALUE(26.493), SIMDE_FLOAT16_VALUE( 0.379), SIMDE_FLOAT16_VALUE( -22.537), SIMDE_FLOAT16_VALUE( -28.404) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 2.779), SIMDE_FLOAT16_VALUE(24.997), SIMDE_FLOAT16_VALUE( -13.904), SIMDE_FLOAT16_VALUE( - 6.204), + SIMDE_FLOAT16_VALUE( -19.614), SIMDE_FLOAT16_VALUE( - 6.603), SIMDE_FLOAT16_VALUE( 5.443), SIMDE_FLOAT16_VALUE( - 1.841) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcgtzq_f16(a); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_uint16x8_t r = simde_vcgtzq_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcgtzq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -805,7 +915,53 @@ test_simde_vcgtzs_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcgtzh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16_t a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(27.364), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -13.960), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE(19.245), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(17.486), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(19.457), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -23.488), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE(15.527), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( - 2.123), + UINT16_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcgtzh_f16(test_vec[i].a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcgtzh_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s8) @@ -813,6 +969,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtz_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s8) @@ -823,6 +980,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzs_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcgtzh_f16) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/cle.c b/test/arm/neon/cle.c index 946e11030..149d0070d 100644 --- a/test/arm/neon/cle.c +++ b/test/arm/neon/cle.c @@ -9,6 +9,65 @@ #include "../../../simde/arm/neon.h" #endif +static int +test_simde_vcle_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 8.447), SIMDE_FLOAT16_VALUE( - 8.659), SIMDE_FLOAT16_VALUE( - 2.615), SIMDE_FLOAT16_VALUE(11.602) }, + { SIMDE_FLOAT16_VALUE(10.611), SIMDE_FLOAT16_VALUE( - 0.755), SIMDE_FLOAT16_VALUE( - 9.784), SIMDE_FLOAT16_VALUE(27.142) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(17.098), SIMDE_FLOAT16_VALUE(10.956), SIMDE_FLOAT16_VALUE( -28.072), SIMDE_FLOAT16_VALUE(29.289) }, + { SIMDE_FLOAT16_VALUE( -19.728), SIMDE_FLOAT16_VALUE( 8.380), SIMDE_FLOAT16_VALUE( -11.126), SIMDE_FLOAT16_VALUE(16.167) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(21.932), SIMDE_FLOAT16_VALUE( 9.638), SIMDE_FLOAT16_VALUE( 4.463), SIMDE_FLOAT16_VALUE(20.865) }, + { SIMDE_FLOAT16_VALUE( -11.698), SIMDE_FLOAT16_VALUE(18.815), SIMDE_FLOAT16_VALUE(15.699), SIMDE_FLOAT16_VALUE(13.508) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -20.159), SIMDE_FLOAT16_VALUE(12.844), SIMDE_FLOAT16_VALUE( - 8.921), SIMDE_FLOAT16_VALUE( - 7.798) }, + { SIMDE_FLOAT16_VALUE( - 1.258), SIMDE_FLOAT16_VALUE( -17.170), SIMDE_FLOAT16_VALUE(16.333), SIMDE_FLOAT16_VALUE( - 8.197) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -11.632), SIMDE_FLOAT16_VALUE( - 8.390), SIMDE_FLOAT16_VALUE(20.714), SIMDE_FLOAT16_VALUE(24.154) }, + { SIMDE_FLOAT16_VALUE(22.192), SIMDE_FLOAT16_VALUE(22.191), SIMDE_FLOAT16_VALUE( 6.143), SIMDE_FLOAT16_VALUE(20.987) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 2.982), SIMDE_FLOAT16_VALUE( - 4.497), SIMDE_FLOAT16_VALUE( -15.911), SIMDE_FLOAT16_VALUE( -26.749) }, + { SIMDE_FLOAT16_VALUE( 7.832), SIMDE_FLOAT16_VALUE(20.355), SIMDE_FLOAT16_VALUE( -18.865), SIMDE_FLOAT16_VALUE( - 8.786) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -19.847), SIMDE_FLOAT16_VALUE( -26.969), SIMDE_FLOAT16_VALUE( -14.823), SIMDE_FLOAT16_VALUE( -21.042) }, + { SIMDE_FLOAT16_VALUE( -28.679), SIMDE_FLOAT16_VALUE( -22.401), SIMDE_FLOAT16_VALUE( -10.468), SIMDE_FLOAT16_VALUE( 1.884) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -13.976), SIMDE_FLOAT16_VALUE(16.503), SIMDE_FLOAT16_VALUE( -18.929), SIMDE_FLOAT16_VALUE( -21.485) }, + { SIMDE_FLOAT16_VALUE(27.388), SIMDE_FLOAT16_VALUE(15.954), SIMDE_FLOAT16_VALUE( -18.157), SIMDE_FLOAT16_VALUE(27.831) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_uint16x4_t r = simde_vcle_f16(a, b); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_uint16x4_t r = simde_vcle_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcle_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -679,6 +738,89 @@ test_simde_vcle_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcleq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 6.052), SIMDE_FLOAT16_VALUE( -21.631), SIMDE_FLOAT16_VALUE( -16.927), SIMDE_FLOAT16_VALUE(20.755), + SIMDE_FLOAT16_VALUE( -15.631), SIMDE_FLOAT16_VALUE( - 6.240), SIMDE_FLOAT16_VALUE(23.509), SIMDE_FLOAT16_VALUE( -29.481) }, + { SIMDE_FLOAT16_VALUE(16.025), SIMDE_FLOAT16_VALUE(24.334), SIMDE_FLOAT16_VALUE( - 6.508), SIMDE_FLOAT16_VALUE(17.524), + SIMDE_FLOAT16_VALUE( -15.973), SIMDE_FLOAT16_VALUE( 0.216), SIMDE_FLOAT16_VALUE( - 7.741), SIMDE_FLOAT16_VALUE( 7.130) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), + UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 5.096), SIMDE_FLOAT16_VALUE( -10.602), SIMDE_FLOAT16_VALUE(27.999), SIMDE_FLOAT16_VALUE( - 1.075), + SIMDE_FLOAT16_VALUE( -23.145), SIMDE_FLOAT16_VALUE( -23.578), SIMDE_FLOAT16_VALUE(25.404), SIMDE_FLOAT16_VALUE( -25.074) }, + { SIMDE_FLOAT16_VALUE( 8.365), SIMDE_FLOAT16_VALUE( -15.037), SIMDE_FLOAT16_VALUE( -16.985), SIMDE_FLOAT16_VALUE( 0.374), + SIMDE_FLOAT16_VALUE(13.526), SIMDE_FLOAT16_VALUE( - 2.567), SIMDE_FLOAT16_VALUE(23.870), SIMDE_FLOAT16_VALUE( -27.272) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(11.955), SIMDE_FLOAT16_VALUE( -21.297), SIMDE_FLOAT16_VALUE(19.909), SIMDE_FLOAT16_VALUE( 2.327), + SIMDE_FLOAT16_VALUE(10.833), SIMDE_FLOAT16_VALUE( - 0.112), SIMDE_FLOAT16_VALUE(14.899), SIMDE_FLOAT16_VALUE( - 5.837) }, + { SIMDE_FLOAT16_VALUE(11.019), SIMDE_FLOAT16_VALUE( - 3.706), SIMDE_FLOAT16_VALUE( -17.685), SIMDE_FLOAT16_VALUE( 3.519), + SIMDE_FLOAT16_VALUE( - 9.207), SIMDE_FLOAT16_VALUE(10.370), SIMDE_FLOAT16_VALUE( -11.560), SIMDE_FLOAT16_VALUE( - 2.552) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -25.799), SIMDE_FLOAT16_VALUE( -16.301), SIMDE_FLOAT16_VALUE( -15.560), SIMDE_FLOAT16_VALUE( -12.832), + SIMDE_FLOAT16_VALUE( -18.070), SIMDE_FLOAT16_VALUE( -26.679), SIMDE_FLOAT16_VALUE(29.328), SIMDE_FLOAT16_VALUE( 5.986) }, + { SIMDE_FLOAT16_VALUE(15.847), SIMDE_FLOAT16_VALUE(27.710), SIMDE_FLOAT16_VALUE( - 7.357), SIMDE_FLOAT16_VALUE( 8.789), + SIMDE_FLOAT16_VALUE( 9.556), SIMDE_FLOAT16_VALUE(26.672), SIMDE_FLOAT16_VALUE( - 7.994), SIMDE_FLOAT16_VALUE( 7.517) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 5.306), SIMDE_FLOAT16_VALUE( - 0.813), SIMDE_FLOAT16_VALUE( 6.000), SIMDE_FLOAT16_VALUE( -23.238), + SIMDE_FLOAT16_VALUE(29.249), SIMDE_FLOAT16_VALUE(25.928), SIMDE_FLOAT16_VALUE( -25.742), SIMDE_FLOAT16_VALUE(28.893) }, + { SIMDE_FLOAT16_VALUE(19.282), SIMDE_FLOAT16_VALUE(14.169), SIMDE_FLOAT16_VALUE( - 8.290), SIMDE_FLOAT16_VALUE(17.378), + SIMDE_FLOAT16_VALUE( - 1.797), SIMDE_FLOAT16_VALUE( 1.777), SIMDE_FLOAT16_VALUE( 5.564), SIMDE_FLOAT16_VALUE(28.078) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -20.592), SIMDE_FLOAT16_VALUE(20.779), SIMDE_FLOAT16_VALUE(10.541), SIMDE_FLOAT16_VALUE( -21.060), + SIMDE_FLOAT16_VALUE(23.945), SIMDE_FLOAT16_VALUE(24.970), SIMDE_FLOAT16_VALUE( -28.557), SIMDE_FLOAT16_VALUE(22.210) }, + { SIMDE_FLOAT16_VALUE(23.015), SIMDE_FLOAT16_VALUE( -29.722), SIMDE_FLOAT16_VALUE( -20.947), SIMDE_FLOAT16_VALUE( -10.237), + SIMDE_FLOAT16_VALUE(14.317), SIMDE_FLOAT16_VALUE( -23.364), SIMDE_FLOAT16_VALUE( - 2.156), SIMDE_FLOAT16_VALUE( 6.988) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_MAX, + UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -16.548), SIMDE_FLOAT16_VALUE( -18.913), SIMDE_FLOAT16_VALUE( 5.054), SIMDE_FLOAT16_VALUE(16.187), + SIMDE_FLOAT16_VALUE(20.355), SIMDE_FLOAT16_VALUE( -12.692), SIMDE_FLOAT16_VALUE( - 8.127), SIMDE_FLOAT16_VALUE( -29.320) }, + { SIMDE_FLOAT16_VALUE( - 3.252), SIMDE_FLOAT16_VALUE( 7.592), SIMDE_FLOAT16_VALUE(14.255), SIMDE_FLOAT16_VALUE( -25.319), + SIMDE_FLOAT16_VALUE( -11.153), SIMDE_FLOAT16_VALUE(29.563), SIMDE_FLOAT16_VALUE(11.098), SIMDE_FLOAT16_VALUE( -18.346) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0), + UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 6.142), SIMDE_FLOAT16_VALUE( -16.113), SIMDE_FLOAT16_VALUE( -25.442), SIMDE_FLOAT16_VALUE(10.388), + SIMDE_FLOAT16_VALUE( 9.547), SIMDE_FLOAT16_VALUE( -26.224), SIMDE_FLOAT16_VALUE( - 2.331), SIMDE_FLOAT16_VALUE( -14.642) }, + { SIMDE_FLOAT16_VALUE( - 8.097), SIMDE_FLOAT16_VALUE( 5.808), SIMDE_FLOAT16_VALUE(19.975), SIMDE_FLOAT16_VALUE(28.291), + SIMDE_FLOAT16_VALUE( 1.823), SIMDE_FLOAT16_VALUE(25.174), SIMDE_FLOAT16_VALUE(23.571), SIMDE_FLOAT16_VALUE( - 7.197) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_uint16x8_t r = simde_vcleq_f16(a, b); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_uint16x8_t r = simde_vcleq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcleq_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1617,7 +1759,64 @@ test_simde_vcles_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcleh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16_t a; + simde_float16_t b; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE(17.427), + SIMDE_FLOAT16_VALUE( -29.394), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -19.443), + SIMDE_FLOAT16_VALUE(14.852), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(19.083), + SIMDE_FLOAT16_VALUE(16.002), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( - 2.002), + SIMDE_FLOAT16_VALUE(28.082), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(11.500), + SIMDE_FLOAT16_VALUE( - 4.960), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.968), + SIMDE_FLOAT16_VALUE(14.557), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -25.518), + SIMDE_FLOAT16_VALUE( -16.212), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -26.015), + SIMDE_FLOAT16_VALUE(16.100), + UINT16_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcleh_f16(test_vec[i].a, test_vec[i].b); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_float16_t b = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcleh_f16(a, b); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcle_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_s8) @@ -1629,6 +1828,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vcle_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcleq_s8) @@ -1644,6 +1844,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcled_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcled_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcled_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcles_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcleh_f16) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/cltz.c b/test/arm/neon/cltz.c index 0f117fa4a..cec9c5166 100644 --- a/test/arm/neon/cltz.c +++ b/test/arm/neon/cltz.c @@ -4,8 +4,56 @@ #include "../../../simde/arm/neon/cltz.h" #include "../../../simde/arm/neon/reinterpret.h" +static int +test_simde_vcltz_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 9.414), SIMDE_FLOAT16_VALUE( -22.697), SIMDE_FLOAT16_VALUE( - 2.044), SIMDE_FLOAT16_VALUE(10.309) }, + { UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 0.842), SIMDE_FLOAT16_VALUE( -24.135), SIMDE_FLOAT16_VALUE( 4.853), SIMDE_FLOAT16_VALUE( -25.116) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(15.876), SIMDE_FLOAT16_VALUE(14.021), SIMDE_FLOAT16_VALUE( -20.571), SIMDE_FLOAT16_VALUE( -16.980) }, + { UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE(20.714), SIMDE_FLOAT16_VALUE( - 9.050), SIMDE_FLOAT16_VALUE(22.773), SIMDE_FLOAT16_VALUE( -25.134) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -10.853), SIMDE_FLOAT16_VALUE( -13.065), SIMDE_FLOAT16_VALUE( -19.550), SIMDE_FLOAT16_VALUE(25.965) }, + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -18.329), SIMDE_FLOAT16_VALUE(16.423), SIMDE_FLOAT16_VALUE( - 2.286), SIMDE_FLOAT16_VALUE( -27.386) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -21.617), SIMDE_FLOAT16_VALUE( -20.611), SIMDE_FLOAT16_VALUE(11.553), SIMDE_FLOAT16_VALUE(10.292) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE(27.032), SIMDE_FLOAT16_VALUE( -20.587), SIMDE_FLOAT16_VALUE(20.330), SIMDE_FLOAT16_VALUE(23.132) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcltz_f16(a); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_uint16x4_t r = simde_vcltz_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcltz_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[2]; uint32_t r[2]; @@ -36,10 +84,23 @@ test_simde_vcltz_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_uint32x2_t r = simde_vcltz_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltz_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[1]; uint64_t r[1]; @@ -70,10 +131,23 @@ test_simde_vcltz_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x1_t r = simde_vcltz_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltz_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int8_t a[8]; uint8_t r[8]; @@ -104,10 +178,23 @@ test_simde_vcltz_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_uint8x8_t r = simde_vcltz_s8(a); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltz_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int16_t a[4]; uint16_t r[4]; @@ -138,10 +225,23 @@ test_simde_vcltz_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_uint16x4_t r = simde_vcltz_s16(a); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltz_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int32_t a[2]; uint32_t r[2]; @@ -172,10 +272,23 @@ test_simde_vcltz_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_uint32x2_t r = simde_vcltz_s32(a); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltz_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int64_t a[1]; uint64_t r[1]; @@ -206,10 +319,85 @@ test_simde_vcltz_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); + simde_uint64x1_t r = simde_vcltz_s64(a); + + simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcltzq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 1.527), SIMDE_FLOAT16_VALUE( - 7.156), SIMDE_FLOAT16_VALUE( 0.644), SIMDE_FLOAT16_VALUE( 5.397), + SIMDE_FLOAT16_VALUE(20.270), SIMDE_FLOAT16_VALUE( 7.110), SIMDE_FLOAT16_VALUE(22.060), SIMDE_FLOAT16_VALUE( -28.149) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -23.412), SIMDE_FLOAT16_VALUE(26.697), SIMDE_FLOAT16_VALUE( -19.659), SIMDE_FLOAT16_VALUE( 7.755), + SIMDE_FLOAT16_VALUE( -18.384), SIMDE_FLOAT16_VALUE( 2.923), SIMDE_FLOAT16_VALUE( -25.086), SIMDE_FLOAT16_VALUE( -16.079) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0), + UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 1.734), SIMDE_FLOAT16_VALUE( 9.920), SIMDE_FLOAT16_VALUE(15.824), SIMDE_FLOAT16_VALUE( 1.851), + SIMDE_FLOAT16_VALUE( 8.740), SIMDE_FLOAT16_VALUE(28.782), SIMDE_FLOAT16_VALUE(24.278), SIMDE_FLOAT16_VALUE(20.947) }, + { UINT16_MAX, UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 2.211), SIMDE_FLOAT16_VALUE( - 2.949), SIMDE_FLOAT16_VALUE(28.515), SIMDE_FLOAT16_VALUE( -11.428), + SIMDE_FLOAT16_VALUE( -17.855), SIMDE_FLOAT16_VALUE( - 2.077), SIMDE_FLOAT16_VALUE( 8.552), SIMDE_FLOAT16_VALUE( - 6.580) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 3.912), SIMDE_FLOAT16_VALUE( -13.497), SIMDE_FLOAT16_VALUE( 6.232), SIMDE_FLOAT16_VALUE( 0.255), + SIMDE_FLOAT16_VALUE( 5.858), SIMDE_FLOAT16_VALUE( - 2.388), SIMDE_FLOAT16_VALUE( -13.003), SIMDE_FLOAT16_VALUE( - 2.953) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( -13.006), SIMDE_FLOAT16_VALUE( - 6.291), SIMDE_FLOAT16_VALUE(25.169), SIMDE_FLOAT16_VALUE(20.013), + SIMDE_FLOAT16_VALUE( -19.562), SIMDE_FLOAT16_VALUE(11.841), SIMDE_FLOAT16_VALUE( - 0.035), SIMDE_FLOAT16_VALUE( 3.541) }, + { UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( -14.891), SIMDE_FLOAT16_VALUE(19.771), SIMDE_FLOAT16_VALUE( -18.229), SIMDE_FLOAT16_VALUE( -15.832), + SIMDE_FLOAT16_VALUE( - 7.117), SIMDE_FLOAT16_VALUE( - 6.054), SIMDE_FLOAT16_VALUE( -16.799), SIMDE_FLOAT16_VALUE( -13.868) }, + { UINT16_MAX, UINT16_C( 0), UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 5.872), SIMDE_FLOAT16_VALUE( - 3.436), SIMDE_FLOAT16_VALUE(29.629), SIMDE_FLOAT16_VALUE(14.318), + SIMDE_FLOAT16_VALUE( -20.699), SIMDE_FLOAT16_VALUE( -26.955), SIMDE_FLOAT16_VALUE( - 6.992), SIMDE_FLOAT16_VALUE( -22.497) }, + { UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0), + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcltzq_f16(a); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_uint16x8_t r = simde_vcltzq_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[4]; uint32_t r[4]; @@ -239,10 +427,23 @@ test_simde_vcltzq_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_uint32x4_t r = simde_vcltzq_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[2]; uint64_t r[2]; @@ -273,10 +474,23 @@ test_simde_vcltzq_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x2_t r = simde_vcltzq_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int8_t a[16]; uint8_t r[16]; @@ -323,10 +537,23 @@ test_simde_vcltzq_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_uint8x16_t r = simde_vcltzq_s8(a); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int16_t a[8]; uint16_t r[8]; @@ -357,10 +584,23 @@ test_simde_vcltzq_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_uint16x8_t r = simde_vcltzq_s16(a); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int32_t a[4]; uint32_t r[4]; @@ -390,10 +630,23 @@ test_simde_vcltzq_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_uint32x4_t r = simde_vcltzq_s32(a); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcltzq_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int64_t a[2]; uint64_t r[2]; @@ -423,6 +676,18 @@ test_simde_vcltzq_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_uint64x2_t r = simde_vcltzq_s64(a); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int @@ -557,7 +822,53 @@ test_simde_vcltzs_f32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vcltzh_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16_t a; + uint16_t r; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( -10.632), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(21.069), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( - 6.106), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(18.507), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( -15.011), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -25.511), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(22.656), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE(29.863), + UINT16_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vcltzh_f16(test_vec[i].a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcltzh_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s8) @@ -565,6 +876,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltz_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s8) @@ -575,6 +887,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcltzq_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzd_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzd_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vcltzs_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcltzh_f16) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/copy_lane.c b/test/arm/neon/copy_lane.c new file mode 100644 index 000000000..9f863a253 --- /dev/null +++ b/test/arm/neon/copy_lane.c @@ -0,0 +1,4179 @@ +#define SIMDE_TEST_ARM_NEON_INSN copy_lane + +#include "test-neon.h" +#include "../../../simde/arm/neon/copy_lane.h" +#include "../../../simde/arm/neon/reinterpret.h" + +static int +test_simde_vcopy_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[8]; + int8_t b[8]; + int8_t r00[8]; + int8_t r13[8]; + int8_t r35[8]; + int8_t r67[8]; + int8_t r71[8]; + } test_vec[] = { + { { -INT8_C( 78), -INT8_C( 72), INT8_C( 48), INT8_C( 97), + -INT8_C( 62), INT8_C( 121), -INT8_C( 59), -INT8_C( 109) }, + { -INT8_C( 56), -INT8_C( 86), INT8_C( 123), INT8_C( 105), + -INT8_C( 32), INT8_C( 96), -INT8_C( 23), INT8_C( 105) }, + { -INT8_C( 56), -INT8_C( 72), INT8_C( 48), INT8_C( 97), + -INT8_C( 62), INT8_C( 121), -INT8_C( 59), -INT8_C( 109) }, + { -INT8_C( 78), INT8_C( 105), INT8_C( 48), INT8_C( 97), + -INT8_C( 62), INT8_C( 121), -INT8_C( 59), -INT8_C( 109) }, + { -INT8_C( 78), -INT8_C( 72), INT8_C( 48), INT8_C( 96), + -INT8_C( 62), INT8_C( 121), -INT8_C( 59), -INT8_C( 109) }, + { -INT8_C( 78), -INT8_C( 72), INT8_C( 48), INT8_C( 97), + -INT8_C( 62), INT8_C( 121), INT8_C( 105), -INT8_C( 109) }, + { -INT8_C( 78), -INT8_C( 72), INT8_C( 48), INT8_C( 97), + -INT8_C( 62), INT8_C( 121), -INT8_C( 59), -INT8_C( 86) } }, + { { -INT8_C( 117), -INT8_C( 45), -INT8_C( 85), INT8_C( 8), + -INT8_C( 53), INT8_C( 73), INT8_C( 39), INT8_C( 87) }, + { -INT8_C( 93), -INT8_C( 73), INT8_C( 58), INT8_C( 17), + INT8_C( 48), INT8_C( 96), -INT8_C( 107), INT8_C( 12) }, + { -INT8_C( 93), -INT8_C( 45), -INT8_C( 85), INT8_C( 8), + -INT8_C( 53), INT8_C( 73), INT8_C( 39), INT8_C( 87) }, + { -INT8_C( 117), INT8_C( 17), -INT8_C( 85), INT8_C( 8), + -INT8_C( 53), INT8_C( 73), INT8_C( 39), INT8_C( 87) }, + { -INT8_C( 117), -INT8_C( 45), -INT8_C( 85), INT8_C( 96), + -INT8_C( 53), INT8_C( 73), INT8_C( 39), INT8_C( 87) }, + { -INT8_C( 117), -INT8_C( 45), -INT8_C( 85), INT8_C( 8), + -INT8_C( 53), INT8_C( 73), INT8_C( 12), INT8_C( 87) }, + { -INT8_C( 117), -INT8_C( 45), -INT8_C( 85), INT8_C( 8), + -INT8_C( 53), INT8_C( 73), INT8_C( 39), -INT8_C( 73) } }, + { { INT8_C( 115), INT8_C( 81), -INT8_C( 94), INT8_C( 115), + -INT8_C( 26), INT8_C( 33), INT8_C( 109), -INT8_C( 113) }, + { INT8_C( 115), -INT8_C( 5), INT8_C( 1), INT8_C( 9), + INT8_C( 47), -INT8_C( 57), INT8_C( 3), -INT8_C( 50) }, + { INT8_C( 115), INT8_C( 81), -INT8_C( 94), INT8_C( 115), + -INT8_C( 26), INT8_C( 33), INT8_C( 109), -INT8_C( 113) }, + { INT8_C( 115), INT8_C( 9), -INT8_C( 94), INT8_C( 115), + -INT8_C( 26), INT8_C( 33), INT8_C( 109), -INT8_C( 113) }, + { INT8_C( 115), INT8_C( 81), -INT8_C( 94), -INT8_C( 57), + -INT8_C( 26), INT8_C( 33), INT8_C( 109), -INT8_C( 113) }, + { INT8_C( 115), INT8_C( 81), -INT8_C( 94), INT8_C( 115), + -INT8_C( 26), INT8_C( 33), -INT8_C( 50), -INT8_C( 113) }, + { INT8_C( 115), INT8_C( 81), -INT8_C( 94), INT8_C( 115), + -INT8_C( 26), INT8_C( 33), INT8_C( 109), -INT8_C( 5) } }, + { { -INT8_C( 90), INT8_C( 59), -INT8_C( 106), -INT8_C( 127), + INT8_C( 1), INT8_C( 70), INT8_C( 24), -INT8_C( 55) }, + { -INT8_C( 85), INT8_C( 64), -INT8_C( 70), INT8_C( 88), + INT8_C( 110), -INT8_C( 17), INT8_C( 56), -INT8_C( 81) }, + { -INT8_C( 85), INT8_C( 59), -INT8_C( 106), -INT8_C( 127), + INT8_C( 1), INT8_C( 70), INT8_C( 24), -INT8_C( 55) }, + { -INT8_C( 90), INT8_C( 88), -INT8_C( 106), -INT8_C( 127), + INT8_C( 1), INT8_C( 70), INT8_C( 24), -INT8_C( 55) }, + { -INT8_C( 90), INT8_C( 59), -INT8_C( 106), -INT8_C( 17), + INT8_C( 1), INT8_C( 70), INT8_C( 24), -INT8_C( 55) }, + { -INT8_C( 90), INT8_C( 59), -INT8_C( 106), -INT8_C( 127), + INT8_C( 1), INT8_C( 70), -INT8_C( 81), -INT8_C( 55) }, + { -INT8_C( 90), INT8_C( 59), -INT8_C( 106), -INT8_C( 127), + INT8_C( 1), INT8_C( 70), INT8_C( 24), INT8_C( 64) } }, + { { -INT8_C( 69), -INT8_C( 46), -INT8_C( 114), INT8_C( 40), + -INT8_C( 68), INT8_C( 99), -INT8_C( 54), INT8_C( 24) }, + { -INT8_C( 6), INT8_C( 13), INT8_C( 89), -INT8_C( 127), + INT8_C( 11), -INT8_C( 122), INT8_C( 51), -INT8_C( 46) }, + { -INT8_C( 6), -INT8_C( 46), -INT8_C( 114), INT8_C( 40), + -INT8_C( 68), INT8_C( 99), -INT8_C( 54), INT8_C( 24) }, + { -INT8_C( 69), -INT8_C( 127), -INT8_C( 114), INT8_C( 40), + -INT8_C( 68), INT8_C( 99), -INT8_C( 54), INT8_C( 24) }, + { -INT8_C( 69), -INT8_C( 46), -INT8_C( 114), -INT8_C( 122), + -INT8_C( 68), INT8_C( 99), -INT8_C( 54), INT8_C( 24) }, + { -INT8_C( 69), -INT8_C( 46), -INT8_C( 114), INT8_C( 40), + -INT8_C( 68), INT8_C( 99), -INT8_C( 46), INT8_C( 24) }, + { -INT8_C( 69), -INT8_C( 46), -INT8_C( 114), INT8_C( 40), + -INT8_C( 68), INT8_C( 99), -INT8_C( 54), INT8_C( 13) } }, + { { INT8_C( 118), -INT8_C( 13), INT8_C( 7), -INT8_C( 30), + -INT8_C( 56), -INT8_C( 127), INT8_C( 34), INT8_C( 6) }, + { INT8_C( 118), INT8_C( 56), -INT8_C( 47), INT8_C( 71), + -INT8_C( 67), -INT8_C( 102), -INT8_C( 49), INT8_C( 23) }, + { INT8_C( 118), -INT8_C( 13), INT8_C( 7), -INT8_C( 30), + -INT8_C( 56), -INT8_C( 127), INT8_C( 34), INT8_C( 6) }, + { INT8_C( 118), INT8_C( 71), INT8_C( 7), -INT8_C( 30), + -INT8_C( 56), -INT8_C( 127), INT8_C( 34), INT8_C( 6) }, + { INT8_C( 118), -INT8_C( 13), INT8_C( 7), -INT8_C( 102), + -INT8_C( 56), -INT8_C( 127), INT8_C( 34), INT8_C( 6) }, + { INT8_C( 118), -INT8_C( 13), INT8_C( 7), -INT8_C( 30), + -INT8_C( 56), -INT8_C( 127), INT8_C( 23), INT8_C( 6) }, + { INT8_C( 118), -INT8_C( 13), INT8_C( 7), -INT8_C( 30), + -INT8_C( 56), -INT8_C( 127), INT8_C( 34), INT8_C( 56) } }, + { { INT8_C( 23), -INT8_C( 45), INT8_C( 85), INT8_C( 18), + INT8_C( 119), INT8_MAX, INT8_C( 70), INT8_C( 119) }, + { -INT8_C( 13), -INT8_C( 123), -INT8_C( 4), INT8_C( 87), + INT8_C( 21), INT8_C( 63), INT8_C( 115), INT8_C( 3) }, + { -INT8_C( 13), -INT8_C( 45), INT8_C( 85), INT8_C( 18), + INT8_C( 119), INT8_MAX, INT8_C( 70), INT8_C( 119) }, + { INT8_C( 23), INT8_C( 87), INT8_C( 85), INT8_C( 18), + INT8_C( 119), INT8_MAX, INT8_C( 70), INT8_C( 119) }, + { INT8_C( 23), -INT8_C( 45), INT8_C( 85), INT8_C( 63), + INT8_C( 119), INT8_MAX, INT8_C( 70), INT8_C( 119) }, + { INT8_C( 23), -INT8_C( 45), INT8_C( 85), INT8_C( 18), + INT8_C( 119), INT8_MAX, INT8_C( 3), INT8_C( 119) }, + { INT8_C( 23), -INT8_C( 45), INT8_C( 85), INT8_C( 18), + INT8_C( 119), INT8_MAX, INT8_C( 70), -INT8_C( 123) } }, + { { INT8_C( 93), -INT8_C( 117), -INT8_C( 15), INT8_C( 74), + -INT8_C( 71), INT8_C( 82), INT8_C( 67), -INT8_C( 59) }, + { -INT8_C( 24), INT8_C( 126), INT8_C( 78), -INT8_C( 104), + -INT8_C( 114), -INT8_C( 31), INT8_C( 4), INT8_C( 6) }, + { -INT8_C( 24), -INT8_C( 117), -INT8_C( 15), INT8_C( 74), + -INT8_C( 71), INT8_C( 82), INT8_C( 67), -INT8_C( 59) }, + { INT8_C( 93), -INT8_C( 104), -INT8_C( 15), INT8_C( 74), + -INT8_C( 71), INT8_C( 82), INT8_C( 67), -INT8_C( 59) }, + { INT8_C( 93), -INT8_C( 117), -INT8_C( 15), -INT8_C( 31), + -INT8_C( 71), INT8_C( 82), INT8_C( 67), -INT8_C( 59) }, + { INT8_C( 93), -INT8_C( 117), -INT8_C( 15), INT8_C( 74), + -INT8_C( 71), INT8_C( 82), INT8_C( 6), -INT8_C( 59) }, + { INT8_C( 93), -INT8_C( 117), -INT8_C( 15), INT8_C( 74), + -INT8_C( 71), INT8_C( 82), INT8_C( 67), INT8_C( 126) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + + simde_int8x8_t r00 = simde_vcopy_lane_s8(a, 0, b, 0); + simde_int8x8_t r13 = simde_vcopy_lane_s8(a, 1, b, 3); + simde_int8x8_t r35 = simde_vcopy_lane_s8(a, 3, b, 5); + simde_int8x8_t r67 = simde_vcopy_lane_s8(a, 6, b, 7); + simde_int8x8_t r71 = simde_vcopy_lane_s8(a, 7, b, 1); + + simde_test_arm_neon_assert_equal_i8x8(r00, simde_vld1_s8(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_i8x8(r13, simde_vld1_s8(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i8x8(r35, simde_vld1_s8(test_vec[i].r35)); + simde_test_arm_neon_assert_equal_i8x8(r67, simde_vld1_s8(test_vec[i].r67)); + simde_test_arm_neon_assert_equal_i8x8(r71, simde_vld1_s8(test_vec[i].r71)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b[4]; + int16_t r00[4]; + int16_t r01[4]; + int16_t r13[4]; + int16_t r22[4]; + int16_t r33[4]; + } test_vec[] = { + { { -INT16_C( 16325), INT16_C( 17773), -INT16_C( 20430), INT16_C( 27334) }, + { -INT16_C( 32517), -INT16_C( 27026), INT16_C( 30460), INT16_C( 14613) }, + { -INT16_C( 32517), INT16_C( 17773), -INT16_C( 20430), INT16_C( 27334) }, + { -INT16_C( 27026), INT16_C( 17773), -INT16_C( 20430), INT16_C( 27334) }, + { -INT16_C( 16325), INT16_C( 14613), -INT16_C( 20430), INT16_C( 27334) }, + { -INT16_C( 16325), INT16_C( 17773), INT16_C( 30460), INT16_C( 27334) }, + { -INT16_C( 16325), INT16_C( 17773), -INT16_C( 20430), INT16_C( 14613) } }, + { { INT16_C( 10522), -INT16_C( 20353), -INT16_C( 11907), INT16_C( 29433) }, + { INT16_C( 26264), -INT16_C( 19178), INT16_C( 29217), INT16_C( 9061) }, + { INT16_C( 26264), -INT16_C( 20353), -INT16_C( 11907), INT16_C( 29433) }, + { -INT16_C( 19178), -INT16_C( 20353), -INT16_C( 11907), INT16_C( 29433) }, + { INT16_C( 10522), INT16_C( 9061), -INT16_C( 11907), INT16_C( 29433) }, + { INT16_C( 10522), -INT16_C( 20353), INT16_C( 29217), INT16_C( 29433) }, + { INT16_C( 10522), -INT16_C( 20353), -INT16_C( 11907), INT16_C( 9061) } }, + { { -INT16_C( 4995), INT16_C( 14378), -INT16_C( 3303), -INT16_C( 17484) }, + { -INT16_C( 10776), INT16_C( 28644), -INT16_C( 30101), INT16_C( 2823) }, + { -INT16_C( 10776), INT16_C( 14378), -INT16_C( 3303), -INT16_C( 17484) }, + { INT16_C( 28644), INT16_C( 14378), -INT16_C( 3303), -INT16_C( 17484) }, + { -INT16_C( 4995), INT16_C( 2823), -INT16_C( 3303), -INT16_C( 17484) }, + { -INT16_C( 4995), INT16_C( 14378), -INT16_C( 30101), -INT16_C( 17484) }, + { -INT16_C( 4995), INT16_C( 14378), -INT16_C( 3303), INT16_C( 2823) } }, + { { INT16_C( 5225), INT16_C( 28725), -INT16_C( 5626), -INT16_C( 26400) }, + { -INT16_C( 7612), INT16_C( 17719), INT16_C( 9923), -INT16_C( 31132) }, + { -INT16_C( 7612), INT16_C( 28725), -INT16_C( 5626), -INT16_C( 26400) }, + { INT16_C( 17719), INT16_C( 28725), -INT16_C( 5626), -INT16_C( 26400) }, + { INT16_C( 5225), -INT16_C( 31132), -INT16_C( 5626), -INT16_C( 26400) }, + { INT16_C( 5225), INT16_C( 28725), INT16_C( 9923), -INT16_C( 26400) }, + { INT16_C( 5225), INT16_C( 28725), -INT16_C( 5626), -INT16_C( 31132) } }, + { { INT16_C( 17703), INT16_C( 16816), -INT16_C( 29954), INT16_C( 20252) }, + { -INT16_C( 9088), -INT16_C( 13417), -INT16_C( 6695), -INT16_C( 24796) }, + { -INT16_C( 9088), INT16_C( 16816), -INT16_C( 29954), INT16_C( 20252) }, + { -INT16_C( 13417), INT16_C( 16816), -INT16_C( 29954), INT16_C( 20252) }, + { INT16_C( 17703), -INT16_C( 24796), -INT16_C( 29954), INT16_C( 20252) }, + { INT16_C( 17703), INT16_C( 16816), -INT16_C( 6695), INT16_C( 20252) }, + { INT16_C( 17703), INT16_C( 16816), -INT16_C( 29954), -INT16_C( 24796) } }, + { { INT16_C( 666), INT16_C( 31820), -INT16_C( 16597), -INT16_C( 19256) }, + { INT16_C( 22386), -INT16_C( 16949), -INT16_C( 7283), -INT16_C( 15292) }, + { INT16_C( 22386), INT16_C( 31820), -INT16_C( 16597), -INT16_C( 19256) }, + { -INT16_C( 16949), INT16_C( 31820), -INT16_C( 16597), -INT16_C( 19256) }, + { INT16_C( 666), -INT16_C( 15292), -INT16_C( 16597), -INT16_C( 19256) }, + { INT16_C( 666), INT16_C( 31820), -INT16_C( 7283), -INT16_C( 19256) }, + { INT16_C( 666), INT16_C( 31820), -INT16_C( 16597), -INT16_C( 15292) } }, + { { INT16_C( 10439), INT16_C( 1873), INT16_C( 4081), INT16_C( 17141) }, + { INT16_C( 16281), -INT16_C( 18813), INT16_C( 988), INT16_C( 29177) }, + { INT16_C( 16281), INT16_C( 1873), INT16_C( 4081), INT16_C( 17141) }, + { -INT16_C( 18813), INT16_C( 1873), INT16_C( 4081), INT16_C( 17141) }, + { INT16_C( 10439), INT16_C( 29177), INT16_C( 4081), INT16_C( 17141) }, + { INT16_C( 10439), INT16_C( 1873), INT16_C( 988), INT16_C( 17141) }, + { INT16_C( 10439), INT16_C( 1873), INT16_C( 4081), INT16_C( 29177) } }, + { { INT16_C( 20517), -INT16_C( 1538), INT16_C( 8824), -INT16_C( 86) }, + { -INT16_C( 3854), -INT16_C( 18339), -INT16_C( 7976), -INT16_C( 23518) }, + { -INT16_C( 3854), -INT16_C( 1538), INT16_C( 8824), -INT16_C( 86) }, + { -INT16_C( 18339), -INT16_C( 1538), INT16_C( 8824), -INT16_C( 86) }, + { INT16_C( 20517), -INT16_C( 23518), INT16_C( 8824), -INT16_C( 86) }, + { INT16_C( 20517), -INT16_C( 1538), -INT16_C( 7976), -INT16_C( 86) }, + { INT16_C( 20517), -INT16_C( 1538), INT16_C( 8824), -INT16_C( 23518) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + + simde_int16x4_t r00 = simde_vcopy_lane_s16(a, 0, b, 0); + simde_int16x4_t r01 = simde_vcopy_lane_s16(a, 0, b, 1); + simde_int16x4_t r13 = simde_vcopy_lane_s16(a, 1, b, 3); + simde_int16x4_t r22 = simde_vcopy_lane_s16(a, 2, b, 2); + simde_int16x4_t r33 = simde_vcopy_lane_s16(a, 3, b, 3); + + simde_test_arm_neon_assert_equal_i16x4(r00, simde_vld1_s16(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_i16x4(r01, simde_vld1_s16(test_vec[i].r01)); + simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x4(r22, simde_vld1_s16(test_vec[i].r22)); + simde_test_arm_neon_assert_equal_i16x4(r33, simde_vld1_s16(test_vec[i].r33)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b[2]; + int32_t r00[2]; + int32_t r01[2]; + int32_t r10[2]; + int32_t r11[2]; + } test_vec[] = { + { { -INT32_C( 161600086), INT32_C( 847429630) }, + { INT32_C( 258110270), INT32_C( 589323412) }, + { INT32_C( 258110270), INT32_C( 847429630) }, + { INT32_C( 589323412), INT32_C( 847429630) }, + { -INT32_C( 161600086), INT32_C( 258110270) }, + { -INT32_C( 161600086), INT32_C( 589323412) } }, + { { INT32_C( 1335877463), -INT32_C( 1387277215) }, + { INT32_C( 1261596096), INT32_C( 118232150) }, + { INT32_C( 1261596096), -INT32_C( 1387277215) }, + { INT32_C( 118232150), -INT32_C( 1387277215) }, + { INT32_C( 1335877463), INT32_C( 1261596096) }, + { INT32_C( 1335877463), INT32_C( 118232150) } }, + { { INT32_C( 1784099357), INT32_C( 1027096747) }, + { -INT32_C( 326542255), -INT32_C( 973205552) }, + { -INT32_C( 326542255), INT32_C( 1027096747) }, + { -INT32_C( 973205552), INT32_C( 1027096747) }, + { INT32_C( 1784099357), -INT32_C( 326542255) }, + { INT32_C( 1784099357), -INT32_C( 973205552) } }, + { { INT32_C( 587291785), -INT32_C( 2004974741) }, + { -INT32_C( 1494547193), -INT32_C( 402964197) }, + { -INT32_C( 1494547193), -INT32_C( 2004974741) }, + { -INT32_C( 402964197), -INT32_C( 2004974741) }, + { INT32_C( 587291785), -INT32_C( 1494547193) }, + { INT32_C( 587291785), -INT32_C( 402964197) } }, + { { INT32_C( 1305831887), INT32_C( 923587366) }, + { INT32_C( 1791728394), INT32_C( 259186719) }, + { INT32_C( 1791728394), INT32_C( 923587366) }, + { INT32_C( 259186719), INT32_C( 923587366) }, + { INT32_C( 1305831887), INT32_C( 1791728394) }, + { INT32_C( 1305831887), INT32_C( 259186719) } }, + { { -INT32_C( 2137273571), INT32_C( 1583898736) }, + { -INT32_C( 1494663023), -INT32_C( 99880334) }, + { -INT32_C( 1494663023), INT32_C( 1583898736) }, + { -INT32_C( 99880334), INT32_C( 1583898736) }, + { -INT32_C( 2137273571), -INT32_C( 1494663023) }, + { -INT32_C( 2137273571), -INT32_C( 99880334) } }, + { { INT32_C( 1186905933), -INT32_C( 783851572) }, + { -INT32_C( 589687716), -INT32_C( 1794063581) }, + { -INT32_C( 589687716), -INT32_C( 783851572) }, + { -INT32_C( 1794063581), -INT32_C( 783851572) }, + { INT32_C( 1186905933), -INT32_C( 589687716) }, + { INT32_C( 1186905933), -INT32_C( 1794063581) } }, + { { -INT32_C( 1701680446), -INT32_C( 1171445663) }, + { INT32_C( 225795531), -INT32_C( 1238600191) }, + { INT32_C( 225795531), -INT32_C( 1171445663) }, + { -INT32_C( 1238600191), -INT32_C( 1171445663) }, + { -INT32_C( 1701680446), INT32_C( 225795531) }, + { -INT32_C( 1701680446), -INT32_C( 1238600191) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + + simde_int32x2_t r00 = simde_vcopy_lane_s32(a, 0, b, 0); + simde_int32x2_t r01 = simde_vcopy_lane_s32(a, 0, b, 1); + simde_int32x2_t r10 = simde_vcopy_lane_s32(a, 1, b, 0); + simde_int32x2_t r11 = simde_vcopy_lane_s32(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_i32x2(r00, simde_vld1_s32(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_i32x2(r01, simde_vld1_s32(test_vec[i].r01)); + simde_test_arm_neon_assert_equal_i32x2(r10, simde_vld1_s32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i32x2(r11, simde_vld1_s32(test_vec[i].r11)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int64_t b[1]; + int64_t r00[1]; + } test_vec[] = { + { { INT64_C(8113828097391158095) }, + { -INT64_C(590582389174642709) }, + { -INT64_C(590582389174642709) } }, + { { -INT64_C(139101902982008402) }, + { -INT64_C(2324368737080581745) }, + { -INT64_C(2324368737080581745) } }, + { { INT64_C(1501602908344634063) }, + { INT64_C(567943954333098302) }, + { INT64_C(567943954333098302) } }, + { { -INT64_C(3453637575015359548) }, + { INT64_C(3441876951233273467) }, + { INT64_C(3441876951233273467) } }, + { { -INT64_C(4915331051121941957) }, + { INT64_C(8920438462121261266) }, + { INT64_C(8920438462121261266) } }, + { { INT64_C(7630574466277382030) }, + { -INT64_C(7035549761693185500) }, + { -INT64_C(7035549761693185500) } }, + { { -INT64_C(8761313260935879654) }, + { -INT64_C(4811158338647994587) }, + { -INT64_C(4811158338647994587) } }, + { { INT64_C(2316745406423380095) }, + { INT64_C(1501791097429643221) }, + { INT64_C(1501791097429643221) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); + + simde_int64x1_t r00 = simde_vcopy_lane_s64(a, 0, b, 0); + + simde_test_arm_neon_assert_equal_i64x1(r00, simde_vld1_s64(test_vec[i].r00)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[8]; + uint8_t b[8]; + uint8_t r00[8]; + uint8_t r13[8]; + uint8_t r35[8]; + uint8_t r67[8]; + uint8_t r71[8]; + } test_vec[] = { + { { UINT8_C( 28), UINT8_C( 131), UINT8_C( 188), UINT8_C( 60), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 8), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 32), UINT8_C( 205), UINT8_C( 168), + UINT8_C( 242), UINT8_C( 204), UINT8_C( 93), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 131), UINT8_C( 188), UINT8_C( 60), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 8), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 168), UINT8_C( 188), UINT8_C( 60), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 8), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 131), UINT8_C( 188), UINT8_C( 204), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 8), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 131), UINT8_C( 188), UINT8_C( 60), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 235), UINT8_C( 235) }, + { UINT8_C( 28), UINT8_C( 131), UINT8_C( 188), UINT8_C( 60), + UINT8_C( 207), UINT8_C( 214), UINT8_C( 8), UINT8_C( 32) } }, + { { UINT8_C( 101), UINT8_C( 32), UINT8_C( 14), UINT8_C( 224), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 63), UINT8_C( 202) }, + { UINT8_C( 60), UINT8_C( 130), UINT8_C( 95), UINT8_C( 209), + UINT8_C( 229), UINT8_C( 184), UINT8_C( 227), UINT8_C( 12) }, + { UINT8_C( 60), UINT8_C( 32), UINT8_C( 14), UINT8_C( 224), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 63), UINT8_C( 202) }, + { UINT8_C( 101), UINT8_C( 209), UINT8_C( 14), UINT8_C( 224), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 63), UINT8_C( 202) }, + { UINT8_C( 101), UINT8_C( 32), UINT8_C( 14), UINT8_C( 184), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 63), UINT8_C( 202) }, + { UINT8_C( 101), UINT8_C( 32), UINT8_C( 14), UINT8_C( 224), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 12), UINT8_C( 202) }, + { UINT8_C( 101), UINT8_C( 32), UINT8_C( 14), UINT8_C( 224), + UINT8_C( 185), UINT8_C( 161), UINT8_C( 63), UINT8_C( 130) } }, + { { UINT8_C( 172), UINT8_C( 54), UINT8_C( 173), UINT8_C( 209), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 13), UINT8_C( 59) }, + { UINT8_C( 88), UINT8_C( 109), UINT8_C( 203), UINT8_C( 38), + UINT8_C( 153), UINT8_C( 233), UINT8_C( 84), UINT8_C( 56) }, + { UINT8_C( 88), UINT8_C( 54), UINT8_C( 173), UINT8_C( 209), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 13), UINT8_C( 59) }, + { UINT8_C( 172), UINT8_C( 38), UINT8_C( 173), UINT8_C( 209), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 13), UINT8_C( 59) }, + { UINT8_C( 172), UINT8_C( 54), UINT8_C( 173), UINT8_C( 233), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 13), UINT8_C( 59) }, + { UINT8_C( 172), UINT8_C( 54), UINT8_C( 173), UINT8_C( 209), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 56), UINT8_C( 59) }, + { UINT8_C( 172), UINT8_C( 54), UINT8_C( 173), UINT8_C( 209), + UINT8_C( 185), UINT8_C( 49), UINT8_C( 13), UINT8_C( 109) } }, + { { UINT8_C( 128), UINT8_C( 179), UINT8_C( 226), UINT8_C( 249), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 195), UINT8_C( 252) }, + { UINT8_C( 137), UINT8_C( 122), UINT8_C( 243), UINT8_C( 28), + UINT8_C( 171), UINT8_C( 202), UINT8_C( 109), UINT8_C( 75) }, + { UINT8_C( 137), UINT8_C( 179), UINT8_C( 226), UINT8_C( 249), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 195), UINT8_C( 252) }, + { UINT8_C( 128), UINT8_C( 28), UINT8_C( 226), UINT8_C( 249), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 195), UINT8_C( 252) }, + { UINT8_C( 128), UINT8_C( 179), UINT8_C( 226), UINT8_C( 202), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 195), UINT8_C( 252) }, + { UINT8_C( 128), UINT8_C( 179), UINT8_C( 226), UINT8_C( 249), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 75), UINT8_C( 252) }, + { UINT8_C( 128), UINT8_C( 179), UINT8_C( 226), UINT8_C( 249), + UINT8_C( 55), UINT8_C( 53), UINT8_C( 195), UINT8_C( 122) } }, + { { UINT8_C( 197), UINT8_C( 148), UINT8_C( 89), UINT8_C( 212), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 254), UINT8_C( 179) }, + { UINT8_C( 30), UINT8_C( 170), UINT8_C( 21), UINT8_C( 164), + UINT8_C( 167), UINT8_C( 25), UINT8_C( 80), UINT8_C( 95) }, + { UINT8_C( 30), UINT8_C( 148), UINT8_C( 89), UINT8_C( 212), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 254), UINT8_C( 179) }, + { UINT8_C( 197), UINT8_C( 164), UINT8_C( 89), UINT8_C( 212), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 254), UINT8_C( 179) }, + { UINT8_C( 197), UINT8_C( 148), UINT8_C( 89), UINT8_C( 25), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 254), UINT8_C( 179) }, + { UINT8_C( 197), UINT8_C( 148), UINT8_C( 89), UINT8_C( 212), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 95), UINT8_C( 179) }, + { UINT8_C( 197), UINT8_C( 148), UINT8_C( 89), UINT8_C( 212), + UINT8_C( 240), UINT8_C( 71), UINT8_C( 254), UINT8_C( 170) } }, + { { UINT8_C( 5), UINT8_C( 43), UINT8_C( 38), UINT8_C( 233), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 31), UINT8_C( 55) }, + { UINT8_C( 70), UINT8_C( 169), UINT8_C( 168), UINT8_C( 163), + UINT8_C( 92), UINT8_C( 80), UINT8_C( 39), UINT8_C( 246) }, + { UINT8_C( 70), UINT8_C( 43), UINT8_C( 38), UINT8_C( 233), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 31), UINT8_C( 55) }, + { UINT8_C( 5), UINT8_C( 163), UINT8_C( 38), UINT8_C( 233), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 31), UINT8_C( 55) }, + { UINT8_C( 5), UINT8_C( 43), UINT8_C( 38), UINT8_C( 80), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 31), UINT8_C( 55) }, + { UINT8_C( 5), UINT8_C( 43), UINT8_C( 38), UINT8_C( 233), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 246), UINT8_C( 55) }, + { UINT8_C( 5), UINT8_C( 43), UINT8_C( 38), UINT8_C( 233), + UINT8_C( 100), UINT8_C( 180), UINT8_C( 31), UINT8_C( 169) } }, + { { UINT8_C( 4), UINT8_C( 200), UINT8_C( 242), UINT8_C( 164), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 35), UINT8_C( 159) }, + { UINT8_C( 208), UINT8_C( 92), UINT8_C( 52), UINT8_C( 62), + UINT8_C( 71), UINT8_C( 150), UINT8_C( 218), UINT8_C( 187) }, + { UINT8_C( 208), UINT8_C( 200), UINT8_C( 242), UINT8_C( 164), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 35), UINT8_C( 159) }, + { UINT8_C( 4), UINT8_C( 62), UINT8_C( 242), UINT8_C( 164), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 35), UINT8_C( 159) }, + { UINT8_C( 4), UINT8_C( 200), UINT8_C( 242), UINT8_C( 150), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 35), UINT8_C( 159) }, + { UINT8_C( 4), UINT8_C( 200), UINT8_C( 242), UINT8_C( 164), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 187), UINT8_C( 159) }, + { UINT8_C( 4), UINT8_C( 200), UINT8_C( 242), UINT8_C( 164), + UINT8_C( 145), UINT8_C( 212), UINT8_C( 35), UINT8_C( 92) } }, + { { UINT8_C( 182), UINT8_C( 17), UINT8_C( 219), UINT8_C( 25), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 208), UINT8_C( 22) }, + { UINT8_C( 183), UINT8_C( 91), UINT8_C( 242), UINT8_C( 21), + UINT8_C( 129), UINT8_C( 215), UINT8_C( 52), UINT8_C( 219) }, + { UINT8_C( 183), UINT8_C( 17), UINT8_C( 219), UINT8_C( 25), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 208), UINT8_C( 22) }, + { UINT8_C( 182), UINT8_C( 21), UINT8_C( 219), UINT8_C( 25), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 208), UINT8_C( 22) }, + { UINT8_C( 182), UINT8_C( 17), UINT8_C( 219), UINT8_C( 215), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 208), UINT8_C( 22) }, + { UINT8_C( 182), UINT8_C( 17), UINT8_C( 219), UINT8_C( 25), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 219), UINT8_C( 22) }, + { UINT8_C( 182), UINT8_C( 17), UINT8_C( 219), UINT8_C( 25), + UINT8_C( 214), UINT8_C( 210), UINT8_C( 208), UINT8_C( 91) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); + simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); + + simde_uint8x8_t r00 = simde_vcopy_lane_u8(a, 0, b, 0); + simde_uint8x8_t r13 = simde_vcopy_lane_u8(a, 1, b, 3); + simde_uint8x8_t r35 = simde_vcopy_lane_u8(a, 3, b, 5); + simde_uint8x8_t r67 = simde_vcopy_lane_u8(a, 6, b, 7); + simde_uint8x8_t r71 = simde_vcopy_lane_u8(a, 7, b, 1); + + simde_test_arm_neon_assert_equal_u8x8(r00, simde_vld1_u8(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_u8x8(r13, simde_vld1_u8(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u8x8(r35, simde_vld1_u8(test_vec[i].r35)); + simde_test_arm_neon_assert_equal_u8x8(r67, simde_vld1_u8(test_vec[i].r67)); + simde_test_arm_neon_assert_equal_u8x8(r71, simde_vld1_u8(test_vec[i].r71)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t b[4]; + uint16_t r00[4]; + uint16_t r01[4]; + uint16_t r13[4]; + uint16_t r22[4]; + uint16_t r33[4]; + } test_vec[] = { + { { UINT16_C( 48859), UINT16_C( 37110), UINT16_C( 57365), UINT16_C( 56893) }, + { UINT16_C( 44198), UINT16_C( 17208), UINT16_C( 42682), UINT16_C( 14084) }, + { UINT16_C( 44198), UINT16_C( 37110), UINT16_C( 57365), UINT16_C( 56893) }, + { UINT16_C( 17208), UINT16_C( 37110), UINT16_C( 57365), UINT16_C( 56893) }, + { UINT16_C( 48859), UINT16_C( 14084), UINT16_C( 57365), UINT16_C( 56893) }, + { UINT16_C( 48859), UINT16_C( 37110), UINT16_C( 42682), UINT16_C( 56893) }, + { UINT16_C( 48859), UINT16_C( 37110), UINT16_C( 57365), UINT16_C( 14084) } }, + { { UINT16_C( 32518), UINT16_C( 27430), UINT16_C( 25434), UINT16_C( 15734) }, + { UINT16_C( 17681), UINT16_C( 1758), UINT16_C( 14425), UINT16_C( 18152) }, + { UINT16_C( 17681), UINT16_C( 27430), UINT16_C( 25434), UINT16_C( 15734) }, + { UINT16_C( 1758), UINT16_C( 27430), UINT16_C( 25434), UINT16_C( 15734) }, + { UINT16_C( 32518), UINT16_C( 18152), UINT16_C( 25434), UINT16_C( 15734) }, + { UINT16_C( 32518), UINT16_C( 27430), UINT16_C( 14425), UINT16_C( 15734) }, + { UINT16_C( 32518), UINT16_C( 27430), UINT16_C( 25434), UINT16_C( 18152) } }, + { { UINT16_C( 35029), UINT16_C( 40800), UINT16_C( 57927), UINT16_C( 58796) }, + { UINT16_C( 38243), UINT16_C( 28472), UINT16_C( 61138), UINT16_C( 52923) }, + { UINT16_C( 38243), UINT16_C( 40800), UINT16_C( 57927), UINT16_C( 58796) }, + { UINT16_C( 28472), UINT16_C( 40800), UINT16_C( 57927), UINT16_C( 58796) }, + { UINT16_C( 35029), UINT16_C( 52923), UINT16_C( 57927), UINT16_C( 58796) }, + { UINT16_C( 35029), UINT16_C( 40800), UINT16_C( 61138), UINT16_C( 58796) }, + { UINT16_C( 35029), UINT16_C( 40800), UINT16_C( 57927), UINT16_C( 52923) } }, + { { UINT16_C( 14462), UINT16_C( 31329), UINT16_C( 44021), UINT16_C( 37135) }, + { UINT16_C( 36643), UINT16_C( 8499), UINT16_C( 55329), UINT16_C( 45901) }, + { UINT16_C( 36643), UINT16_C( 31329), UINT16_C( 44021), UINT16_C( 37135) }, + { UINT16_C( 8499), UINT16_C( 31329), UINT16_C( 44021), UINT16_C( 37135) }, + { UINT16_C( 14462), UINT16_C( 45901), UINT16_C( 44021), UINT16_C( 37135) }, + { UINT16_C( 14462), UINT16_C( 31329), UINT16_C( 55329), UINT16_C( 37135) }, + { UINT16_C( 14462), UINT16_C( 31329), UINT16_C( 44021), UINT16_C( 45901) } }, + { { UINT16_C( 18155), UINT16_C( 29451), UINT16_C( 9594), UINT16_C( 47476) }, + { UINT16_C( 36782), UINT16_C( 10647), UINT16_C( 14223), UINT16_C( 3446) }, + { UINT16_C( 36782), UINT16_C( 29451), UINT16_C( 9594), UINT16_C( 47476) }, + { UINT16_C( 10647), UINT16_C( 29451), UINT16_C( 9594), UINT16_C( 47476) }, + { UINT16_C( 18155), UINT16_C( 3446), UINT16_C( 9594), UINT16_C( 47476) }, + { UINT16_C( 18155), UINT16_C( 29451), UINT16_C( 14223), UINT16_C( 47476) }, + { UINT16_C( 18155), UINT16_C( 29451), UINT16_C( 9594), UINT16_C( 3446) } }, + { { UINT16_C( 39055), UINT16_C( 58824), UINT16_C( 16709), UINT16_C( 26292) }, + { UINT16_C( 52894), UINT16_C( 53794), UINT16_C( 27364), UINT16_C( 9410) }, + { UINT16_C( 52894), UINT16_C( 58824), UINT16_C( 16709), UINT16_C( 26292) }, + { UINT16_C( 53794), UINT16_C( 58824), UINT16_C( 16709), UINT16_C( 26292) }, + { UINT16_C( 39055), UINT16_C( 9410), UINT16_C( 16709), UINT16_C( 26292) }, + { UINT16_C( 39055), UINT16_C( 58824), UINT16_C( 27364), UINT16_C( 26292) }, + { UINT16_C( 39055), UINT16_C( 58824), UINT16_C( 16709), UINT16_C( 9410) } }, + { { UINT16_C( 3714), UINT16_C( 45544), UINT16_C( 10782), UINT16_C( 53336) }, + { UINT16_C( 53247), UINT16_C( 29686), UINT16_C( 23527), UINT16_C( 52801) }, + { UINT16_C( 53247), UINT16_C( 45544), UINT16_C( 10782), UINT16_C( 53336) }, + { UINT16_C( 29686), UINT16_C( 45544), UINT16_C( 10782), UINT16_C( 53336) }, + { UINT16_C( 3714), UINT16_C( 52801), UINT16_C( 10782), UINT16_C( 53336) }, + { UINT16_C( 3714), UINT16_C( 45544), UINT16_C( 23527), UINT16_C( 53336) }, + { UINT16_C( 3714), UINT16_C( 45544), UINT16_C( 10782), UINT16_C( 52801) } }, + { { UINT16_C( 956), UINT16_C( 38216), UINT16_C( 64583), UINT16_C( 19844) }, + { UINT16_C( 55529), UINT16_C( 5655), UINT16_C( 39007), UINT16_C( 36038) }, + { UINT16_C( 55529), UINT16_C( 38216), UINT16_C( 64583), UINT16_C( 19844) }, + { UINT16_C( 5655), UINT16_C( 38216), UINT16_C( 64583), UINT16_C( 19844) }, + { UINT16_C( 956), UINT16_C( 36038), UINT16_C( 64583), UINT16_C( 19844) }, + { UINT16_C( 956), UINT16_C( 38216), UINT16_C( 39007), UINT16_C( 19844) }, + { UINT16_C( 956), UINT16_C( 38216), UINT16_C( 64583), UINT16_C( 36038) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); + + simde_uint16x4_t r00 = simde_vcopy_lane_u16(a, 0, b, 0); + simde_uint16x4_t r01 = simde_vcopy_lane_u16(a, 0, b, 1); + simde_uint16x4_t r13 = simde_vcopy_lane_u16(a, 1, b, 3); + simde_uint16x4_t r22 = simde_vcopy_lane_u16(a, 2, b, 2); + simde_uint16x4_t r33 = simde_vcopy_lane_u16(a, 3, b, 3); + + simde_test_arm_neon_assert_equal_u16x4(r00, simde_vld1_u16(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_u16x4(r01, simde_vld1_u16(test_vec[i].r01)); + simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x4(r22, simde_vld1_u16(test_vec[i].r22)); + simde_test_arm_neon_assert_equal_u16x4(r33, simde_vld1_u16(test_vec[i].r33)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t b[2]; + uint32_t r00[2]; + uint32_t r01[2]; + uint32_t r10[2]; + uint32_t r11[2]; + } test_vec[] = { + { { UINT32_C( 1845930776), UINT32_C( 3844134345) }, + { UINT32_C( 1849881073), UINT32_C( 3913321945) }, + { UINT32_C( 1849881073), UINT32_C( 3844134345) }, + { UINT32_C( 3913321945), UINT32_C( 3844134345) }, + { UINT32_C( 1845930776), UINT32_C( 1849881073) }, + { UINT32_C( 1845930776), UINT32_C( 3913321945) } }, + { { UINT32_C( 2795833089), UINT32_C( 3073381716) }, + { UINT32_C( 3744189582), UINT32_C( 2223479106) }, + { UINT32_C( 3744189582), UINT32_C( 3073381716) }, + { UINT32_C( 2223479106), UINT32_C( 3073381716) }, + { UINT32_C( 2795833089), UINT32_C( 3744189582) }, + { UINT32_C( 2795833089), UINT32_C( 2223479106) } }, + { { UINT32_C( 850448814), UINT32_C( 3018325402) }, + { UINT32_C( 4276357940), UINT32_C( 1105496326) }, + { UINT32_C( 4276357940), UINT32_C( 3018325402) }, + { UINT32_C( 1105496326), UINT32_C( 3018325402) }, + { UINT32_C( 850448814), UINT32_C( 4276357940) }, + { UINT32_C( 850448814), UINT32_C( 1105496326) } }, + { { UINT32_C( 316065699), UINT32_C( 3151513231) }, + { UINT32_C( 361459012), UINT32_C( 3957792290) }, + { UINT32_C( 361459012), UINT32_C( 3151513231) }, + { UINT32_C( 3957792290), UINT32_C( 3151513231) }, + { UINT32_C( 316065699), UINT32_C( 361459012) }, + { UINT32_C( 316065699), UINT32_C( 3957792290) } }, + { { UINT32_C( 4188653768), UINT32_C( 4220115240) }, + { UINT32_C( 923671752), UINT32_C( 2829963684) }, + { UINT32_C( 923671752), UINT32_C( 4220115240) }, + { UINT32_C( 2829963684), UINT32_C( 4220115240) }, + { UINT32_C( 4188653768), UINT32_C( 923671752) }, + { UINT32_C( 4188653768), UINT32_C( 2829963684) } }, + { { UINT32_C( 2782303633), UINT32_C( 2680224312) }, + { UINT32_C( 2278952676), UINT32_C( 3042603857) }, + { UINT32_C( 2278952676), UINT32_C( 2680224312) }, + { UINT32_C( 3042603857), UINT32_C( 2680224312) }, + { UINT32_C( 2782303633), UINT32_C( 2278952676) }, + { UINT32_C( 2782303633), UINT32_C( 3042603857) } }, + { { UINT32_C( 2497286721), UINT32_C( 2408904758) }, + { UINT32_C( 1300967812), UINT32_C( 2801168690) }, + { UINT32_C( 1300967812), UINT32_C( 2408904758) }, + { UINT32_C( 2801168690), UINT32_C( 2408904758) }, + { UINT32_C( 2497286721), UINT32_C( 1300967812) }, + { UINT32_C( 2497286721), UINT32_C( 2801168690) } }, + { { UINT32_C( 3765890668), UINT32_C( 3157704832) }, + { UINT32_C( 3135164014), UINT32_C( 1487743293) }, + { UINT32_C( 3135164014), UINT32_C( 3157704832) }, + { UINT32_C( 1487743293), UINT32_C( 3157704832) }, + { UINT32_C( 3765890668), UINT32_C( 3135164014) }, + { UINT32_C( 3765890668), UINT32_C( 1487743293) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); + + simde_uint32x2_t r00 = simde_vcopy_lane_u32(a, 0, b, 0); + simde_uint32x2_t r01 = simde_vcopy_lane_u32(a, 0, b, 1); + simde_uint32x2_t r10 = simde_vcopy_lane_u32(a, 1, b, 0); + simde_uint32x2_t r11 = simde_vcopy_lane_u32(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_u32x2(r00, simde_vld1_u32(test_vec[i].r00)); + simde_test_arm_neon_assert_equal_u32x2(r01, simde_vld1_u32(test_vec[i].r01)); + simde_test_arm_neon_assert_equal_u32x2(r10, simde_vld1_u32(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u32x2(r11, simde_vld1_u32(test_vec[i].r11)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[1]; + uint64_t b[1]; + uint64_t r00[1]; + } test_vec[] = { + { { UINT64_C( 3055880545951251893) }, + { UINT64_C( 4769400864024572261) }, + { UINT64_C( 4769400864024572261) } }, + { { UINT64_C(10022377050862213090) }, + { UINT64_C( 709100166864470872) }, + { UINT64_C( 709100166864470872) } }, + { { UINT64_C(14501613329003973833) }, + { UINT64_C( 1949783300048066942) }, + { UINT64_C( 1949783300048066942) } }, + { { UINT64_C( 4269142848878481025) }, + { UINT64_C(13395296486420650805) }, + { UINT64_C(13395296486420650805) } }, + { { UINT64_C(16135277955010204355) }, + { UINT64_C( 8074912141013526827) }, + { UINT64_C( 8074912141013526827) } }, + { { UINT64_C( 2540591871860474721) }, + { UINT64_C( 3018004574689373093) }, + { UINT64_C( 3018004574689373093) } }, + { { UINT64_C( 6078212644320502818) }, + { UINT64_C( 5640613611397537144) }, + { UINT64_C( 5640613611397537144) } }, + { { UINT64_C(17738531384755593185) }, + { UINT64_C(12574375413058459129) }, + { UINT64_C(12574375413058459129) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); + + simde_uint64x1_t r00 = simde_vcopy_lane_u64(a, 0, b, 0); + + simde_test_arm_neon_assert_equal_u64x1(r00, simde_vld1_u64(test_vec[i].r00)); + } + + return 0; +} + +static int +test_simde_vcopy_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[2]; + simde_float32_t b[2]; + simde_float32_t r00[2]; + simde_float32_t r01[2]; + simde_float32_t r10[2]; + simde_float32_t r11[2]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C(7872.434), -SIMDE_FLOAT32_C(5331.952) }, + { -SIMDE_FLOAT32_C( 676.709), SIMDE_FLOAT32_C(3911.896) }, + { -SIMDE_FLOAT32_C( 676.709), -SIMDE_FLOAT32_C(5331.952) }, + { SIMDE_FLOAT32_C(3911.896), -SIMDE_FLOAT32_C(5331.952) }, + { -SIMDE_FLOAT32_C(7872.434), -SIMDE_FLOAT32_C( 676.709) }, + { -SIMDE_FLOAT32_C(7872.434), SIMDE_FLOAT32_C(3911.896) } }, + { { -SIMDE_FLOAT32_C(7795.166), SIMDE_FLOAT32_C(5870.333) }, + { SIMDE_FLOAT32_C(7542.590), SIMDE_FLOAT32_C(9623.578) }, + { SIMDE_FLOAT32_C(7542.590), SIMDE_FLOAT32_C(5870.333) }, + { SIMDE_FLOAT32_C(9623.578), SIMDE_FLOAT32_C(5870.333) }, + { -SIMDE_FLOAT32_C(7795.166), SIMDE_FLOAT32_C(7542.590) }, + { -SIMDE_FLOAT32_C(7795.166), SIMDE_FLOAT32_C(9623.578) } }, + { { SIMDE_FLOAT32_C(9280.762), -SIMDE_FLOAT32_C(9574.360) }, + { SIMDE_FLOAT32_C(1067.386), SIMDE_FLOAT32_C(4955.505) }, + { SIMDE_FLOAT32_C(1067.386), -SIMDE_FLOAT32_C(9574.360) }, + { SIMDE_FLOAT32_C(4955.505), -SIMDE_FLOAT32_C(9574.360) }, + { SIMDE_FLOAT32_C(9280.762), SIMDE_FLOAT32_C(1067.386) }, + { SIMDE_FLOAT32_C(9280.762), SIMDE_FLOAT32_C(4955.505) } }, + { { -SIMDE_FLOAT32_C(5985.849), SIMDE_FLOAT32_C(4135.134) }, + { -SIMDE_FLOAT32_C(1607.159), -SIMDE_FLOAT32_C(7509.242) }, + { -SIMDE_FLOAT32_C(1607.159), SIMDE_FLOAT32_C(4135.134) }, + { -SIMDE_FLOAT32_C(7509.242), SIMDE_FLOAT32_C(4135.134) }, + { -SIMDE_FLOAT32_C(5985.849), -SIMDE_FLOAT32_C(1607.159) }, + { -SIMDE_FLOAT32_C(5985.849), -SIMDE_FLOAT32_C(7509.242) } }, + { { -SIMDE_FLOAT32_C(4353.862), SIMDE_FLOAT32_C(3343.175) }, + { SIMDE_FLOAT32_C(6301.056), SIMDE_FLOAT32_C(6250.665) }, + { SIMDE_FLOAT32_C(6301.056), SIMDE_FLOAT32_C(3343.175) }, + { SIMDE_FLOAT32_C(6250.665), SIMDE_FLOAT32_C(3343.175) }, + { -SIMDE_FLOAT32_C(4353.862), SIMDE_FLOAT32_C(6301.056) }, + { -SIMDE_FLOAT32_C(4353.862), SIMDE_FLOAT32_C(6250.665) } }, + { { -SIMDE_FLOAT32_C(2707.744), -SIMDE_FLOAT32_C(8746.154) }, + { -SIMDE_FLOAT32_C(4845.834), -SIMDE_FLOAT32_C(5771.376) }, + { -SIMDE_FLOAT32_C(4845.834), -SIMDE_FLOAT32_C(8746.154) }, + { -SIMDE_FLOAT32_C(5771.376), -SIMDE_FLOAT32_C(8746.154) }, + { -SIMDE_FLOAT32_C(2707.744), -SIMDE_FLOAT32_C(4845.834) }, + { -SIMDE_FLOAT32_C(2707.744), -SIMDE_FLOAT32_C(5771.376) } }, + { { -SIMDE_FLOAT32_C(5351.658), SIMDE_FLOAT32_C(4436.026) }, + { SIMDE_FLOAT32_C( 568.685), SIMDE_FLOAT32_C(2344.722) }, + { SIMDE_FLOAT32_C( 568.685), SIMDE_FLOAT32_C(4436.026) }, + { SIMDE_FLOAT32_C(2344.722), SIMDE_FLOAT32_C(4436.026) }, + { -SIMDE_FLOAT32_C(5351.658), SIMDE_FLOAT32_C( 568.685) }, + { -SIMDE_FLOAT32_C(5351.658), SIMDE_FLOAT32_C(2344.722) } }, + { { -SIMDE_FLOAT32_C(4624.979), -SIMDE_FLOAT32_C(3015.579) }, + { SIMDE_FLOAT32_C(7762.184), SIMDE_FLOAT32_C(3041.834) }, + { SIMDE_FLOAT32_C(7762.184), -SIMDE_FLOAT32_C(3015.579) }, + { SIMDE_FLOAT32_C(3041.834), -SIMDE_FLOAT32_C(3015.579) }, + { -SIMDE_FLOAT32_C(4624.979), SIMDE_FLOAT32_C(7762.184) }, + { -SIMDE_FLOAT32_C(4624.979), SIMDE_FLOAT32_C(3041.834) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + + simde_float32x2_t r00 = simde_vcopy_lane_f32(a, 0, b, 0); + simde_float32x2_t r01 = simde_vcopy_lane_f32(a, 0, b, 1); + simde_float32x2_t r10 = simde_vcopy_lane_f32(a, 1, b, 0); + simde_float32x2_t r11 = simde_vcopy_lane_f32(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_f32x2(r00, simde_vld1_f32(test_vec[i].r00), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r01, simde_vld1_f32(test_vec[i].r01), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r10, simde_vld1_f32(test_vec[i].r10), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r11, simde_vld1_f32(test_vec[i].r11), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopy_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[1]; + simde_float64_t b[1]; + simde_float64_t r00[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 75436.250) }, + { -SIMDE_FLOAT64_C( 308233.375) }, + { -SIMDE_FLOAT64_C( 308233.375) } }, + { { SIMDE_FLOAT64_C( 679840.750) }, + { -SIMDE_FLOAT64_C( 665586.875) }, + { -SIMDE_FLOAT64_C( 665586.875) } }, + { { SIMDE_FLOAT64_C( 211015.500) }, + { -SIMDE_FLOAT64_C( 944742.812) }, + { -SIMDE_FLOAT64_C( 944742.812) } }, + { { -SIMDE_FLOAT64_C( 696791.812) }, + { SIMDE_FLOAT64_C( 948299.000) }, + { SIMDE_FLOAT64_C( 948299.000) } }, + { { -SIMDE_FLOAT64_C( 100363.250) }, + { SIMDE_FLOAT64_C( 741415.125) }, + { SIMDE_FLOAT64_C( 741415.125) } }, + { { SIMDE_FLOAT64_C( 832180.750) }, + { SIMDE_FLOAT64_C( 235085.625) }, + { SIMDE_FLOAT64_C( 235085.625) } }, + { { -SIMDE_FLOAT64_C( 800552.562) }, + { -SIMDE_FLOAT64_C( 864425.500) }, + { -SIMDE_FLOAT64_C( 864425.500) } }, + { { SIMDE_FLOAT64_C( 434821.625) }, + { SIMDE_FLOAT64_C( 765610.500) }, + { SIMDE_FLOAT64_C( 765610.500) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + + simde_float64x1_t r00 = simde_vcopy_lane_f64(a, 0, b, 0); + + simde_test_arm_neon_assert_equal_f64x1(r00, simde_vld1_f64(test_vec[i].r00), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[8]; + int8_t b[16]; + int8_t r0_0[8]; + int8_t r1_3[8]; + int8_t r3_8[8]; + int8_t r6_12[8]; + int8_t r7_15[8]; + } test_vec[] = { + { { -INT8_C( 35), INT8_C( 102), -INT8_C( 10), -INT8_C( 58), + INT8_C( 85), INT8_C( 88), INT8_C( 9), -INT8_C( 69) }, + { -INT8_C( 55), -INT8_C( 1), -INT8_C( 62), INT8_C( 105), + INT8_C( 72), -INT8_C( 82), -INT8_C( 33), INT8_C( 53), + INT8_C( 74), -INT8_C( 56), INT8_C( 88), INT8_C( 16), + -INT8_C( 32), INT8_C( 35), INT8_C( 39), -INT8_C( 2) }, + { -INT8_C( 55), INT8_C( 102), -INT8_C( 10), -INT8_C( 58), + INT8_C( 85), INT8_C( 88), INT8_C( 9), -INT8_C( 69) }, + { -INT8_C( 35), INT8_C( 105), -INT8_C( 10), -INT8_C( 58), + INT8_C( 85), INT8_C( 88), INT8_C( 9), -INT8_C( 69) }, + { -INT8_C( 35), INT8_C( 102), -INT8_C( 10), INT8_C( 74), + INT8_C( 85), INT8_C( 88), INT8_C( 9), -INT8_C( 69) }, + { -INT8_C( 35), INT8_C( 102), -INT8_C( 10), -INT8_C( 58), + INT8_C( 85), INT8_C( 88), -INT8_C( 32), -INT8_C( 69) }, + { -INT8_C( 35), INT8_C( 102), -INT8_C( 10), -INT8_C( 58), + INT8_C( 85), INT8_C( 88), INT8_C( 9), -INT8_C( 2) } }, + { { -INT8_C( 65), -INT8_C( 111), INT8_C( 3), -INT8_C( 57), + -INT8_C( 57), INT8_MIN, -INT8_C( 116), -INT8_C( 29) }, + { -INT8_C( 115), -INT8_C( 86), -INT8_C( 27), INT8_C( 110), + INT8_C( 88), INT8_C( 80), INT8_C( 102), -INT8_C( 9), + -INT8_C( 78), -INT8_C( 88), -INT8_C( 17), -INT8_C( 111), + INT8_C( 124), -INT8_C( 6), INT8_C( 54), INT8_C( 36) }, + { -INT8_C( 115), -INT8_C( 111), INT8_C( 3), -INT8_C( 57), + -INT8_C( 57), INT8_MIN, -INT8_C( 116), -INT8_C( 29) }, + { -INT8_C( 65), INT8_C( 110), INT8_C( 3), -INT8_C( 57), + -INT8_C( 57), INT8_MIN, -INT8_C( 116), -INT8_C( 29) }, + { -INT8_C( 65), -INT8_C( 111), INT8_C( 3), -INT8_C( 78), + -INT8_C( 57), INT8_MIN, -INT8_C( 116), -INT8_C( 29) }, + { -INT8_C( 65), -INT8_C( 111), INT8_C( 3), -INT8_C( 57), + -INT8_C( 57), INT8_MIN, INT8_C( 124), -INT8_C( 29) }, + { -INT8_C( 65), -INT8_C( 111), INT8_C( 3), -INT8_C( 57), + -INT8_C( 57), INT8_MIN, -INT8_C( 116), INT8_C( 36) } }, + { { -INT8_C( 1), -INT8_C( 3), -INT8_C( 46), -INT8_C( 19), + -INT8_C( 58), INT8_C( 65), -INT8_C( 22), -INT8_C( 118) }, + { -INT8_C( 30), INT8_C( 120), INT8_C( 48), -INT8_C( 112), + -INT8_C( 117), -INT8_C( 67), INT8_C( 61), INT8_C( 19), + INT8_C( 99), INT8_C( 114), INT8_C( 100), -INT8_C( 50), + INT8_C( 110), INT8_C( 89), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 30), -INT8_C( 3), -INT8_C( 46), -INT8_C( 19), + -INT8_C( 58), INT8_C( 65), -INT8_C( 22), -INT8_C( 118) }, + { -INT8_C( 1), -INT8_C( 112), -INT8_C( 46), -INT8_C( 19), + -INT8_C( 58), INT8_C( 65), -INT8_C( 22), -INT8_C( 118) }, + { -INT8_C( 1), -INT8_C( 3), -INT8_C( 46), INT8_C( 99), + -INT8_C( 58), INT8_C( 65), -INT8_C( 22), -INT8_C( 118) }, + { -INT8_C( 1), -INT8_C( 3), -INT8_C( 46), -INT8_C( 19), + -INT8_C( 58), INT8_C( 65), INT8_C( 110), -INT8_C( 118) }, + { -INT8_C( 1), -INT8_C( 3), -INT8_C( 46), -INT8_C( 19), + -INT8_C( 58), INT8_C( 65), -INT8_C( 22), -INT8_C( 65) } }, + { { INT8_C( 19), INT8_C( 70), INT8_C( 77), INT8_C( 8), + INT8_C( 50), -INT8_C( 109), INT8_C( 80), INT8_C( 45) }, + { INT8_C( 118), -INT8_C( 88), INT8_C( 104), INT8_C( 19), + -INT8_C( 118), -INT8_C( 113), -INT8_C( 59), INT8_C( 8), + -INT8_C( 64), -INT8_C( 37), INT8_C( 34), -INT8_C( 33), + -INT8_C( 123), -INT8_C( 39), INT8_C( 7), INT8_C( 35) }, + { INT8_C( 118), INT8_C( 70), INT8_C( 77), INT8_C( 8), + INT8_C( 50), -INT8_C( 109), INT8_C( 80), INT8_C( 45) }, + { INT8_C( 19), INT8_C( 19), INT8_C( 77), INT8_C( 8), + INT8_C( 50), -INT8_C( 109), INT8_C( 80), INT8_C( 45) }, + { INT8_C( 19), INT8_C( 70), INT8_C( 77), -INT8_C( 64), + INT8_C( 50), -INT8_C( 109), INT8_C( 80), INT8_C( 45) }, + { INT8_C( 19), INT8_C( 70), INT8_C( 77), INT8_C( 8), + INT8_C( 50), -INT8_C( 109), -INT8_C( 123), INT8_C( 45) }, + { INT8_C( 19), INT8_C( 70), INT8_C( 77), INT8_C( 8), + INT8_C( 50), -INT8_C( 109), INT8_C( 80), INT8_C( 35) } }, + { { -INT8_C( 20), INT8_C( 35), -INT8_C( 98), INT8_C( 102), + INT8_C( 105), -INT8_C( 62), -INT8_C( 46), INT8_C( 1) }, + { -INT8_C( 49), -INT8_C( 82), INT8_C( 116), INT8_C( 35), + -INT8_C( 40), -INT8_C( 127), -INT8_C( 23), -INT8_C( 22), + INT8_C( 94), -INT8_C( 63), -INT8_C( 77), INT8_C( 101), + INT8_C( 123), -INT8_C( 24), -INT8_C( 51), INT8_C( 56) }, + { -INT8_C( 49), INT8_C( 35), -INT8_C( 98), INT8_C( 102), + INT8_C( 105), -INT8_C( 62), -INT8_C( 46), INT8_C( 1) }, + { -INT8_C( 20), INT8_C( 35), -INT8_C( 98), INT8_C( 102), + INT8_C( 105), -INT8_C( 62), -INT8_C( 46), INT8_C( 1) }, + { -INT8_C( 20), INT8_C( 35), -INT8_C( 98), INT8_C( 94), + INT8_C( 105), -INT8_C( 62), -INT8_C( 46), INT8_C( 1) }, + { -INT8_C( 20), INT8_C( 35), -INT8_C( 98), INT8_C( 102), + INT8_C( 105), -INT8_C( 62), INT8_C( 123), INT8_C( 1) }, + { -INT8_C( 20), INT8_C( 35), -INT8_C( 98), INT8_C( 102), + INT8_C( 105), -INT8_C( 62), -INT8_C( 46), INT8_C( 56) } }, + { { -INT8_C( 83), -INT8_C( 91), -INT8_C( 122), -INT8_C( 55), + INT8_C( 71), -INT8_C( 34), INT8_C( 2), -INT8_C( 43) }, + { INT8_C( 36), INT8_C( 74), INT8_C( 23), INT8_C( 4), + INT8_C( 54), -INT8_C( 8), -INT8_C( 68), -INT8_C( 46), + INT8_C( 110), INT8_C( 0), INT8_C( 46), -INT8_C( 123), + INT8_C( 54), INT8_C( 39), INT8_C( 121), -INT8_C( 55) }, + { INT8_C( 36), -INT8_C( 91), -INT8_C( 122), -INT8_C( 55), + INT8_C( 71), -INT8_C( 34), INT8_C( 2), -INT8_C( 43) }, + { -INT8_C( 83), INT8_C( 4), -INT8_C( 122), -INT8_C( 55), + INT8_C( 71), -INT8_C( 34), INT8_C( 2), -INT8_C( 43) }, + { -INT8_C( 83), -INT8_C( 91), -INT8_C( 122), INT8_C( 110), + INT8_C( 71), -INT8_C( 34), INT8_C( 2), -INT8_C( 43) }, + { -INT8_C( 83), -INT8_C( 91), -INT8_C( 122), -INT8_C( 55), + INT8_C( 71), -INT8_C( 34), INT8_C( 54), -INT8_C( 43) }, + { -INT8_C( 83), -INT8_C( 91), -INT8_C( 122), -INT8_C( 55), + INT8_C( 71), -INT8_C( 34), INT8_C( 2), -INT8_C( 55) } }, + { { -INT8_C( 48), -INT8_C( 42), -INT8_C( 114), -INT8_C( 127), + INT8_C( 2), -INT8_C( 91), -INT8_C( 1), INT8_C( 31) }, + { INT8_C( 72), -INT8_C( 60), INT8_C( 68), INT8_C( 40), + INT8_C( 103), INT8_C( 92), -INT8_C( 104), -INT8_C( 105), + INT8_C( 5), -INT8_C( 10), INT8_C( 101), INT8_C( 15), + -INT8_C( 100), INT8_C( 101), -INT8_C( 12), -INT8_C( 31) }, + { INT8_C( 72), -INT8_C( 42), -INT8_C( 114), -INT8_C( 127), + INT8_C( 2), -INT8_C( 91), -INT8_C( 1), INT8_C( 31) }, + { -INT8_C( 48), INT8_C( 40), -INT8_C( 114), -INT8_C( 127), + INT8_C( 2), -INT8_C( 91), -INT8_C( 1), INT8_C( 31) }, + { -INT8_C( 48), -INT8_C( 42), -INT8_C( 114), INT8_C( 5), + INT8_C( 2), -INT8_C( 91), -INT8_C( 1), INT8_C( 31) }, + { -INT8_C( 48), -INT8_C( 42), -INT8_C( 114), -INT8_C( 127), + INT8_C( 2), -INT8_C( 91), -INT8_C( 100), INT8_C( 31) }, + { -INT8_C( 48), -INT8_C( 42), -INT8_C( 114), -INT8_C( 127), + INT8_C( 2), -INT8_C( 91), -INT8_C( 1), -INT8_C( 31) } }, + { { -INT8_C( 117), -INT8_C( 59), -INT8_C( 110), -INT8_C( 122), + INT8_C( 97), -INT8_C( 17), INT8_C( 15), INT8_C( 106) }, + { INT8_C( 81), -INT8_C( 59), INT8_C( 121), -INT8_C( 42), + INT8_C( 97), -INT8_C( 50), -INT8_C( 102), -INT8_C( 32), + INT8_C( 49), -INT8_C( 103), -INT8_C( 84), INT8_C( 2), + -INT8_C( 33), INT8_MIN, -INT8_C( 60), INT8_C( 81) }, + { INT8_C( 81), -INT8_C( 59), -INT8_C( 110), -INT8_C( 122), + INT8_C( 97), -INT8_C( 17), INT8_C( 15), INT8_C( 106) }, + { -INT8_C( 117), -INT8_C( 42), -INT8_C( 110), -INT8_C( 122), + INT8_C( 97), -INT8_C( 17), INT8_C( 15), INT8_C( 106) }, + { -INT8_C( 117), -INT8_C( 59), -INT8_C( 110), INT8_C( 49), + INT8_C( 97), -INT8_C( 17), INT8_C( 15), INT8_C( 106) }, + { -INT8_C( 117), -INT8_C( 59), -INT8_C( 110), -INT8_C( 122), + INT8_C( 97), -INT8_C( 17), -INT8_C( 33), INT8_C( 106) }, + { -INT8_C( 117), -INT8_C( 59), -INT8_C( 110), -INT8_C( 122), + INT8_C( 97), -INT8_C( 17), INT8_C( 15), INT8_C( 81) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + + simde_int8x8_t r0_0 = simde_vcopy_laneq_s8(a, 0, b, 0); + simde_int8x8_t r1_3 = simde_vcopy_laneq_s8(a, 1, b, 3); + simde_int8x8_t r3_8 = simde_vcopy_laneq_s8(a, 3, b, 8); + simde_int8x8_t r6_12 = simde_vcopy_laneq_s8(a, 6, b, 12); + simde_int8x8_t r7_15 = simde_vcopy_laneq_s8(a, 7, b, 15); + + simde_test_arm_neon_assert_equal_i8x8(r0_0, simde_vld1_s8(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i8x8(r1_3, simde_vld1_s8(test_vec[i].r1_3)); + simde_test_arm_neon_assert_equal_i8x8(r3_8, simde_vld1_s8(test_vec[i].r3_8)); + simde_test_arm_neon_assert_equal_i8x8(r6_12, simde_vld1_s8(test_vec[i].r6_12)); + simde_test_arm_neon_assert_equal_i8x8(r7_15, simde_vld1_s8(test_vec[i].r7_15)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t b[8]; + int16_t r0_0[4]; + int16_t r1_3[4]; + int16_t r2_5[4]; + int16_t r3_7[4]; + } test_vec[] = { + { { INT16_C( 11568), INT16_C( 1189), -INT16_C( 31827), -INT16_C( 15215) }, + { -INT16_C( 7146), -INT16_C( 9374), -INT16_C( 14179), INT16_C( 15036), + -INT16_C( 20968), INT16_C( 10399), -INT16_C( 25296), -INT16_C( 27806) }, + { -INT16_C( 7146), INT16_C( 1189), -INT16_C( 31827), -INT16_C( 15215) }, + { INT16_C( 11568), INT16_C( 15036), -INT16_C( 31827), -INT16_C( 15215) }, + { INT16_C( 11568), INT16_C( 1189), INT16_C( 10399), -INT16_C( 15215) }, + { INT16_C( 11568), INT16_C( 1189), -INT16_C( 31827), -INT16_C( 27806) } }, + { { -INT16_C( 29466), INT16_C( 12540), -INT16_C( 32471), INT16_C( 25566) }, + { INT16_C( 3723), INT16_C( 12785), -INT16_C( 31701), -INT16_C( 20907), + -INT16_C( 15423), INT16_C( 7301), INT16_C( 23738), INT16_C( 27813) }, + { INT16_C( 3723), INT16_C( 12540), -INT16_C( 32471), INT16_C( 25566) }, + { -INT16_C( 29466), -INT16_C( 20907), -INT16_C( 32471), INT16_C( 25566) }, + { -INT16_C( 29466), INT16_C( 12540), INT16_C( 7301), INT16_C( 25566) }, + { -INT16_C( 29466), INT16_C( 12540), -INT16_C( 32471), INT16_C( 27813) } }, + { { -INT16_C( 26183), -INT16_C( 5597), -INT16_C( 2230), -INT16_C( 10015) }, + { INT16_C( 26576), -INT16_C( 8824), -INT16_C( 28847), INT16_C( 4173), + INT16_C( 25260), INT16_C( 12404), INT16_C( 31395), INT16_C( 21002) }, + { INT16_C( 26576), -INT16_C( 5597), -INT16_C( 2230), -INT16_C( 10015) }, + { -INT16_C( 26183), INT16_C( 4173), -INT16_C( 2230), -INT16_C( 10015) }, + { -INT16_C( 26183), -INT16_C( 5597), INT16_C( 12404), -INT16_C( 10015) }, + { -INT16_C( 26183), -INT16_C( 5597), -INT16_C( 2230), INT16_C( 21002) } }, + { { INT16_C( 6672), INT16_C( 15408), -INT16_C( 2985), -INT16_C( 17248) }, + { INT16_C( 7532), -INT16_C( 4273), INT16_C( 31970), -INT16_C( 3753), + INT16_C( 17390), INT16_C( 14312), INT16_C( 5983), -INT16_C( 19903) }, + { INT16_C( 7532), INT16_C( 15408), -INT16_C( 2985), -INT16_C( 17248) }, + { INT16_C( 6672), -INT16_C( 3753), -INT16_C( 2985), -INT16_C( 17248) }, + { INT16_C( 6672), INT16_C( 15408), INT16_C( 14312), -INT16_C( 17248) }, + { INT16_C( 6672), INT16_C( 15408), -INT16_C( 2985), -INT16_C( 19903) } }, + { { INT16_C( 10634), INT16_C( 13785), -INT16_C( 21124), INT16_C( 18819) }, + { -INT16_C( 23606), -INT16_C( 23575), INT16_C( 23736), -INT16_C( 14515), + -INT16_C( 28372), -INT16_C( 21675), -INT16_C( 29450), -INT16_C( 8419) }, + { -INT16_C( 23606), INT16_C( 13785), -INT16_C( 21124), INT16_C( 18819) }, + { INT16_C( 10634), -INT16_C( 14515), -INT16_C( 21124), INT16_C( 18819) }, + { INT16_C( 10634), INT16_C( 13785), -INT16_C( 21675), INT16_C( 18819) }, + { INT16_C( 10634), INT16_C( 13785), -INT16_C( 21124), -INT16_C( 8419) } }, + { { -INT16_C( 2153), INT16_C( 11458), -INT16_C( 5624), -INT16_C( 18131) }, + { -INT16_C( 25804), -INT16_C( 24558), -INT16_C( 10190), INT16_C( 16869), + INT16_C( 21833), INT16_C( 10951), -INT16_C( 4960), -INT16_C( 24993) }, + { -INT16_C( 25804), INT16_C( 11458), -INT16_C( 5624), -INT16_C( 18131) }, + { -INT16_C( 2153), INT16_C( 16869), -INT16_C( 5624), -INT16_C( 18131) }, + { -INT16_C( 2153), INT16_C( 11458), INT16_C( 10951), -INT16_C( 18131) }, + { -INT16_C( 2153), INT16_C( 11458), -INT16_C( 5624), -INT16_C( 24993) } }, + { { -INT16_C( 12465), INT16_C( 11890), INT16_C( 12124), INT16_C( 10524) }, + { INT16_C( 29032), -INT16_C( 41), INT16_C( 12620), INT16_C( 26779), + INT16_C( 10646), INT16_C( 12590), -INT16_C( 29637), -INT16_C( 18553) }, + { INT16_C( 29032), INT16_C( 11890), INT16_C( 12124), INT16_C( 10524) }, + { -INT16_C( 12465), INT16_C( 26779), INT16_C( 12124), INT16_C( 10524) }, + { -INT16_C( 12465), INT16_C( 11890), INT16_C( 12590), INT16_C( 10524) }, + { -INT16_C( 12465), INT16_C( 11890), INT16_C( 12124), -INT16_C( 18553) } }, + { { -INT16_C( 6289), INT16_C( 21955), -INT16_C( 10116), INT16_C( 9614) }, + { INT16_C( 25780), -INT16_C( 2709), -INT16_C( 29858), INT16_C( 17932), + INT16_C( 27686), -INT16_C( 30566), -INT16_C( 1086), -INT16_C( 10968) }, + { INT16_C( 25780), INT16_C( 21955), -INT16_C( 10116), INT16_C( 9614) }, + { -INT16_C( 6289), INT16_C( 17932), -INT16_C( 10116), INT16_C( 9614) }, + { -INT16_C( 6289), INT16_C( 21955), -INT16_C( 30566), INT16_C( 9614) }, + { -INT16_C( 6289), INT16_C( 21955), -INT16_C( 10116), -INT16_C( 10968) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + + simde_int16x4_t r0_0 = simde_vcopy_laneq_s16(a, 0, b, 0); + simde_int16x4_t r1_3 = simde_vcopy_laneq_s16(a, 1, b, 3); + simde_int16x4_t r2_5 = simde_vcopy_laneq_s16(a, 2, b, 5); + simde_int16x4_t r3_7 = simde_vcopy_laneq_s16(a, 3, b, 7); + + simde_test_arm_neon_assert_equal_i16x4(r0_0, simde_vld1_s16(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i16x4(r1_3, simde_vld1_s16(test_vec[i].r1_3)); + simde_test_arm_neon_assert_equal_i16x4(r2_5, simde_vld1_s16(test_vec[i].r2_5)); + simde_test_arm_neon_assert_equal_i16x4(r3_7, simde_vld1_s16(test_vec[i].r3_7)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t b[4]; + int32_t r0_0[2]; + int32_t r0_2[2]; + int32_t r1_1[2]; + int32_t r1_3[2]; + } test_vec[] = { + { { INT32_C( 1084595629), -INT32_C( 1950361537) }, + { INT32_C( 1443362392), INT32_C( 1640750526), INT32_C( 895649900), INT32_C( 1359771437) }, + { INT32_C( 1443362392), -INT32_C( 1950361537) }, + { INT32_C( 895649900), -INT32_C( 1950361537) }, + { INT32_C( 1084595629), INT32_C( 1640750526) }, + { INT32_C( 1084595629), INT32_C( 1359771437) } }, + { { -INT32_C( 1748970842), INT32_C( 2053463719) }, + { INT32_C( 19379512), -INT32_C( 1861133592), -INT32_C( 529529470), INT32_C( 388681637) }, + { INT32_C( 19379512), INT32_C( 2053463719) }, + { -INT32_C( 529529470), INT32_C( 2053463719) }, + { -INT32_C( 1748970842), -INT32_C( 1861133592) }, + { -INT32_C( 1748970842), INT32_C( 388681637) } }, + { { INT32_C( 1476418479), -INT32_C( 1924452234) }, + { INT32_C( 2070023191), -INT32_C( 53868389), -INT32_C( 266392323), -INT32_C( 474980198) }, + { INT32_C( 2070023191), -INT32_C( 1924452234) }, + { -INT32_C( 266392323), -INT32_C( 1924452234) }, + { INT32_C( 1476418479), -INT32_C( 53868389) }, + { INT32_C( 1476418479), -INT32_C( 474980198) } }, + { { INT32_C( 629708170), -INT32_C( 1981353517) }, + { INT32_C( 376672318), -INT32_C( 1618425121), INT32_C( 34344684), -INT32_C( 2024844639) }, + { INT32_C( 376672318), -INT32_C( 1981353517) }, + { INT32_C( 34344684), -INT32_C( 1981353517) }, + { INT32_C( 629708170), -INT32_C( 1618425121) }, + { INT32_C( 629708170), -INT32_C( 2024844639) } }, + { { -INT32_C( 1168139645), -INT32_C( 1639571907) }, + { INT32_C( 1818508399), INT32_C( 1912849895), -INT32_C( 81367675), -INT32_C( 933616398) }, + { INT32_C( 1818508399), -INT32_C( 1639571907) }, + { -INT32_C( 81367675), -INT32_C( 1639571907) }, + { -INT32_C( 1168139645), INT32_C( 1912849895) }, + { -INT32_C( 1168139645), -INT32_C( 933616398) } }, + { { INT32_C( 1391980757), INT32_C( 34941896) }, + { -INT32_C( 428457330), INT32_C( 205780721), -INT32_C( 331850439), INT32_C( 1610682246) }, + { -INT32_C( 428457330), INT32_C( 34941896) }, + { -INT32_C( 331850439), INT32_C( 34941896) }, + { INT32_C( 1391980757), INT32_C( 205780721) }, + { INT32_C( 1391980757), INT32_C( 1610682246) } }, + { { -INT32_C( 332770313), -INT32_C( 40080371) }, + { -INT32_C( 1900488554), INT32_C( 497830222), -INT32_C( 522537965), -INT32_C( 1036126695) }, + { -INT32_C( 1900488554), -INT32_C( 40080371) }, + { -INT32_C( 522537965), -INT32_C( 40080371) }, + { -INT32_C( 332770313), INT32_C( 497830222) }, + { -INT32_C( 332770313), -INT32_C( 1036126695) } }, + { { -INT32_C( 1146060113), INT32_C( 1919349998) }, + { INT32_C( 1840725219), -INT32_C( 1422378091), -INT32_C( 1992513290), -INT32_C( 21119508) }, + { INT32_C( 1840725219), INT32_C( 1919349998) }, + { -INT32_C( 1992513290), INT32_C( 1919349998) }, + { -INT32_C( 1146060113), -INT32_C( 1422378091) }, + { -INT32_C( 1146060113), -INT32_C( 21119508) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + + simde_int32x2_t r0_0 = simde_vcopy_laneq_s32(a, 0, b, 0); + simde_int32x2_t r0_2 = simde_vcopy_laneq_s32(a, 0, b, 2); + simde_int32x2_t r1_1 = simde_vcopy_laneq_s32(a, 1, b, 1); + simde_int32x2_t r1_3 = simde_vcopy_laneq_s32(a, 1, b, 3); + + simde_test_arm_neon_assert_equal_i32x2(r0_0, simde_vld1_s32(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i32x2(r0_2, simde_vld1_s32(test_vec[i].r0_2)); + simde_test_arm_neon_assert_equal_i32x2(r1_1, simde_vld1_s32(test_vec[i].r1_1)); + simde_test_arm_neon_assert_equal_i32x2(r1_3, simde_vld1_s32(test_vec[i].r1_3)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int64_t b[2]; + int64_t r0_0[1]; + int64_t r0_1[1]; + } test_vec[] = { + { { -INT64_C( 5524975992096568302) }, + { INT64_C( 5928209276499224928), INT64_C( 3570587797784554636) }, + { INT64_C( 5928209276499224928) }, + { INT64_C( 3570587797784554636) } }, + { { -INT64_C( 5698335035773134399) }, + { INT64_C( 4781888000487025832), INT64_C( 1507882669637173008) }, + { INT64_C( 4781888000487025832) }, + { INT64_C( 1507882669637173008) } }, + { { -INT64_C( 2144038387548032181) }, + { -INT64_C( 8135906999096004888), -INT64_C( 3560370611244798922) }, + { -INT64_C( 8135906999096004888) }, + { -INT64_C( 3560370611244798922) } }, + { { -INT64_C( 117785309047793137) }, + { -INT64_C( 1882610051046624115), INT64_C( 7186331988604967381) }, + { -INT64_C( 1882610051046624115) }, + { INT64_C( 7186331988604967381) } }, + { { INT64_C( 4066640187066630269) }, + { -INT64_C( 4494233011176271376), -INT64_C( 3865763807733887293) }, + { -INT64_C( 4494233011176271376) }, + { -INT64_C( 3865763807733887293) } }, + { { -INT64_C( 6040197928986056700) }, + { INT64_C( 1006231244329940143), INT64_C( 2476886300863804510) }, + { INT64_C( 1006231244329940143) }, + { INT64_C( 2476886300863804510) } }, + { { -INT64_C( 7084452747101498475) }, + { -INT64_C( 7001274548870075067), INT64_C( 4002777076668709871) }, + { -INT64_C( 7001274548870075067) }, + { INT64_C( 4002777076668709871) } }, + { { INT64_C( 163938470204546022) }, + { -INT64_C( 5694319174922918088), -INT64_C( 1823313391809624358) }, + { -INT64_C( 5694319174922918088) }, + { -INT64_C( 1823313391809624358) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + + simde_int64x1_t r0_0 = simde_vcopy_laneq_s64(a, 0, b, 0); + simde_int64x1_t r0_1 = simde_vcopy_laneq_s64(a, 0, b, 1); + + simde_test_arm_neon_assert_equal_i64x1(r0_0, simde_vld1_s64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i64x1(r0_1, simde_vld1_s64(test_vec[i].r0_1)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[8]; + uint8_t b[16]; + uint8_t r0_0[8]; + uint8_t r1_3[8]; + uint8_t r3_8[8]; + uint8_t r6_12[8]; + uint8_t r7_15[8]; + } test_vec[] = { + { { UINT8_C( 99), UINT8_C( 40), UINT8_C( 75), UINT8_C( 35), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 87), UINT8_C( 222) }, + { UINT8_C( 52), UINT8_C( 1), UINT8_C( 224), UINT8_C( 221), + UINT8_C( 253), UINT8_C( 198), UINT8_C( 240), UINT8_C( 81), + UINT8_C( 184), UINT8_C( 118), UINT8_C( 1), UINT8_C( 200), + UINT8_C( 52), UINT8_C( 165), UINT8_C( 148), UINT8_C( 99) }, + { UINT8_C( 52), UINT8_C( 40), UINT8_C( 75), UINT8_C( 35), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 87), UINT8_C( 222) }, + { UINT8_C( 99), UINT8_C( 221), UINT8_C( 75), UINT8_C( 35), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 87), UINT8_C( 222) }, + { UINT8_C( 99), UINT8_C( 40), UINT8_C( 75), UINT8_C( 184), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 87), UINT8_C( 222) }, + { UINT8_C( 99), UINT8_C( 40), UINT8_C( 75), UINT8_C( 35), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 52), UINT8_C( 222) }, + { UINT8_C( 99), UINT8_C( 40), UINT8_C( 75), UINT8_C( 35), + UINT8_C( 30), UINT8_C( 240), UINT8_C( 87), UINT8_C( 99) } }, + { { UINT8_C( 212), UINT8_C( 86), UINT8_C( 230), UINT8_C( 248), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 189), UINT8_C( 69) }, + { UINT8_C( 189), UINT8_C( 213), UINT8_C( 28), UINT8_C( 243), + UINT8_C( 192), UINT8_C( 83), UINT8_C( 82), UINT8_C( 86), + UINT8_C( 253), UINT8_C( 168), UINT8_C( 184), UINT8_C( 57), + UINT8_C( 162), UINT8_C( 132), UINT8_C( 109), UINT8_C( 69) }, + { UINT8_C( 189), UINT8_C( 86), UINT8_C( 230), UINT8_C( 248), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 189), UINT8_C( 69) }, + { UINT8_C( 212), UINT8_C( 243), UINT8_C( 230), UINT8_C( 248), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 189), UINT8_C( 69) }, + { UINT8_C( 212), UINT8_C( 86), UINT8_C( 230), UINT8_C( 253), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 189), UINT8_C( 69) }, + { UINT8_C( 212), UINT8_C( 86), UINT8_C( 230), UINT8_C( 248), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 162), UINT8_C( 69) }, + { UINT8_C( 212), UINT8_C( 86), UINT8_C( 230), UINT8_C( 248), + UINT8_C( 64), UINT8_C( 245), UINT8_C( 189), UINT8_C( 69) } }, + { { UINT8_C( 29), UINT8_C( 103), UINT8_C( 152), UINT8_C( 196), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 206), UINT8_C( 196) }, + { UINT8_C( 66), UINT8_C( 242), UINT8_C( 97), UINT8_C( 23), + UINT8_C( 124), UINT8_C( 30), UINT8_C( 240), UINT8_C( 31), + UINT8_C( 105), UINT8_C( 243), UINT8_C( 21), UINT8_C( 146), + UINT8_C( 3), UINT8_C( 218), UINT8_C( 80), UINT8_C( 224) }, + { UINT8_C( 66), UINT8_C( 103), UINT8_C( 152), UINT8_C( 196), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 206), UINT8_C( 196) }, + { UINT8_C( 29), UINT8_C( 23), UINT8_C( 152), UINT8_C( 196), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 206), UINT8_C( 196) }, + { UINT8_C( 29), UINT8_C( 103), UINT8_C( 152), UINT8_C( 105), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 206), UINT8_C( 196) }, + { UINT8_C( 29), UINT8_C( 103), UINT8_C( 152), UINT8_C( 196), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 3), UINT8_C( 196) }, + { UINT8_C( 29), UINT8_C( 103), UINT8_C( 152), UINT8_C( 196), + UINT8_C( 89), UINT8_C( 91), UINT8_C( 206), UINT8_C( 224) } }, + { { UINT8_C( 21), UINT8_C( 26), UINT8_C( 161), UINT8_C( 44), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 7), UINT8_C( 181) }, + { UINT8_C( 69), UINT8_C( 115), UINT8_C( 53), UINT8_C( 237), + UINT8_C( 116), UINT8_C( 223), UINT8_C( 252), UINT8_C( 212), + UINT8_C( 149), UINT8_C( 72), UINT8_C( 106), UINT8_C( 27), + UINT8_C( 249), UINT8_C( 7), UINT8_C( 66), UINT8_C( 133) }, + { UINT8_C( 69), UINT8_C( 26), UINT8_C( 161), UINT8_C( 44), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 7), UINT8_C( 181) }, + { UINT8_C( 21), UINT8_C( 237), UINT8_C( 161), UINT8_C( 44), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 7), UINT8_C( 181) }, + { UINT8_C( 21), UINT8_C( 26), UINT8_C( 161), UINT8_C( 149), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 7), UINT8_C( 181) }, + { UINT8_C( 21), UINT8_C( 26), UINT8_C( 161), UINT8_C( 44), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 249), UINT8_C( 181) }, + { UINT8_C( 21), UINT8_C( 26), UINT8_C( 161), UINT8_C( 44), + UINT8_C( 217), UINT8_C( 170), UINT8_C( 7), UINT8_C( 133) } }, + { { UINT8_C( 99), UINT8_C( 126), UINT8_C( 4), UINT8_MAX, + UINT8_C( 179), UINT8_C( 35), UINT8_C( 241), UINT8_C( 40) }, + { UINT8_C( 5), UINT8_C( 120), UINT8_C( 120), UINT8_C( 177), + UINT8_C( 245), UINT8_C( 56), UINT8_C( 251), UINT8_C( 121), + UINT8_C( 185), UINT8_C( 18), UINT8_C( 149), UINT8_C( 67), + UINT8_C( 176), UINT8_C( 7), UINT8_C( 139), UINT8_C( 203) }, + { UINT8_C( 5), UINT8_C( 126), UINT8_C( 4), UINT8_MAX, + UINT8_C( 179), UINT8_C( 35), UINT8_C( 241), UINT8_C( 40) }, + { UINT8_C( 99), UINT8_C( 177), UINT8_C( 4), UINT8_MAX, + UINT8_C( 179), UINT8_C( 35), UINT8_C( 241), UINT8_C( 40) }, + { UINT8_C( 99), UINT8_C( 126), UINT8_C( 4), UINT8_C( 185), + UINT8_C( 179), UINT8_C( 35), UINT8_C( 241), UINT8_C( 40) }, + { UINT8_C( 99), UINT8_C( 126), UINT8_C( 4), UINT8_MAX, + UINT8_C( 179), UINT8_C( 35), UINT8_C( 176), UINT8_C( 40) }, + { UINT8_C( 99), UINT8_C( 126), UINT8_C( 4), UINT8_MAX, + UINT8_C( 179), UINT8_C( 35), UINT8_C( 241), UINT8_C( 203) } }, + { { UINT8_C( 207), UINT8_C( 15), UINT8_C( 186), UINT8_C( 59), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 169), UINT8_C( 170) }, + { UINT8_C( 135), UINT8_C( 89), UINT8_C( 92), UINT8_C( 84), + UINT8_C( 98), UINT8_C( 83), UINT8_C( 54), UINT8_C( 70), + UINT8_C( 74), UINT8_C( 190), UINT8_C( 21), UINT8_C( 204), + UINT8_C( 114), UINT8_C( 121), UINT8_C( 146), UINT8_C( 53) }, + { UINT8_C( 135), UINT8_C( 15), UINT8_C( 186), UINT8_C( 59), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 169), UINT8_C( 170) }, + { UINT8_C( 207), UINT8_C( 84), UINT8_C( 186), UINT8_C( 59), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 169), UINT8_C( 170) }, + { UINT8_C( 207), UINT8_C( 15), UINT8_C( 186), UINT8_C( 74), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 169), UINT8_C( 170) }, + { UINT8_C( 207), UINT8_C( 15), UINT8_C( 186), UINT8_C( 59), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 114), UINT8_C( 170) }, + { UINT8_C( 207), UINT8_C( 15), UINT8_C( 186), UINT8_C( 59), + UINT8_C( 190), UINT8_C( 250), UINT8_C( 169), UINT8_C( 53) } }, + { { UINT8_C( 212), UINT8_C( 125), UINT8_C( 231), UINT8_C( 195), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 167), UINT8_C( 186) }, + { UINT8_C( 9), UINT8_C( 138), UINT8_C( 171), UINT8_C( 97), + UINT8_C( 120), UINT8_C( 72), UINT8_C( 155), UINT8_C( 29), + UINT8_C( 5), UINT8_C( 14), UINT8_C( 10), UINT8_C( 220), + UINT8_C( 253), UINT8_C( 194), UINT8_C( 240), UINT8_C( 218) }, + { UINT8_C( 9), UINT8_C( 125), UINT8_C( 231), UINT8_C( 195), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 167), UINT8_C( 186) }, + { UINT8_C( 212), UINT8_C( 97), UINT8_C( 231), UINT8_C( 195), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 167), UINT8_C( 186) }, + { UINT8_C( 212), UINT8_C( 125), UINT8_C( 231), UINT8_C( 5), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 167), UINT8_C( 186) }, + { UINT8_C( 212), UINT8_C( 125), UINT8_C( 231), UINT8_C( 195), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 253), UINT8_C( 186) }, + { UINT8_C( 212), UINT8_C( 125), UINT8_C( 231), UINT8_C( 195), + UINT8_C( 124), UINT8_C( 208), UINT8_C( 167), UINT8_C( 218) } }, + { { UINT8_C( 43), UINT8_C( 179), UINT8_C( 5), UINT8_C( 62), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 232), UINT8_C( 247) }, + { UINT8_C( 190), UINT8_C( 199), UINT8_C( 65), UINT8_C( 133), + UINT8_C( 222), UINT8_C( 218), UINT8_C( 184), UINT8_C( 55), + UINT8_C( 217), UINT8_C( 12), UINT8_C( 216), UINT8_C( 57), + UINT8_C( 204), UINT8_C( 112), UINT8_C( 59), UINT8_C( 253) }, + { UINT8_C( 190), UINT8_C( 179), UINT8_C( 5), UINT8_C( 62), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 232), UINT8_C( 247) }, + { UINT8_C( 43), UINT8_C( 133), UINT8_C( 5), UINT8_C( 62), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 232), UINT8_C( 247) }, + { UINT8_C( 43), UINT8_C( 179), UINT8_C( 5), UINT8_C( 217), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 232), UINT8_C( 247) }, + { UINT8_C( 43), UINT8_C( 179), UINT8_C( 5), UINT8_C( 62), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 204), UINT8_C( 247) }, + { UINT8_C( 43), UINT8_C( 179), UINT8_C( 5), UINT8_C( 62), + UINT8_C( 203), UINT8_C( 46), UINT8_C( 232), UINT8_C( 253) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); + simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); + + simde_uint8x8_t r0_0 = simde_vcopy_laneq_u8(a, 0, b, 0); + simde_uint8x8_t r1_3 = simde_vcopy_laneq_u8(a, 1, b, 3); + simde_uint8x8_t r3_8 = simde_vcopy_laneq_u8(a, 3, b, 8); + simde_uint8x8_t r6_12 = simde_vcopy_laneq_u8(a, 6, b, 12); + simde_uint8x8_t r7_15 = simde_vcopy_laneq_u8(a, 7, b, 15); + + simde_test_arm_neon_assert_equal_u8x8(r0_0, simde_vld1_u8(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u8x8(r1_3, simde_vld1_u8(test_vec[i].r1_3)); + simde_test_arm_neon_assert_equal_u8x8(r3_8, simde_vld1_u8(test_vec[i].r3_8)); + simde_test_arm_neon_assert_equal_u8x8(r6_12, simde_vld1_u8(test_vec[i].r6_12)); + simde_test_arm_neon_assert_equal_u8x8(r7_15, simde_vld1_u8(test_vec[i].r7_15)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + uint16_t b[8]; + uint16_t r0_0[4]; + uint16_t r1_3[4]; + uint16_t r2_5[4]; + uint16_t r3_7[4]; + } test_vec[] = { + { { UINT16_C( 14090), UINT16_C( 43348), UINT16_C( 44694), UINT16_C( 30737) }, + { UINT16_C( 38617), UINT16_C( 33902), UINT16_C( 90), UINT16_C( 41210), + UINT16_C( 36570), UINT16_C( 20021), UINT16_C( 3767), UINT16_C( 62186) }, + { UINT16_C( 38617), UINT16_C( 43348), UINT16_C( 44694), UINT16_C( 30737) }, + { UINT16_C( 14090), UINT16_C( 41210), UINT16_C( 44694), UINT16_C( 30737) }, + { UINT16_C( 14090), UINT16_C( 43348), UINT16_C( 20021), UINT16_C( 30737) }, + { UINT16_C( 14090), UINT16_C( 43348), UINT16_C( 44694), UINT16_C( 62186) } }, + { { UINT16_C( 61109), UINT16_C( 30682), UINT16_C( 62985), UINT16_C( 21741) }, + { UINT16_C( 19675), UINT16_C( 47463), UINT16_C( 48866), UINT16_C( 48845), + UINT16_C( 50845), UINT16_C( 43216), UINT16_C( 9981), UINT16_C( 42973) }, + { UINT16_C( 19675), UINT16_C( 30682), UINT16_C( 62985), UINT16_C( 21741) }, + { UINT16_C( 61109), UINT16_C( 48845), UINT16_C( 62985), UINT16_C( 21741) }, + { UINT16_C( 61109), UINT16_C( 30682), UINT16_C( 43216), UINT16_C( 21741) }, + { UINT16_C( 61109), UINT16_C( 30682), UINT16_C( 62985), UINT16_C( 42973) } }, + { { UINT16_C( 31349), UINT16_C( 64794), UINT16_C( 25390), UINT16_C( 50931) }, + { UINT16_C( 20875), UINT16_C( 26295), UINT16_C( 47031), UINT16_C( 37393), + UINT16_C( 8989), UINT16_C( 1142), UINT16_C( 891), UINT16_C( 41568) }, + { UINT16_C( 20875), UINT16_C( 64794), UINT16_C( 25390), UINT16_C( 50931) }, + { UINT16_C( 31349), UINT16_C( 37393), UINT16_C( 25390), UINT16_C( 50931) }, + { UINT16_C( 31349), UINT16_C( 64794), UINT16_C( 1142), UINT16_C( 50931) }, + { UINT16_C( 31349), UINT16_C( 64794), UINT16_C( 25390), UINT16_C( 41568) } }, + { { UINT16_C( 27978), UINT16_C( 43843), UINT16_C( 22177), UINT16_C( 31195) }, + { UINT16_C( 13000), UINT16_C( 19847), UINT16_C( 29376), UINT16_C( 26702), + UINT16_C( 12549), UINT16_C( 33267), UINT16_C( 27103), UINT16_C( 28719) }, + { UINT16_C( 13000), UINT16_C( 43843), UINT16_C( 22177), UINT16_C( 31195) }, + { UINT16_C( 27978), UINT16_C( 26702), UINT16_C( 22177), UINT16_C( 31195) }, + { UINT16_C( 27978), UINT16_C( 43843), UINT16_C( 33267), UINT16_C( 31195) }, + { UINT16_C( 27978), UINT16_C( 43843), UINT16_C( 22177), UINT16_C( 28719) } }, + { { UINT16_C( 1114), UINT16_C( 44831), UINT16_C( 23387), UINT16_C( 26671) }, + { UINT16_C( 62402), UINT16_C( 40206), UINT16_C( 5589), UINT16_C( 46365), + UINT16_C( 15546), UINT16_C( 56079), UINT16_C( 10655), UINT16_C( 28459) }, + { UINT16_C( 62402), UINT16_C( 44831), UINT16_C( 23387), UINT16_C( 26671) }, + { UINT16_C( 1114), UINT16_C( 46365), UINT16_C( 23387), UINT16_C( 26671) }, + { UINT16_C( 1114), UINT16_C( 44831), UINT16_C( 56079), UINT16_C( 26671) }, + { UINT16_C( 1114), UINT16_C( 44831), UINT16_C( 23387), UINT16_C( 28459) } }, + { { UINT16_C( 15907), UINT16_C( 19190), UINT16_C( 53939), UINT16_C( 23137) }, + { UINT16_C( 31842), UINT16_C( 25173), UINT16_C( 4547), UINT16_C( 2456), + UINT16_C( 47549), UINT16_C( 26853), UINT16_C( 42826), UINT16_C( 39063) }, + { UINT16_C( 31842), UINT16_C( 19190), UINT16_C( 53939), UINT16_C( 23137) }, + { UINT16_C( 15907), UINT16_C( 2456), UINT16_C( 53939), UINT16_C( 23137) }, + { UINT16_C( 15907), UINT16_C( 19190), UINT16_C( 26853), UINT16_C( 23137) }, + { UINT16_C( 15907), UINT16_C( 19190), UINT16_C( 53939), UINT16_C( 39063) } }, + { { UINT16_C( 35181), UINT16_C( 17415), UINT16_C( 59315), UINT16_C( 49844) }, + { UINT16_C( 32240), UINT16_C( 12816), UINT16_C( 48038), UINT16_C( 32248), + UINT16_C( 21982), UINT16_C( 58359), UINT16_C( 59704), UINT16_C( 41823) }, + { UINT16_C( 32240), UINT16_C( 17415), UINT16_C( 59315), UINT16_C( 49844) }, + { UINT16_C( 35181), UINT16_C( 32248), UINT16_C( 59315), UINT16_C( 49844) }, + { UINT16_C( 35181), UINT16_C( 17415), UINT16_C( 58359), UINT16_C( 49844) }, + { UINT16_C( 35181), UINT16_C( 17415), UINT16_C( 59315), UINT16_C( 41823) } }, + { { UINT16_C( 10135), UINT16_C( 9895), UINT16_C( 14931), UINT16_C( 6474) }, + { UINT16_C( 48682), UINT16_C( 7265), UINT16_C( 8191), UINT16_C( 33426), + UINT16_C( 41840), UINT16_C( 56821), UINT16_C( 7954), UINT16_C( 43755) }, + { UINT16_C( 48682), UINT16_C( 9895), UINT16_C( 14931), UINT16_C( 6474) }, + { UINT16_C( 10135), UINT16_C( 33426), UINT16_C( 14931), UINT16_C( 6474) }, + { UINT16_C( 10135), UINT16_C( 9895), UINT16_C( 56821), UINT16_C( 6474) }, + { UINT16_C( 10135), UINT16_C( 9895), UINT16_C( 14931), UINT16_C( 43755) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + + simde_uint16x4_t r0_0 = simde_vcopy_laneq_u16(a, 0, b, 0); + simde_uint16x4_t r1_3 = simde_vcopy_laneq_u16(a, 1, b, 3); + simde_uint16x4_t r2_5 = simde_vcopy_laneq_u16(a, 2, b, 5); + simde_uint16x4_t r3_7 = simde_vcopy_laneq_u16(a, 3, b, 7); + + simde_test_arm_neon_assert_equal_u16x4(r0_0, simde_vld1_u16(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u16x4(r1_3, simde_vld1_u16(test_vec[i].r1_3)); + simde_test_arm_neon_assert_equal_u16x4(r2_5, simde_vld1_u16(test_vec[i].r2_5)); + simde_test_arm_neon_assert_equal_u16x4(r3_7, simde_vld1_u16(test_vec[i].r3_7)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + uint32_t b[4]; + uint32_t r0_0[2]; + uint32_t r0_2[2]; + uint32_t r1_1[2]; + uint32_t r1_3[2]; + } test_vec[] = { + { { UINT32_C( 1263327646), UINT32_C( 3936930385) }, + { UINT32_C( 1009741332), UINT32_C( 1271791302), UINT32_C( 2117473016), UINT32_C( 1209805040) }, + { UINT32_C( 1009741332), UINT32_C( 3936930385) }, + { UINT32_C( 2117473016), UINT32_C( 3936930385) }, + { UINT32_C( 1263327646), UINT32_C( 1271791302) }, + { UINT32_C( 1263327646), UINT32_C( 1209805040) } }, + { { UINT32_C( 3169852858), UINT32_C( 4047976040) }, + { UINT32_C( 3469589779), UINT32_C( 319323338), UINT32_C( 2782162475), UINT32_C( 3540513941) }, + { UINT32_C( 3469589779), UINT32_C( 4047976040) }, + { UINT32_C( 2782162475), UINT32_C( 4047976040) }, + { UINT32_C( 3169852858), UINT32_C( 319323338) }, + { UINT32_C( 3169852858), UINT32_C( 3540513941) } }, + { { UINT32_C( 723977372), UINT32_C( 1213626978) }, + { UINT32_C( 2388609191), UINT32_C( 1459060976), UINT32_C( 1783941758), UINT32_C( 514486587) }, + { UINT32_C( 2388609191), UINT32_C( 1213626978) }, + { UINT32_C( 1783941758), UINT32_C( 1213626978) }, + { UINT32_C( 723977372), UINT32_C( 1459060976) }, + { UINT32_C( 723977372), UINT32_C( 514486587) } }, + { { UINT32_C( 2520531324), UINT32_C( 2850749730) }, + { UINT32_C( 888192105), UINT32_C( 1739139179), UINT32_C( 230879930), UINT32_C( 1965453646) }, + { UINT32_C( 888192105), UINT32_C( 2850749730) }, + { UINT32_C( 230879930), UINT32_C( 2850749730) }, + { UINT32_C( 2520531324), UINT32_C( 1739139179) }, + { UINT32_C( 2520531324), UINT32_C( 1965453646) } }, + { { UINT32_C( 2822169713), UINT32_C( 1803041930) }, + { UINT32_C( 2701651503), UINT32_C( 1720551226), UINT32_C( 2473213009), UINT32_C( 1517886129) }, + { UINT32_C( 2701651503), UINT32_C( 1803041930) }, + { UINT32_C( 2473213009), UINT32_C( 1803041930) }, + { UINT32_C( 2822169713), UINT32_C( 1720551226) }, + { UINT32_C( 2822169713), UINT32_C( 1517886129) } }, + { { UINT32_C( 1141343430), UINT32_C( 3637108750) }, + { UINT32_C( 3503443812), UINT32_C( 4186632742), UINT32_C( 1721722610), UINT32_C( 1039187286) }, + { UINT32_C( 3503443812), UINT32_C( 3637108750) }, + { UINT32_C( 1721722610), UINT32_C( 3637108750) }, + { UINT32_C( 1141343430), UINT32_C( 4186632742) }, + { UINT32_C( 1141343430), UINT32_C( 1039187286) } }, + { { UINT32_C( 678901130), UINT32_C( 1376760353) }, + { UINT32_C( 56937070), UINT32_C( 3336802856), UINT32_C( 3559984617), UINT32_C( 1829425083) }, + { UINT32_C( 56937070), UINT32_C( 1376760353) }, + { UINT32_C( 3559984617), UINT32_C( 1376760353) }, + { UINT32_C( 678901130), UINT32_C( 3336802856) }, + { UINT32_C( 678901130), UINT32_C( 1829425083) } }, + { { UINT32_C( 566419176), UINT32_C( 1835229255) }, + { UINT32_C( 2657473149), UINT32_C( 3998120314), UINT32_C( 3755826214), UINT32_C( 1740123970) }, + { UINT32_C( 2657473149), UINT32_C( 1835229255) }, + { UINT32_C( 3755826214), UINT32_C( 1835229255) }, + { UINT32_C( 566419176), UINT32_C( 3998120314) }, + { UINT32_C( 566419176), UINT32_C( 1740123970) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + + simde_uint32x2_t r0_0 = simde_vcopy_laneq_u32(a, 0, b, 0); + simde_uint32x2_t r0_2 = simde_vcopy_laneq_u32(a, 0, b, 2); + simde_uint32x2_t r1_1 = simde_vcopy_laneq_u32(a, 1, b, 1); + simde_uint32x2_t r1_3 = simde_vcopy_laneq_u32(a, 1, b, 3); + + simde_test_arm_neon_assert_equal_u32x2(r0_0, simde_vld1_u32(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u32x2(r0_2, simde_vld1_u32(test_vec[i].r0_2)); + simde_test_arm_neon_assert_equal_u32x2(r1_1, simde_vld1_u32(test_vec[i].r1_1)); + simde_test_arm_neon_assert_equal_u32x2(r1_3, simde_vld1_u32(test_vec[i].r1_3)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[1]; + uint64_t b[2]; + uint64_t r0_0[1]; + uint64_t r0_1[1]; + } test_vec[] = { + { { UINT64_C( 4774845596606571093) }, + { UINT64_C( 8217638833653863154), UINT64_C(17418031248681962729) }, + { UINT64_C( 8217638833653863154) }, + { UINT64_C(17418031248681962729) } }, + { { UINT64_C(17601450605500208340) }, + { UINT64_C( 2768201381815185750), UINT64_C(16070785927570141580) }, + { UINT64_C( 2768201381815185750) }, + { UINT64_C(16070785927570141580) } }, + { { UINT64_C( 5515676140731327920) }, + { UINT64_C(10667193760066591791), UINT64_C( 5614703629073485915) }, + { UINT64_C(10667193760066591791) }, + { UINT64_C( 5614703629073485915) } }, + { { UINT64_C( 8834081084411742948) }, + { UINT64_C( 9878499915507241938), UINT64_C(12395589140738461358) }, + { UINT64_C( 9878499915507241938) }, + { UINT64_C(12395589140738461358) } }, + { { UINT64_C(14076324571948981968) }, + { UINT64_C(13707984266428261295), UINT64_C(16651015344936509577) }, + { UINT64_C(13707984266428261295) }, + { UINT64_C(16651015344936509577) } }, + { { UINT64_C( 7401032381136844628) }, + { UINT64_C(13595471166024805103), UINT64_C(12253610641519224975) }, + { UINT64_C(13595471166024805103) }, + { UINT64_C(12253610641519224975) } }, + { { UINT64_C( 3043905301656645740) }, + { UINT64_C(13594318462557334235), UINT64_C( 878904381009645699) }, + { UINT64_C(13594318462557334235) }, + { UINT64_C( 878904381009645699) } }, + { { UINT64_C( 7588891631142822727) }, + { UINT64_C( 1673981919106237551), UINT64_C(12358823236528999795) }, + { UINT64_C( 1673981919106237551) }, + { UINT64_C(12358823236528999795) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + + simde_uint64x1_t r0_0 = simde_vcopy_laneq_u64(a, 0, b, 0); + simde_uint64x1_t r0_1 = simde_vcopy_laneq_u64(a, 0, b, 1); + + simde_test_arm_neon_assert_equal_u64x1(r0_0, simde_vld1_u64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u64x1(r0_1, simde_vld1_u64(test_vec[i].r0_1)); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[2]; + simde_float32_t b[4]; + simde_float32_t r0_0[2]; + simde_float32_t r0_2[2]; + simde_float32_t r1_1[2]; + simde_float32_t r1_3[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 9416.182), -SIMDE_FLOAT32_C( 8833.327) }, + { -SIMDE_FLOAT32_C( 8422.051), -SIMDE_FLOAT32_C( 1474.965), SIMDE_FLOAT32_C( 22.255), SIMDE_FLOAT32_C( 1862.729) }, + { -SIMDE_FLOAT32_C( 8422.051), -SIMDE_FLOAT32_C( 8833.327) }, + { SIMDE_FLOAT32_C( 22.255), -SIMDE_FLOAT32_C( 8833.327) }, + { SIMDE_FLOAT32_C( 9416.182), -SIMDE_FLOAT32_C( 1474.965) }, + { SIMDE_FLOAT32_C( 9416.182), SIMDE_FLOAT32_C( 1862.729) } }, + { { -SIMDE_FLOAT32_C( 9615.762), -SIMDE_FLOAT32_C( 14.217) }, + { -SIMDE_FLOAT32_C( 5132.670), SIMDE_FLOAT32_C( 3844.124), SIMDE_FLOAT32_C( 8234.064), -SIMDE_FLOAT32_C( 7859.389) }, + { -SIMDE_FLOAT32_C( 5132.670), -SIMDE_FLOAT32_C( 14.217) }, + { SIMDE_FLOAT32_C( 8234.064), -SIMDE_FLOAT32_C( 14.217) }, + { -SIMDE_FLOAT32_C( 9615.762), SIMDE_FLOAT32_C( 3844.124) }, + { -SIMDE_FLOAT32_C( 9615.762), -SIMDE_FLOAT32_C( 7859.389) } }, + { { -SIMDE_FLOAT32_C( 4940.350), -SIMDE_FLOAT32_C( 8895.818) }, + { -SIMDE_FLOAT32_C( 4031.934), SIMDE_FLOAT32_C( 2112.525), SIMDE_FLOAT32_C( 6157.536), SIMDE_FLOAT32_C( 6249.606) }, + { -SIMDE_FLOAT32_C( 4031.934), -SIMDE_FLOAT32_C( 8895.818) }, + { SIMDE_FLOAT32_C( 6157.536), -SIMDE_FLOAT32_C( 8895.818) }, + { -SIMDE_FLOAT32_C( 4940.350), SIMDE_FLOAT32_C( 2112.525) }, + { -SIMDE_FLOAT32_C( 4940.350), SIMDE_FLOAT32_C( 6249.606) } }, + { { SIMDE_FLOAT32_C( 9344.492), SIMDE_FLOAT32_C( 2826.421) }, + { -SIMDE_FLOAT32_C( 5138.487), SIMDE_FLOAT32_C( 5406.190), SIMDE_FLOAT32_C( 8334.525), SIMDE_FLOAT32_C( 6643.959) }, + { -SIMDE_FLOAT32_C( 5138.487), SIMDE_FLOAT32_C( 2826.421) }, + { SIMDE_FLOAT32_C( 8334.525), SIMDE_FLOAT32_C( 2826.421) }, + { SIMDE_FLOAT32_C( 9344.492), SIMDE_FLOAT32_C( 5406.190) }, + { SIMDE_FLOAT32_C( 9344.492), SIMDE_FLOAT32_C( 6643.959) } }, + { { -SIMDE_FLOAT32_C( 6024.062), SIMDE_FLOAT32_C( 6388.316) }, + { SIMDE_FLOAT32_C( 2465.654), SIMDE_FLOAT32_C( 3539.057), -SIMDE_FLOAT32_C( 6462.303), -SIMDE_FLOAT32_C( 2503.313) }, + { SIMDE_FLOAT32_C( 2465.654), SIMDE_FLOAT32_C( 6388.316) }, + { -SIMDE_FLOAT32_C( 6462.303), SIMDE_FLOAT32_C( 6388.316) }, + { -SIMDE_FLOAT32_C( 6024.062), SIMDE_FLOAT32_C( 3539.057) }, + { -SIMDE_FLOAT32_C( 6024.062), -SIMDE_FLOAT32_C( 2503.313) } }, + { { -SIMDE_FLOAT32_C( 2609.983), -SIMDE_FLOAT32_C( 2150.719) }, + { -SIMDE_FLOAT32_C( 2105.146), -SIMDE_FLOAT32_C( 4271.761), SIMDE_FLOAT32_C( 4775.484), -SIMDE_FLOAT32_C( 8675.755) }, + { -SIMDE_FLOAT32_C( 2105.146), -SIMDE_FLOAT32_C( 2150.719) }, + { SIMDE_FLOAT32_C( 4775.484), -SIMDE_FLOAT32_C( 2150.719) }, + { -SIMDE_FLOAT32_C( 2609.983), -SIMDE_FLOAT32_C( 4271.761) }, + { -SIMDE_FLOAT32_C( 2609.983), -SIMDE_FLOAT32_C( 8675.755) } }, + { { -SIMDE_FLOAT32_C( 7533.391), SIMDE_FLOAT32_C( 2646.042) }, + { -SIMDE_FLOAT32_C( 4768.051), -SIMDE_FLOAT32_C( 9232.611), SIMDE_FLOAT32_C( 5796.161), SIMDE_FLOAT32_C( 319.964) }, + { -SIMDE_FLOAT32_C( 4768.051), SIMDE_FLOAT32_C( 2646.042) }, + { SIMDE_FLOAT32_C( 5796.161), SIMDE_FLOAT32_C( 2646.042) }, + { -SIMDE_FLOAT32_C( 7533.391), -SIMDE_FLOAT32_C( 9232.611) }, + { -SIMDE_FLOAT32_C( 7533.391), SIMDE_FLOAT32_C( 319.964) } }, + { { -SIMDE_FLOAT32_C( 9847.213), SIMDE_FLOAT32_C( 6520.873) }, + { SIMDE_FLOAT32_C( 9756.117), SIMDE_FLOAT32_C( 625.608), -SIMDE_FLOAT32_C( 979.024), -SIMDE_FLOAT32_C( 183.463) }, + { SIMDE_FLOAT32_C( 9756.117), SIMDE_FLOAT32_C( 6520.873) }, + { -SIMDE_FLOAT32_C( 979.024), SIMDE_FLOAT32_C( 6520.873) }, + { -SIMDE_FLOAT32_C( 9847.213), SIMDE_FLOAT32_C( 625.608) }, + { -SIMDE_FLOAT32_C( 9847.213), -SIMDE_FLOAT32_C( 183.463) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + + simde_float32x2_t r0_0 = simde_vcopy_laneq_f32(a, 0, b, 0); + simde_float32x2_t r0_2 = simde_vcopy_laneq_f32(a, 0, b, 2); + simde_float32x2_t r1_1 = simde_vcopy_laneq_f32(a, 1, b, 1); + simde_float32x2_t r1_3 = simde_vcopy_laneq_f32(a, 1, b, 3); + + simde_test_arm_neon_assert_equal_f32x2(r0_0, simde_vld1_f32(test_vec[i].r0_0), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r0_2, simde_vld1_f32(test_vec[i].r0_2), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r1_1, simde_vld1_f32(test_vec[i].r1_1), INT_MAX); + simde_test_arm_neon_assert_equal_f32x2(r1_3, simde_vld1_f32(test_vec[i].r1_3), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopy_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[1]; + simde_float64_t b[2]; + simde_float64_t r0_0[1]; + simde_float64_t r0_1[1]; + } test_vec[] = { + { { -SIMDE_FLOAT64_C( 739568.812) }, + { -SIMDE_FLOAT64_C( 918096.188), -SIMDE_FLOAT64_C( 636033.688) }, + { -SIMDE_FLOAT64_C( 918096.188) }, + { -SIMDE_FLOAT64_C( 636033.688) } }, + { { SIMDE_FLOAT64_C( 347996.500) }, + { -SIMDE_FLOAT64_C( 789037.625), SIMDE_FLOAT64_C( 410753.000) }, + { -SIMDE_FLOAT64_C( 789037.625) }, + { SIMDE_FLOAT64_C( 410753.000) } }, + { { -SIMDE_FLOAT64_C( 519933.062) }, + { SIMDE_FLOAT64_C( 858011.500), SIMDE_FLOAT64_C( 664039.875) }, + { SIMDE_FLOAT64_C( 858011.500) }, + { SIMDE_FLOAT64_C( 664039.875) } }, + { { -SIMDE_FLOAT64_C( 836484.250) }, + { SIMDE_FLOAT64_C( 343727.500), -SIMDE_FLOAT64_C( 654913.188) }, + { SIMDE_FLOAT64_C( 343727.500) }, + { -SIMDE_FLOAT64_C( 654913.188) } }, + { { SIMDE_FLOAT64_C( 462913.500) }, + { -SIMDE_FLOAT64_C( 509992.406), -SIMDE_FLOAT64_C( 610779.125) }, + { -SIMDE_FLOAT64_C( 509992.406) }, + { -SIMDE_FLOAT64_C( 610779.125) } }, + { { -SIMDE_FLOAT64_C( 265341.438) }, + { SIMDE_FLOAT64_C( 503597.625), SIMDE_FLOAT64_C( 215071.125) }, + { SIMDE_FLOAT64_C( 503597.625) }, + { SIMDE_FLOAT64_C( 215071.125) } }, + { { -SIMDE_FLOAT64_C( 582044.625) }, + { SIMDE_FLOAT64_C( 731499.250), -SIMDE_FLOAT64_C( 730278.875) }, + { SIMDE_FLOAT64_C( 731499.250) }, + { -SIMDE_FLOAT64_C( 730278.875) } }, + { { -SIMDE_FLOAT64_C( 991497.875) }, + { -SIMDE_FLOAT64_C( 653912.188), SIMDE_FLOAT64_C( 50960.625) }, + { -SIMDE_FLOAT64_C( 653912.188) }, + { SIMDE_FLOAT64_C( 50960.625) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + + simde_float64x1_t r0_0 = simde_vcopy_laneq_f64(a, 0, b, 0); + simde_float64x1_t r0_1 = simde_vcopy_laneq_f64(a, 0, b, 1); + + simde_test_arm_neon_assert_equal_f64x1(r0_0, simde_vld1_f64(test_vec[i].r0_0), INT_MAX); + simde_test_arm_neon_assert_equal_f64x1(r0_1, simde_vld1_f64(test_vec[i].r0_1), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[16]; + int8_t b[8]; + int8_t r0_7[16]; + int8_t r4_6[16]; + int8_t r8_4[16]; + int8_t r12_3[16]; + int8_t r15_1[16]; + } test_vec[] = { + { { INT8_C( 7), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + -INT8_C( 105), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + INT8_C( 32), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 83), -INT8_C( 39), INT8_C( 22), INT8_C( 28) }, + { -INT8_C( 93), INT8_C( 1), -INT8_C( 111), -INT8_C( 96), + -INT8_C( 106), -INT8_C( 86), INT8_C( 43), INT8_C( 48) }, + { INT8_C( 48), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + -INT8_C( 105), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + INT8_C( 32), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 83), -INT8_C( 39), INT8_C( 22), INT8_C( 28) }, + { INT8_C( 7), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + INT8_C( 43), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + INT8_C( 32), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 83), -INT8_C( 39), INT8_C( 22), INT8_C( 28) }, + { INT8_C( 7), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + -INT8_C( 105), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + -INT8_C( 106), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 83), -INT8_C( 39), INT8_C( 22), INT8_C( 28) }, + { INT8_C( 7), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + -INT8_C( 105), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + INT8_C( 32), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 96), -INT8_C( 39), INT8_C( 22), INT8_C( 28) }, + { INT8_C( 7), -INT8_C( 118), INT8_C( 62), -INT8_C( 50), + -INT8_C( 105), -INT8_C( 76), -INT8_C( 19), INT8_C( 111), + INT8_C( 32), INT8_C( 20), -INT8_C( 96), -INT8_C( 77), + -INT8_C( 83), -INT8_C( 39), INT8_C( 22), INT8_C( 1) } }, + { { INT8_C( 96), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 33), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + -INT8_C( 65), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 25), -INT8_C( 117), INT8_C( 123), -INT8_C( 19) }, + { -INT8_C( 31), INT8_C( 116), INT8_C( 103), -INT8_C( 112), + INT8_C( 118), INT8_C( 34), -INT8_C( 70), -INT8_C( 51) }, + { -INT8_C( 51), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 33), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + -INT8_C( 65), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 25), -INT8_C( 117), INT8_C( 123), -INT8_C( 19) }, + { INT8_C( 96), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 70), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + -INT8_C( 65), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 25), -INT8_C( 117), INT8_C( 123), -INT8_C( 19) }, + { INT8_C( 96), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 33), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + INT8_C( 118), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 25), -INT8_C( 117), INT8_C( 123), -INT8_C( 19) }, + { INT8_C( 96), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 33), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + -INT8_C( 65), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 112), -INT8_C( 117), INT8_C( 123), -INT8_C( 19) }, + { INT8_C( 96), INT8_C( 23), INT8_C( 106), INT8_C( 101), + -INT8_C( 33), -INT8_C( 74), -INT8_C( 78), INT8_C( 22), + -INT8_C( 65), INT8_C( 29), -INT8_C( 8), -INT8_C( 48), + -INT8_C( 25), -INT8_C( 117), INT8_C( 123), INT8_C( 116) } }, + { { -INT8_C( 29), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + -INT8_C( 18), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 37), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + -INT8_C( 89), -INT8_C( 18), -INT8_C( 68), -INT8_C( 102) }, + { INT8_C( 43), -INT8_C( 14), -INT8_C( 83), INT8_C( 108), + -INT8_C( 89), INT8_C( 34), INT8_C( 58), INT8_C( 0) }, + { INT8_C( 0), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + -INT8_C( 18), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 37), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + -INT8_C( 89), -INT8_C( 18), -INT8_C( 68), -INT8_C( 102) }, + { -INT8_C( 29), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + INT8_C( 58), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 37), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + -INT8_C( 89), -INT8_C( 18), -INT8_C( 68), -INT8_C( 102) }, + { -INT8_C( 29), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + -INT8_C( 18), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 89), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + -INT8_C( 89), -INT8_C( 18), -INT8_C( 68), -INT8_C( 102) }, + { -INT8_C( 29), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + -INT8_C( 18), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 37), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + INT8_C( 108), -INT8_C( 18), -INT8_C( 68), -INT8_C( 102) }, + { -INT8_C( 29), -INT8_C( 7), -INT8_C( 116), -INT8_C( 65), + -INT8_C( 18), -INT8_C( 27), -INT8_C( 92), -INT8_C( 16), + -INT8_C( 37), INT8_C( 116), INT8_C( 54), -INT8_C( 34), + -INT8_C( 89), -INT8_C( 18), -INT8_C( 68), -INT8_C( 14) } }, + { { -INT8_C( 41), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 17), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 124), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 20), -INT8_C( 48), -INT8_C( 9), INT8_C( 105) }, + { INT8_C( 63), -INT8_C( 79), -INT8_C( 114), INT8_C( 6), + INT8_C( 40), -INT8_C( 108), -INT8_C( 46), INT8_C( 54) }, + { INT8_C( 54), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 17), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 124), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 20), -INT8_C( 48), -INT8_C( 9), INT8_C( 105) }, + { -INT8_C( 41), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 46), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 124), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 20), -INT8_C( 48), -INT8_C( 9), INT8_C( 105) }, + { -INT8_C( 41), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 17), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 40), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 20), -INT8_C( 48), -INT8_C( 9), INT8_C( 105) }, + { -INT8_C( 41), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 17), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 124), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 6), -INT8_C( 48), -INT8_C( 9), INT8_C( 105) }, + { -INT8_C( 41), INT8_C( 20), INT8_C( 87), INT8_C( 37), + -INT8_C( 17), -INT8_C( 85), INT8_C( 120), -INT8_C( 41), + INT8_C( 124), -INT8_C( 59), INT8_C( 63), -INT8_C( 2), + INT8_C( 20), -INT8_C( 48), -INT8_C( 9), -INT8_C( 79) } }, + { { INT8_C( 48), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + -INT8_C( 51), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + INT8_C( 52), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 85), -INT8_C( 18), -INT8_C( 25), INT8_C( 27) }, + { INT8_C( 30), INT8_C( 77), INT8_C( 53), INT8_C( 9), + -INT8_C( 64), -INT8_C( 87), INT8_C( 56), INT8_C( 58) }, + { INT8_C( 58), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + -INT8_C( 51), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + INT8_C( 52), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 85), -INT8_C( 18), -INT8_C( 25), INT8_C( 27) }, + { INT8_C( 48), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + INT8_C( 56), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + INT8_C( 52), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 85), -INT8_C( 18), -INT8_C( 25), INT8_C( 27) }, + { INT8_C( 48), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + -INT8_C( 51), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + -INT8_C( 64), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 85), -INT8_C( 18), -INT8_C( 25), INT8_C( 27) }, + { INT8_C( 48), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + -INT8_C( 51), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + INT8_C( 52), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 9), -INT8_C( 18), -INT8_C( 25), INT8_C( 27) }, + { INT8_C( 48), INT8_C( 105), INT8_C( 63), -INT8_C( 110), + -INT8_C( 51), -INT8_C( 88), -INT8_C( 45), INT8_C( 22), + INT8_C( 52), INT8_C( 62), -INT8_C( 123), INT8_C( 67), + INT8_C( 85), -INT8_C( 18), -INT8_C( 25), INT8_C( 77) } }, + { { INT8_C( 96), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + INT8_C( 117), INT8_C( 119), INT8_C( 90), INT8_C( 103), + -INT8_C( 71), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 58), INT8_C( 11), -INT8_C( 68), -INT8_C( 36) }, + { INT8_C( 10), INT8_C( 92), INT8_C( 122), INT8_C( 88), + INT8_C( 63), INT8_C( 40), -INT8_C( 15), -INT8_C( 67) }, + { -INT8_C( 67), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + INT8_C( 117), INT8_C( 119), INT8_C( 90), INT8_C( 103), + -INT8_C( 71), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 58), INT8_C( 11), -INT8_C( 68), -INT8_C( 36) }, + { INT8_C( 96), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + -INT8_C( 15), INT8_C( 119), INT8_C( 90), INT8_C( 103), + -INT8_C( 71), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 58), INT8_C( 11), -INT8_C( 68), -INT8_C( 36) }, + { INT8_C( 96), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + INT8_C( 117), INT8_C( 119), INT8_C( 90), INT8_C( 103), + INT8_C( 63), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 58), INT8_C( 11), -INT8_C( 68), -INT8_C( 36) }, + { INT8_C( 96), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + INT8_C( 117), INT8_C( 119), INT8_C( 90), INT8_C( 103), + -INT8_C( 71), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 88), INT8_C( 11), -INT8_C( 68), -INT8_C( 36) }, + { INT8_C( 96), INT8_C( 87), -INT8_C( 47), INT8_C( 101), + INT8_C( 117), INT8_C( 119), INT8_C( 90), INT8_C( 103), + -INT8_C( 71), INT8_C( 27), INT8_C( 71), INT8_C( 69), + INT8_C( 58), INT8_C( 11), -INT8_C( 68), INT8_C( 92) } }, + { { INT8_C( 111), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 72), INT8_C( 23), INT8_C( 57), INT8_C( 61), + -INT8_C( 14), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + -INT8_C( 80), -INT8_C( 79), INT8_C( 93), -INT8_C( 38) }, + { INT8_C( 73), INT8_C( 117), INT8_C( 112), INT8_C( 26), + INT8_C( 55), -INT8_C( 15), INT8_C( 28), -INT8_C( 61) }, + { -INT8_C( 61), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 72), INT8_C( 23), INT8_C( 57), INT8_C( 61), + -INT8_C( 14), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + -INT8_C( 80), -INT8_C( 79), INT8_C( 93), -INT8_C( 38) }, + { INT8_C( 111), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 28), INT8_C( 23), INT8_C( 57), INT8_C( 61), + -INT8_C( 14), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + -INT8_C( 80), -INT8_C( 79), INT8_C( 93), -INT8_C( 38) }, + { INT8_C( 111), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 72), INT8_C( 23), INT8_C( 57), INT8_C( 61), + INT8_C( 55), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + -INT8_C( 80), -INT8_C( 79), INT8_C( 93), -INT8_C( 38) }, + { INT8_C( 111), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 72), INT8_C( 23), INT8_C( 57), INT8_C( 61), + -INT8_C( 14), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + INT8_C( 26), -INT8_C( 79), INT8_C( 93), -INT8_C( 38) }, + { INT8_C( 111), -INT8_C( 99), -INT8_C( 23), INT8_C( 126), + INT8_C( 72), INT8_C( 23), INT8_C( 57), INT8_C( 61), + -INT8_C( 14), INT8_C( 97), -INT8_C( 98), -INT8_C( 84), + -INT8_C( 80), -INT8_C( 79), INT8_C( 93), INT8_C( 117) } }, + { { -INT8_C( 105), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + INT8_C( 121), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + -INT8_C( 85), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 77), INT8_C( 121), -INT8_C( 66), INT8_C( 74) }, + { INT8_C( 71), -INT8_C( 82), INT8_C( 94), INT8_C( 107), + INT8_C( 33), INT8_C( 106), -INT8_C( 96), -INT8_C( 85) }, + { -INT8_C( 85), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + INT8_C( 121), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + -INT8_C( 85), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 77), INT8_C( 121), -INT8_C( 66), INT8_C( 74) }, + { -INT8_C( 105), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + -INT8_C( 96), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + -INT8_C( 85), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 77), INT8_C( 121), -INT8_C( 66), INT8_C( 74) }, + { -INT8_C( 105), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + INT8_C( 121), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + INT8_C( 33), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 77), INT8_C( 121), -INT8_C( 66), INT8_C( 74) }, + { -INT8_C( 105), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + INT8_C( 121), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + -INT8_C( 85), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 107), INT8_C( 121), -INT8_C( 66), INT8_C( 74) }, + { -INT8_C( 105), INT8_C( 101), -INT8_C( 89), -INT8_C( 90), + INT8_C( 121), -INT8_C( 68), INT8_C( 2), -INT8_C( 4), + -INT8_C( 85), INT8_C( 63), -INT8_C( 15), -INT8_C( 29), + INT8_C( 77), INT8_C( 121), -INT8_C( 66), -INT8_C( 82) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + + simde_int8x16_t r0_7 = simde_vcopyq_lane_s8(a, 0, b, 7); + simde_int8x16_t r4_6 = simde_vcopyq_lane_s8(a, 4, b, 6); + simde_int8x16_t r8_4 = simde_vcopyq_lane_s8(a, 8, b, 4); + simde_int8x16_t r12_3 = simde_vcopyq_lane_s8(a, 12, b, 3); + simde_int8x16_t r15_1 = simde_vcopyq_lane_s8(a, 15, b, 1); + + simde_test_arm_neon_assert_equal_i8x16(r0_7, simde_vld1q_s8(test_vec[i].r0_7)); + simde_test_arm_neon_assert_equal_i8x16(r4_6, simde_vld1q_s8(test_vec[i].r4_6)); + simde_test_arm_neon_assert_equal_i8x16(r8_4, simde_vld1q_s8(test_vec[i].r8_4)); + simde_test_arm_neon_assert_equal_i8x16(r12_3, simde_vld1q_s8(test_vec[i].r12_3)); + simde_test_arm_neon_assert_equal_i8x16(r15_1, simde_vld1q_s8(test_vec[i].r15_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[4]; + int16_t r0_3[8]; + int16_t r3_2[8]; + int16_t r5_1[8]; + int16_t r7_0[8]; + } test_vec[] = { + { { INT16_C( 27029), INT16_C( 26579), INT16_C( 28585), INT16_C( 25552), + INT16_C( 16551), -INT16_C( 7338), INT16_C( 19387), INT16_C( 12129) }, + { -INT16_C( 17263), INT16_C( 17994), INT16_C( 21673), INT16_C( 26674) }, + { INT16_C( 26674), INT16_C( 26579), INT16_C( 28585), INT16_C( 25552), + INT16_C( 16551), -INT16_C( 7338), INT16_C( 19387), INT16_C( 12129) }, + { INT16_C( 27029), INT16_C( 26579), INT16_C( 28585), INT16_C( 21673), + INT16_C( 16551), -INT16_C( 7338), INT16_C( 19387), INT16_C( 12129) }, + { INT16_C( 27029), INT16_C( 26579), INT16_C( 28585), INT16_C( 25552), + INT16_C( 16551), INT16_C( 17994), INT16_C( 19387), INT16_C( 12129) }, + { INT16_C( 27029), INT16_C( 26579), INT16_C( 28585), INT16_C( 25552), + INT16_C( 16551), -INT16_C( 7338), INT16_C( 19387), -INT16_C( 17263) } }, + { { INT16_C( 22253), -INT16_C( 22260), INT16_C( 32244), -INT16_C( 26177), + -INT16_C( 8811), INT16_C( 15417), INT16_C( 5213), -INT16_C( 28635) }, + { -INT16_C( 7193), -INT16_C( 30400), -INT16_C( 17625), -INT16_C( 25516) }, + { -INT16_C( 25516), -INT16_C( 22260), INT16_C( 32244), -INT16_C( 26177), + -INT16_C( 8811), INT16_C( 15417), INT16_C( 5213), -INT16_C( 28635) }, + { INT16_C( 22253), -INT16_C( 22260), INT16_C( 32244), -INT16_C( 17625), + -INT16_C( 8811), INT16_C( 15417), INT16_C( 5213), -INT16_C( 28635) }, + { INT16_C( 22253), -INT16_C( 22260), INT16_C( 32244), -INT16_C( 26177), + -INT16_C( 8811), -INT16_C( 30400), INT16_C( 5213), -INT16_C( 28635) }, + { INT16_C( 22253), -INT16_C( 22260), INT16_C( 32244), -INT16_C( 26177), + -INT16_C( 8811), INT16_C( 15417), INT16_C( 5213), -INT16_C( 7193) } }, + { { -INT16_C( 23776), INT16_C( 6518), -INT16_C( 6832), -INT16_C( 29422), + -INT16_C( 28847), -INT16_C( 13217), INT16_C( 31430), -INT16_C( 8848) }, + { -INT16_C( 3922), INT16_C( 22802), -INT16_C( 16279), -INT16_C( 13177) }, + { -INT16_C( 13177), INT16_C( 6518), -INT16_C( 6832), -INT16_C( 29422), + -INT16_C( 28847), -INT16_C( 13217), INT16_C( 31430), -INT16_C( 8848) }, + { -INT16_C( 23776), INT16_C( 6518), -INT16_C( 6832), -INT16_C( 16279), + -INT16_C( 28847), -INT16_C( 13217), INT16_C( 31430), -INT16_C( 8848) }, + { -INT16_C( 23776), INT16_C( 6518), -INT16_C( 6832), -INT16_C( 29422), + -INT16_C( 28847), INT16_C( 22802), INT16_C( 31430), -INT16_C( 8848) }, + { -INT16_C( 23776), INT16_C( 6518), -INT16_C( 6832), -INT16_C( 29422), + -INT16_C( 28847), -INT16_C( 13217), INT16_C( 31430), -INT16_C( 3922) } }, + { { INT16_C( 25863), INT16_C( 13259), -INT16_C( 30865), INT16_C( 15801), + INT16_C( 11683), -INT16_C( 16303), INT16_C( 25175), -INT16_C( 21172) }, + { INT16_C( 29037), INT16_C( 28656), INT16_C( 6282), INT16_C( 198) }, + { INT16_C( 198), INT16_C( 13259), -INT16_C( 30865), INT16_C( 15801), + INT16_C( 11683), -INT16_C( 16303), INT16_C( 25175), -INT16_C( 21172) }, + { INT16_C( 25863), INT16_C( 13259), -INT16_C( 30865), INT16_C( 6282), + INT16_C( 11683), -INT16_C( 16303), INT16_C( 25175), -INT16_C( 21172) }, + { INT16_C( 25863), INT16_C( 13259), -INT16_C( 30865), INT16_C( 15801), + INT16_C( 11683), INT16_C( 28656), INT16_C( 25175), -INT16_C( 21172) }, + { INT16_C( 25863), INT16_C( 13259), -INT16_C( 30865), INT16_C( 15801), + INT16_C( 11683), -INT16_C( 16303), INT16_C( 25175), INT16_C( 29037) } }, + { { -INT16_C( 30988), INT16_C( 27521), -INT16_C( 7197), -INT16_C( 13039), + -INT16_C( 14170), -INT16_C( 2528), -INT16_C( 15234), INT16_C( 13381) }, + { -INT16_C( 32112), INT16_C( 31755), -INT16_C( 27872), INT16_C( 13351) }, + { INT16_C( 13351), INT16_C( 27521), -INT16_C( 7197), -INT16_C( 13039), + -INT16_C( 14170), -INT16_C( 2528), -INT16_C( 15234), INT16_C( 13381) }, + { -INT16_C( 30988), INT16_C( 27521), -INT16_C( 7197), -INT16_C( 27872), + -INT16_C( 14170), -INT16_C( 2528), -INT16_C( 15234), INT16_C( 13381) }, + { -INT16_C( 30988), INT16_C( 27521), -INT16_C( 7197), -INT16_C( 13039), + -INT16_C( 14170), INT16_C( 31755), -INT16_C( 15234), INT16_C( 13381) }, + { -INT16_C( 30988), INT16_C( 27521), -INT16_C( 7197), -INT16_C( 13039), + -INT16_C( 14170), -INT16_C( 2528), -INT16_C( 15234), -INT16_C( 32112) } }, + { { INT16_C( 29081), -INT16_C( 1225), INT16_C( 17138), INT16_C( 12210), + -INT16_C( 17247), INT16_C( 2087), -INT16_C( 3722), -INT16_C( 22560) }, + { INT16_C( 2458), -INT16_C( 5128), INT16_C( 20732), INT16_C( 3292) }, + { INT16_C( 3292), -INT16_C( 1225), INT16_C( 17138), INT16_C( 12210), + -INT16_C( 17247), INT16_C( 2087), -INT16_C( 3722), -INT16_C( 22560) }, + { INT16_C( 29081), -INT16_C( 1225), INT16_C( 17138), INT16_C( 20732), + -INT16_C( 17247), INT16_C( 2087), -INT16_C( 3722), -INT16_C( 22560) }, + { INT16_C( 29081), -INT16_C( 1225), INT16_C( 17138), INT16_C( 12210), + -INT16_C( 17247), -INT16_C( 5128), -INT16_C( 3722), -INT16_C( 22560) }, + { INT16_C( 29081), -INT16_C( 1225), INT16_C( 17138), INT16_C( 12210), + -INT16_C( 17247), INT16_C( 2087), -INT16_C( 3722), INT16_C( 2458) } }, + { { -INT16_C( 25013), -INT16_C( 19961), -INT16_C( 18606), INT16_C( 11403), + -INT16_C( 24211), -INT16_C( 32295), INT16_C( 11289), INT16_C( 2644) }, + { -INT16_C( 24926), INT16_C( 13396), INT16_C( 13926), -INT16_C( 15521) }, + { -INT16_C( 15521), -INT16_C( 19961), -INT16_C( 18606), INT16_C( 11403), + -INT16_C( 24211), -INT16_C( 32295), INT16_C( 11289), INT16_C( 2644) }, + { -INT16_C( 25013), -INT16_C( 19961), -INT16_C( 18606), INT16_C( 13926), + -INT16_C( 24211), -INT16_C( 32295), INT16_C( 11289), INT16_C( 2644) }, + { -INT16_C( 25013), -INT16_C( 19961), -INT16_C( 18606), INT16_C( 11403), + -INT16_C( 24211), INT16_C( 13396), INT16_C( 11289), INT16_C( 2644) }, + { -INT16_C( 25013), -INT16_C( 19961), -INT16_C( 18606), INT16_C( 11403), + -INT16_C( 24211), -INT16_C( 32295), INT16_C( 11289), -INT16_C( 24926) } }, + { { INT16_C( 28225), -INT16_C( 13641), INT16_C( 12137), -INT16_C( 2051), + -INT16_C( 24470), -INT16_C( 8376), INT16_C( 22272), INT16_C( 20450) }, + { INT16_C( 31393), -INT16_C( 20326), INT16_C( 23241), INT16_C( 19500) }, + { INT16_C( 19500), -INT16_C( 13641), INT16_C( 12137), -INT16_C( 2051), + -INT16_C( 24470), -INT16_C( 8376), INT16_C( 22272), INT16_C( 20450) }, + { INT16_C( 28225), -INT16_C( 13641), INT16_C( 12137), INT16_C( 23241), + -INT16_C( 24470), -INT16_C( 8376), INT16_C( 22272), INT16_C( 20450) }, + { INT16_C( 28225), -INT16_C( 13641), INT16_C( 12137), -INT16_C( 2051), + -INT16_C( 24470), -INT16_C( 20326), INT16_C( 22272), INT16_C( 20450) }, + { INT16_C( 28225), -INT16_C( 13641), INT16_C( 12137), -INT16_C( 2051), + -INT16_C( 24470), -INT16_C( 8376), INT16_C( 22272), INT16_C( 31393) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + + simde_int16x8_t r0_3 = simde_vcopyq_lane_s16(a, 0, b, 3); + simde_int16x8_t r3_2 = simde_vcopyq_lane_s16(a, 3, b, 2); + simde_int16x8_t r5_1 = simde_vcopyq_lane_s16(a, 5, b, 1); + simde_int16x8_t r7_0 = simde_vcopyq_lane_s16(a, 7, b, 0); + + simde_test_arm_neon_assert_equal_i16x8(r0_3, simde_vld1q_s16(test_vec[i].r0_3)); + simde_test_arm_neon_assert_equal_i16x8(r3_2, simde_vld1q_s16(test_vec[i].r3_2)); + simde_test_arm_neon_assert_equal_i16x8(r5_1, simde_vld1q_s16(test_vec[i].r5_1)); + simde_test_arm_neon_assert_equal_i16x8(r7_0, simde_vld1q_s16(test_vec[i].r7_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[2]; + int32_t r0_1[4]; + int32_t r1_0[4]; + int32_t r2_1[4]; + int32_t r3_0[4]; + } test_vec[] = { + { { -INT32_C( 1076912012), INT32_C( 618068284), INT32_C( 1042680298), -INT32_C( 1284688742) }, + { -INT32_C( 1538444613), -INT32_C( 1251735563) }, + { -INT32_C( 1251735563), INT32_C( 618068284), INT32_C( 1042680298), -INT32_C( 1284688742) }, + { -INT32_C( 1076912012), -INT32_C( 1538444613), INT32_C( 1042680298), -INT32_C( 1284688742) }, + { -INT32_C( 1076912012), INT32_C( 618068284), -INT32_C( 1251735563), -INT32_C( 1284688742) }, + { -INT32_C( 1076912012), INT32_C( 618068284), INT32_C( 1042680298), -INT32_C( 1538444613) } }, + { { INT32_C( 2026659719), -INT32_C( 1368393889), INT32_C( 964220746), -INT32_C( 2040877007) }, + { INT32_C( 557196735), -INT32_C( 1587131242) }, + { -INT32_C( 1587131242), -INT32_C( 1368393889), INT32_C( 964220746), -INT32_C( 2040877007) }, + { INT32_C( 2026659719), INT32_C( 557196735), INT32_C( 964220746), -INT32_C( 2040877007) }, + { INT32_C( 2026659719), -INT32_C( 1368393889), -INT32_C( 1587131242), -INT32_C( 2040877007) }, + { INT32_C( 2026659719), -INT32_C( 1368393889), INT32_C( 964220746), INT32_C( 557196735) } }, + { { -INT32_C( 1558714266), -INT32_C( 1474286938), INT32_C( 2027944758), -INT32_C( 237725764) }, + { -INT32_C( 611299852), INT32_C( 1233220441) }, + { INT32_C( 1233220441), -INT32_C( 1474286938), INT32_C( 2027944758), -INT32_C( 237725764) }, + { -INT32_C( 1558714266), -INT32_C( 611299852), INT32_C( 2027944758), -INT32_C( 237725764) }, + { -INT32_C( 1558714266), -INT32_C( 1474286938), INT32_C( 1233220441), -INT32_C( 237725764) }, + { -INT32_C( 1558714266), -INT32_C( 1474286938), INT32_C( 2027944758), -INT32_C( 611299852) } }, + { { INT32_C( 510883444), INT32_C( 50993558), INT32_C( 615538984), -INT32_C( 1925132709) }, + { INT32_C( 17242711), INT32_C( 560137033) }, + { INT32_C( 560137033), INT32_C( 50993558), INT32_C( 615538984), -INT32_C( 1925132709) }, + { INT32_C( 510883444), INT32_C( 17242711), INT32_C( 615538984), -INT32_C( 1925132709) }, + { INT32_C( 510883444), INT32_C( 50993558), INT32_C( 560137033), -INT32_C( 1925132709) }, + { INT32_C( 510883444), INT32_C( 50993558), INT32_C( 615538984), INT32_C( 17242711) } }, + { { INT32_C( 1386115226), -INT32_C( 1963224204), INT32_C( 211420390), -INT32_C( 1694280984) }, + { -INT32_C( 1322169749), INT32_C( 836919814) }, + { INT32_C( 836919814), -INT32_C( 1963224204), INT32_C( 211420390), -INT32_C( 1694280984) }, + { INT32_C( 1386115226), -INT32_C( 1322169749), INT32_C( 211420390), -INT32_C( 1694280984) }, + { INT32_C( 1386115226), -INT32_C( 1963224204), INT32_C( 836919814), -INT32_C( 1694280984) }, + { INT32_C( 1386115226), -INT32_C( 1963224204), INT32_C( 211420390), -INT32_C( 1322169749) } }, + { { INT32_C( 958226031), INT32_C( 93380822), INT32_C( 1144688739), -INT32_C( 234382015) }, + { INT32_C( 1749098987), -INT32_C( 179092004) }, + { -INT32_C( 179092004), INT32_C( 93380822), INT32_C( 1144688739), -INT32_C( 234382015) }, + { INT32_C( 958226031), INT32_C( 1749098987), INT32_C( 1144688739), -INT32_C( 234382015) }, + { INT32_C( 958226031), INT32_C( 93380822), -INT32_C( 179092004), -INT32_C( 234382015) }, + { INT32_C( 958226031), INT32_C( 93380822), INT32_C( 1144688739), INT32_C( 1749098987) } }, + { { -INT32_C( 635952031), -INT32_C( 1377334988), -INT32_C( 22978719), -INT32_C( 34765681) }, + { INT32_C( 1211106462), -INT32_C( 23115063) }, + { -INT32_C( 23115063), -INT32_C( 1377334988), -INT32_C( 22978719), -INT32_C( 34765681) }, + { -INT32_C( 635952031), INT32_C( 1211106462), -INT32_C( 22978719), -INT32_C( 34765681) }, + { -INT32_C( 635952031), -INT32_C( 1377334988), -INT32_C( 23115063), -INT32_C( 34765681) }, + { -INT32_C( 635952031), -INT32_C( 1377334988), -INT32_C( 22978719), INT32_C( 1211106462) } }, + { { -INT32_C( 701062361), INT32_C( 166953805), INT32_C( 1658614329), -INT32_C( 730083122) }, + { -INT32_C( 1322477008), INT32_C( 858166047) }, + { INT32_C( 858166047), INT32_C( 166953805), INT32_C( 1658614329), -INT32_C( 730083122) }, + { -INT32_C( 701062361), -INT32_C( 1322477008), INT32_C( 1658614329), -INT32_C( 730083122) }, + { -INT32_C( 701062361), INT32_C( 166953805), INT32_C( 858166047), -INT32_C( 730083122) }, + { -INT32_C( 701062361), INT32_C( 166953805), INT32_C( 1658614329), -INT32_C( 1322477008) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + + simde_int32x4_t r0_1 = simde_vcopyq_lane_s32(a, 0, b, 1); + simde_int32x4_t r1_0 = simde_vcopyq_lane_s32(a, 1, b, 0); + simde_int32x4_t r2_1 = simde_vcopyq_lane_s32(a, 2, b, 1); + simde_int32x4_t r3_0 = simde_vcopyq_lane_s32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_i32x4(r0_1, simde_vld1q_s32(test_vec[i].r0_1)); + simde_test_arm_neon_assert_equal_i32x4(r1_0, simde_vld1q_s32(test_vec[i].r1_0)); + simde_test_arm_neon_assert_equal_i32x4(r2_1, simde_vld1q_s32(test_vec[i].r2_1)); + simde_test_arm_neon_assert_equal_i32x4(r3_0, simde_vld1q_s32(test_vec[i].r3_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int64_t b[1]; + int64_t r0_0[2]; + int64_t r1_0[2]; + } test_vec[] = { + { { INT64_C( 5923506011110482733), -INT64_C( 2645285737381901732) }, + { -INT64_C( 7834399894314585151) }, + { -INT64_C( 7834399894314585151), -INT64_C( 2645285737381901732) }, + { INT64_C( 5923506011110482733), -INT64_C( 7834399894314585151) } }, + { { -INT64_C( 1016997391912317657), INT64_C( 387471655083973654) }, + { -INT64_C( 2303427801483812271) }, + { -INT64_C( 2303427801483812271), INT64_C( 387471655083973654) }, + { -INT64_C( 1016997391912317657), -INT64_C( 2303427801483812271) } }, + { { INT64_C( 7341233720419624579), INT64_C( 8440647527380018740) }, + { INT64_C( 6296444952903933582) }, + { INT64_C( 6296444952903933582), INT64_C( 8440647527380018740) }, + { INT64_C( 7341233720419624579), INT64_C( 6296444952903933582) } }, + { { -INT64_C( 768202849149799049), -INT64_C( 4683657666914593469) }, + { INT64_C( 5482397825823415064) }, + { INT64_C( 5482397825823415064), -INT64_C( 4683657666914593469) }, + { -INT64_C( 768202849149799049), INT64_C( 5482397825823415064) } }, + { { INT64_C( 6276973598659071226), INT64_C( 5439164051307246322) }, + { INT64_C( 7898197828517044658) }, + { INT64_C( 7898197828517044658), INT64_C( 5439164051307246322) }, + { INT64_C( 6276973598659071226), INT64_C( 7898197828517044658) } }, + { { -INT64_C( 5573085121491680858), INT64_C( 5680682011604371585) }, + { -INT64_C( 9149867884760669227) }, + { -INT64_C( 9149867884760669227), INT64_C( 5680682011604371585) }, + { -INT64_C( 5573085121491680858), -INT64_C( 9149867884760669227) } }, + { { INT64_C( 8622738552695243649), -INT64_C( 6638279232013153822) }, + { -INT64_C( 5812927542173415403) }, + { -INT64_C( 5812927542173415403), -INT64_C( 6638279232013153822) }, + { INT64_C( 8622738552695243649), -INT64_C( 5812927542173415403) } }, + { { -INT64_C( 2025031443728201743), INT64_C( 9148317493025265463) }, + { INT64_C( 5207511157286457380) }, + { INT64_C( 5207511157286457380), INT64_C( 9148317493025265463) }, + { -INT64_C( 2025031443728201743), INT64_C( 5207511157286457380) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); + + simde_int64x2_t r0_0 = simde_vcopyq_lane_s64(a, 0, b, 0); + simde_int64x2_t r1_0 = simde_vcopyq_lane_s64(a, 1, b, 0); + + simde_test_arm_neon_assert_equal_i64x2(r0_0, simde_vld1q_s64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i64x2(r1_0, simde_vld1q_s64(test_vec[i].r1_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[16]; + uint8_t b[8]; + uint8_t r0_7[16]; + uint8_t r4_6[16]; + uint8_t r8_4[16]; + uint8_t r12_3[16]; + uint8_t r15_1[16]; + } test_vec[] = { + { { UINT8_C( 59), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 78), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 32), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 206), UINT8_C( 10), UINT8_C( 184), UINT8_C( 187) }, + { UINT8_C( 70), UINT8_C( 103), UINT8_C( 193), UINT8_C( 36), + UINT8_C( 18), UINT8_C( 128), UINT8_C( 189), UINT8_C( 208) }, + { UINT8_C( 208), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 78), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 32), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 206), UINT8_C( 10), UINT8_C( 184), UINT8_C( 187) }, + { UINT8_C( 59), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 189), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 32), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 206), UINT8_C( 10), UINT8_C( 184), UINT8_C( 187) }, + { UINT8_C( 59), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 78), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 18), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 206), UINT8_C( 10), UINT8_C( 184), UINT8_C( 187) }, + { UINT8_C( 59), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 78), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 32), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 36), UINT8_C( 10), UINT8_C( 184), UINT8_C( 187) }, + { UINT8_C( 59), UINT8_C( 146), UINT8_C( 87), UINT8_C( 29), + UINT8_C( 78), UINT8_C( 48), UINT8_C( 79), UINT8_C( 166), + UINT8_C( 32), UINT8_C( 121), UINT8_C( 103), UINT8_C( 178), + UINT8_C( 206), UINT8_C( 10), UINT8_C( 184), UINT8_C( 103) } }, + { { UINT8_C( 91), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 61), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 111), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 194), UINT8_C( 125), UINT8_C( 37), UINT8_C( 129) }, + { UINT8_C( 139), UINT8_C( 50), UINT8_C( 181), UINT8_C( 157), + UINT8_C( 169), UINT8_C( 160), UINT8_C( 180), UINT8_C( 165) }, + { UINT8_C( 165), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 61), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 111), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 194), UINT8_C( 125), UINT8_C( 37), UINT8_C( 129) }, + { UINT8_C( 91), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 180), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 111), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 194), UINT8_C( 125), UINT8_C( 37), UINT8_C( 129) }, + { UINT8_C( 91), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 61), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 169), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 194), UINT8_C( 125), UINT8_C( 37), UINT8_C( 129) }, + { UINT8_C( 91), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 61), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 111), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 157), UINT8_C( 125), UINT8_C( 37), UINT8_C( 129) }, + { UINT8_C( 91), UINT8_C( 101), UINT8_C( 84), UINT8_C( 52), + UINT8_C( 61), UINT8_C( 205), UINT8_C( 72), UINT8_C( 179), + UINT8_C( 111), UINT8_C( 192), UINT8_C( 117), UINT8_C( 227), + UINT8_C( 194), UINT8_C( 125), UINT8_C( 37), UINT8_C( 50) } }, + { { UINT8_C( 105), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 196), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 132), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 16), UINT8_C( 48), UINT8_C( 49), UINT8_C( 129) }, + { UINT8_C( 43), UINT8_C( 107), UINT8_C( 122), UINT8_C( 203), + UINT8_C( 47), UINT8_C( 15), UINT8_C( 218), UINT8_C( 173) }, + { UINT8_C( 173), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 196), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 132), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 16), UINT8_C( 48), UINT8_C( 49), UINT8_C( 129) }, + { UINT8_C( 105), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 218), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 132), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 16), UINT8_C( 48), UINT8_C( 49), UINT8_C( 129) }, + { UINT8_C( 105), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 196), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 47), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 16), UINT8_C( 48), UINT8_C( 49), UINT8_C( 129) }, + { UINT8_C( 105), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 196), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 132), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 203), UINT8_C( 48), UINT8_C( 49), UINT8_C( 129) }, + { UINT8_C( 105), UINT8_C( 235), UINT8_C( 218), UINT8_C( 67), + UINT8_C( 196), UINT8_C( 121), UINT8_C( 249), UINT8_C( 221), + UINT8_C( 132), UINT8_C( 75), UINT8_C( 140), UINT8_C( 45), + UINT8_C( 16), UINT8_C( 48), UINT8_C( 49), UINT8_C( 107) } }, + { { UINT8_C( 7), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 193), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 61), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 101), UINT8_C( 249), UINT8_C( 217), UINT8_C( 254) }, + { UINT8_C( 46), UINT8_C( 151), UINT8_C( 116), UINT8_C( 149), + UINT8_C( 92), UINT8_C( 84), UINT8_C( 239), UINT8_C( 224) }, + { UINT8_C( 224), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 193), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 61), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 101), UINT8_C( 249), UINT8_C( 217), UINT8_C( 254) }, + { UINT8_C( 7), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 239), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 61), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 101), UINT8_C( 249), UINT8_C( 217), UINT8_C( 254) }, + { UINT8_C( 7), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 193), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 92), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 101), UINT8_C( 249), UINT8_C( 217), UINT8_C( 254) }, + { UINT8_C( 7), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 193), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 61), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 149), UINT8_C( 249), UINT8_C( 217), UINT8_C( 254) }, + { UINT8_C( 7), UINT8_C( 14), UINT8_C( 69), UINT8_C( 189), + UINT8_C( 193), UINT8_C( 0), UINT8_C( 183), UINT8_C( 117), + UINT8_C( 61), UINT8_C( 243), UINT8_C( 125), UINT8_C( 221), + UINT8_C( 101), UINT8_C( 249), UINT8_C( 217), UINT8_C( 151) } }, + { { UINT8_C( 129), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 235), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 224), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 39), UINT8_C( 35), UINT8_C( 204), UINT8_C( 158) }, + { UINT8_C( 61), UINT8_C( 217), UINT8_C( 158), UINT8_C( 85), + UINT8_C( 22), UINT8_C( 54), UINT8_C( 63), UINT8_C( 57) }, + { UINT8_C( 57), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 235), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 224), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 39), UINT8_C( 35), UINT8_C( 204), UINT8_C( 158) }, + { UINT8_C( 129), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 63), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 224), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 39), UINT8_C( 35), UINT8_C( 204), UINT8_C( 158) }, + { UINT8_C( 129), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 235), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 22), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 39), UINT8_C( 35), UINT8_C( 204), UINT8_C( 158) }, + { UINT8_C( 129), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 235), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 224), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 85), UINT8_C( 35), UINT8_C( 204), UINT8_C( 158) }, + { UINT8_C( 129), UINT8_C( 32), UINT8_C( 181), UINT8_C( 85), + UINT8_C( 235), UINT8_C( 229), UINT8_C( 36), UINT8_C( 140), + UINT8_C( 224), UINT8_C( 213), UINT8_C( 182), UINT8_C( 144), + UINT8_C( 39), UINT8_C( 35), UINT8_C( 204), UINT8_C( 217) } }, + { { UINT8_C( 42), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 201), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 152), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 248), UINT8_C( 92), UINT8_C( 158), UINT8_C( 137) }, + { UINT8_C( 101), UINT8_C( 207), UINT8_C( 250), UINT8_C( 139), + UINT8_C( 236), UINT8_C( 39), UINT8_C( 230), UINT8_C( 206) }, + { UINT8_C( 206), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 201), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 152), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 248), UINT8_C( 92), UINT8_C( 158), UINT8_C( 137) }, + { UINT8_C( 42), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 230), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 152), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 248), UINT8_C( 92), UINT8_C( 158), UINT8_C( 137) }, + { UINT8_C( 42), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 201), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 236), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 248), UINT8_C( 92), UINT8_C( 158), UINT8_C( 137) }, + { UINT8_C( 42), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 201), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 152), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 139), UINT8_C( 92), UINT8_C( 158), UINT8_C( 137) }, + { UINT8_C( 42), UINT8_C( 33), UINT8_C( 152), UINT8_C( 160), + UINT8_C( 201), UINT8_C( 17), UINT8_C( 113), UINT8_C( 21), + UINT8_C( 152), UINT8_C( 89), UINT8_C( 243), UINT8_C( 109), + UINT8_C( 248), UINT8_C( 92), UINT8_C( 158), UINT8_C( 207) } }, + { { UINT8_C( 14), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 120), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 79), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 118), UINT8_C( 99), UINT8_C( 223), UINT8_C( 239) }, + { UINT8_C( 87), UINT8_C( 213), UINT8_C( 68), UINT8_C( 244), + UINT8_C( 30), UINT8_C( 172), UINT8_C( 2), UINT8_C( 136) }, + { UINT8_C( 136), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 120), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 79), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 118), UINT8_C( 99), UINT8_C( 223), UINT8_C( 239) }, + { UINT8_C( 14), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 2), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 79), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 118), UINT8_C( 99), UINT8_C( 223), UINT8_C( 239) }, + { UINT8_C( 14), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 120), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 30), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 118), UINT8_C( 99), UINT8_C( 223), UINT8_C( 239) }, + { UINT8_C( 14), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 120), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 79), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 244), UINT8_C( 99), UINT8_C( 223), UINT8_C( 239) }, + { UINT8_C( 14), UINT8_C( 0), UINT8_C( 142), UINT8_C( 110), + UINT8_C( 120), UINT8_C( 86), UINT8_C( 235), UINT8_C( 19), + UINT8_C( 79), UINT8_C( 222), UINT8_C( 38), UINT8_C( 88), + UINT8_C( 118), UINT8_C( 99), UINT8_C( 223), UINT8_C( 213) } }, + { { UINT8_C( 155), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 114), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 185), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 81), UINT8_C( 187), UINT8_C( 3), UINT8_C( 123) }, + { UINT8_C( 151), UINT8_C( 65), UINT8_C( 26), UINT8_C( 1), + UINT8_C( 212), UINT8_C( 113), UINT8_C( 67), UINT8_C( 18) }, + { UINT8_C( 18), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 114), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 185), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 81), UINT8_C( 187), UINT8_C( 3), UINT8_C( 123) }, + { UINT8_C( 155), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 67), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 185), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 81), UINT8_C( 187), UINT8_C( 3), UINT8_C( 123) }, + { UINT8_C( 155), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 114), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 212), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 81), UINT8_C( 187), UINT8_C( 3), UINT8_C( 123) }, + { UINT8_C( 155), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 114), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 185), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 1), UINT8_C( 187), UINT8_C( 3), UINT8_C( 123) }, + { UINT8_C( 155), UINT8_C( 176), UINT8_C( 228), UINT8_C( 41), + UINT8_C( 114), UINT8_C( 45), UINT8_C( 120), UINT8_C( 68), + UINT8_C( 185), UINT8_C( 174), UINT8_C( 129), UINT8_C( 46), + UINT8_C( 81), UINT8_C( 187), UINT8_C( 3), UINT8_C( 65) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); + + simde_uint8x16_t r0_7 = simde_vcopyq_lane_u8(a, 0, b, 7); + simde_uint8x16_t r4_6 = simde_vcopyq_lane_u8(a, 4, b, 6); + simde_uint8x16_t r8_4 = simde_vcopyq_lane_u8(a, 8, b, 4); + simde_uint8x16_t r12_3 = simde_vcopyq_lane_u8(a, 12, b, 3); + simde_uint8x16_t r15_1 = simde_vcopyq_lane_u8(a, 15, b, 1); + + simde_test_arm_neon_assert_equal_u8x16(r0_7, simde_vld1q_u8(test_vec[i].r0_7)); + simde_test_arm_neon_assert_equal_u8x16(r4_6, simde_vld1q_u8(test_vec[i].r4_6)); + simde_test_arm_neon_assert_equal_u8x16(r8_4, simde_vld1q_u8(test_vec[i].r8_4)); + simde_test_arm_neon_assert_equal_u8x16(r12_3, simde_vld1q_u8(test_vec[i].r12_3)); + simde_test_arm_neon_assert_equal_u8x16(r15_1, simde_vld1q_u8(test_vec[i].r15_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint16_t b[4]; + uint16_t r0_3[8]; + uint16_t r3_2[8]; + uint16_t r5_1[8]; + uint16_t r7_0[8]; + } test_vec[] = { + { { UINT16_C( 45106), UINT16_C( 38241), UINT16_C( 4836), UINT16_C( 40111), + UINT16_C( 37552), UINT16_C( 53688), UINT16_C( 17469), UINT16_C( 24387) }, + { UINT16_C( 47087), UINT16_C( 53333), UINT16_C( 17428), UINT16_C( 45727) }, + { UINT16_C( 45727), UINT16_C( 38241), UINT16_C( 4836), UINT16_C( 40111), + UINT16_C( 37552), UINT16_C( 53688), UINT16_C( 17469), UINT16_C( 24387) }, + { UINT16_C( 45106), UINT16_C( 38241), UINT16_C( 4836), UINT16_C( 17428), + UINT16_C( 37552), UINT16_C( 53688), UINT16_C( 17469), UINT16_C( 24387) }, + { UINT16_C( 45106), UINT16_C( 38241), UINT16_C( 4836), UINT16_C( 40111), + UINT16_C( 37552), UINT16_C( 53333), UINT16_C( 17469), UINT16_C( 24387) }, + { UINT16_C( 45106), UINT16_C( 38241), UINT16_C( 4836), UINT16_C( 40111), + UINT16_C( 37552), UINT16_C( 53688), UINT16_C( 17469), UINT16_C( 47087) } }, + { { UINT16_C( 5206), UINT16_C( 60332), UINT16_C( 40277), UINT16_C( 19674), + UINT16_C( 6622), UINT16_C( 38757), UINT16_C( 64251), UINT16_C( 47386) }, + { UINT16_C( 8478), UINT16_C( 4749), UINT16_C( 53363), UINT16_C( 29869) }, + { UINT16_C( 29869), UINT16_C( 60332), UINT16_C( 40277), UINT16_C( 19674), + UINT16_C( 6622), UINT16_C( 38757), UINT16_C( 64251), UINT16_C( 47386) }, + { UINT16_C( 5206), UINT16_C( 60332), UINT16_C( 40277), UINT16_C( 53363), + UINT16_C( 6622), UINT16_C( 38757), UINT16_C( 64251), UINT16_C( 47386) }, + { UINT16_C( 5206), UINT16_C( 60332), UINT16_C( 40277), UINT16_C( 19674), + UINT16_C( 6622), UINT16_C( 4749), UINT16_C( 64251), UINT16_C( 47386) }, + { UINT16_C( 5206), UINT16_C( 60332), UINT16_C( 40277), UINT16_C( 19674), + UINT16_C( 6622), UINT16_C( 38757), UINT16_C( 64251), UINT16_C( 8478) } }, + { { UINT16_C( 59206), UINT16_C( 36412), UINT16_C( 15257), UINT16_C( 46944), + UINT16_C( 46282), UINT16_C( 17117), UINT16_C( 45233), UINT16_C( 4994) }, + { UINT16_C( 46395), UINT16_C( 50547), UINT16_C( 19830), UINT16_C( 44578) }, + { UINT16_C( 44578), UINT16_C( 36412), UINT16_C( 15257), UINT16_C( 46944), + UINT16_C( 46282), UINT16_C( 17117), UINT16_C( 45233), UINT16_C( 4994) }, + { UINT16_C( 59206), UINT16_C( 36412), UINT16_C( 15257), UINT16_C( 19830), + UINT16_C( 46282), UINT16_C( 17117), UINT16_C( 45233), UINT16_C( 4994) }, + { UINT16_C( 59206), UINT16_C( 36412), UINT16_C( 15257), UINT16_C( 46944), + UINT16_C( 46282), UINT16_C( 50547), UINT16_C( 45233), UINT16_C( 4994) }, + { UINT16_C( 59206), UINT16_C( 36412), UINT16_C( 15257), UINT16_C( 46944), + UINT16_C( 46282), UINT16_C( 17117), UINT16_C( 45233), UINT16_C( 46395) } }, + { { UINT16_C( 13682), UINT16_C( 54233), UINT16_C( 15911), UINT16_C( 26730), + UINT16_C( 502), UINT16_C( 64753), UINT16_C( 30819), UINT16_C( 45749) }, + { UINT16_C( 24828), UINT16_C( 29679), UINT16_C( 21740), UINT16_C( 34938) }, + { UINT16_C( 34938), UINT16_C( 54233), UINT16_C( 15911), UINT16_C( 26730), + UINT16_C( 502), UINT16_C( 64753), UINT16_C( 30819), UINT16_C( 45749) }, + { UINT16_C( 13682), UINT16_C( 54233), UINT16_C( 15911), UINT16_C( 21740), + UINT16_C( 502), UINT16_C( 64753), UINT16_C( 30819), UINT16_C( 45749) }, + { UINT16_C( 13682), UINT16_C( 54233), UINT16_C( 15911), UINT16_C( 26730), + UINT16_C( 502), UINT16_C( 29679), UINT16_C( 30819), UINT16_C( 45749) }, + { UINT16_C( 13682), UINT16_C( 54233), UINT16_C( 15911), UINT16_C( 26730), + UINT16_C( 502), UINT16_C( 64753), UINT16_C( 30819), UINT16_C( 24828) } }, + { { UINT16_C( 12489), UINT16_C( 1881), UINT16_C( 42262), UINT16_C( 53977), + UINT16_C( 19091), UINT16_C( 33756), UINT16_C( 22902), UINT16_C( 39968) }, + { UINT16_C( 373), UINT16_C( 54089), UINT16_C( 12533), UINT16_C( 41276) }, + { UINT16_C( 41276), UINT16_C( 1881), UINT16_C( 42262), UINT16_C( 53977), + UINT16_C( 19091), UINT16_C( 33756), UINT16_C( 22902), UINT16_C( 39968) }, + { UINT16_C( 12489), UINT16_C( 1881), UINT16_C( 42262), UINT16_C( 12533), + UINT16_C( 19091), UINT16_C( 33756), UINT16_C( 22902), UINT16_C( 39968) }, + { UINT16_C( 12489), UINT16_C( 1881), UINT16_C( 42262), UINT16_C( 53977), + UINT16_C( 19091), UINT16_C( 54089), UINT16_C( 22902), UINT16_C( 39968) }, + { UINT16_C( 12489), UINT16_C( 1881), UINT16_C( 42262), UINT16_C( 53977), + UINT16_C( 19091), UINT16_C( 33756), UINT16_C( 22902), UINT16_C( 373) } }, + { { UINT16_C( 8219), UINT16_C( 44521), UINT16_C( 54001), UINT16_C( 7888), + UINT16_C( 57460), UINT16_C( 14245), UINT16_C( 48545), UINT16_C( 24933) }, + { UINT16_C( 62999), UINT16_C( 52731), UINT16_C( 45056), UINT16_C( 13736) }, + { UINT16_C( 13736), UINT16_C( 44521), UINT16_C( 54001), UINT16_C( 7888), + UINT16_C( 57460), UINT16_C( 14245), UINT16_C( 48545), UINT16_C( 24933) }, + { UINT16_C( 8219), UINT16_C( 44521), UINT16_C( 54001), UINT16_C( 45056), + UINT16_C( 57460), UINT16_C( 14245), UINT16_C( 48545), UINT16_C( 24933) }, + { UINT16_C( 8219), UINT16_C( 44521), UINT16_C( 54001), UINT16_C( 7888), + UINT16_C( 57460), UINT16_C( 52731), UINT16_C( 48545), UINT16_C( 24933) }, + { UINT16_C( 8219), UINT16_C( 44521), UINT16_C( 54001), UINT16_C( 7888), + UINT16_C( 57460), UINT16_C( 14245), UINT16_C( 48545), UINT16_C( 62999) } }, + { { UINT16_C( 17803), UINT16_C( 23302), UINT16_C( 51288), UINT16_C( 6738), + UINT16_C( 26381), UINT16_C( 44920), UINT16_C( 438), UINT16_C( 12440) }, + { UINT16_C( 64520), UINT16_C( 6608), UINT16_C( 27242), UINT16_C( 9986) }, + { UINT16_C( 9986), UINT16_C( 23302), UINT16_C( 51288), UINT16_C( 6738), + UINT16_C( 26381), UINT16_C( 44920), UINT16_C( 438), UINT16_C( 12440) }, + { UINT16_C( 17803), UINT16_C( 23302), UINT16_C( 51288), UINT16_C( 27242), + UINT16_C( 26381), UINT16_C( 44920), UINT16_C( 438), UINT16_C( 12440) }, + { UINT16_C( 17803), UINT16_C( 23302), UINT16_C( 51288), UINT16_C( 6738), + UINT16_C( 26381), UINT16_C( 6608), UINT16_C( 438), UINT16_C( 12440) }, + { UINT16_C( 17803), UINT16_C( 23302), UINT16_C( 51288), UINT16_C( 6738), + UINT16_C( 26381), UINT16_C( 44920), UINT16_C( 438), UINT16_C( 64520) } }, + { { UINT16_C( 40969), UINT16_C( 63552), UINT16_C( 36257), UINT16_C( 5157), + UINT16_C( 49006), UINT16_C( 3329), UINT16_C( 25930), UINT16_C( 37093) }, + { UINT16_C( 24602), UINT16_C( 27093), UINT16_C( 33126), UINT16_C( 48029) }, + { UINT16_C( 48029), UINT16_C( 63552), UINT16_C( 36257), UINT16_C( 5157), + UINT16_C( 49006), UINT16_C( 3329), UINT16_C( 25930), UINT16_C( 37093) }, + { UINT16_C( 40969), UINT16_C( 63552), UINT16_C( 36257), UINT16_C( 33126), + UINT16_C( 49006), UINT16_C( 3329), UINT16_C( 25930), UINT16_C( 37093) }, + { UINT16_C( 40969), UINT16_C( 63552), UINT16_C( 36257), UINT16_C( 5157), + UINT16_C( 49006), UINT16_C( 27093), UINT16_C( 25930), UINT16_C( 37093) }, + { UINT16_C( 40969), UINT16_C( 63552), UINT16_C( 36257), UINT16_C( 5157), + UINT16_C( 49006), UINT16_C( 3329), UINT16_C( 25930), UINT16_C( 24602) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); + + simde_uint16x8_t r0_3 = simde_vcopyq_lane_u16(a, 0, b, 3); + simde_uint16x8_t r3_2 = simde_vcopyq_lane_u16(a, 3, b, 2); + simde_uint16x8_t r5_1 = simde_vcopyq_lane_u16(a, 5, b, 1); + simde_uint16x8_t r7_0 = simde_vcopyq_lane_u16(a, 7, b, 0); + + simde_test_arm_neon_assert_equal_u16x8(r0_3, simde_vld1q_u16(test_vec[i].r0_3)); + simde_test_arm_neon_assert_equal_u16x8(r3_2, simde_vld1q_u16(test_vec[i].r3_2)); + simde_test_arm_neon_assert_equal_u16x8(r5_1, simde_vld1q_u16(test_vec[i].r5_1)); + simde_test_arm_neon_assert_equal_u16x8(r7_0, simde_vld1q_u16(test_vec[i].r7_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t b[2]; + uint32_t r0_1[4]; + uint32_t r1_0[4]; + uint32_t r2_1[4]; + uint32_t r3_0[4]; + } test_vec[] = { + { { UINT32_C( 361921892), UINT32_C( 3900928184), UINT32_C( 3837963448), UINT32_C( 429793356) }, + { UINT32_C( 26373424), UINT32_C( 3910484228) }, + { UINT32_C( 3910484228), UINT32_C( 3900928184), UINT32_C( 3837963448), UINT32_C( 429793356) }, + { UINT32_C( 361921892), UINT32_C( 26373424), UINT32_C( 3837963448), UINT32_C( 429793356) }, + { UINT32_C( 361921892), UINT32_C( 3900928184), UINT32_C( 3910484228), UINT32_C( 429793356) }, + { UINT32_C( 361921892), UINT32_C( 3900928184), UINT32_C( 3837963448), UINT32_C( 26373424) } }, + { { UINT32_C( 1396390629), UINT32_C( 3575792530), UINT32_C( 3584537359), UINT32_C( 2356347849) }, + { UINT32_C( 3791328885), UINT32_C( 2861189464) }, + { UINT32_C( 2861189464), UINT32_C( 3575792530), UINT32_C( 3584537359), UINT32_C( 2356347849) }, + { UINT32_C( 1396390629), UINT32_C( 3791328885), UINT32_C( 3584537359), UINT32_C( 2356347849) }, + { UINT32_C( 1396390629), UINT32_C( 3575792530), UINT32_C( 2861189464), UINT32_C( 2356347849) }, + { UINT32_C( 1396390629), UINT32_C( 3575792530), UINT32_C( 3584537359), UINT32_C( 3791328885) } }, + { { UINT32_C( 2264455894), UINT32_C( 772844039), UINT32_C( 2992759396), UINT32_C( 2614908981) }, + { UINT32_C( 121133279), UINT32_C( 1252193851) }, + { UINT32_C( 1252193851), UINT32_C( 772844039), UINT32_C( 2992759396), UINT32_C( 2614908981) }, + { UINT32_C( 2264455894), UINT32_C( 121133279), UINT32_C( 2992759396), UINT32_C( 2614908981) }, + { UINT32_C( 2264455894), UINT32_C( 772844039), UINT32_C( 1252193851), UINT32_C( 2614908981) }, + { UINT32_C( 2264455894), UINT32_C( 772844039), UINT32_C( 2992759396), UINT32_C( 121133279) } }, + { { UINT32_C( 2880672670), UINT32_C( 57847964), UINT32_C( 4166813298), UINT32_C( 126229096) }, + { UINT32_C( 4065062230), UINT32_C( 1434580494) }, + { UINT32_C( 1434580494), UINT32_C( 57847964), UINT32_C( 4166813298), UINT32_C( 126229096) }, + { UINT32_C( 2880672670), UINT32_C( 4065062230), UINT32_C( 4166813298), UINT32_C( 126229096) }, + { UINT32_C( 2880672670), UINT32_C( 57847964), UINT32_C( 1434580494), UINT32_C( 126229096) }, + { UINT32_C( 2880672670), UINT32_C( 57847964), UINT32_C( 4166813298), UINT32_C( 4065062230) } }, + { { UINT32_C( 1340690585), UINT32_C( 946996966), UINT32_C( 551479920), UINT32_C( 3629071371) }, + { UINT32_C( 404629523), UINT32_C( 2776260397) }, + { UINT32_C( 2776260397), UINT32_C( 946996966), UINT32_C( 551479920), UINT32_C( 3629071371) }, + { UINT32_C( 1340690585), UINT32_C( 404629523), UINT32_C( 551479920), UINT32_C( 3629071371) }, + { UINT32_C( 1340690585), UINT32_C( 946996966), UINT32_C( 2776260397), UINT32_C( 3629071371) }, + { UINT32_C( 1340690585), UINT32_C( 946996966), UINT32_C( 551479920), UINT32_C( 404629523) } }, + { { UINT32_C( 2002654709), UINT32_C( 1050964603), UINT32_C( 1159950872), UINT32_C( 2906190267) }, + { UINT32_C( 258802985), UINT32_C( 3426756944) }, + { UINT32_C( 3426756944), UINT32_C( 1050964603), UINT32_C( 1159950872), UINT32_C( 2906190267) }, + { UINT32_C( 2002654709), UINT32_C( 258802985), UINT32_C( 1159950872), UINT32_C( 2906190267) }, + { UINT32_C( 2002654709), UINT32_C( 1050964603), UINT32_C( 3426756944), UINT32_C( 2906190267) }, + { UINT32_C( 2002654709), UINT32_C( 1050964603), UINT32_C( 1159950872), UINT32_C( 258802985) } }, + { { UINT32_C( 2945893748), UINT32_C( 402372422), UINT32_C( 2823976653), UINT32_C( 2361987444) }, + { UINT32_C( 73085720), UINT32_C( 3141146066) }, + { UINT32_C( 3141146066), UINT32_C( 402372422), UINT32_C( 2823976653), UINT32_C( 2361987444) }, + { UINT32_C( 2945893748), UINT32_C( 73085720), UINT32_C( 2823976653), UINT32_C( 2361987444) }, + { UINT32_C( 2945893748), UINT32_C( 402372422), UINT32_C( 3141146066), UINT32_C( 2361987444) }, + { UINT32_C( 2945893748), UINT32_C( 402372422), UINT32_C( 2823976653), UINT32_C( 73085720) } }, + { { UINT32_C( 703705882), UINT32_C( 1874556398), UINT32_C( 357072285), UINT32_C( 989687532) }, + { UINT32_C( 1441815222), UINT32_C( 4271316470) }, + { UINT32_C( 4271316470), UINT32_C( 1874556398), UINT32_C( 357072285), UINT32_C( 989687532) }, + { UINT32_C( 703705882), UINT32_C( 1441815222), UINT32_C( 357072285), UINT32_C( 989687532) }, + { UINT32_C( 703705882), UINT32_C( 1874556398), UINT32_C( 4271316470), UINT32_C( 989687532) }, + { UINT32_C( 703705882), UINT32_C( 1874556398), UINT32_C( 357072285), UINT32_C( 1441815222) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); + + simde_uint32x4_t r0_1 = simde_vcopyq_lane_u32(a, 0, b, 1); + simde_uint32x4_t r1_0 = simde_vcopyq_lane_u32(a, 1, b, 0); + simde_uint32x4_t r2_1 = simde_vcopyq_lane_u32(a, 2, b, 1); + simde_uint32x4_t r3_0 = simde_vcopyq_lane_u32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_u32x4(r0_1, simde_vld1q_u32(test_vec[i].r0_1)); + simde_test_arm_neon_assert_equal_u32x4(r1_0, simde_vld1q_u32(test_vec[i].r1_0)); + simde_test_arm_neon_assert_equal_u32x4(r2_1, simde_vld1q_u32(test_vec[i].r2_1)); + simde_test_arm_neon_assert_equal_u32x4(r3_0, simde_vld1q_u32(test_vec[i].r3_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint64_t b[1]; + uint64_t r0_0[2]; + uint64_t r1_0[2]; + } test_vec[] = { + { { UINT64_C(13088088903368892823), UINT64_C(11535793185305623270) }, + { UINT64_C( 5924534224249363076) }, + { UINT64_C( 5924534224249363076), UINT64_C(11535793185305623270) }, + { UINT64_C(13088088903368892823), UINT64_C( 5924534224249363076) } }, + { { UINT64_C(18271803387774738026), UINT64_C( 840258608695505479) }, + { UINT64_C(18149383589595975661) }, + { UINT64_C(18149383589595975661), UINT64_C( 840258608695505479) }, + { UINT64_C(18271803387774738026), UINT64_C(18149383589595975661) } }, + { { UINT64_C( 1426608908501472439), UINT64_C(17579859043434713714) }, + { UINT64_C( 3863851528950464347) }, + { UINT64_C( 3863851528950464347), UINT64_C(17579859043434713714) }, + { UINT64_C( 1426608908501472439), UINT64_C( 3863851528950464347) } }, + { { UINT64_C( 2887535003750440669), UINT64_C( 9706127502422434237) }, + { UINT64_C( 585016102035113102) }, + { UINT64_C( 585016102035113102), UINT64_C( 9706127502422434237) }, + { UINT64_C( 2887535003750440669), UINT64_C( 585016102035113102) } }, + { { UINT64_C(11767062204421784175), UINT64_C( 6611989932014277560) }, + { UINT64_C( 1728274990776995191) }, + { UINT64_C( 1728274990776995191), UINT64_C( 6611989932014277560) }, + { UINT64_C(11767062204421784175), UINT64_C( 1728274990776995191) } }, + { { UINT64_C(11529019270513437728), UINT64_C( 7695879568420221205) }, + { UINT64_C(15611393117687386357) }, + { UINT64_C(15611393117687386357), UINT64_C( 7695879568420221205) }, + { UINT64_C(11529019270513437728), UINT64_C(15611393117687386357) } }, + { { UINT64_C(11313965299073639025), UINT64_C( 5304856213118510799) }, + { UINT64_C( 4627035757479335269) }, + { UINT64_C( 4627035757479335269), UINT64_C( 5304856213118510799) }, + { UINT64_C(11313965299073639025), UINT64_C( 4627035757479335269) } }, + { { UINT64_C(10403068332418732573), UINT64_C(18189821306737318447) }, + { UINT64_C(15540049615037950397) }, + { UINT64_C(15540049615037950397), UINT64_C(18189821306737318447) }, + { UINT64_C(10403068332418732573), UINT64_C(15540049615037950397) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); + + simde_uint64x2_t r0_0 = simde_vcopyq_lane_u64(a, 0, b, 0); + simde_uint64x2_t r1_0 = simde_vcopyq_lane_u64(a, 1, b, 0); + + simde_test_arm_neon_assert_equal_u64x2(r0_0, simde_vld1q_u64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u64x2(r1_0, simde_vld1q_u64(test_vec[i].r1_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[4]; + simde_float32_t b[2]; + simde_float32_t r0_1[4]; + simde_float32_t r1_0[4]; + simde_float32_t r2_1[4]; + simde_float32_t r3_0[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 538.654), -SIMDE_FLOAT32_C( 4708.625), -SIMDE_FLOAT32_C( 9690.416), -SIMDE_FLOAT32_C( 2113.408) }, + { SIMDE_FLOAT32_C( 8132.408), SIMDE_FLOAT32_C( 5050.145) }, + { SIMDE_FLOAT32_C( 5050.145), -SIMDE_FLOAT32_C( 4708.625), -SIMDE_FLOAT32_C( 9690.416), -SIMDE_FLOAT32_C( 2113.408) }, + { SIMDE_FLOAT32_C( 538.654), SIMDE_FLOAT32_C( 8132.408), -SIMDE_FLOAT32_C( 9690.416), -SIMDE_FLOAT32_C( 2113.408) }, + { SIMDE_FLOAT32_C( 538.654), -SIMDE_FLOAT32_C( 4708.625), SIMDE_FLOAT32_C( 5050.145), -SIMDE_FLOAT32_C( 2113.408) }, + { SIMDE_FLOAT32_C( 538.654), -SIMDE_FLOAT32_C( 4708.625), -SIMDE_FLOAT32_C( 9690.416), SIMDE_FLOAT32_C( 8132.408) } }, + { { SIMDE_FLOAT32_C( 5966.624), SIMDE_FLOAT32_C( 7056.986), -SIMDE_FLOAT32_C( 1760.608), -SIMDE_FLOAT32_C( 656.592) }, + { SIMDE_FLOAT32_C( 7371.629), -SIMDE_FLOAT32_C( 974.229) }, + { -SIMDE_FLOAT32_C( 974.229), SIMDE_FLOAT32_C( 7056.986), -SIMDE_FLOAT32_C( 1760.608), -SIMDE_FLOAT32_C( 656.592) }, + { SIMDE_FLOAT32_C( 5966.624), SIMDE_FLOAT32_C( 7371.629), -SIMDE_FLOAT32_C( 1760.608), -SIMDE_FLOAT32_C( 656.592) }, + { SIMDE_FLOAT32_C( 5966.624), SIMDE_FLOAT32_C( 7056.986), -SIMDE_FLOAT32_C( 974.229), -SIMDE_FLOAT32_C( 656.592) }, + { SIMDE_FLOAT32_C( 5966.624), SIMDE_FLOAT32_C( 7056.986), -SIMDE_FLOAT32_C( 1760.608), SIMDE_FLOAT32_C( 7371.629) } }, + { { -SIMDE_FLOAT32_C( 2332.909), -SIMDE_FLOAT32_C( 12.015), SIMDE_FLOAT32_C( 958.198), SIMDE_FLOAT32_C( 3179.859) }, + { SIMDE_FLOAT32_C( 421.483), -SIMDE_FLOAT32_C( 391.272) }, + { -SIMDE_FLOAT32_C( 391.272), -SIMDE_FLOAT32_C( 12.015), SIMDE_FLOAT32_C( 958.198), SIMDE_FLOAT32_C( 3179.859) }, + { -SIMDE_FLOAT32_C( 2332.909), SIMDE_FLOAT32_C( 421.483), SIMDE_FLOAT32_C( 958.198), SIMDE_FLOAT32_C( 3179.859) }, + { -SIMDE_FLOAT32_C( 2332.909), -SIMDE_FLOAT32_C( 12.015), -SIMDE_FLOAT32_C( 391.272), SIMDE_FLOAT32_C( 3179.859) }, + { -SIMDE_FLOAT32_C( 2332.909), -SIMDE_FLOAT32_C( 12.015), SIMDE_FLOAT32_C( 958.198), SIMDE_FLOAT32_C( 421.483) } }, + { { SIMDE_FLOAT32_C( 5751.485), -SIMDE_FLOAT32_C( 4550.866), -SIMDE_FLOAT32_C( 5492.006), -SIMDE_FLOAT32_C( 3518.459) }, + { SIMDE_FLOAT32_C( 5851.975), SIMDE_FLOAT32_C( 7832.057) }, + { SIMDE_FLOAT32_C( 7832.057), -SIMDE_FLOAT32_C( 4550.866), -SIMDE_FLOAT32_C( 5492.006), -SIMDE_FLOAT32_C( 3518.459) }, + { SIMDE_FLOAT32_C( 5751.485), SIMDE_FLOAT32_C( 5851.975), -SIMDE_FLOAT32_C( 5492.006), -SIMDE_FLOAT32_C( 3518.459) }, + { SIMDE_FLOAT32_C( 5751.485), -SIMDE_FLOAT32_C( 4550.866), SIMDE_FLOAT32_C( 7832.057), -SIMDE_FLOAT32_C( 3518.459) }, + { SIMDE_FLOAT32_C( 5751.485), -SIMDE_FLOAT32_C( 4550.866), -SIMDE_FLOAT32_C( 5492.006), SIMDE_FLOAT32_C( 5851.975) } }, + { { SIMDE_FLOAT32_C( 2504.024), -SIMDE_FLOAT32_C( 4086.377), SIMDE_FLOAT32_C( 2309.051), SIMDE_FLOAT32_C( 1842.812) }, + { SIMDE_FLOAT32_C( 9351.285), -SIMDE_FLOAT32_C( 4461.544) }, + { -SIMDE_FLOAT32_C( 4461.544), -SIMDE_FLOAT32_C( 4086.377), SIMDE_FLOAT32_C( 2309.051), SIMDE_FLOAT32_C( 1842.812) }, + { SIMDE_FLOAT32_C( 2504.024), SIMDE_FLOAT32_C( 9351.285), SIMDE_FLOAT32_C( 2309.051), SIMDE_FLOAT32_C( 1842.812) }, + { SIMDE_FLOAT32_C( 2504.024), -SIMDE_FLOAT32_C( 4086.377), -SIMDE_FLOAT32_C( 4461.544), SIMDE_FLOAT32_C( 1842.812) }, + { SIMDE_FLOAT32_C( 2504.024), -SIMDE_FLOAT32_C( 4086.377), SIMDE_FLOAT32_C( 2309.051), SIMDE_FLOAT32_C( 9351.285) } }, + { { -SIMDE_FLOAT32_C( 6719.923), SIMDE_FLOAT32_C( 7398.066), SIMDE_FLOAT32_C( 7624.779), -SIMDE_FLOAT32_C( 7913.260) }, + { SIMDE_FLOAT32_C( 9228.150), -SIMDE_FLOAT32_C( 6206.421) }, + { -SIMDE_FLOAT32_C( 6206.421), SIMDE_FLOAT32_C( 7398.066), SIMDE_FLOAT32_C( 7624.779), -SIMDE_FLOAT32_C( 7913.260) }, + { -SIMDE_FLOAT32_C( 6719.923), SIMDE_FLOAT32_C( 9228.150), SIMDE_FLOAT32_C( 7624.779), -SIMDE_FLOAT32_C( 7913.260) }, + { -SIMDE_FLOAT32_C( 6719.923), SIMDE_FLOAT32_C( 7398.066), -SIMDE_FLOAT32_C( 6206.421), -SIMDE_FLOAT32_C( 7913.260) }, + { -SIMDE_FLOAT32_C( 6719.923), SIMDE_FLOAT32_C( 7398.066), SIMDE_FLOAT32_C( 7624.779), SIMDE_FLOAT32_C( 9228.150) } }, + { { SIMDE_FLOAT32_C( 3809.922), SIMDE_FLOAT32_C( 3800.742), -SIMDE_FLOAT32_C( 1237.310), SIMDE_FLOAT32_C( 4963.046) }, + { -SIMDE_FLOAT32_C( 6177.402), SIMDE_FLOAT32_C( 3679.462) }, + { SIMDE_FLOAT32_C( 3679.462), SIMDE_FLOAT32_C( 3800.742), -SIMDE_FLOAT32_C( 1237.310), SIMDE_FLOAT32_C( 4963.046) }, + { SIMDE_FLOAT32_C( 3809.922), -SIMDE_FLOAT32_C( 6177.402), -SIMDE_FLOAT32_C( 1237.310), SIMDE_FLOAT32_C( 4963.046) }, + { SIMDE_FLOAT32_C( 3809.922), SIMDE_FLOAT32_C( 3800.742), SIMDE_FLOAT32_C( 3679.462), SIMDE_FLOAT32_C( 4963.046) }, + { SIMDE_FLOAT32_C( 3809.922), SIMDE_FLOAT32_C( 3800.742), -SIMDE_FLOAT32_C( 1237.310), -SIMDE_FLOAT32_C( 6177.402) } }, + { { SIMDE_FLOAT32_C( 2821.190), -SIMDE_FLOAT32_C( 5775.095), -SIMDE_FLOAT32_C( 3892.955), SIMDE_FLOAT32_C( 4093.751) }, + { SIMDE_FLOAT32_C( 3420.151), -SIMDE_FLOAT32_C( 6425.996) }, + { -SIMDE_FLOAT32_C( 6425.996), -SIMDE_FLOAT32_C( 5775.095), -SIMDE_FLOAT32_C( 3892.955), SIMDE_FLOAT32_C( 4093.751) }, + { SIMDE_FLOAT32_C( 2821.190), SIMDE_FLOAT32_C( 3420.151), -SIMDE_FLOAT32_C( 3892.955), SIMDE_FLOAT32_C( 4093.751) }, + { SIMDE_FLOAT32_C( 2821.190), -SIMDE_FLOAT32_C( 5775.095), -SIMDE_FLOAT32_C( 6425.996), SIMDE_FLOAT32_C( 4093.751) }, + { SIMDE_FLOAT32_C( 2821.190), -SIMDE_FLOAT32_C( 5775.095), -SIMDE_FLOAT32_C( 3892.955), SIMDE_FLOAT32_C( 3420.151) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x2_t b = simde_vld1_f32(test_vec[i].b); + + simde_float32x4_t r0_1 = simde_vcopyq_lane_f32(a, 0, b, 1); + simde_float32x4_t r1_0 = simde_vcopyq_lane_f32(a, 1, b, 0); + simde_float32x4_t r2_1 = simde_vcopyq_lane_f32(a, 2, b, 1); + simde_float32x4_t r3_0 = simde_vcopyq_lane_f32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_f32x4(r0_1, simde_vld1q_f32(test_vec[i].r0_1), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r1_0, simde_vld1q_f32(test_vec[i].r1_0), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r2_1, simde_vld1q_f32(test_vec[i].r2_1), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r3_0, simde_vld1q_f32(test_vec[i].r3_0), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopyq_lane_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[2]; + simde_float64_t b[1]; + simde_float64_t r0_0[2]; + simde_float64_t r1_0[2]; + } test_vec[] = { + { { -SIMDE_FLOAT64_C( 157594.625), SIMDE_FLOAT64_C( 656117.375) }, + { -SIMDE_FLOAT64_C( 75540.062) }, + { -SIMDE_FLOAT64_C( 75540.062), SIMDE_FLOAT64_C( 656117.375) }, + { -SIMDE_FLOAT64_C( 157594.625), -SIMDE_FLOAT64_C( 75540.062) } }, + { { -SIMDE_FLOAT64_C( 646085.625), SIMDE_FLOAT64_C( 464808.125) }, + { SIMDE_FLOAT64_C( 806328.250) }, + { SIMDE_FLOAT64_C( 806328.250), SIMDE_FLOAT64_C( 464808.125) }, + { -SIMDE_FLOAT64_C( 646085.625), SIMDE_FLOAT64_C( 806328.250) } }, + { { SIMDE_FLOAT64_C( 502781.125), SIMDE_FLOAT64_C( 524179.125) }, + { SIMDE_FLOAT64_C( 744040.125) }, + { SIMDE_FLOAT64_C( 744040.125), SIMDE_FLOAT64_C( 524179.125) }, + { SIMDE_FLOAT64_C( 502781.125), SIMDE_FLOAT64_C( 744040.125) } }, + { { SIMDE_FLOAT64_C( 621060.750), -SIMDE_FLOAT64_C( 924502.875) }, + { -SIMDE_FLOAT64_C( 581503.625) }, + { -SIMDE_FLOAT64_C( 581503.625), -SIMDE_FLOAT64_C( 924502.875) }, + { SIMDE_FLOAT64_C( 621060.750), -SIMDE_FLOAT64_C( 581503.625) } }, + { { -SIMDE_FLOAT64_C( 202743.125), SIMDE_FLOAT64_C( 94559.250) }, + { -SIMDE_FLOAT64_C( 791547.938) }, + { -SIMDE_FLOAT64_C( 791547.938), SIMDE_FLOAT64_C( 94559.250) }, + { -SIMDE_FLOAT64_C( 202743.125), -SIMDE_FLOAT64_C( 791547.938) } }, + { { SIMDE_FLOAT64_C( 242598.875), -SIMDE_FLOAT64_C( 699786.875) }, + { SIMDE_FLOAT64_C( 419738.000) }, + { SIMDE_FLOAT64_C( 419738.000), -SIMDE_FLOAT64_C( 699786.875) }, + { SIMDE_FLOAT64_C( 242598.875), SIMDE_FLOAT64_C( 419738.000) } }, + { { SIMDE_FLOAT64_C( 47666.812), -SIMDE_FLOAT64_C( 674149.875) }, + { -SIMDE_FLOAT64_C( 567563.562) }, + { -SIMDE_FLOAT64_C( 567563.562), -SIMDE_FLOAT64_C( 674149.875) }, + { SIMDE_FLOAT64_C( 47666.812), -SIMDE_FLOAT64_C( 567563.562) } }, + { { SIMDE_FLOAT64_C( 864337.875), SIMDE_FLOAT64_C( 108615.000) }, + { -SIMDE_FLOAT64_C( 346185.500) }, + { -SIMDE_FLOAT64_C( 346185.500), SIMDE_FLOAT64_C( 108615.000) }, + { SIMDE_FLOAT64_C( 864337.875), -SIMDE_FLOAT64_C( 346185.500) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x1_t b = simde_vld1_f64(test_vec[i].b); + + simde_float64x2_t r0_0 = simde_vcopyq_lane_f64(a, 0, b, 0); + simde_float64x2_t r1_0 = simde_vcopyq_lane_f64(a, 1, b, 0); + + simde_test_arm_neon_assert_equal_f64x2(r0_0, simde_vld1q_f64(test_vec[i].r0_0), INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r1_0, simde_vld1q_f64(test_vec[i].r1_0), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[16]; + int8_t b[16]; + int8_t r0_15[16]; + int8_t r3_12[16]; + int8_t r7_8[16]; + int8_t r11_4[16]; + int8_t r15_1[16]; + } test_vec[] = { + { { -INT8_C( 86), -INT8_C( 119), -INT8_C( 89), -INT8_C( 101), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), -INT8_C( 3), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 8), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 105), -INT8_C( 43), -INT8_C( 27), INT8_C( 85), + -INT8_C( 127), INT8_C( 28), -INT8_C( 6), INT8_C( 100), + INT8_C( 49), INT8_C( 98), INT8_C( 119), INT8_C( 45), + -INT8_C( 106), INT8_C( 22), INT8_C( 85), INT8_C( 97) }, + { INT8_C( 97), -INT8_C( 119), -INT8_C( 89), -INT8_C( 101), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), -INT8_C( 3), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 8), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 86), -INT8_C( 119), -INT8_C( 89), -INT8_C( 106), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), -INT8_C( 3), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 8), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 86), -INT8_C( 119), -INT8_C( 89), -INT8_C( 101), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), INT8_C( 49), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 8), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 86), -INT8_C( 119), -INT8_C( 89), -INT8_C( 101), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), -INT8_C( 3), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 127), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 65) }, + { -INT8_C( 86), -INT8_C( 119), -INT8_C( 89), -INT8_C( 101), + -INT8_C( 97), INT8_C( 71), INT8_C( 93), -INT8_C( 3), + -INT8_C( 57), -INT8_C( 99), -INT8_C( 121), -INT8_C( 8), + -INT8_C( 23), INT8_C( 60), INT8_C( 99), -INT8_C( 43) } }, + { { -INT8_C( 2), -INT8_C( 46), -INT8_C( 111), INT8_C( 44), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, -INT8_C( 124), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 91), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), INT8_C( 88) }, + { INT8_C( 88), -INT8_C( 33), INT8_C( 90), -INT8_C( 33), + INT8_C( 109), INT8_C( 74), INT8_C( 81), INT8_C( 100), + INT8_C( 39), INT8_C( 114), -INT8_C( 29), INT8_C( 93), + INT8_C( 59), -INT8_C( 78), INT8_C( 36), INT8_C( 4) }, + { INT8_C( 4), -INT8_C( 46), -INT8_C( 111), INT8_C( 44), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, -INT8_C( 124), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 91), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), INT8_C( 88) }, + { -INT8_C( 2), -INT8_C( 46), -INT8_C( 111), INT8_C( 59), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, -INT8_C( 124), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 91), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), INT8_C( 88) }, + { -INT8_C( 2), -INT8_C( 46), -INT8_C( 111), INT8_C( 44), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, INT8_C( 39), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 91), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), INT8_C( 88) }, + { -INT8_C( 2), -INT8_C( 46), -INT8_C( 111), INT8_C( 44), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, -INT8_C( 124), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 109), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), INT8_C( 88) }, + { -INT8_C( 2), -INT8_C( 46), -INT8_C( 111), INT8_C( 44), + -INT8_C( 104), -INT8_C( 127), INT8_MAX, -INT8_C( 124), + INT8_C( 126), INT8_C( 89), -INT8_C( 85), INT8_C( 91), + INT8_C( 57), -INT8_C( 44), -INT8_C( 31), -INT8_C( 33) } }, + { { -INT8_C( 111), INT8_C( 42), -INT8_C( 114), -INT8_C( 100), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 106), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), -INT8_C( 39), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), INT8_C( 100) }, + { INT8_C( 45), -INT8_C( 75), INT8_C( 39), INT8_C( 126), + INT8_C( 74), -INT8_C( 18), INT8_C( 80), INT8_C( 87), + -INT8_C( 33), -INT8_C( 86), -INT8_C( 75), -INT8_C( 14), + -INT8_C( 61), -INT8_C( 68), -INT8_C( 38), -INT8_C( 86) }, + { -INT8_C( 86), INT8_C( 42), -INT8_C( 114), -INT8_C( 100), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 106), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), -INT8_C( 39), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), INT8_C( 100) }, + { -INT8_C( 111), INT8_C( 42), -INT8_C( 114), -INT8_C( 61), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 106), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), -INT8_C( 39), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), INT8_C( 100) }, + { -INT8_C( 111), INT8_C( 42), -INT8_C( 114), -INT8_C( 100), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 33), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), -INT8_C( 39), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), INT8_C( 100) }, + { -INT8_C( 111), INT8_C( 42), -INT8_C( 114), -INT8_C( 100), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 106), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), INT8_C( 74), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), INT8_C( 100) }, + { -INT8_C( 111), INT8_C( 42), -INT8_C( 114), -INT8_C( 100), + -INT8_C( 58), -INT8_C( 2), INT8_C( 107), -INT8_C( 106), + -INT8_C( 82), -INT8_C( 67), INT8_C( 73), -INT8_C( 39), + INT8_C( 115), -INT8_C( 10), -INT8_C( 106), -INT8_C( 75) } }, + { { INT8_C( 26), INT8_C( 18), INT8_C( 90), INT8_C( 80), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), INT8_C( 22), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 110), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), -INT8_C( 86) }, + { -INT8_C( 45), INT8_C( 21), -INT8_C( 30), INT8_C( 94), + INT8_C( 77), -INT8_C( 94), -INT8_C( 67), -INT8_C( 43), + -INT8_C( 75), INT8_C( 49), -INT8_C( 118), INT8_C( 63), + -INT8_C( 2), -INT8_C( 73), INT8_C( 116), -INT8_C( 29) }, + { -INT8_C( 29), INT8_C( 18), INT8_C( 90), INT8_C( 80), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), INT8_C( 22), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 110), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), -INT8_C( 86) }, + { INT8_C( 26), INT8_C( 18), INT8_C( 90), -INT8_C( 2), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), INT8_C( 22), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 110), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), -INT8_C( 86) }, + { INT8_C( 26), INT8_C( 18), INT8_C( 90), INT8_C( 80), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), -INT8_C( 75), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 110), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), -INT8_C( 86) }, + { INT8_C( 26), INT8_C( 18), INT8_C( 90), INT8_C( 80), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), INT8_C( 22), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 77), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), -INT8_C( 86) }, + { INT8_C( 26), INT8_C( 18), INT8_C( 90), INT8_C( 80), + -INT8_C( 102), -INT8_C( 62), INT8_C( 113), INT8_C( 22), + -INT8_C( 13), INT8_C( 104), -INT8_C( 38), INT8_C( 110), + -INT8_C( 58), -INT8_C( 39), -INT8_C( 42), INT8_C( 21) } }, + { { INT8_C( 30), INT8_C( 53), -INT8_C( 1), INT8_C( 113), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 44), + INT8_C( 117), INT8_C( 63), INT8_C( 33), INT8_C( 5), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 48) }, + { -INT8_C( 41), INT8_C( 25), -INT8_C( 111), -INT8_C( 24), + -INT8_C( 18), -INT8_C( 110), -INT8_C( 38), INT8_C( 50), + INT8_C( 54), -INT8_C( 27), INT8_C( 121), -INT8_C( 37), + -INT8_C( 56), INT8_C( 67), -INT8_C( 9), -INT8_C( 12) }, + { -INT8_C( 12), INT8_C( 53), -INT8_C( 1), INT8_C( 113), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 44), + INT8_C( 117), INT8_C( 63), INT8_C( 33), INT8_C( 5), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 48) }, + { INT8_C( 30), INT8_C( 53), -INT8_C( 1), -INT8_C( 56), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 44), + INT8_C( 117), INT8_C( 63), INT8_C( 33), INT8_C( 5), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 48) }, + { INT8_C( 30), INT8_C( 53), -INT8_C( 1), INT8_C( 113), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 54), + INT8_C( 117), INT8_C( 63), INT8_C( 33), INT8_C( 5), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 48) }, + { INT8_C( 30), INT8_C( 53), -INT8_C( 1), INT8_C( 113), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 44), + INT8_C( 117), INT8_C( 63), INT8_C( 33), -INT8_C( 18), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 48) }, + { INT8_C( 30), INT8_C( 53), -INT8_C( 1), INT8_C( 113), + INT8_C( 122), -INT8_C( 83), -INT8_C( 104), INT8_C( 44), + INT8_C( 117), INT8_C( 63), INT8_C( 33), INT8_C( 5), + -INT8_C( 56), INT8_C( 100), INT8_C( 3), INT8_C( 25) } }, + { { -INT8_C( 68), INT8_C( 107), -INT8_C( 28), -INT8_C( 87), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 41), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), -INT8_C( 44), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), INT8_MIN }, + { -INT8_C( 79), -INT8_C( 3), INT8_C( 1), -INT8_C( 70), + INT8_C( 13), INT8_C( 50), -INT8_C( 100), INT8_C( 3), + -INT8_C( 99), INT8_C( 54), INT8_C( 6), INT8_C( 114), + -INT8_C( 70), INT8_C( 81), -INT8_C( 40), INT8_C( 65) }, + { INT8_C( 65), INT8_C( 107), -INT8_C( 28), -INT8_C( 87), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 41), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), -INT8_C( 44), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), INT8_MIN }, + { -INT8_C( 68), INT8_C( 107), -INT8_C( 28), -INT8_C( 70), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 41), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), -INT8_C( 44), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), INT8_MIN }, + { -INT8_C( 68), INT8_C( 107), -INT8_C( 28), -INT8_C( 87), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 99), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), -INT8_C( 44), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), INT8_MIN }, + { -INT8_C( 68), INT8_C( 107), -INT8_C( 28), -INT8_C( 87), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 41), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), INT8_C( 13), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), INT8_MIN }, + { -INT8_C( 68), INT8_C( 107), -INT8_C( 28), -INT8_C( 87), + -INT8_C( 31), -INT8_C( 14), INT8_C( 112), -INT8_C( 41), + INT8_C( 106), -INT8_C( 43), -INT8_C( 101), -INT8_C( 44), + -INT8_C( 5), INT8_C( 73), -INT8_C( 125), -INT8_C( 3) } }, + { { -INT8_C( 77), INT8_C( 70), INT8_C( 64), INT8_C( 54), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 7), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 68), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), INT8_C( 75) }, + { INT8_C( 34), -INT8_C( 23), INT8_C( 73), INT8_C( 98), + INT8_C( 46), -INT8_C( 31), INT8_C( 75), INT8_C( 32), + INT8_C( 97), INT8_C( 6), -INT8_C( 61), -INT8_C( 33), + -INT8_C( 99), -INT8_C( 104), -INT8_C( 103), -INT8_C( 34) }, + { -INT8_C( 34), INT8_C( 70), INT8_C( 64), INT8_C( 54), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 7), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 68), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), INT8_C( 75) }, + { -INT8_C( 77), INT8_C( 70), INT8_C( 64), -INT8_C( 99), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 7), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 68), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), INT8_C( 75) }, + { -INT8_C( 77), INT8_C( 70), INT8_C( 64), INT8_C( 54), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 97), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 68), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), INT8_C( 75) }, + { -INT8_C( 77), INT8_C( 70), INT8_C( 64), INT8_C( 54), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 7), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 46), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), INT8_C( 75) }, + { -INT8_C( 77), INT8_C( 70), INT8_C( 64), INT8_C( 54), + INT8_C( 95), INT8_C( 0), INT8_C( 81), INT8_C( 7), + INT8_C( 24), -INT8_C( 47), INT8_C( 79), INT8_C( 68), + -INT8_C( 40), INT8_C( 62), INT8_C( 119), -INT8_C( 23) } }, + { { INT8_C( 92), -INT8_C( 112), -INT8_C( 75), INT8_C( 88), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 66), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 113), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), -INT8_C( 15) }, + { -INT8_C( 97), INT8_C( 123), INT8_C( 16), -INT8_C( 91), + -INT8_C( 99), INT8_C( 56), -INT8_C( 45), -INT8_C( 55), + -INT8_C( 10), -INT8_C( 123), -INT8_C( 7), INT8_C( 80), + INT8_C( 101), INT8_C( 44), -INT8_C( 15), INT8_C( 84) }, + { INT8_C( 84), -INT8_C( 112), -INT8_C( 75), INT8_C( 88), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 66), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 113), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), -INT8_C( 15) }, + { INT8_C( 92), -INT8_C( 112), -INT8_C( 75), INT8_C( 101), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 66), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 113), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), -INT8_C( 15) }, + { INT8_C( 92), -INT8_C( 112), -INT8_C( 75), INT8_C( 88), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 10), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 113), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), -INT8_C( 15) }, + { INT8_C( 92), -INT8_C( 112), -INT8_C( 75), INT8_C( 88), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 66), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 99), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), -INT8_C( 15) }, + { INT8_C( 92), -INT8_C( 112), -INT8_C( 75), INT8_C( 88), + INT8_C( 84), -INT8_C( 15), INT8_C( 41), -INT8_C( 66), + INT8_C( 45), -INT8_C( 70), -INT8_C( 35), -INT8_C( 113), + -INT8_C( 9), -INT8_C( 89), -INT8_C( 8), INT8_C( 123) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + + simde_int8x16_t r0_15 = simde_vcopyq_laneq_s8(a, 0, b, 15); + simde_int8x16_t r3_12 = simde_vcopyq_laneq_s8(a, 3, b, 12); + simde_int8x16_t r7_8 = simde_vcopyq_laneq_s8(a, 7, b, 8); + simde_int8x16_t r11_4 = simde_vcopyq_laneq_s8(a, 11, b, 4); + simde_int8x16_t r15_1 = simde_vcopyq_laneq_s8(a, 15, b, 1); + + simde_test_arm_neon_assert_equal_i8x16(r0_15, simde_vld1q_s8(test_vec[i].r0_15)); + simde_test_arm_neon_assert_equal_i8x16(r3_12, simde_vld1q_s8(test_vec[i].r3_12)); + simde_test_arm_neon_assert_equal_i8x16(r7_8, simde_vld1q_s8(test_vec[i].r7_8)); + simde_test_arm_neon_assert_equal_i8x16(r11_4, simde_vld1q_s8(test_vec[i].r11_4)); + simde_test_arm_neon_assert_equal_i8x16(r15_1, simde_vld1q_s8(test_vec[i].r15_1)); + } + + return 0; +} + + +static int +test_simde_vcopyq_laneq_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t b[8]; + int16_t r0_7[8]; + int16_t r1_6[8]; + int16_t r3_4[8]; + int16_t r5_3[8]; + int16_t r7_1[8]; + } test_vec[] = { + { { -INT16_C( 16276), INT16_C( 11398), -INT16_C( 29409), -INT16_C( 27267), + -INT16_C( 7128), -INT16_C( 21139), INT16_C( 13269), INT16_C( 12401) }, + { INT16_C( 9098), INT16_C( 20041), INT16_C( 26801), -INT16_C( 20544), + INT16_C( 3157), INT16_C( 28134), INT16_C( 11263), -INT16_C( 28347) }, + { -INT16_C( 28347), INT16_C( 11398), -INT16_C( 29409), -INT16_C( 27267), + -INT16_C( 7128), -INT16_C( 21139), INT16_C( 13269), INT16_C( 12401) }, + { -INT16_C( 16276), INT16_C( 11263), -INT16_C( 29409), -INT16_C( 27267), + -INT16_C( 7128), -INT16_C( 21139), INT16_C( 13269), INT16_C( 12401) }, + { -INT16_C( 16276), INT16_C( 11398), -INT16_C( 29409), INT16_C( 3157), + -INT16_C( 7128), -INT16_C( 21139), INT16_C( 13269), INT16_C( 12401) }, + { -INT16_C( 16276), INT16_C( 11398), -INT16_C( 29409), -INT16_C( 27267), + -INT16_C( 7128), -INT16_C( 20544), INT16_C( 13269), INT16_C( 12401) }, + { -INT16_C( 16276), INT16_C( 11398), -INT16_C( 29409), -INT16_C( 27267), + -INT16_C( 7128), -INT16_C( 21139), INT16_C( 13269), INT16_C( 20041) } }, + { { INT16_C( 1776), INT16_C( 18106), INT16_C( 3726), INT16_C( 10406), + -INT16_C( 25792), -INT16_C( 1873), INT16_C( 8778), -INT16_C( 32635) }, + { -INT16_C( 22146), -INT16_C( 16595), INT16_C( 1633), INT16_C( 16850), + INT16_C( 108), INT16_C( 17026), -INT16_C( 4147), -INT16_C( 13132) }, + { -INT16_C( 13132), INT16_C( 18106), INT16_C( 3726), INT16_C( 10406), + -INT16_C( 25792), -INT16_C( 1873), INT16_C( 8778), -INT16_C( 32635) }, + { INT16_C( 1776), -INT16_C( 4147), INT16_C( 3726), INT16_C( 10406), + -INT16_C( 25792), -INT16_C( 1873), INT16_C( 8778), -INT16_C( 32635) }, + { INT16_C( 1776), INT16_C( 18106), INT16_C( 3726), INT16_C( 108), + -INT16_C( 25792), -INT16_C( 1873), INT16_C( 8778), -INT16_C( 32635) }, + { INT16_C( 1776), INT16_C( 18106), INT16_C( 3726), INT16_C( 10406), + -INT16_C( 25792), INT16_C( 16850), INT16_C( 8778), -INT16_C( 32635) }, + { INT16_C( 1776), INT16_C( 18106), INT16_C( 3726), INT16_C( 10406), + -INT16_C( 25792), -INT16_C( 1873), INT16_C( 8778), -INT16_C( 16595) } }, + { { -INT16_C( 21426), -INT16_C( 5480), INT16_C( 21409), -INT16_C( 32739), + INT16_C( 19414), -INT16_C( 30534), -INT16_C( 8998), -INT16_C( 18036) }, + { INT16_C( 19199), -INT16_C( 12691), -INT16_C( 7618), INT16_C( 21179), + INT16_C( 9688), INT16_C( 5616), INT16_C( 23519), -INT16_C( 22371) }, + { -INT16_C( 22371), -INT16_C( 5480), INT16_C( 21409), -INT16_C( 32739), + INT16_C( 19414), -INT16_C( 30534), -INT16_C( 8998), -INT16_C( 18036) }, + { -INT16_C( 21426), INT16_C( 23519), INT16_C( 21409), -INT16_C( 32739), + INT16_C( 19414), -INT16_C( 30534), -INT16_C( 8998), -INT16_C( 18036) }, + { -INT16_C( 21426), -INT16_C( 5480), INT16_C( 21409), INT16_C( 9688), + INT16_C( 19414), -INT16_C( 30534), -INT16_C( 8998), -INT16_C( 18036) }, + { -INT16_C( 21426), -INT16_C( 5480), INT16_C( 21409), -INT16_C( 32739), + INT16_C( 19414), INT16_C( 21179), -INT16_C( 8998), -INT16_C( 18036) }, + { -INT16_C( 21426), -INT16_C( 5480), INT16_C( 21409), -INT16_C( 32739), + INT16_C( 19414), -INT16_C( 30534), -INT16_C( 8998), -INT16_C( 12691) } }, + { { INT16_C( 11127), INT16_C( 4281), INT16_C( 8108), -INT16_C( 11718), + -INT16_C( 8689), -INT16_C( 17258), INT16_C( 2511), INT16_C( 3672) }, + { -INT16_C( 29705), INT16_C( 24574), -INT16_C( 8515), INT16_C( 25788), + INT16_C( 9031), -INT16_C( 32656), INT16_C( 18167), -INT16_C( 3569) }, + { -INT16_C( 3569), INT16_C( 4281), INT16_C( 8108), -INT16_C( 11718), + -INT16_C( 8689), -INT16_C( 17258), INT16_C( 2511), INT16_C( 3672) }, + { INT16_C( 11127), INT16_C( 18167), INT16_C( 8108), -INT16_C( 11718), + -INT16_C( 8689), -INT16_C( 17258), INT16_C( 2511), INT16_C( 3672) }, + { INT16_C( 11127), INT16_C( 4281), INT16_C( 8108), INT16_C( 9031), + -INT16_C( 8689), -INT16_C( 17258), INT16_C( 2511), INT16_C( 3672) }, + { INT16_C( 11127), INT16_C( 4281), INT16_C( 8108), -INT16_C( 11718), + -INT16_C( 8689), INT16_C( 25788), INT16_C( 2511), INT16_C( 3672) }, + { INT16_C( 11127), INT16_C( 4281), INT16_C( 8108), -INT16_C( 11718), + -INT16_C( 8689), -INT16_C( 17258), INT16_C( 2511), INT16_C( 24574) } }, + { { INT16_C( 2598), INT16_C( 13531), INT16_C( 24973), -INT16_C( 27774), + -INT16_C( 25114), INT16_C( 10082), INT16_C( 3018), INT16_C( 11447) }, + { -INT16_C( 29304), INT16_C( 22734), INT16_C( 1386), INT16_C( 29640), + -INT16_C( 27220), INT16_C( 20656), INT16_C( 20884), -INT16_C( 29958) }, + { -INT16_C( 29958), INT16_C( 13531), INT16_C( 24973), -INT16_C( 27774), + -INT16_C( 25114), INT16_C( 10082), INT16_C( 3018), INT16_C( 11447) }, + { INT16_C( 2598), INT16_C( 20884), INT16_C( 24973), -INT16_C( 27774), + -INT16_C( 25114), INT16_C( 10082), INT16_C( 3018), INT16_C( 11447) }, + { INT16_C( 2598), INT16_C( 13531), INT16_C( 24973), -INT16_C( 27220), + -INT16_C( 25114), INT16_C( 10082), INT16_C( 3018), INT16_C( 11447) }, + { INT16_C( 2598), INT16_C( 13531), INT16_C( 24973), -INT16_C( 27774), + -INT16_C( 25114), INT16_C( 29640), INT16_C( 3018), INT16_C( 11447) }, + { INT16_C( 2598), INT16_C( 13531), INT16_C( 24973), -INT16_C( 27774), + -INT16_C( 25114), INT16_C( 10082), INT16_C( 3018), INT16_C( 22734) } }, + { { INT16_C( 26821), -INT16_C( 29427), -INT16_C( 29199), -INT16_C( 23310), + INT16_C( 19461), INT16_C( 28471), INT16_C( 17652), INT16_C( 8495) }, + { INT16_C( 1907), INT16_C( 9481), INT16_C( 17372), INT16_C( 27351), + INT16_C( 27913), INT16_C( 15194), -INT16_C( 9916), -INT16_C( 22787) }, + { -INT16_C( 22787), -INT16_C( 29427), -INT16_C( 29199), -INT16_C( 23310), + INT16_C( 19461), INT16_C( 28471), INT16_C( 17652), INT16_C( 8495) }, + { INT16_C( 26821), -INT16_C( 9916), -INT16_C( 29199), -INT16_C( 23310), + INT16_C( 19461), INT16_C( 28471), INT16_C( 17652), INT16_C( 8495) }, + { INT16_C( 26821), -INT16_C( 29427), -INT16_C( 29199), INT16_C( 27913), + INT16_C( 19461), INT16_C( 28471), INT16_C( 17652), INT16_C( 8495) }, + { INT16_C( 26821), -INT16_C( 29427), -INT16_C( 29199), -INT16_C( 23310), + INT16_C( 19461), INT16_C( 27351), INT16_C( 17652), INT16_C( 8495) }, + { INT16_C( 26821), -INT16_C( 29427), -INT16_C( 29199), -INT16_C( 23310), + INT16_C( 19461), INT16_C( 28471), INT16_C( 17652), INT16_C( 9481) } }, + { { -INT16_C( 22367), -INT16_C( 26006), -INT16_C( 14130), INT16_C( 29416), + INT16_C( 9288), -INT16_C( 19099), -INT16_C( 19470), -INT16_C( 17968) }, + { -INT16_C( 12254), INT16_C( 844), INT16_C( 32114), INT16_C( 8794), + -INT16_C( 12642), -INT16_C( 21136), -INT16_C( 16890), INT16_C( 25350) }, + { INT16_C( 25350), -INT16_C( 26006), -INT16_C( 14130), INT16_C( 29416), + INT16_C( 9288), -INT16_C( 19099), -INT16_C( 19470), -INT16_C( 17968) }, + { -INT16_C( 22367), -INT16_C( 16890), -INT16_C( 14130), INT16_C( 29416), + INT16_C( 9288), -INT16_C( 19099), -INT16_C( 19470), -INT16_C( 17968) }, + { -INT16_C( 22367), -INT16_C( 26006), -INT16_C( 14130), -INT16_C( 12642), + INT16_C( 9288), -INT16_C( 19099), -INT16_C( 19470), -INT16_C( 17968) }, + { -INT16_C( 22367), -INT16_C( 26006), -INT16_C( 14130), INT16_C( 29416), + INT16_C( 9288), INT16_C( 8794), -INT16_C( 19470), -INT16_C( 17968) }, + { -INT16_C( 22367), -INT16_C( 26006), -INT16_C( 14130), INT16_C( 29416), + INT16_C( 9288), -INT16_C( 19099), -INT16_C( 19470), INT16_C( 844) } }, + { { -INT16_C( 4687), -INT16_C( 685), INT16_C( 32290), INT16_C( 8754), + -INT16_C( 4611), -INT16_C( 6814), -INT16_C( 8711), INT16_C( 31457) }, + { INT16_C( 14971), INT16_C( 27437), INT16_C( 6885), INT16_C( 11065), + INT16_C( 8411), -INT16_C( 31180), INT16_C( 29285), -INT16_C( 24834) }, + { -INT16_C( 24834), -INT16_C( 685), INT16_C( 32290), INT16_C( 8754), + -INT16_C( 4611), -INT16_C( 6814), -INT16_C( 8711), INT16_C( 31457) }, + { -INT16_C( 4687), INT16_C( 29285), INT16_C( 32290), INT16_C( 8754), + -INT16_C( 4611), -INT16_C( 6814), -INT16_C( 8711), INT16_C( 31457) }, + { -INT16_C( 4687), -INT16_C( 685), INT16_C( 32290), INT16_C( 8411), + -INT16_C( 4611), -INT16_C( 6814), -INT16_C( 8711), INT16_C( 31457) }, + { -INT16_C( 4687), -INT16_C( 685), INT16_C( 32290), INT16_C( 8754), + -INT16_C( 4611), INT16_C( 11065), -INT16_C( 8711), INT16_C( 31457) }, + { -INT16_C( 4687), -INT16_C( 685), INT16_C( 32290), INT16_C( 8754), + -INT16_C( 4611), -INT16_C( 6814), -INT16_C( 8711), INT16_C( 27437) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + + simde_int16x8_t r0_7 = simde_vcopyq_laneq_s16(a, 0, b, 7); + simde_int16x8_t r1_6 = simde_vcopyq_laneq_s16(a, 1, b, 6); + simde_int16x8_t r3_4 = simde_vcopyq_laneq_s16(a, 3, b, 4); + simde_int16x8_t r5_3 = simde_vcopyq_laneq_s16(a, 5, b, 3); + simde_int16x8_t r7_1 = simde_vcopyq_laneq_s16(a, 7, b, 1); + + simde_test_arm_neon_assert_equal_i16x8(r0_7, simde_vld1q_s16(test_vec[i].r0_7)); + simde_test_arm_neon_assert_equal_i16x8(r1_6, simde_vld1q_s16(test_vec[i].r1_6)); + simde_test_arm_neon_assert_equal_i16x8(r3_4, simde_vld1q_s16(test_vec[i].r3_4)); + simde_test_arm_neon_assert_equal_i16x8(r5_3, simde_vld1q_s16(test_vec[i].r5_3)); + simde_test_arm_neon_assert_equal_i16x8(r7_1, simde_vld1q_s16(test_vec[i].r7_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t b[4]; + int32_t r0_3[4]; + int32_t r1_2[4]; + int32_t r2_1[4]; + int32_t r3_0[4]; + } test_vec[] = { + { { -INT32_C( 1892583197), -INT32_C( 225454004), INT32_C( 1008306445), INT32_C( 1245890578) }, + { INT32_C( 731542339), -INT32_C( 857053337), INT32_C( 1016225573), INT32_C( 1824087776) }, + { INT32_C( 1824087776), -INT32_C( 225454004), INT32_C( 1008306445), INT32_C( 1245890578) }, + { -INT32_C( 1892583197), INT32_C( 1016225573), INT32_C( 1008306445), INT32_C( 1245890578) }, + { -INT32_C( 1892583197), -INT32_C( 225454004), -INT32_C( 857053337), INT32_C( 1245890578) }, + { -INT32_C( 1892583197), -INT32_C( 225454004), INT32_C( 1008306445), INT32_C( 731542339) } }, + { { INT32_C( 1600614011), INT32_C( 240019305), -INT32_C( 2019582847), -INT32_C( 562177638) }, + { -INT32_C( 73458434), INT32_C( 715226345), -INT32_C( 1660159717), INT32_C( 1203496468) }, + { INT32_C( 1203496468), INT32_C( 240019305), -INT32_C( 2019582847), -INT32_C( 562177638) }, + { INT32_C( 1600614011), -INT32_C( 1660159717), -INT32_C( 2019582847), -INT32_C( 562177638) }, + { INT32_C( 1600614011), INT32_C( 240019305), INT32_C( 715226345), -INT32_C( 562177638) }, + { INT32_C( 1600614011), INT32_C( 240019305), -INT32_C( 2019582847), -INT32_C( 73458434) } }, + { { INT32_C( 1778478051), INT32_C( 630029678), INT32_C( 1918387593), -INT32_C( 898166499) }, + { -INT32_C( 2070028428), -INT32_C( 1057493739), -INT32_C( 1053820267), -INT32_C( 464877709) }, + { -INT32_C( 464877709), INT32_C( 630029678), INT32_C( 1918387593), -INT32_C( 898166499) }, + { INT32_C( 1778478051), -INT32_C( 1053820267), INT32_C( 1918387593), -INT32_C( 898166499) }, + { INT32_C( 1778478051), INT32_C( 630029678), -INT32_C( 1057493739), -INT32_C( 898166499) }, + { INT32_C( 1778478051), INT32_C( 630029678), INT32_C( 1918387593), -INT32_C( 2070028428) } }, + { { -INT32_C( 1933730243), -INT32_C( 179719064), -INT32_C( 1680784440), -INT32_C( 269744068) }, + { INT32_C( 627798432), INT32_C( 622441545), -INT32_C( 1933882589), -INT32_C( 1310730543) }, + { -INT32_C( 1310730543), -INT32_C( 179719064), -INT32_C( 1680784440), -INT32_C( 269744068) }, + { -INT32_C( 1933730243), -INT32_C( 1933882589), -INT32_C( 1680784440), -INT32_C( 269744068) }, + { -INT32_C( 1933730243), -INT32_C( 179719064), INT32_C( 622441545), -INT32_C( 269744068) }, + { -INT32_C( 1933730243), -INT32_C( 179719064), -INT32_C( 1680784440), INT32_C( 627798432) } }, + { { INT32_C( 2089054990), -INT32_C( 800367284), INT32_C( 604869219), -INT32_C( 1842421805) }, + { -INT32_C( 1206028316), -INT32_C( 1825846196), -INT32_C( 677690043), INT32_C( 938506212) }, + { INT32_C( 938506212), -INT32_C( 800367284), INT32_C( 604869219), -INT32_C( 1842421805) }, + { INT32_C( 2089054990), -INT32_C( 677690043), INT32_C( 604869219), -INT32_C( 1842421805) }, + { INT32_C( 2089054990), -INT32_C( 800367284), -INT32_C( 1825846196), -INT32_C( 1842421805) }, + { INT32_C( 2089054990), -INT32_C( 800367284), INT32_C( 604869219), -INT32_C( 1206028316) } }, + { { INT32_C( 1368055385), INT32_C( 1200632878), -INT32_C( 1243554760), -INT32_C( 1709345942) }, + { -INT32_C( 1279975680), INT32_C( 97226001), INT32_C( 1718562473), -INT32_C( 150019811) }, + { -INT32_C( 150019811), INT32_C( 1200632878), -INT32_C( 1243554760), -INT32_C( 1709345942) }, + { INT32_C( 1368055385), INT32_C( 1718562473), -INT32_C( 1243554760), -INT32_C( 1709345942) }, + { INT32_C( 1368055385), INT32_C( 1200632878), INT32_C( 97226001), -INT32_C( 1709345942) }, + { INT32_C( 1368055385), INT32_C( 1200632878), -INT32_C( 1243554760), -INT32_C( 1279975680) } }, + { { INT32_C( 1816126290), -INT32_C( 227914872), -INT32_C( 964882524), INT32_C( 1928245859) }, + { -INT32_C( 112086283), INT32_C( 873557272), -INT32_C( 470253178), -INT32_C( 654993104) }, + { -INT32_C( 654993104), -INT32_C( 227914872), -INT32_C( 964882524), INT32_C( 1928245859) }, + { INT32_C( 1816126290), -INT32_C( 470253178), -INT32_C( 964882524), INT32_C( 1928245859) }, + { INT32_C( 1816126290), -INT32_C( 227914872), INT32_C( 873557272), INT32_C( 1928245859) }, + { INT32_C( 1816126290), -INT32_C( 227914872), -INT32_C( 964882524), -INT32_C( 112086283) } }, + { { INT32_C( 562043715), -INT32_C( 106864670), -INT32_C( 1846548877), INT32_C( 1744408579) }, + { -INT32_C( 774102303), -INT32_C( 1452959164), -INT32_C( 490774966), -INT32_C( 2057123680) }, + { -INT32_C( 2057123680), -INT32_C( 106864670), -INT32_C( 1846548877), INT32_C( 1744408579) }, + { INT32_C( 562043715), -INT32_C( 490774966), -INT32_C( 1846548877), INT32_C( 1744408579) }, + { INT32_C( 562043715), -INT32_C( 106864670), -INT32_C( 1452959164), INT32_C( 1744408579) }, + { INT32_C( 562043715), -INT32_C( 106864670), -INT32_C( 1846548877), -INT32_C( 774102303) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + + simde_int32x4_t r0_3 = simde_vcopyq_laneq_s32(a, 0, b, 3); + simde_int32x4_t r1_2 = simde_vcopyq_laneq_s32(a, 1, b, 2); + simde_int32x4_t r2_1 = simde_vcopyq_laneq_s32(a, 2, b, 1); + simde_int32x4_t r3_0 = simde_vcopyq_laneq_s32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_i32x4(r0_3, simde_vld1q_s32(test_vec[i].r0_3)); + simde_test_arm_neon_assert_equal_i32x4(r1_2, simde_vld1q_s32(test_vec[i].r1_2)); + simde_test_arm_neon_assert_equal_i32x4(r2_1, simde_vld1q_s32(test_vec[i].r2_1)); + simde_test_arm_neon_assert_equal_i32x4(r3_0, simde_vld1q_s32(test_vec[i].r3_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int64_t b[2]; + int64_t r0_0[2]; + int64_t r0_1[2]; + int64_t r1_0[2]; + int64_t r1_1[2]; + } test_vec[] = { + { { -INT64_C( 475864847648919097), INT64_C( 8417644638487745847) }, + { INT64_C( 9182198556432984409), INT64_C( 6177027272280840895) }, + { INT64_C( 9182198556432984409), INT64_C( 8417644638487745847) }, + { INT64_C( 6177027272280840895), INT64_C( 8417644638487745847) }, + { -INT64_C( 475864847648919097), INT64_C( 9182198556432984409) }, + { -INT64_C( 475864847648919097), INT64_C( 6177027272280840895) } }, + { { -INT64_C( 8929020168002432621), -INT64_C( 4477976040726240340) }, + { INT64_C( 5692018566259932777), INT64_C( 4316779177536635438) }, + { INT64_C( 5692018566259932777), -INT64_C( 4477976040726240340) }, + { INT64_C( 4316779177536635438), -INT64_C( 4477976040726240340) }, + { -INT64_C( 8929020168002432621), INT64_C( 5692018566259932777) }, + { -INT64_C( 8929020168002432621), INT64_C( 4316779177536635438) } }, + { { INT64_C( 4763414794636474503), INT64_C( 462179614187538409) }, + { INT64_C( 7917706878421569084), -INT64_C( 5171000984489091865) }, + { INT64_C( 7917706878421569084), INT64_C( 462179614187538409) }, + { -INT64_C( 5171000984489091865), INT64_C( 462179614187538409) }, + { INT64_C( 4763414794636474503), INT64_C( 7917706878421569084) }, + { INT64_C( 4763414794636474503), -INT64_C( 5171000984489091865) } }, + { { -INT64_C( 655109288458667793), -INT64_C( 337451926488198107) }, + { -INT64_C( 7854309330387571658), INT64_C( 8420743667125891946) }, + { -INT64_C( 7854309330387571658), -INT64_C( 337451926488198107) }, + { INT64_C( 8420743667125891946), -INT64_C( 337451926488198107) }, + { -INT64_C( 655109288458667793), -INT64_C( 7854309330387571658) }, + { -INT64_C( 655109288458667793), INT64_C( 8420743667125891946) } }, + { { -INT64_C( 6164247547247293913), INT64_C( 2253183027365453726) }, + { -INT64_C( 1821736987801891187), -INT64_C( 5278162077788122669) }, + { -INT64_C( 1821736987801891187), INT64_C( 2253183027365453726) }, + { -INT64_C( 5278162077788122669), INT64_C( 2253183027365453726) }, + { -INT64_C( 6164247547247293913), -INT64_C( 1821736987801891187) }, + { -INT64_C( 6164247547247293913), -INT64_C( 5278162077788122669) } }, + { { -INT64_C( 8639413444708581833), -INT64_C( 5606583741094413314) }, + { INT64_C( 5140422082660112584), -INT64_C( 3884945958717385980) }, + { INT64_C( 5140422082660112584), -INT64_C( 5606583741094413314) }, + { -INT64_C( 3884945958717385980), -INT64_C( 5606583741094413314) }, + { -INT64_C( 8639413444708581833), INT64_C( 5140422082660112584) }, + { -INT64_C( 8639413444708581833), -INT64_C( 3884945958717385980) } }, + { { INT64_C( 9084394517336304795), -INT64_C( 5982763725163385905) }, + { -INT64_C( 8482304762806811342), INT64_C( 8203438312388876529) }, + { -INT64_C( 8482304762806811342), -INT64_C( 5982763725163385905) }, + { INT64_C( 8203438312388876529), -INT64_C( 5982763725163385905) }, + { INT64_C( 9084394517336304795), -INT64_C( 8482304762806811342) }, + { INT64_C( 9084394517336304795), INT64_C( 8203438312388876529) } }, + { { INT64_C( 1616920090545660434), INT64_C( 2830530565097296736) }, + { -INT64_C( 6774932576689688966), INT64_C( 4252959599116308083) }, + { -INT64_C( 6774932576689688966), INT64_C( 2830530565097296736) }, + { INT64_C( 4252959599116308083), INT64_C( 2830530565097296736) }, + { INT64_C( 1616920090545660434), -INT64_C( 6774932576689688966) }, + { INT64_C( 1616920090545660434), INT64_C( 4252959599116308083) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + + simde_int64x2_t r0_0 = simde_vcopyq_laneq_s64(a, 0, b, 0); + simde_int64x2_t r0_1 = simde_vcopyq_laneq_s64(a, 0, b, 1); + simde_int64x2_t r1_0 = simde_vcopyq_laneq_s64(a, 1, b, 0); + simde_int64x2_t r1_1 = simde_vcopyq_laneq_s64(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_i64x2(r0_0, simde_vld1q_s64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_i64x2(r0_1, simde_vld1q_s64(test_vec[i].r0_1)); + simde_test_arm_neon_assert_equal_i64x2(r1_0, simde_vld1q_s64(test_vec[i].r1_0)); + simde_test_arm_neon_assert_equal_i64x2(r1_1, simde_vld1q_s64(test_vec[i].r1_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[16]; + uint8_t b[16]; + uint8_t r0_15[16]; + uint8_t r3_12[16]; + uint8_t r7_8[16]; + uint8_t r11_4[16]; + uint8_t r15_1[16]; + } test_vec[] = { + { { UINT8_C( 246), UINT8_C( 247), UINT8_C( 144), UINT8_C( 17), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 150), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 93), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 228) }, + { UINT8_C( 163), UINT8_C( 179), UINT8_C( 104), UINT8_C( 196), + UINT8_C( 232), UINT8_C( 51), UINT8_C( 215), UINT8_C( 174), + UINT8_C( 37), UINT8_C( 181), UINT8_C( 16), UINT8_C( 157), + UINT8_C( 67), UINT8_C( 118), UINT8_C( 181), UINT8_C( 84) }, + { UINT8_C( 84), UINT8_C( 247), UINT8_C( 144), UINT8_C( 17), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 150), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 93), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 228) }, + { UINT8_C( 246), UINT8_C( 247), UINT8_C( 144), UINT8_C( 67), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 150), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 93), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 228) }, + { UINT8_C( 246), UINT8_C( 247), UINT8_C( 144), UINT8_C( 17), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 37), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 93), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 228) }, + { UINT8_C( 246), UINT8_C( 247), UINT8_C( 144), UINT8_C( 17), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 150), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 232), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 228) }, + { UINT8_C( 246), UINT8_C( 247), UINT8_C( 144), UINT8_C( 17), + UINT8_C( 184), UINT8_C( 243), UINT8_C( 185), UINT8_C( 150), + UINT8_C( 75), UINT8_C( 48), UINT8_C( 193), UINT8_C( 93), + UINT8_C( 178), UINT8_C( 223), UINT8_C( 110), UINT8_C( 179) } }, + { { UINT8_C( 33), UINT8_C( 229), UINT8_C( 20), UINT8_C( 122), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 45), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 147), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 241) }, + { UINT8_C( 56), UINT8_C( 173), UINT8_C( 83), UINT8_C( 20), + UINT8_C( 151), UINT8_C( 249), UINT8_C( 195), UINT8_C( 179), + UINT8_C( 11), UINT8_C( 61), UINT8_C( 82), UINT8_C( 120), + UINT8_C( 20), UINT8_C( 192), UINT8_C( 93), UINT8_C( 178) }, + { UINT8_C( 178), UINT8_C( 229), UINT8_C( 20), UINT8_C( 122), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 45), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 147), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 241) }, + { UINT8_C( 33), UINT8_C( 229), UINT8_C( 20), UINT8_C( 20), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 45), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 147), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 241) }, + { UINT8_C( 33), UINT8_C( 229), UINT8_C( 20), UINT8_C( 122), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 11), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 147), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 241) }, + { UINT8_C( 33), UINT8_C( 229), UINT8_C( 20), UINT8_C( 122), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 45), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 151), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 241) }, + { UINT8_C( 33), UINT8_C( 229), UINT8_C( 20), UINT8_C( 122), + UINT8_C( 136), UINT8_C( 32), UINT8_C( 196), UINT8_C( 45), + UINT8_C( 56), UINT8_C( 169), UINT8_C( 181), UINT8_C( 147), + UINT8_C( 247), UINT8_C( 103), UINT8_C( 243), UINT8_C( 173) } }, + { { UINT8_C( 85), UINT8_C( 42), UINT8_C( 232), UINT8_C( 22), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 207), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 196), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 175) }, + { UINT8_C( 167), UINT8_C( 39), UINT8_C( 158), UINT8_C( 122), + UINT8_C( 43), UINT8_C( 90), UINT8_C( 131), UINT8_C( 127), + UINT8_C( 57), UINT8_C( 210), UINT8_C( 100), UINT8_C( 133), + UINT8_C( 98), UINT8_C( 145), UINT8_C( 33), UINT8_C( 80) }, + { UINT8_C( 80), UINT8_C( 42), UINT8_C( 232), UINT8_C( 22), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 207), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 196), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 175) }, + { UINT8_C( 85), UINT8_C( 42), UINT8_C( 232), UINT8_C( 98), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 207), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 196), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 175) }, + { UINT8_C( 85), UINT8_C( 42), UINT8_C( 232), UINT8_C( 22), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 57), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 196), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 175) }, + { UINT8_C( 85), UINT8_C( 42), UINT8_C( 232), UINT8_C( 22), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 207), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 43), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 175) }, + { UINT8_C( 85), UINT8_C( 42), UINT8_C( 232), UINT8_C( 22), + UINT8_C( 22), UINT8_C( 177), UINT8_C( 19), UINT8_C( 207), + UINT8_C( 243), UINT8_C( 35), UINT8_C( 139), UINT8_C( 196), + UINT8_C( 97), UINT8_C( 0), UINT8_C( 98), UINT8_C( 39) } }, + { { UINT8_C( 83), UINT8_C( 171), UINT8_C( 5), UINT8_C( 105), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 53), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 235), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 1) }, + { UINT8_C( 212), UINT8_C( 16), UINT8_C( 118), UINT8_C( 213), + UINT8_C( 68), UINT8_C( 168), UINT8_C( 78), UINT8_C( 94), + UINT8_C( 96), UINT8_C( 68), UINT8_C( 29), UINT8_C( 218), + UINT8_C( 191), UINT8_C( 115), UINT8_C( 146), UINT8_C( 235) }, + { UINT8_C( 235), UINT8_C( 171), UINT8_C( 5), UINT8_C( 105), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 53), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 235), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 1) }, + { UINT8_C( 83), UINT8_C( 171), UINT8_C( 5), UINT8_C( 191), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 53), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 235), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 1) }, + { UINT8_C( 83), UINT8_C( 171), UINT8_C( 5), UINT8_C( 105), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 96), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 235), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 1) }, + { UINT8_C( 83), UINT8_C( 171), UINT8_C( 5), UINT8_C( 105), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 53), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 68), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 1) }, + { UINT8_C( 83), UINT8_C( 171), UINT8_C( 5), UINT8_C( 105), + UINT8_C( 211), UINT8_C( 1), UINT8_C( 43), UINT8_C( 53), + UINT8_C( 171), UINT8_C( 15), UINT8_C( 83), UINT8_C( 235), + UINT8_C( 252), UINT8_C( 134), UINT8_C( 244), UINT8_C( 16) } }, + { { UINT8_C( 110), UINT8_C( 242), UINT8_C( 72), UINT8_C( 45), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 76), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 231), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 70) }, + { UINT8_C( 249), UINT8_C( 143), UINT8_C( 98), UINT8_C( 59), + UINT8_C( 2), UINT8_C( 33), UINT8_C( 73), UINT8_C( 97), + UINT8_C( 161), UINT8_C( 222), UINT8_C( 76), UINT8_C( 96), + UINT8_C( 205), UINT8_C( 112), UINT8_C( 99), UINT8_C( 202) }, + { UINT8_C( 202), UINT8_C( 242), UINT8_C( 72), UINT8_C( 45), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 76), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 231), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 70) }, + { UINT8_C( 110), UINT8_C( 242), UINT8_C( 72), UINT8_C( 205), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 76), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 231), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 70) }, + { UINT8_C( 110), UINT8_C( 242), UINT8_C( 72), UINT8_C( 45), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 161), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 231), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 70) }, + { UINT8_C( 110), UINT8_C( 242), UINT8_C( 72), UINT8_C( 45), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 76), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 2), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 70) }, + { UINT8_C( 110), UINT8_C( 242), UINT8_C( 72), UINT8_C( 45), + UINT8_C( 85), UINT8_C( 45), UINT8_C( 124), UINT8_C( 76), + UINT8_C( 205), UINT8_C( 127), UINT8_C( 154), UINT8_C( 231), + UINT8_C( 152), UINT8_C( 56), UINT8_C( 62), UINT8_C( 143) } }, + { { UINT8_C( 205), UINT8_C( 113), UINT8_C( 133), UINT8_C( 98), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 243), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 0), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 91) }, + { UINT8_C( 214), UINT8_C( 25), UINT8_C( 230), UINT8_C( 142), + UINT8_C( 76), UINT8_C( 97), UINT8_C( 57), UINT8_C( 7), + UINT8_C( 239), UINT8_C( 176), UINT8_C( 8), UINT8_C( 43), + UINT8_C( 54), UINT8_C( 231), UINT8_C( 29), UINT8_C( 24) }, + { UINT8_C( 24), UINT8_C( 113), UINT8_C( 133), UINT8_C( 98), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 243), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 0), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 91) }, + { UINT8_C( 205), UINT8_C( 113), UINT8_C( 133), UINT8_C( 54), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 243), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 0), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 91) }, + { UINT8_C( 205), UINT8_C( 113), UINT8_C( 133), UINT8_C( 98), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 239), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 0), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 91) }, + { UINT8_C( 205), UINT8_C( 113), UINT8_C( 133), UINT8_C( 98), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 243), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 76), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 91) }, + { UINT8_C( 205), UINT8_C( 113), UINT8_C( 133), UINT8_C( 98), + UINT8_C( 49), UINT8_C( 168), UINT8_C( 196), UINT8_C( 243), + UINT8_C( 48), UINT8_C( 38), UINT8_C( 75), UINT8_C( 0), + UINT8_C( 164), UINT8_C( 135), UINT8_C( 28), UINT8_C( 25) } }, + { { UINT8_C( 41), UINT8_C( 0), UINT8_C( 18), UINT8_C( 142), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 98), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 46), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 144) }, + { UINT8_C( 28), UINT8_C( 82), UINT8_C( 123), UINT8_C( 131), + UINT8_C( 204), UINT8_C( 199), UINT8_C( 247), UINT8_C( 126), + UINT8_C( 30), UINT8_C( 111), UINT8_C( 48), UINT8_C( 50), + UINT8_C( 252), UINT8_C( 65), UINT8_C( 21), UINT8_C( 254) }, + { UINT8_C( 254), UINT8_C( 0), UINT8_C( 18), UINT8_C( 142), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 98), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 46), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 144) }, + { UINT8_C( 41), UINT8_C( 0), UINT8_C( 18), UINT8_C( 252), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 98), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 46), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 144) }, + { UINT8_C( 41), UINT8_C( 0), UINT8_C( 18), UINT8_C( 142), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 30), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 46), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 144) }, + { UINT8_C( 41), UINT8_C( 0), UINT8_C( 18), UINT8_C( 142), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 98), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 204), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 144) }, + { UINT8_C( 41), UINT8_C( 0), UINT8_C( 18), UINT8_C( 142), + UINT8_C( 56), UINT8_C( 132), UINT8_C( 1), UINT8_C( 98), + UINT8_C( 122), UINT8_C( 59), UINT8_C( 66), UINT8_C( 46), + UINT8_C( 71), UINT8_C( 48), UINT8_C( 59), UINT8_C( 82) } }, + { { UINT8_C( 214), UINT8_C( 112), UINT8_C( 60), UINT8_C( 183), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 231), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 13), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 167) }, + { UINT8_C( 60), UINT8_C( 212), UINT8_C( 212), UINT8_C( 225), + UINT8_C( 136), UINT8_C( 233), UINT8_C( 4), UINT8_C( 29), + UINT8_C( 196), UINT8_C( 145), UINT8_C( 157), UINT8_C( 121), + UINT8_C( 58), UINT8_C( 57), UINT8_C( 166), UINT8_C( 79) }, + { UINT8_C( 79), UINT8_C( 112), UINT8_C( 60), UINT8_C( 183), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 231), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 13), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 167) }, + { UINT8_C( 214), UINT8_C( 112), UINT8_C( 60), UINT8_C( 58), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 231), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 13), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 167) }, + { UINT8_C( 214), UINT8_C( 112), UINT8_C( 60), UINT8_C( 183), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 196), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 13), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 167) }, + { UINT8_C( 214), UINT8_C( 112), UINT8_C( 60), UINT8_C( 183), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 231), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 136), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 167) }, + { UINT8_C( 214), UINT8_C( 112), UINT8_C( 60), UINT8_C( 183), + UINT8_C( 213), UINT8_C( 143), UINT8_C( 98), UINT8_C( 231), + UINT8_C( 105), UINT8_C( 97), UINT8_C( 76), UINT8_C( 13), + UINT8_C( 190), UINT8_C( 12), UINT8_C( 121), UINT8_C( 212) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); + + simde_uint8x16_t r0_15 = simde_vcopyq_laneq_u8(a, 0, b, 15); + simde_uint8x16_t r3_12 = simde_vcopyq_laneq_u8(a, 3, b, 12); + simde_uint8x16_t r7_8 = simde_vcopyq_laneq_u8(a, 7, b, 8); + simde_uint8x16_t r11_4 = simde_vcopyq_laneq_u8(a, 11, b, 4); + simde_uint8x16_t r15_1 = simde_vcopyq_laneq_u8(a, 15, b, 1); + + simde_test_arm_neon_assert_equal_u8x16(r0_15, simde_vld1q_u8(test_vec[i].r0_15)); + simde_test_arm_neon_assert_equal_u8x16(r3_12, simde_vld1q_u8(test_vec[i].r3_12)); + simde_test_arm_neon_assert_equal_u8x16(r7_8, simde_vld1q_u8(test_vec[i].r7_8)); + simde_test_arm_neon_assert_equal_u8x16(r11_4, simde_vld1q_u8(test_vec[i].r11_4)); + simde_test_arm_neon_assert_equal_u8x16(r15_1, simde_vld1q_u8(test_vec[i].r15_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint16_t r0_7[8]; + uint16_t r1_6[8]; + uint16_t r3_4[8]; + uint16_t r5_3[8]; + uint16_t r7_1[8]; + } test_vec[] = { + { { UINT16_C( 25081), UINT16_C( 12133), UINT16_C( 16658), UINT16_C( 10269), + UINT16_C( 62695), UINT16_C( 42052), UINT16_C( 28583), UINT16_C( 55868) }, + { UINT16_C( 64179), UINT16_C( 3756), UINT16_C( 15151), UINT16_C( 33917), + UINT16_C( 33797), UINT16_C( 56243), UINT16_C( 48168), UINT16_C( 62591) }, + { UINT16_C( 62591), UINT16_C( 12133), UINT16_C( 16658), UINT16_C( 10269), + UINT16_C( 62695), UINT16_C( 42052), UINT16_C( 28583), UINT16_C( 55868) }, + { UINT16_C( 25081), UINT16_C( 48168), UINT16_C( 16658), UINT16_C( 10269), + UINT16_C( 62695), UINT16_C( 42052), UINT16_C( 28583), UINT16_C( 55868) }, + { UINT16_C( 25081), UINT16_C( 12133), UINT16_C( 16658), UINT16_C( 33797), + UINT16_C( 62695), UINT16_C( 42052), UINT16_C( 28583), UINT16_C( 55868) }, + { UINT16_C( 25081), UINT16_C( 12133), UINT16_C( 16658), UINT16_C( 10269), + UINT16_C( 62695), UINT16_C( 33917), UINT16_C( 28583), UINT16_C( 55868) }, + { UINT16_C( 25081), UINT16_C( 12133), UINT16_C( 16658), UINT16_C( 10269), + UINT16_C( 62695), UINT16_C( 42052), UINT16_C( 28583), UINT16_C( 3756) } }, + { { UINT16_C( 42773), UINT16_C( 60819), UINT16_C( 5780), UINT16_C( 38892), + UINT16_C( 50295), UINT16_C( 55830), UINT16_C( 29603), UINT16_C( 6234) }, + { UINT16_C( 14631), UINT16_C( 14975), UINT16_C( 18813), UINT16_C( 48357), + UINT16_C( 43565), UINT16_C( 52059), UINT16_C( 49414), UINT16_C( 62266) }, + { UINT16_C( 62266), UINT16_C( 60819), UINT16_C( 5780), UINT16_C( 38892), + UINT16_C( 50295), UINT16_C( 55830), UINT16_C( 29603), UINT16_C( 6234) }, + { UINT16_C( 42773), UINT16_C( 49414), UINT16_C( 5780), UINT16_C( 38892), + UINT16_C( 50295), UINT16_C( 55830), UINT16_C( 29603), UINT16_C( 6234) }, + { UINT16_C( 42773), UINT16_C( 60819), UINT16_C( 5780), UINT16_C( 43565), + UINT16_C( 50295), UINT16_C( 55830), UINT16_C( 29603), UINT16_C( 6234) }, + { UINT16_C( 42773), UINT16_C( 60819), UINT16_C( 5780), UINT16_C( 38892), + UINT16_C( 50295), UINT16_C( 48357), UINT16_C( 29603), UINT16_C( 6234) }, + { UINT16_C( 42773), UINT16_C( 60819), UINT16_C( 5780), UINT16_C( 38892), + UINT16_C( 50295), UINT16_C( 55830), UINT16_C( 29603), UINT16_C( 14975) } }, + { { UINT16_C( 59618), UINT16_C( 16122), UINT16_C( 25506), UINT16_C( 64566), + UINT16_C( 25795), UINT16_C( 55880), UINT16_C( 14803), UINT16_C( 20543) }, + { UINT16_C( 40023), UINT16_C( 25126), UINT16_C( 26717), UINT16_C( 35096), + UINT16_C( 32251), UINT16_C( 38772), UINT16_C( 8275), UINT16_C( 33753) }, + { UINT16_C( 33753), UINT16_C( 16122), UINT16_C( 25506), UINT16_C( 64566), + UINT16_C( 25795), UINT16_C( 55880), UINT16_C( 14803), UINT16_C( 20543) }, + { UINT16_C( 59618), UINT16_C( 8275), UINT16_C( 25506), UINT16_C( 64566), + UINT16_C( 25795), UINT16_C( 55880), UINT16_C( 14803), UINT16_C( 20543) }, + { UINT16_C( 59618), UINT16_C( 16122), UINT16_C( 25506), UINT16_C( 32251), + UINT16_C( 25795), UINT16_C( 55880), UINT16_C( 14803), UINT16_C( 20543) }, + { UINT16_C( 59618), UINT16_C( 16122), UINT16_C( 25506), UINT16_C( 64566), + UINT16_C( 25795), UINT16_C( 35096), UINT16_C( 14803), UINT16_C( 20543) }, + { UINT16_C( 59618), UINT16_C( 16122), UINT16_C( 25506), UINT16_C( 64566), + UINT16_C( 25795), UINT16_C( 55880), UINT16_C( 14803), UINT16_C( 25126) } }, + { { UINT16_C( 43675), UINT16_C( 35166), UINT16_C( 34629), UINT16_C( 36341), + UINT16_C( 64989), UINT16_C( 2813), UINT16_C( 39062), UINT16_C( 26525) }, + { UINT16_C( 57069), UINT16_C( 55607), UINT16_C( 20903), UINT16_C( 57247), + UINT16_C( 36244), UINT16_C( 62344), UINT16_C( 41912), UINT16_C( 44736) }, + { UINT16_C( 44736), UINT16_C( 35166), UINT16_C( 34629), UINT16_C( 36341), + UINT16_C( 64989), UINT16_C( 2813), UINT16_C( 39062), UINT16_C( 26525) }, + { UINT16_C( 43675), UINT16_C( 41912), UINT16_C( 34629), UINT16_C( 36341), + UINT16_C( 64989), UINT16_C( 2813), UINT16_C( 39062), UINT16_C( 26525) }, + { UINT16_C( 43675), UINT16_C( 35166), UINT16_C( 34629), UINT16_C( 36244), + UINT16_C( 64989), UINT16_C( 2813), UINT16_C( 39062), UINT16_C( 26525) }, + { UINT16_C( 43675), UINT16_C( 35166), UINT16_C( 34629), UINT16_C( 36341), + UINT16_C( 64989), UINT16_C( 57247), UINT16_C( 39062), UINT16_C( 26525) }, + { UINT16_C( 43675), UINT16_C( 35166), UINT16_C( 34629), UINT16_C( 36341), + UINT16_C( 64989), UINT16_C( 2813), UINT16_C( 39062), UINT16_C( 55607) } }, + { { UINT16_C( 63367), UINT16_C( 50618), UINT16_C( 52629), UINT16_C( 26557), + UINT16_C( 49457), UINT16_C( 55560), UINT16_C( 59740), UINT16_C( 54035) }, + { UINT16_C( 35397), UINT16_C( 27456), UINT16_C( 19596), UINT16_C( 23074), + UINT16_C( 44230), UINT16_C( 28458), UINT16_C( 34027), UINT16_C( 31882) }, + { UINT16_C( 31882), UINT16_C( 50618), UINT16_C( 52629), UINT16_C( 26557), + UINT16_C( 49457), UINT16_C( 55560), UINT16_C( 59740), UINT16_C( 54035) }, + { UINT16_C( 63367), UINT16_C( 34027), UINT16_C( 52629), UINT16_C( 26557), + UINT16_C( 49457), UINT16_C( 55560), UINT16_C( 59740), UINT16_C( 54035) }, + { UINT16_C( 63367), UINT16_C( 50618), UINT16_C( 52629), UINT16_C( 44230), + UINT16_C( 49457), UINT16_C( 55560), UINT16_C( 59740), UINT16_C( 54035) }, + { UINT16_C( 63367), UINT16_C( 50618), UINT16_C( 52629), UINT16_C( 26557), + UINT16_C( 49457), UINT16_C( 23074), UINT16_C( 59740), UINT16_C( 54035) }, + { UINT16_C( 63367), UINT16_C( 50618), UINT16_C( 52629), UINT16_C( 26557), + UINT16_C( 49457), UINT16_C( 55560), UINT16_C( 59740), UINT16_C( 27456) } }, + { { UINT16_C( 23413), UINT16_C( 40483), UINT16_C( 63), UINT16_C( 52152), + UINT16_C( 38959), UINT16_C( 4929), UINT16_C( 21435), UINT16_C( 62787) }, + { UINT16_C( 49317), UINT16_C( 18538), UINT16_C( 17793), UINT16_C( 61704), + UINT16_C( 41671), UINT16_C( 53824), UINT16_C( 25678), UINT16_C( 37125) }, + { UINT16_C( 37125), UINT16_C( 40483), UINT16_C( 63), UINT16_C( 52152), + UINT16_C( 38959), UINT16_C( 4929), UINT16_C( 21435), UINT16_C( 62787) }, + { UINT16_C( 23413), UINT16_C( 25678), UINT16_C( 63), UINT16_C( 52152), + UINT16_C( 38959), UINT16_C( 4929), UINT16_C( 21435), UINT16_C( 62787) }, + { UINT16_C( 23413), UINT16_C( 40483), UINT16_C( 63), UINT16_C( 41671), + UINT16_C( 38959), UINT16_C( 4929), UINT16_C( 21435), UINT16_C( 62787) }, + { UINT16_C( 23413), UINT16_C( 40483), UINT16_C( 63), UINT16_C( 52152), + UINT16_C( 38959), UINT16_C( 61704), UINT16_C( 21435), UINT16_C( 62787) }, + { UINT16_C( 23413), UINT16_C( 40483), UINT16_C( 63), UINT16_C( 52152), + UINT16_C( 38959), UINT16_C( 4929), UINT16_C( 21435), UINT16_C( 18538) } }, + { { UINT16_C( 41696), UINT16_C( 7636), UINT16_C( 15486), UINT16_C( 842), + UINT16_C( 14005), UINT16_C( 39402), UINT16_C( 58283), UINT16_C( 8456) }, + { UINT16_C( 47670), UINT16_C( 2324), UINT16_C( 19969), UINT16_C( 53005), + UINT16_C( 4511), UINT16_C( 36060), UINT16_C( 54065), UINT16_C( 35479) }, + { UINT16_C( 35479), UINT16_C( 7636), UINT16_C( 15486), UINT16_C( 842), + UINT16_C( 14005), UINT16_C( 39402), UINT16_C( 58283), UINT16_C( 8456) }, + { UINT16_C( 41696), UINT16_C( 54065), UINT16_C( 15486), UINT16_C( 842), + UINT16_C( 14005), UINT16_C( 39402), UINT16_C( 58283), UINT16_C( 8456) }, + { UINT16_C( 41696), UINT16_C( 7636), UINT16_C( 15486), UINT16_C( 4511), + UINT16_C( 14005), UINT16_C( 39402), UINT16_C( 58283), UINT16_C( 8456) }, + { UINT16_C( 41696), UINT16_C( 7636), UINT16_C( 15486), UINT16_C( 842), + UINT16_C( 14005), UINT16_C( 53005), UINT16_C( 58283), UINT16_C( 8456) }, + { UINT16_C( 41696), UINT16_C( 7636), UINT16_C( 15486), UINT16_C( 842), + UINT16_C( 14005), UINT16_C( 39402), UINT16_C( 58283), UINT16_C( 2324) } }, + { { UINT16_C( 31497), UINT16_C( 5172), UINT16_C( 59315), UINT16_C( 4122), + UINT16_C( 52072), UINT16_C( 14663), UINT16_C( 6643), UINT16_C( 19646) }, + { UINT16_C( 3117), UINT16_C( 6887), UINT16_C( 8411), UINT16_C( 34695), + UINT16_C( 37761), UINT16_C( 62501), UINT16_C( 1235), UINT16_C( 7430) }, + { UINT16_C( 7430), UINT16_C( 5172), UINT16_C( 59315), UINT16_C( 4122), + UINT16_C( 52072), UINT16_C( 14663), UINT16_C( 6643), UINT16_C( 19646) }, + { UINT16_C( 31497), UINT16_C( 1235), UINT16_C( 59315), UINT16_C( 4122), + UINT16_C( 52072), UINT16_C( 14663), UINT16_C( 6643), UINT16_C( 19646) }, + { UINT16_C( 31497), UINT16_C( 5172), UINT16_C( 59315), UINT16_C( 37761), + UINT16_C( 52072), UINT16_C( 14663), UINT16_C( 6643), UINT16_C( 19646) }, + { UINT16_C( 31497), UINT16_C( 5172), UINT16_C( 59315), UINT16_C( 4122), + UINT16_C( 52072), UINT16_C( 34695), UINT16_C( 6643), UINT16_C( 19646) }, + { UINT16_C( 31497), UINT16_C( 5172), UINT16_C( 59315), UINT16_C( 4122), + UINT16_C( 52072), UINT16_C( 14663), UINT16_C( 6643), UINT16_C( 6887) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + + simde_uint16x8_t r0_7 = simde_vcopyq_laneq_u16(a, 0, b, 7); + simde_uint16x8_t r1_6 = simde_vcopyq_laneq_u16(a, 1, b, 6); + simde_uint16x8_t r3_4 = simde_vcopyq_laneq_u16(a, 3, b, 4); + simde_uint16x8_t r5_3 = simde_vcopyq_laneq_u16(a, 5, b, 3); + simde_uint16x8_t r7_1 = simde_vcopyq_laneq_u16(a, 7, b, 1); + + simde_test_arm_neon_assert_equal_u16x8(r0_7, simde_vld1q_u16(test_vec[i].r0_7)); + simde_test_arm_neon_assert_equal_u16x8(r1_6, simde_vld1q_u16(test_vec[i].r1_6)); + simde_test_arm_neon_assert_equal_u16x8(r3_4, simde_vld1q_u16(test_vec[i].r3_4)); + simde_test_arm_neon_assert_equal_u16x8(r5_3, simde_vld1q_u16(test_vec[i].r5_3)); + simde_test_arm_neon_assert_equal_u16x8(r7_1, simde_vld1q_u16(test_vec[i].r7_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint32_t r0_3[4]; + uint32_t r1_2[4]; + uint32_t r2_1[4]; + uint32_t r3_0[4]; + } test_vec[] = { + { { UINT32_C( 2136215777), UINT32_C( 3630933059), UINT32_C( 2773291539), UINT32_C( 3690985421) }, + { UINT32_C( 892184183), UINT32_C( 193672311), UINT32_C( 2139275606), UINT32_C( 136188165) }, + { UINT32_C( 136188165), UINT32_C( 3630933059), UINT32_C( 2773291539), UINT32_C( 3690985421) }, + { UINT32_C( 2136215777), UINT32_C( 2139275606), UINT32_C( 2773291539), UINT32_C( 3690985421) }, + { UINT32_C( 2136215777), UINT32_C( 3630933059), UINT32_C( 193672311), UINT32_C( 3690985421) }, + { UINT32_C( 2136215777), UINT32_C( 3630933059), UINT32_C( 2773291539), UINT32_C( 892184183) } }, + { { UINT32_C( 2671810708), UINT32_C( 1685919905), UINT32_C( 2821922285), UINT32_C( 3960114487) }, + { UINT32_C( 3985986156), UINT32_C( 864598811), UINT32_C( 693528333), UINT32_C( 3592219988) }, + { UINT32_C( 3592219988), UINT32_C( 1685919905), UINT32_C( 2821922285), UINT32_C( 3960114487) }, + { UINT32_C( 2671810708), UINT32_C( 693528333), UINT32_C( 2821922285), UINT32_C( 3960114487) }, + { UINT32_C( 2671810708), UINT32_C( 1685919905), UINT32_C( 864598811), UINT32_C( 3960114487) }, + { UINT32_C( 2671810708), UINT32_C( 1685919905), UINT32_C( 2821922285), UINT32_C( 3985986156) } }, + { { UINT32_C( 817974392), UINT32_C( 501550872), UINT32_C( 1712818019), UINT32_C( 2965256612) }, + { UINT32_C( 1300216849), UINT32_C( 3932298120), UINT32_C( 3679577723), UINT32_C( 1127937645) }, + { UINT32_C( 1127937645), UINT32_C( 501550872), UINT32_C( 1712818019), UINT32_C( 2965256612) }, + { UINT32_C( 817974392), UINT32_C( 3679577723), UINT32_C( 1712818019), UINT32_C( 2965256612) }, + { UINT32_C( 817974392), UINT32_C( 501550872), UINT32_C( 3932298120), UINT32_C( 2965256612) }, + { UINT32_C( 817974392), UINT32_C( 501550872), UINT32_C( 1712818019), UINT32_C( 1300216849) } }, + { { UINT32_C( 2723508205), UINT32_C( 4208862096), UINT32_C( 2940436296), UINT32_C( 1699082770) }, + { UINT32_C( 906232555), UINT32_C( 736207677), UINT32_C( 1564834480), UINT32_C( 634302632) }, + { UINT32_C( 634302632), UINT32_C( 4208862096), UINT32_C( 2940436296), UINT32_C( 1699082770) }, + { UINT32_C( 2723508205), UINT32_C( 1564834480), UINT32_C( 2940436296), UINT32_C( 1699082770) }, + { UINT32_C( 2723508205), UINT32_C( 4208862096), UINT32_C( 736207677), UINT32_C( 1699082770) }, + { UINT32_C( 2723508205), UINT32_C( 4208862096), UINT32_C( 2940436296), UINT32_C( 906232555) } }, + { { UINT32_C( 379503279), UINT32_C( 2857600816), UINT32_C( 275459501), UINT32_C( 304292775) }, + { UINT32_C( 1019543097), UINT32_C( 373140555), UINT32_C( 909261254), UINT32_C( 3547460328) }, + { UINT32_C( 3547460328), UINT32_C( 2857600816), UINT32_C( 275459501), UINT32_C( 304292775) }, + { UINT32_C( 379503279), UINT32_C( 909261254), UINT32_C( 275459501), UINT32_C( 304292775) }, + { UINT32_C( 379503279), UINT32_C( 2857600816), UINT32_C( 373140555), UINT32_C( 304292775) }, + { UINT32_C( 379503279), UINT32_C( 2857600816), UINT32_C( 275459501), UINT32_C( 1019543097) } }, + { { UINT32_C( 1203434097), UINT32_C( 114376390), UINT32_C( 168566558), UINT32_C( 3889127067) }, + { UINT32_C( 2410760924), UINT32_C( 2923650458), UINT32_C( 3855579023), UINT32_C( 4134326194) }, + { UINT32_C( 4134326194), UINT32_C( 114376390), UINT32_C( 168566558), UINT32_C( 3889127067) }, + { UINT32_C( 1203434097), UINT32_C( 3855579023), UINT32_C( 168566558), UINT32_C( 3889127067) }, + { UINT32_C( 1203434097), UINT32_C( 114376390), UINT32_C( 2923650458), UINT32_C( 3889127067) }, + { UINT32_C( 1203434097), UINT32_C( 114376390), UINT32_C( 168566558), UINT32_C( 2410760924) } }, + { { UINT32_C( 3869172885), UINT32_C( 2993434742), UINT32_C( 2905386218), UINT32_C( 2642883192) }, + { UINT32_C( 2870909542), UINT32_C( 1947325143), UINT32_C( 2317972867), UINT32_C( 1261263722) }, + { UINT32_C( 1261263722), UINT32_C( 2993434742), UINT32_C( 2905386218), UINT32_C( 2642883192) }, + { UINT32_C( 3869172885), UINT32_C( 2317972867), UINT32_C( 2905386218), UINT32_C( 2642883192) }, + { UINT32_C( 3869172885), UINT32_C( 2993434742), UINT32_C( 1947325143), UINT32_C( 2642883192) }, + { UINT32_C( 3869172885), UINT32_C( 2993434742), UINT32_C( 2905386218), UINT32_C( 2870909542) } }, + { { UINT32_C( 1907398736), UINT32_C( 1863663803), UINT32_C( 3232279915), UINT32_C( 2788982881) }, + { UINT32_C( 483563107), UINT32_C( 22560959), UINT32_C( 3427567537), UINT32_C( 922192707) }, + { UINT32_C( 922192707), UINT32_C( 1863663803), UINT32_C( 3232279915), UINT32_C( 2788982881) }, + { UINT32_C( 1907398736), UINT32_C( 3427567537), UINT32_C( 3232279915), UINT32_C( 2788982881) }, + { UINT32_C( 1907398736), UINT32_C( 1863663803), UINT32_C( 22560959), UINT32_C( 2788982881) }, + { UINT32_C( 1907398736), UINT32_C( 1863663803), UINT32_C( 3232279915), UINT32_C( 483563107) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + + simde_uint32x4_t r0_3 = simde_vcopyq_laneq_u32(a, 0, b, 3); + simde_uint32x4_t r1_2 = simde_vcopyq_laneq_u32(a, 1, b, 2); + simde_uint32x4_t r2_1 = simde_vcopyq_laneq_u32(a, 2, b, 1); + simde_uint32x4_t r3_0 = simde_vcopyq_laneq_u32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_u32x4(r0_3, simde_vld1q_u32(test_vec[i].r0_3)); + simde_test_arm_neon_assert_equal_u32x4(r1_2, simde_vld1q_u32(test_vec[i].r1_2)); + simde_test_arm_neon_assert_equal_u32x4(r2_1, simde_vld1q_u32(test_vec[i].r2_1)); + simde_test_arm_neon_assert_equal_u32x4(r3_0, simde_vld1q_u32(test_vec[i].r3_0)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint64_t b[2]; + uint64_t r0_0[2]; + uint64_t r0_1[2]; + uint64_t r1_0[2]; + uint64_t r1_1[2]; + } test_vec[] = { + { { UINT64_C(13439609365108810888), UINT64_C(15937695336418566217) }, + { UINT64_C(11757999458065731058), UINT64_C(17975820540431480162) }, + { UINT64_C(11757999458065731058), UINT64_C(15937695336418566217) }, + { UINT64_C(17975820540431480162), UINT64_C(15937695336418566217) }, + { UINT64_C(13439609365108810888), UINT64_C(11757999458065731058) }, + { UINT64_C(13439609365108810888), UINT64_C(17975820540431480162) } }, + { { UINT64_C(16564546751237621820), UINT64_C(12241248961613285278) }, + { UINT64_C( 7641825135841022951), UINT64_C(12315251603794302270) }, + { UINT64_C( 7641825135841022951), UINT64_C(12241248961613285278) }, + { UINT64_C(12315251603794302270), UINT64_C(12241248961613285278) }, + { UINT64_C(16564546751237621820), UINT64_C( 7641825135841022951) }, + { UINT64_C(16564546751237621820), UINT64_C(12315251603794302270) } }, + { { UINT64_C(17704957791680247171), UINT64_C( 9371616720984263202) }, + { UINT64_C( 7276494259531683395), UINT64_C( 5859910920720482327) }, + { UINT64_C( 7276494259531683395), UINT64_C( 9371616720984263202) }, + { UINT64_C( 5859910920720482327), UINT64_C( 9371616720984263202) }, + { UINT64_C(17704957791680247171), UINT64_C( 7276494259531683395) }, + { UINT64_C(17704957791680247171), UINT64_C( 5859910920720482327) } }, + { { UINT64_C(14662575302122884717), UINT64_C( 3176330314367196783) }, + { UINT64_C( 7453175549508455517), UINT64_C( 4453488503906172319) }, + { UINT64_C( 7453175549508455517), UINT64_C( 3176330314367196783) }, + { UINT64_C( 4453488503906172319), UINT64_C( 3176330314367196783) }, + { UINT64_C(14662575302122884717), UINT64_C( 7453175549508455517) }, + { UINT64_C(14662575302122884717), UINT64_C( 4453488503906172319) } }, + { { UINT64_C(12249441119495819269), UINT64_C( 186417079881519408) }, + { UINT64_C(16358619248331576219), UINT64_C( 987352505179597987) }, + { UINT64_C(16358619248331576219), UINT64_C( 186417079881519408) }, + { UINT64_C( 987352505179597987), UINT64_C( 186417079881519408) }, + { UINT64_C(12249441119495819269), UINT64_C(16358619248331576219) }, + { UINT64_C(12249441119495819269), UINT64_C( 987352505179597987) } }, + { { UINT64_C( 3211036171106824205), UINT64_C(17735964159800454359) }, + { UINT64_C(14856931664874395873), UINT64_C( 5092631228929164180) }, + { UINT64_C(14856931664874395873), UINT64_C(17735964159800454359) }, + { UINT64_C( 5092631228929164180), UINT64_C(17735964159800454359) }, + { UINT64_C( 3211036171106824205), UINT64_C(14856931664874395873) }, + { UINT64_C( 3211036171106824205), UINT64_C( 5092631228929164180) } }, + { { UINT64_C(16290890406714175431), UINT64_C( 7369026070235053466) }, + { UINT64_C(14159949742399780407), UINT64_C(12799840516808467205) }, + { UINT64_C(14159949742399780407), UINT64_C( 7369026070235053466) }, + { UINT64_C(12799840516808467205), UINT64_C( 7369026070235053466) }, + { UINT64_C(16290890406714175431), UINT64_C(14159949742399780407) }, + { UINT64_C(16290890406714175431), UINT64_C(12799840516808467205) } }, + { { UINT64_C(11596480201225729972), UINT64_C(15597658933811404841) }, + { UINT64_C( 4501987111170332097), UINT64_C(16539108248642911739) }, + { UINT64_C( 4501987111170332097), UINT64_C(15597658933811404841) }, + { UINT64_C(16539108248642911739), UINT64_C(15597658933811404841) }, + { UINT64_C(11596480201225729972), UINT64_C( 4501987111170332097) }, + { UINT64_C(11596480201225729972), UINT64_C(16539108248642911739) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + + simde_uint64x2_t r0_0 = simde_vcopyq_laneq_u64(a, 0, b, 0); + simde_uint64x2_t r0_1 = simde_vcopyq_laneq_u64(a, 0, b, 1); + simde_uint64x2_t r1_0 = simde_vcopyq_laneq_u64(a, 1, b, 0); + simde_uint64x2_t r1_1 = simde_vcopyq_laneq_u64(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_u64x2(r0_0, simde_vld1q_u64(test_vec[i].r0_0)); + simde_test_arm_neon_assert_equal_u64x2(r0_1, simde_vld1q_u64(test_vec[i].r0_1)); + simde_test_arm_neon_assert_equal_u64x2(r1_0, simde_vld1q_u64(test_vec[i].r1_0)); + simde_test_arm_neon_assert_equal_u64x2(r1_1, simde_vld1q_u64(test_vec[i].r1_1)); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_f32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float32_t a[4]; + simde_float32_t b[4]; + simde_float32_t r0_3[4]; + simde_float32_t r1_2[4]; + simde_float32_t r2_1[4]; + simde_float32_t r3_0[4]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C( 2288.813), -SIMDE_FLOAT32_C( 6828.664), -SIMDE_FLOAT32_C( 1743.203), SIMDE_FLOAT32_C( 5142.055) }, + { -SIMDE_FLOAT32_C( 7820.406), SIMDE_FLOAT32_C( 4065.967), -SIMDE_FLOAT32_C( 7156.866), SIMDE_FLOAT32_C( 1483.873) }, + { SIMDE_FLOAT32_C( 1483.873), -SIMDE_FLOAT32_C( 6828.664), -SIMDE_FLOAT32_C( 1743.203), SIMDE_FLOAT32_C( 5142.055) }, + { -SIMDE_FLOAT32_C( 2288.813), -SIMDE_FLOAT32_C( 7156.866), -SIMDE_FLOAT32_C( 1743.203), SIMDE_FLOAT32_C( 5142.055) }, + { -SIMDE_FLOAT32_C( 2288.813), -SIMDE_FLOAT32_C( 6828.664), SIMDE_FLOAT32_C( 4065.967), SIMDE_FLOAT32_C( 5142.055) }, + { -SIMDE_FLOAT32_C( 2288.813), -SIMDE_FLOAT32_C( 6828.664), -SIMDE_FLOAT32_C( 1743.203), -SIMDE_FLOAT32_C( 7820.406) } }, + { { SIMDE_FLOAT32_C( 8992.982), SIMDE_FLOAT32_C( 7814.275), SIMDE_FLOAT32_C( 9353.939), SIMDE_FLOAT32_C( 2474.339) }, + { SIMDE_FLOAT32_C( 7863.002), -SIMDE_FLOAT32_C( 412.511), SIMDE_FLOAT32_C( 196.362), SIMDE_FLOAT32_C( 8459.223) }, + { SIMDE_FLOAT32_C( 8459.223), SIMDE_FLOAT32_C( 7814.275), SIMDE_FLOAT32_C( 9353.939), SIMDE_FLOAT32_C( 2474.339) }, + { SIMDE_FLOAT32_C( 8992.982), SIMDE_FLOAT32_C( 196.362), SIMDE_FLOAT32_C( 9353.939), SIMDE_FLOAT32_C( 2474.339) }, + { SIMDE_FLOAT32_C( 8992.982), SIMDE_FLOAT32_C( 7814.275), -SIMDE_FLOAT32_C( 412.511), SIMDE_FLOAT32_C( 2474.339) }, + { SIMDE_FLOAT32_C( 8992.982), SIMDE_FLOAT32_C( 7814.275), SIMDE_FLOAT32_C( 9353.939), SIMDE_FLOAT32_C( 7863.002) } }, + { { -SIMDE_FLOAT32_C( 2949.494), SIMDE_FLOAT32_C( 2614.980), SIMDE_FLOAT32_C( 8780.445), SIMDE_FLOAT32_C( 3656.842) }, + { SIMDE_FLOAT32_C( 8306.215), SIMDE_FLOAT32_C( 4706.809), SIMDE_FLOAT32_C( 2470.985), SIMDE_FLOAT32_C( 2624.702) }, + { SIMDE_FLOAT32_C( 2624.702), SIMDE_FLOAT32_C( 2614.980), SIMDE_FLOAT32_C( 8780.445), SIMDE_FLOAT32_C( 3656.842) }, + { -SIMDE_FLOAT32_C( 2949.494), SIMDE_FLOAT32_C( 2470.985), SIMDE_FLOAT32_C( 8780.445), SIMDE_FLOAT32_C( 3656.842) }, + { -SIMDE_FLOAT32_C( 2949.494), SIMDE_FLOAT32_C( 2614.980), SIMDE_FLOAT32_C( 4706.809), SIMDE_FLOAT32_C( 3656.842) }, + { -SIMDE_FLOAT32_C( 2949.494), SIMDE_FLOAT32_C( 2614.980), SIMDE_FLOAT32_C( 8780.445), SIMDE_FLOAT32_C( 8306.215) } }, + { { SIMDE_FLOAT32_C( 3479.178), -SIMDE_FLOAT32_C( 9729.815), SIMDE_FLOAT32_C( 782.037), SIMDE_FLOAT32_C( 1451.528) }, + { -SIMDE_FLOAT32_C( 1571.802), -SIMDE_FLOAT32_C( 8661.118), -SIMDE_FLOAT32_C( 447.722), SIMDE_FLOAT32_C( 6319.575) }, + { SIMDE_FLOAT32_C( 6319.575), -SIMDE_FLOAT32_C( 9729.815), SIMDE_FLOAT32_C( 782.037), SIMDE_FLOAT32_C( 1451.528) }, + { SIMDE_FLOAT32_C( 3479.178), -SIMDE_FLOAT32_C( 447.722), SIMDE_FLOAT32_C( 782.037), SIMDE_FLOAT32_C( 1451.528) }, + { SIMDE_FLOAT32_C( 3479.178), -SIMDE_FLOAT32_C( 9729.815), -SIMDE_FLOAT32_C( 8661.118), SIMDE_FLOAT32_C( 1451.528) }, + { SIMDE_FLOAT32_C( 3479.178), -SIMDE_FLOAT32_C( 9729.815), SIMDE_FLOAT32_C( 782.037), -SIMDE_FLOAT32_C( 1571.802) } }, + { { -SIMDE_FLOAT32_C( 4819.511), -SIMDE_FLOAT32_C( 4667.547), -SIMDE_FLOAT32_C( 5410.779), -SIMDE_FLOAT32_C( 5298.958) }, + { SIMDE_FLOAT32_C( 6992.926), -SIMDE_FLOAT32_C( 2054.155), SIMDE_FLOAT32_C( 4552.382), SIMDE_FLOAT32_C( 6344.732) }, + { SIMDE_FLOAT32_C( 6344.732), -SIMDE_FLOAT32_C( 4667.547), -SIMDE_FLOAT32_C( 5410.779), -SIMDE_FLOAT32_C( 5298.958) }, + { -SIMDE_FLOAT32_C( 4819.511), SIMDE_FLOAT32_C( 4552.382), -SIMDE_FLOAT32_C( 5410.779), -SIMDE_FLOAT32_C( 5298.958) }, + { -SIMDE_FLOAT32_C( 4819.511), -SIMDE_FLOAT32_C( 4667.547), -SIMDE_FLOAT32_C( 2054.155), -SIMDE_FLOAT32_C( 5298.958) }, + { -SIMDE_FLOAT32_C( 4819.511), -SIMDE_FLOAT32_C( 4667.547), -SIMDE_FLOAT32_C( 5410.779), SIMDE_FLOAT32_C( 6992.926) } }, + { { -SIMDE_FLOAT32_C( 6145.318), SIMDE_FLOAT32_C( 5380.021), -SIMDE_FLOAT32_C( 6940.736), SIMDE_FLOAT32_C( 4362.208) }, + { -SIMDE_FLOAT32_C( 2727.117), SIMDE_FLOAT32_C( 7132.807), -SIMDE_FLOAT32_C( 7647.750), SIMDE_FLOAT32_C( 5286.702) }, + { SIMDE_FLOAT32_C( 5286.702), SIMDE_FLOAT32_C( 5380.021), -SIMDE_FLOAT32_C( 6940.736), SIMDE_FLOAT32_C( 4362.208) }, + { -SIMDE_FLOAT32_C( 6145.318), -SIMDE_FLOAT32_C( 7647.750), -SIMDE_FLOAT32_C( 6940.736), SIMDE_FLOAT32_C( 4362.208) }, + { -SIMDE_FLOAT32_C( 6145.318), SIMDE_FLOAT32_C( 5380.021), SIMDE_FLOAT32_C( 7132.807), SIMDE_FLOAT32_C( 4362.208) }, + { -SIMDE_FLOAT32_C( 6145.318), SIMDE_FLOAT32_C( 5380.021), -SIMDE_FLOAT32_C( 6940.736), -SIMDE_FLOAT32_C( 2727.117) } }, + { { SIMDE_FLOAT32_C( 5009.768), SIMDE_FLOAT32_C( 5052.221), SIMDE_FLOAT32_C( 1795.097), SIMDE_FLOAT32_C( 9838.342) }, + { -SIMDE_FLOAT32_C( 6042.081), SIMDE_FLOAT32_C( 645.963), SIMDE_FLOAT32_C( 1858.851), -SIMDE_FLOAT32_C( 8743.057) }, + { -SIMDE_FLOAT32_C( 8743.057), SIMDE_FLOAT32_C( 5052.221), SIMDE_FLOAT32_C( 1795.097), SIMDE_FLOAT32_C( 9838.342) }, + { SIMDE_FLOAT32_C( 5009.768), SIMDE_FLOAT32_C( 1858.851), SIMDE_FLOAT32_C( 1795.097), SIMDE_FLOAT32_C( 9838.342) }, + { SIMDE_FLOAT32_C( 5009.768), SIMDE_FLOAT32_C( 5052.221), SIMDE_FLOAT32_C( 645.963), SIMDE_FLOAT32_C( 9838.342) }, + { SIMDE_FLOAT32_C( 5009.768), SIMDE_FLOAT32_C( 5052.221), SIMDE_FLOAT32_C( 1795.097), -SIMDE_FLOAT32_C( 6042.081) } }, + { { SIMDE_FLOAT32_C( 4248.810), -SIMDE_FLOAT32_C( 8662.274), -SIMDE_FLOAT32_C( 3328.998), -SIMDE_FLOAT32_C( 2036.577) }, + { SIMDE_FLOAT32_C( 9358.539), SIMDE_FLOAT32_C( 8204.385), SIMDE_FLOAT32_C( 4985.117), SIMDE_FLOAT32_C( 697.916) }, + { SIMDE_FLOAT32_C( 697.916), -SIMDE_FLOAT32_C( 8662.274), -SIMDE_FLOAT32_C( 3328.998), -SIMDE_FLOAT32_C( 2036.577) }, + { SIMDE_FLOAT32_C( 4248.810), SIMDE_FLOAT32_C( 4985.117), -SIMDE_FLOAT32_C( 3328.998), -SIMDE_FLOAT32_C( 2036.577) }, + { SIMDE_FLOAT32_C( 4248.810), -SIMDE_FLOAT32_C( 8662.274), SIMDE_FLOAT32_C( 8204.385), -SIMDE_FLOAT32_C( 2036.577) }, + { SIMDE_FLOAT32_C( 4248.810), -SIMDE_FLOAT32_C( 8662.274), -SIMDE_FLOAT32_C( 3328.998), SIMDE_FLOAT32_C( 9358.539) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float32x4_t b = simde_vld1q_f32(test_vec[i].b); + + simde_float32x4_t r0_3 = simde_vcopyq_laneq_f32(a, 0, b, 3); + simde_float32x4_t r1_2 = simde_vcopyq_laneq_f32(a, 1, b, 2); + simde_float32x4_t r2_1 = simde_vcopyq_laneq_f32(a, 2, b, 1); + simde_float32x4_t r3_0 = simde_vcopyq_laneq_f32(a, 3, b, 0); + + simde_test_arm_neon_assert_equal_f32x4(r0_3, simde_vld1q_f32(test_vec[i].r0_3), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r1_2, simde_vld1q_f32(test_vec[i].r1_2), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r2_1, simde_vld1q_f32(test_vec[i].r2_1), INT_MAX); + simde_test_arm_neon_assert_equal_f32x4(r3_0, simde_vld1q_f32(test_vec[i].r3_0), INT_MAX); + } + + return 0; +} + +static int +test_simde_vcopyq_laneq_f64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + simde_float64_t a[2]; + simde_float64_t b[2]; + simde_float64_t r0_0[2]; + simde_float64_t r0_1[2]; + simde_float64_t r1_0[2]; + simde_float64_t r1_1[2]; + } test_vec[] = { + { { -SIMDE_FLOAT64_C( 678563.125), SIMDE_FLOAT64_C( 194228.625) }, + { SIMDE_FLOAT64_C( 42214.062), SIMDE_FLOAT64_C( 747572.625) }, + { SIMDE_FLOAT64_C( 42214.062), SIMDE_FLOAT64_C( 194228.625) }, + { SIMDE_FLOAT64_C( 747572.625), SIMDE_FLOAT64_C( 194228.625) }, + { -SIMDE_FLOAT64_C( 678563.125), SIMDE_FLOAT64_C( 42214.062) }, + { -SIMDE_FLOAT64_C( 678563.125), SIMDE_FLOAT64_C( 747572.625) } }, + { { -SIMDE_FLOAT64_C( 672868.625), SIMDE_FLOAT64_C( 823476.500) }, + { -SIMDE_FLOAT64_C( 249558.438), SIMDE_FLOAT64_C( 445662.250) }, + { -SIMDE_FLOAT64_C( 249558.438), SIMDE_FLOAT64_C( 823476.500) }, + { SIMDE_FLOAT64_C( 445662.250), SIMDE_FLOAT64_C( 823476.500) }, + { -SIMDE_FLOAT64_C( 672868.625), -SIMDE_FLOAT64_C( 249558.438) }, + { -SIMDE_FLOAT64_C( 672868.625), SIMDE_FLOAT64_C( 445662.250) } }, + { { -SIMDE_FLOAT64_C( 760755.875), SIMDE_FLOAT64_C( 382520.375) }, + { SIMDE_FLOAT64_C( 767680.250), -SIMDE_FLOAT64_C( 74684.625) }, + { SIMDE_FLOAT64_C( 767680.250), SIMDE_FLOAT64_C( 382520.375) }, + { -SIMDE_FLOAT64_C( 74684.625), SIMDE_FLOAT64_C( 382520.375) }, + { -SIMDE_FLOAT64_C( 760755.875), SIMDE_FLOAT64_C( 767680.250) }, + { -SIMDE_FLOAT64_C( 760755.875), -SIMDE_FLOAT64_C( 74684.625) } }, + { { -SIMDE_FLOAT64_C( 34493.875), SIMDE_FLOAT64_C( 196752.750) }, + { -SIMDE_FLOAT64_C( 164549.625), SIMDE_FLOAT64_C( 930720.250) }, + { -SIMDE_FLOAT64_C( 164549.625), SIMDE_FLOAT64_C( 196752.750) }, + { SIMDE_FLOAT64_C( 930720.250), SIMDE_FLOAT64_C( 196752.750) }, + { -SIMDE_FLOAT64_C( 34493.875), -SIMDE_FLOAT64_C( 164549.625) }, + { -SIMDE_FLOAT64_C( 34493.875), SIMDE_FLOAT64_C( 930720.250) } }, + { { -SIMDE_FLOAT64_C( 88352.375), SIMDE_FLOAT64_C( 286620.375) }, + { -SIMDE_FLOAT64_C( 353142.375), -SIMDE_FLOAT64_C( 187538.125) }, + { -SIMDE_FLOAT64_C( 353142.375), SIMDE_FLOAT64_C( 286620.375) }, + { -SIMDE_FLOAT64_C( 187538.125), SIMDE_FLOAT64_C( 286620.375) }, + { -SIMDE_FLOAT64_C( 88352.375), -SIMDE_FLOAT64_C( 353142.375) }, + { -SIMDE_FLOAT64_C( 88352.375), -SIMDE_FLOAT64_C( 187538.125) } }, + { { SIMDE_FLOAT64_C( 896252.125), -SIMDE_FLOAT64_C( 543789.250) }, + { -SIMDE_FLOAT64_C( 384486.062), SIMDE_FLOAT64_C( 366117.500) }, + { -SIMDE_FLOAT64_C( 384486.062), -SIMDE_FLOAT64_C( 543789.250) }, + { SIMDE_FLOAT64_C( 366117.500), -SIMDE_FLOAT64_C( 543789.250) }, + { SIMDE_FLOAT64_C( 896252.125), -SIMDE_FLOAT64_C( 384486.062) }, + { SIMDE_FLOAT64_C( 896252.125), SIMDE_FLOAT64_C( 366117.500) } }, + { { SIMDE_FLOAT64_C( 921497.375), SIMDE_FLOAT64_C( 126141.125) }, + { -SIMDE_FLOAT64_C( 84883.125), SIMDE_FLOAT64_C( 833692.250) }, + { -SIMDE_FLOAT64_C( 84883.125), SIMDE_FLOAT64_C( 126141.125) }, + { SIMDE_FLOAT64_C( 833692.250), SIMDE_FLOAT64_C( 126141.125) }, + { SIMDE_FLOAT64_C( 921497.375), -SIMDE_FLOAT64_C( 84883.125) }, + { SIMDE_FLOAT64_C( 921497.375), SIMDE_FLOAT64_C( 833692.250) } }, + { { SIMDE_FLOAT64_C( 63073.875), -SIMDE_FLOAT64_C( 637440.312) }, + { SIMDE_FLOAT64_C( 955680.125), SIMDE_FLOAT64_C( 696772.875) }, + { SIMDE_FLOAT64_C( 955680.125), -SIMDE_FLOAT64_C( 637440.312) }, + { SIMDE_FLOAT64_C( 696772.875), -SIMDE_FLOAT64_C( 637440.312) }, + { SIMDE_FLOAT64_C( 63073.875), SIMDE_FLOAT64_C( 955680.125) }, + { SIMDE_FLOAT64_C( 63073.875), SIMDE_FLOAT64_C( 696772.875) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_t b = simde_vld1q_f64(test_vec[i].b); + + simde_float64x2_t r0_0 = simde_vcopyq_laneq_f64(a, 0, b, 0); + simde_float64x2_t r0_1 = simde_vcopyq_laneq_f64(a, 0, b, 1); + simde_float64x2_t r1_0 = simde_vcopyq_laneq_f64(a, 1, b, 0); + simde_float64x2_t r1_1 = simde_vcopyq_laneq_f64(a, 1, b, 1); + + simde_test_arm_neon_assert_equal_f64x2(r0_0, simde_vld1q_f64(test_vec[i].r0_0), INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r0_1, simde_vld1q_f64(test_vec[i].r0_1), INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r1_0, simde_vld1q_f64(test_vec[i].r1_0), INT_MAX); + simde_test_arm_neon_assert_equal_f64x2(r1_1, simde_vld1q_f64(test_vec[i].r1_1), INT_MAX); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_lane_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopy_laneq_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_lane_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcopyq_laneq_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/cvt.c b/test/arm/neon/cvt.c index 6b20d842d..4b83654c3 100644 --- a/test/arm/neon/cvt.c +++ b/test/arm/neon/cvt.c @@ -5,10 +5,23 @@ static int test_simde_vcvth_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a; int16_t r; } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MAX)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MIN)), + INT16_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MAX+1000)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MIN-1000)), + INT16_MIN }, + #endif { SIMDE_FLOAT16_VALUE( -0.604), INT16_C( 0) }, { SIMDE_FLOAT16_VALUE( 24.671), @@ -35,28 +48,161 @@ test_simde_vcvth_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int16_t r = simde_vcvth_s16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MIN)), + INT32_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MAX+1000ll)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MIN-1000ll)), + INT32_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 20.055), + INT32_C( 20) }, + { SIMDE_FLOAT16_VALUE( 9.812), + INT32_C( 9) }, + { SIMDE_FLOAT16_VALUE( - 16.334), + -INT32_C( 16) }, + { SIMDE_FLOAT16_VALUE( 28.437), + INT32_C( 28) }, + { SIMDE_FLOAT16_VALUE( 27.096), + INT32_C( 27) }, + { SIMDE_FLOAT16_VALUE( - 3.232), + -INT32_C( 3) }, + { SIMDE_FLOAT16_VALUE( - 16.515), + -INT32_C( 16) }, + { SIMDE_FLOAT16_VALUE( - 18.901), + -INT32_C( 18) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int32_t r = simde_vcvth_s32_f16(a); + + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int32_t r = simde_vcvth_s32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT64_MAX)), + INT64_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT64_MIN)), + INT64_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, 0)), + INT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( - 17.280), + -INT64_C( 17) }, + { SIMDE_FLOAT16_VALUE( 20.237), + INT64_C( 20) }, + { SIMDE_FLOAT16_VALUE( 27.909), + INT64_C( 27) }, + { SIMDE_FLOAT16_VALUE( 28.646), + INT64_C( 28) }, + { SIMDE_FLOAT16_VALUE( - 17.404), + -INT64_C( 17) }, + { SIMDE_FLOAT16_VALUE( 19.894), + INT64_C( 19) }, + { SIMDE_FLOAT16_VALUE( - 3.624), + -INT64_C( 3) }, + { SIMDE_FLOAT16_VALUE( 11.438), + INT64_C( 11) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int64_t r = simde_vcvth_s64_f16(a); + + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int64_t r = simde_vcvth_s64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvth_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a; uint16_t r; } test_vec[] = { - { SIMDE_FLOAT16_VALUE( 25.639), - UINT16_C( 25) }, - { SIMDE_FLOAT16_VALUE( -25.081), - UINT16_C( 0) }, - { SIMDE_FLOAT16_VALUE( 15.061), - UINT16_C( 15) }, - { SIMDE_FLOAT16_VALUE( -21.777), + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, UINT16_C( 0) }, - { SIMDE_FLOAT16_VALUE( -26.635), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT16_MAX)), + UINT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, 0)), UINT16_C( 0) }, - { SIMDE_FLOAT16_VALUE( -9.047), + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT16_MAX+1000)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( -25.081), UINT16_C( 0) }, { SIMDE_FLOAT16_VALUE( -27.803), UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 25.639), + UINT16_C( 25) }, + { SIMDE_FLOAT16_VALUE( 15.061), + UINT16_C( 15) }, { SIMDE_FLOAT16_VALUE( 3.276), UINT16_C( 3) }, }; @@ -69,6 +215,302 @@ test_simde_vcvth_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcvth_u16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT32_MAX)), + UINT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, 0)), + UINT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT32_MAX+1000ll)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( -25.081), + UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 21.502), + UINT32_C( 21) }, + { SIMDE_FLOAT16_VALUE( 26.560), + UINT32_C( 26) }, + { SIMDE_FLOAT16_VALUE( 28.676), + UINT32_C( 28) }, + { SIMDE_FLOAT16_VALUE( 0.122), + UINT32_C( 0) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint32_t r = simde_vcvth_u32_f16(a); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint32_t r = simde_vcvth_u32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT64_MAX)), + UINT64_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, 0)), + UINT64_C( 0) }, + { SIMDE_FLOAT16_VALUE( -16.558), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 11.916), + UINT64_C( 11) }, + { SIMDE_FLOAT16_VALUE( 2.866), + UINT64_C( 2) }, + { SIMDE_FLOAT16_VALUE( 21.522), + UINT64_C( 21) }, + { SIMDE_FLOAT16_VALUE( 10.554), + UINT64_C( 10) }, + { SIMDE_FLOAT16_VALUE( 4.087), + UINT64_C( 4) }, + { SIMDE_FLOAT16_VALUE( 19.017), + UINT64_C( 19) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint64_t r = simde_vcvth_u64_f16(a); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint64_t r = simde_vcvth_u64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_f16_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int32_t a; + simde_float16 r; + } test_vec[] = { + { -INT32_C( 1589103087), + SIMDE_NINFINITYHF }, + { -INT32_C( 1592118057), + SIMDE_NINFINITYHF }, + { INT32_C( 895663592), + SIMDE_INFINITYHF }, + { -INT32_C( 320293893), + SIMDE_NINFINITYHF }, + { -INT32_C( 2108080077), + SIMDE_NINFINITYHF }, + { -INT32_C( 1415311873), + SIMDE_NINFINITYHF }, + { INT32_C( 1265253428), + SIMDE_INFINITYHF }, + { INT32_C( 587118609), + SIMDE_INFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 r = simde_vcvth_f16_s32(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int32_t a = simde_test_codegen_random_i32(); + simde_float16 r = simde_vcvth_f16_s32(a); + + simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_f16_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int64_t a; + simde_float16 r; + } test_vec[] = { + { INT64_C( 7162937251746498287), + SIMDE_INFINITYHF }, + { -INT64_C( 5728155978464441958), + SIMDE_NINFINITYHF }, + { INT64_C( 2047685275721849536), + SIMDE_INFINITYHF }, + { INT64_C( 8954675090954900754), + SIMDE_INFINITYHF }, + { INT64_C( 9190064415792367788), + SIMDE_INFINITYHF }, + { INT64_C( 7034787140702839156), + SIMDE_INFINITYHF }, + { -INT64_C( 4867111737836383376), + SIMDE_NINFINITYHF }, + { -INT64_C( 1339216974095328416), + SIMDE_NINFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 r = simde_vcvth_f16_s64(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int64_t a = simde_test_codegen_random_i64(); + simde_float16 r = simde_vcvth_f16_s64(a); + + simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_f16_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint32_t a; + simde_float16 r; + } test_vec[] = { + { UINT32_C( 320056621), + SIMDE_INFINITYHF }, + { UINT32_C(3415383383), + SIMDE_INFINITYHF }, + { UINT32_C(1622976446), + SIMDE_INFINITYHF }, + { UINT32_C(2898353212), + SIMDE_INFINITYHF }, + { UINT32_C(3640975919), + SIMDE_INFINITYHF }, + { UINT32_C(3449124973), + SIMDE_INFINITYHF }, + { UINT32_C(2512024660), + SIMDE_INFINITYHF }, + { UINT32_C(3347195033), + SIMDE_INFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 r = simde_vcvth_f16_u32(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + uint32_t a = simde_test_codegen_random_u32(); + simde_float16 r = simde_vcvth_f16_u32(a); + + simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_f16_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint64_t a; + simde_float16 r; + } test_vec[] = { + { UINT64_C(17434679020164388305), + SIMDE_INFINITYHF }, + { UINT64_C( 4394407184798167059), + SIMDE_INFINITYHF }, + { UINT64_C(14784942065000448500), + SIMDE_INFINITYHF }, + { UINT64_C(16104841540079380767), + SIMDE_INFINITYHF }, + { UINT64_C(15680967407793887610), + SIMDE_INFINITYHF }, + { UINT64_C(17723395172423298733), + SIMDE_INFINITYHF }, + { UINT64_C(15379376796937423911), + SIMDE_INFINITYHF }, + { UINT64_C( 3412336215793258606), + SIMDE_INFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 r = simde_vcvth_f16_u64(test_vec[i].a); + + simde_assert_equal_f16(r, test_vec[i].r, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + uint64_t a = simde_test_codegen_random_u64(); + simde_float16 r = simde_vcvth_f16_u64(a); + + simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int @@ -631,7 +1073,7 @@ test_simde_vcvtq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { { -INT16_C( 943), INT16_C( 423), -INT16_C( 567), -INT16_C( 790), -INT16_C( 972), INT16_C( 286), -INT16_C( 874), INT16_C( 903) } }, { { SIMDE_FLOAT16_VALUE( -818.00), SIMDE_FLOAT16_VALUE( 261.25), SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( 880.50), SIMDE_FLOAT16_VALUE( 100.44), SIMDE_FLOAT16_VALUE( 66.19), SIMDE_FLOAT16_VALUE( -111.75), SIMDE_FLOAT16_VALUE( -173.88) }, - { -INT16_C( 818), INT16_C( 261), INT16_C( 630), INT16_C( 880), INT16_C( 100), INT16_C( 66), -INT16_C( 111), -INT16_C( 173) } } + { -INT16_C( 818), INT16_C( 261), INT16_C( 630), INT16_C( 880), INT16_C( 100), INT16_C( 66), -INT16_C( 111), -INT16_C( 173) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1253,22 +1695,22 @@ test_simde_vcvtq_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { int32_t a[4]; simde_float32 r[4]; } test_vec[] = { - { { -INT32_C( 932389316), -INT32_C( 1088008327), -INT32_C( 1969616841), -INT32_C( 1510957523) }, - { SIMDE_FLOAT32_C(-932389312.00), SIMDE_FLOAT32_C(-1088008320.00), SIMDE_FLOAT32_C(-1969616896.00), SIMDE_FLOAT32_C(-1510957568.00) } }, - { { -INT32_C( 55262820), INT32_C( 574170805), INT32_C( 1250738434), INT32_C( 1268545039) }, - { SIMDE_FLOAT32_C(-55262820.00), SIMDE_FLOAT32_C(574170816.00), SIMDE_FLOAT32_C(1250738432.00), SIMDE_FLOAT32_C(1268545024.00) } }, - { { -INT32_C( 854325164), -INT32_C( 1886569896), INT32_C( 1880762178), INT32_C( 1645546181) }, - { SIMDE_FLOAT32_C(-854325184.00), SIMDE_FLOAT32_C(-1886569856.00), SIMDE_FLOAT32_C(1880762240.00), SIMDE_FLOAT32_C(1645546240.00) } }, - { { -INT32_C( 2141271605), -INT32_C( 224225040), INT32_C( 1748774489), -INT32_C( 122431324) }, - { SIMDE_FLOAT32_C(-2141271552.00), SIMDE_FLOAT32_C(-224225040.00), SIMDE_FLOAT32_C(1748774528.00), SIMDE_FLOAT32_C(-122431328.00) } }, - { { INT32_C( 969328609), INT32_C( 1153979138), INT32_C( 1085596282), -INT32_C( 1214068244) }, - { SIMDE_FLOAT32_C(969328640.00), SIMDE_FLOAT32_C(1153979136.00), SIMDE_FLOAT32_C(1085596288.00), SIMDE_FLOAT32_C(-1214068224.00) } }, - { { -INT32_C( 2093481837), -INT32_C( 243934568), -INT32_C( 1386630647), INT32_C( 1806044554) }, - { SIMDE_FLOAT32_C(-2093481856.00), SIMDE_FLOAT32_C(-243934560.00), SIMDE_FLOAT32_C(-1386630656.00), SIMDE_FLOAT32_C(1806044544.00) } }, - { { -INT32_C( 693867308), INT32_C( 958098879), INT32_C( 1014615887), INT32_C( 754129817) }, - { SIMDE_FLOAT32_C(-693867328.00), SIMDE_FLOAT32_C(958098880.00), SIMDE_FLOAT32_C(1014615872.00), SIMDE_FLOAT32_C(754129792.00) } }, - { { -INT32_C( 1263588580), INT32_C( 262546438), INT32_C( 1639776214), -INT32_C( 506699252) }, - { SIMDE_FLOAT32_C(-1263588608.00), SIMDE_FLOAT32_C(262546432.00), SIMDE_FLOAT32_C(1639776256.00), SIMDE_FLOAT32_C(-506699264.00) } } + { { INT32_C( 582805677), INT32_C( 1143681869), INT32_C( 1367472729), -INT32_C( 1370995331) }, + { SIMDE_FLOAT32_C(582805696.00), SIMDE_FLOAT32_C(1143681920.00), SIMDE_FLOAT32_C(1367472768.00), SIMDE_FLOAT32_C(-1370995328.00) } }, + { { INT32_C( 1359924217), -INT32_C( 1396830435), INT32_C( 904922231), INT32_C( 168727901) }, + { SIMDE_FLOAT32_C(1359924224.00), SIMDE_FLOAT32_C(-1396830464.00), SIMDE_FLOAT32_C(904922240.00), SIMDE_FLOAT32_C(168727904.00) } }, + { { -INT32_C( 886257026), INT32_C( 1443846141), -INT32_C( 811101870), -INT32_C( 797052970) }, + { SIMDE_FLOAT32_C(-886257024.00), SIMDE_FLOAT32_C(1443846144.00), SIMDE_FLOAT32_C(-811101888.00), SIMDE_FLOAT32_C(-797052992.00) } }, + { { -INT32_C( 685667398), INT32_C( 293920666), INT32_C( 1078359267), -INT32_C( 2008328950) }, + { SIMDE_FLOAT32_C(-685667392.00), SIMDE_FLOAT32_C(293920672.00), SIMDE_FLOAT32_C(1078359296.00), SIMDE_FLOAT32_C(-2008328960.00) } }, + { { INT32_C( 492009247), INT32_C( 561210063), -INT32_C( 890233869), -INT32_C( 979735286) }, + { SIMDE_FLOAT32_C(492009248.00), SIMDE_FLOAT32_C(561210048.00), SIMDE_FLOAT32_C(-890233856.00), SIMDE_FLOAT32_C(-979735296.00) } }, + { { -INT32_C( 1835222024), INT32_C( 2107908250), -INT32_C( 1614878059), INT32_C( 1596393791) }, + { SIMDE_FLOAT32_C(-1835222016.00), SIMDE_FLOAT32_C(2107908224.00), SIMDE_FLOAT32_C(-1614878080.00), SIMDE_FLOAT32_C(1596393728.00) } }, + { { INT32_C( 1333557888), -INT32_C( 797904932), INT32_C( 362438666), -INT32_C( 975555379) }, + { SIMDE_FLOAT32_C(1333557888.00), SIMDE_FLOAT32_C(-797904960.00), SIMDE_FLOAT32_C(362438656.00), SIMDE_FLOAT32_C(-975555392.00) } }, + { { -INT32_C( 1990691089), INT32_C( 738655383), INT32_C( 650888422), INT32_C( 1317401293) }, + { SIMDE_FLOAT32_C(-1990691072.00), SIMDE_FLOAT32_C(738655360.00), SIMDE_FLOAT32_C(650888448.00), SIMDE_FLOAT32_C(1317401344.00) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1506,7 +1948,7 @@ test_simde_vcvt_f16_f32 (SIMDE_MUNIT_TEST_ARGS) { { { SIMDE_FLOAT32_C( -829.48), SIMDE_FLOAT32_C( -168.37), SIMDE_FLOAT32_C( -205.50), SIMDE_FLOAT32_C( -615.00) }, { SIMDE_FLOAT16_VALUE( -829.50), SIMDE_FLOAT16_VALUE( -168.38), SIMDE_FLOAT16_VALUE( -205.50), SIMDE_FLOAT16_VALUE( -615.00) } }, { { SIMDE_FLOAT32_C( -491.32), SIMDE_FLOAT32_C( 717.47), SIMDE_FLOAT32_C( -341.53), SIMDE_FLOAT32_C( -237.33) }, - { SIMDE_FLOAT16_VALUE( -491.25), SIMDE_FLOAT16_VALUE( 717.50), SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -237.38) } } + { SIMDE_FLOAT16_VALUE( -491.25), SIMDE_FLOAT16_VALUE( 717.50), SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -237.38) } } }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { @@ -1621,252 +2063,1569 @@ test_simde_vcvt_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { } static int -test_simde_vcvtas_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { - simde_float32 a; - int32_t r; +test_simde_vcvtah_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int16_t r; } test_vec[] = { #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { SIMDE_MATH_NANF, - INT32_C( 0) }, - { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), - INT32_MAX }, - { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(1000.0), - INT32_MIN }, + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MAX)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MIN)), + INT16_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MAX+1000)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT16_MIN-1000)), + INT16_MIN }, #endif - { SIMDE_FLOAT32_C(-55.5), - -INT32_C(56) }, - { SIMDE_FLOAT32_C(55.5), - INT32_C(56) }, - { SIMDE_FLOAT32_C(-755.699707), - -INT32_C(756) }, - { SIMDE_FLOAT32_C(-479.408081), - -INT32_C(479) }, - { SIMDE_FLOAT32_C(-192.237427), - -INT32_C(192) }, - { SIMDE_FLOAT32_C(92.246948), - INT32_C(92) }, - { SIMDE_FLOAT32_C(-620.131226), - -INT32_C(620) }, - { SIMDE_FLOAT32_C(658.543213), - INT32_C(659) }, - { SIMDE_FLOAT32_C(-58.790283), + { SIMDE_FLOAT16_VALUE( - 2.161), + -INT16_C( 2) }, + { SIMDE_FLOAT16_VALUE( 6.927), + INT16_C( 7) }, + { SIMDE_FLOAT16_VALUE( - 22.166), + -INT16_C( 22) }, + { SIMDE_FLOAT16_VALUE( 24.050), + INT16_C( 24) }, + { SIMDE_FLOAT16_VALUE( 3.665), + INT16_C( 4) }, + { SIMDE_FLOAT16_VALUE( - 16.715), + -INT16_C( 17) }, + { SIMDE_FLOAT16_VALUE( 0.250), + INT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( - 3.303), + -INT16_C( 3) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int16_t r = simde_vcvtah_s16_f16(a); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int16_t r = simde_vcvtah_s16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtah_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT16_MAX)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT16_MAX+1000)), + UINT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, -1000)), + UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 19.373), + UINT16_C( 19) }, + { SIMDE_FLOAT16_VALUE( 1.450), + UINT16_C( 1) }, + { SIMDE_FLOAT16_VALUE( 13.179), + UINT16_C( 13) }, + { SIMDE_FLOAT16_VALUE( 11.179), + UINT16_C( 11) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint16_t r = simde_vcvtah_u16_f16(a); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcvtah_u16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtah_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MAX+1000ll)), + INT32_MAX }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MIN)), + INT32_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT32_MIN-1000ll)), + INT32_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 18.628), + INT32_C( 19) }, + { SIMDE_FLOAT16_VALUE( - 22.354), + -INT32_C( 22) }, + { SIMDE_FLOAT16_VALUE( - 24.547), + -INT32_C( 25) }, + { SIMDE_FLOAT16_VALUE( 1.754), + INT32_C( 2) }, + { SIMDE_FLOAT16_VALUE( 17.010), + INT32_C( 17) }, + { SIMDE_FLOAT16_VALUE( 10.181), + INT32_C( 10) }, + { SIMDE_FLOAT16_VALUE( 4.337), + INT32_C( 4) }, + { SIMDE_FLOAT16_VALUE( 15.753), + INT32_C( 16) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int32_t r = simde_vcvtah_s32_f16(a); + + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int32_t r = simde_vcvtah_s32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtah_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT32_MAX)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT32_MAX+1000ll)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( - 24.202), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 8.537), + UINT32_C( 9) }, + { SIMDE_FLOAT16_VALUE( 25.260), + UINT32_C( 25) }, + { SIMDE_FLOAT16_VALUE( 29.793), + UINT32_C( 30) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint32_t r = simde_vcvtah_u32_f16(a); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint32_t r = simde_vcvtah_u32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtah_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT64_MAX)), + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, INT64_MIN)), + INT64_MIN }, + #endif + { SIMDE_FLOAT16_VALUE( 12.535), + INT64_C( 13) }, + { SIMDE_FLOAT16_VALUE( - 6.257), + -INT64_C( 6) }, + { SIMDE_FLOAT16_VALUE( 10.120), + INT64_C( 10) }, + { SIMDE_FLOAT16_VALUE( 10.746), + INT64_C( 11) }, + { SIMDE_FLOAT16_VALUE( 25.689), + INT64_C( 26) }, + { SIMDE_FLOAT16_VALUE( 7.095), + INT64_C( 7) }, + { SIMDE_FLOAT16_VALUE( 9.180), + INT64_C( 9) }, + { SIMDE_FLOAT16_VALUE( - 10.937), + -INT64_C( 11) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int64_t r = simde_vcvtah_s64_f16(a); + + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int64_t r = simde_vcvtah_s64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtah_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32_t, UINT64_MAX)), + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT64_C( 0) }, + { SIMDE_FLOAT16_VALUE( -24.202), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 4.300), + UINT64_C( 4) }, + { SIMDE_FLOAT16_VALUE( 11.343), + UINT64_C( 11) }, + { SIMDE_FLOAT16_VALUE( 26.432), + UINT64_C( 26) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint64_t r = simde_vcvtah_u64_f16(a); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint64_t r = simde_vcvtah_u64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtas_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), + INT32_MIN }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(1000.0), + INT32_MIN }, + #endif + { SIMDE_FLOAT32_C(-55.5), + -INT32_C(56) }, + { SIMDE_FLOAT32_C(55.5), + INT32_C(56) }, + { SIMDE_FLOAT32_C(-755.699707), + -INT32_C(756) }, + { SIMDE_FLOAT32_C(-479.408081), + -INT32_C(479) }, + { SIMDE_FLOAT32_C(-192.237427), + -INT32_C(192) }, + { SIMDE_FLOAT32_C(92.246948), + INT32_C(92) }, + { SIMDE_FLOAT32_C(-620.131226), + -INT32_C(620) }, + { SIMDE_FLOAT32_C(658.543213), + INT32_C(659) }, + { SIMDE_FLOAT32_C(-58.790283), -INT32_C(59) }, { SIMDE_FLOAT32_C(-777.055359), -INT32_C(777) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32 a = test_vec[i].a; - int32_t r = simde_vcvtas_s32_f32(a); + simde_float32 a = test_vec[i].a; + int32_t r = simde_vcvtas_s32_f32(a); + + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_int32_t r = simde_vcvtas_s32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtas_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + UINT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), + UINT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX), + UINT32_MAX }, + { SIMDE_MATH_INFINITYF, + UINT32_MAX }, + #endif + { SIMDE_FLOAT32_C(238.269043), + UINT32_C(238) }, + { SIMDE_FLOAT32_C(884.073364), + UINT32_C(884) }, + { SIMDE_FLOAT32_C(517.341492), + UINT32_C(517) }, + { SIMDE_FLOAT32_C(161.270676), + UINT32_C(161) }, + { SIMDE_FLOAT32_C(302.139801), + UINT32_C(302) }, + { SIMDE_FLOAT32_C(949.265381), + UINT32_C(949) }, + { SIMDE_FLOAT32_C(586.265320), + UINT32_C(586) }, + { SIMDE_FLOAT32_C(230.019547), + UINT32_C(230) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + uint32_t r = simde_vcvtas_u32_f32(a); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_uint32_t r = simde_vcvtas_u32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtad_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX) + SIMDE_FLOAT64_C(1000.0), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN), + INT64_MIN }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) - SIMDE_FLOAT64_C(1000.0), + INT64_MIN }, + #endif + { SIMDE_FLOAT64_C( -19.13), + -INT64_C( 19) }, + { SIMDE_FLOAT64_C( -897.18), + -INT64_C( 897) }, + { SIMDE_FLOAT64_C( -126.06), + -INT64_C( 126) }, + { SIMDE_FLOAT64_C( -662.96), + -INT64_C( 663) }, + { SIMDE_FLOAT64_C( -999.91), + -INT64_C( 1000) }, + { SIMDE_FLOAT64_C( -845.48), + -INT64_C( 845) }, + { SIMDE_FLOAT64_C( -431.61), + -INT64_C( 432) }, + { SIMDE_FLOAT64_C( -165.42), + -INT64_C( 165) }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + int64_t r = simde_vcvtad_s64_f64(a); + + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64 a = simde_test_codegen_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + int64_t r = simde_vcvtad_s64_f64(a); + + simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtad_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + UINT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(1000.0), + UINT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX), + UINT64_MAX }, + { SIMDE_FLOAT64_C( -463.65), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -607.03), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT64_C( 522.61), + UINT64_C( 523) }, + { SIMDE_FLOAT64_C( 960.37), + UINT64_C( 960) }, + { SIMDE_FLOAT64_C( 66.87), + UINT64_C( 67) }, + { SIMDE_FLOAT64_C( 176.41), + UINT64_C( 176) }, + { SIMDE_FLOAT64_C( 0.33), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 450.07), + UINT64_C( 450) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + uint64_t r = simde_vcvtad_u64_f64(a); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64 a = simde_test_codegen_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + uint64_t r = simde_vcvtad_u64_f64(a); + + simde_test_codegen_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + int16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 5.484), SIMDE_FLOAT16_VALUE( - 6.295), SIMDE_FLOAT16_VALUE( - 5.132), SIMDE_FLOAT16_VALUE( 12.566) }, + { INT16_C( 5), -INT16_C( 6), -INT16_C( 5), INT16_C( 13) } }, + { { SIMDE_FLOAT16_VALUE( - 25.918), SIMDE_FLOAT16_VALUE( 12.330), SIMDE_FLOAT16_VALUE( - 10.464), SIMDE_FLOAT16_VALUE( 9.083) }, + { -INT16_C( 26), INT16_C( 12), -INT16_C( 10), INT16_C( 9) } }, + { { SIMDE_FLOAT16_VALUE( - 1.292), SIMDE_FLOAT16_VALUE( - 11.738), SIMDE_FLOAT16_VALUE( 6.931), SIMDE_FLOAT16_VALUE( 29.815) }, + { -INT16_C( 1), -INT16_C( 12), INT16_C( 7), INT16_C( 30) } }, + { { SIMDE_FLOAT16_VALUE( - 6.023), SIMDE_FLOAT16_VALUE( - 16.490), SIMDE_FLOAT16_VALUE( 13.782), SIMDE_FLOAT16_VALUE( 3.109) }, + { -INT16_C( 6), -INT16_C( 16), INT16_C( 14), INT16_C( 3) } }, + { { SIMDE_FLOAT16_VALUE( 21.674), SIMDE_FLOAT16_VALUE( - 27.968), SIMDE_FLOAT16_VALUE( - 13.322), SIMDE_FLOAT16_VALUE( - 0.172) }, + { INT16_C( 22), -INT16_C( 28), -INT16_C( 13), INT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( - 10.360), SIMDE_FLOAT16_VALUE( 15.085), SIMDE_FLOAT16_VALUE( 26.131), SIMDE_FLOAT16_VALUE( 18.421) }, + { -INT16_C( 10), INT16_C( 15), INT16_C( 26), INT16_C( 18) } }, + { { SIMDE_FLOAT16_VALUE( - 20.906), SIMDE_FLOAT16_VALUE( 12.372), SIMDE_FLOAT16_VALUE( 11.952), SIMDE_FLOAT16_VALUE( 14.232) }, + { -INT16_C( 21), INT16_C( 12), INT16_C( 12), INT16_C( 14) } }, + { { SIMDE_FLOAT16_VALUE( 4.188), SIMDE_FLOAT16_VALUE( 3.209), SIMDE_FLOAT16_VALUE( - 3.902), SIMDE_FLOAT16_VALUE( - 4.983) }, + { INT16_C( 4), INT16_C( 3), -INT16_C( 4), -INT16_C( 5) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_int16x4_t r = simde_vcvta_s16_f16(a); + + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_int16x4_t r = simde_vcvta_s16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 91.88), SIMDE_FLOAT16_VALUE( 32.12), SIMDE_FLOAT16_VALUE( 15.08), SIMDE_FLOAT16_VALUE( 33.19) }, + { UINT16_C( 92), UINT16_C( 32), UINT16_C( 15), UINT16_C( 33) } }, + { { SIMDE_FLOAT16_VALUE( 76.44), SIMDE_FLOAT16_VALUE( 43.81), SIMDE_FLOAT16_VALUE( 91.31), SIMDE_FLOAT16_VALUE( 41.16) }, + { UINT16_C( 76), UINT16_C( 44), UINT16_C( 91), UINT16_C( 41) } }, + { { SIMDE_FLOAT16_VALUE( 69.56), SIMDE_FLOAT16_VALUE( 98.00), SIMDE_FLOAT16_VALUE( 93.50), SIMDE_FLOAT16_VALUE( 29.94) }, + { UINT16_C( 70), UINT16_C( 98), UINT16_C( 94), UINT16_C( 30) } }, + { { SIMDE_FLOAT16_VALUE( 47.88), SIMDE_FLOAT16_VALUE( 20.88), SIMDE_FLOAT16_VALUE( 43.38), SIMDE_FLOAT16_VALUE( 30.53) }, + { UINT16_C( 48), UINT16_C( 21), UINT16_C( 43), UINT16_C( 31) } }, + { { SIMDE_FLOAT16_VALUE( 51.06), SIMDE_FLOAT16_VALUE( 29.81), SIMDE_FLOAT16_VALUE( 36.16), SIMDE_FLOAT16_VALUE( 52.94) }, + { UINT16_C( 51), UINT16_C( 30), UINT16_C( 36), UINT16_C( 53) } }, + { { SIMDE_FLOAT16_VALUE( 77.94), SIMDE_FLOAT16_VALUE( 76.12), SIMDE_FLOAT16_VALUE( 36.69), SIMDE_FLOAT16_VALUE( 46.81) }, + { UINT16_C( 78), UINT16_C( 76), UINT16_C( 37), UINT16_C( 47) } }, + { { SIMDE_FLOAT16_VALUE( 94.00), SIMDE_FLOAT16_VALUE( 85.12), SIMDE_FLOAT16_VALUE( 91.81), SIMDE_FLOAT16_VALUE( 98.12) }, + { UINT16_C( 94), UINT16_C( 85), UINT16_C( 92), UINT16_C( 98) } }, + { { SIMDE_FLOAT16_VALUE( 93.12), SIMDE_FLOAT16_VALUE( 2.32), SIMDE_FLOAT16_VALUE( 40.25), SIMDE_FLOAT16_VALUE( 85.00) }, + { UINT16_C( 93), UINT16_C( 2), UINT16_C( 40), UINT16_C( 85) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcvta_u16_f16(a); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(0.0f, 100.0f); + simde_uint16x4_t r = simde_vcvta_u16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[2]; + int32_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), + HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(1000.0) }, + { INT32_MAX, INT32_MIN } }, + { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, + { INT32_C( 0), INT32_MAX } }, + #endif + { { SIMDE_FLOAT32_C(-137.097046), SIMDE_FLOAT32_C(632.638672) }, + { -INT32_C(137), INT32_C(633) } }, + { { SIMDE_FLOAT32_C(135.947388), SIMDE_FLOAT32_C(-204.564087) }, + { INT32_C(136), -INT32_C(205) } }, + { { SIMDE_FLOAT32_C(422.245239), SIMDE_FLOAT32_C(972.902710) }, + { INT32_C(422), INT32_C(973) } }, + { { SIMDE_FLOAT32_C(-291.536621), SIMDE_FLOAT32_C(-849.554077) }, + { -INT32_C(292), -INT32_C(850) } }, + { { SIMDE_FLOAT32_C(-9.575623), SIMDE_FLOAT32_C(318.716919) }, + { -INT32_C(10), INT32_C(319) } }, + { { SIMDE_FLOAT32_C(-734.776367), SIMDE_FLOAT32_C(-510.679810) }, + { -INT32_C(735), -INT32_C(511) } }, + { { SIMDE_FLOAT32_C(-457.886719), SIMDE_FLOAT32_C(655.444580) }, + { -INT32_C(458), INT32_C(655) } }, + { { SIMDE_FLOAT32_C(847.546021), SIMDE_FLOAT32_C(849.980591) }, + { INT32_C(848), INT32_C(850) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_int32x2_t r = simde_vcvta_s32_f32(a); + + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_int32x2_t r = simde_vcvta_s32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), + SIMDE_FLOAT32_C(-1000.0) }, + { UINT32_MAX, 0 } }, + { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, + { UINT32_C( 0), UINT32_MAX } }, + #endif + { { SIMDE_FLOAT32_C(518.760376), SIMDE_FLOAT32_C(796.769409) }, + { UINT32_C(519), UINT32_C(797) } }, + { { SIMDE_FLOAT32_C(161.204361), SIMDE_FLOAT32_C(381.395020) }, + { UINT32_C(161), UINT32_C(381) } }, + { { SIMDE_FLOAT32_C(803.856689), SIMDE_FLOAT32_C(971.859131) }, + { UINT32_C(804), UINT32_C(972) } }, + { { SIMDE_FLOAT32_C(445.868378), SIMDE_FLOAT32_C(558.828979) }, + { UINT32_C(446), UINT32_C(559) } }, + { { SIMDE_FLOAT32_C(83.968452), SIMDE_FLOAT32_C(140.023712) }, + { UINT32_C(84), UINT32_C(140) } }, + { { SIMDE_FLOAT32_C(230.921921), SIMDE_FLOAT32_C(235.137802) }, + { UINT32_C(231), UINT32_C(235) } }, + { { SIMDE_FLOAT32_C(367.292725), SIMDE_FLOAT32_C(815.052429) }, + { UINT32_C(367), UINT32_C(815) } }, + { { SIMDE_FLOAT32_C(13.168660), SIMDE_FLOAT32_C(406.672668) }, + { UINT32_C(13), UINT32_C(407) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_uint32x2_t r = simde_vcvta_u32_f32(a); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_uint32x2_t r = simde_vcvta_u32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[1]; + int64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C(467711.875) }, + { INT64_C( 467712) } }, + { { -SIMDE_FLOAT64_C(975485.312) }, + { -INT64_C( 975485) } }, + { { SIMDE_FLOAT64_C(313635.750) }, + { INT64_C( 313636) } }, + { { SIMDE_FLOAT64_C(699213.000) }, + { INT64_C( 699213) } }, + { { -SIMDE_FLOAT64_C(798464.875) }, + { -INT64_C( 798465) } }, + { { SIMDE_FLOAT64_C(501944.750) }, + { INT64_C( 501945) } }, + { { -SIMDE_FLOAT64_C(986927.375) }, + { -INT64_C( 986927) } }, + { { -SIMDE_FLOAT64_C(797904.438) }, + { -INT64_C( 797904) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_int64x1_t r = simde_vcvta_s64_f64(a); + + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x1_t r = simde_vcvta_s64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvta_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { -SIMDE_FLOAT64_C(866879.562) }, + { UINT64_C( 0) } }, + { { -SIMDE_FLOAT64_C(144314.438) }, + { UINT64_C( 0) } }, + { { -SIMDE_FLOAT64_C(515307.688) }, + { UINT64_C( 0) } }, + { { -SIMDE_FLOAT64_C(833382.875) }, + { UINT64_C( 0) } }, + { { -SIMDE_FLOAT64_C(579680.125) }, + { UINT64_C( 0) } }, + #endif + { { SIMDE_FLOAT64_C(869455.375) }, + { UINT64_C( 869455) } }, + { { SIMDE_FLOAT64_C(372019.875) }, + { UINT64_C( 372020) } }, + { { SIMDE_FLOAT64_C(487323.250) }, + { UINT64_C( 487323) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_uint64x1_t r = simde_vcvta_u64_f64(a); + + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x1_t r = simde_vcvta_u64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + int16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 26.678), SIMDE_FLOAT16_VALUE( 11.961), SIMDE_FLOAT16_VALUE( - 5.477), SIMDE_FLOAT16_VALUE( 2.192), + SIMDE_FLOAT16_VALUE( - 23.653), SIMDE_FLOAT16_VALUE( 24.091), SIMDE_FLOAT16_VALUE( 16.211), SIMDE_FLOAT16_VALUE( 13.136) }, + { INT16_C( 27), INT16_C( 12), -INT16_C( 5), INT16_C( 2), + -INT16_C( 24), INT16_C( 24), INT16_C( 16), INT16_C( 13) } }, + { { SIMDE_FLOAT16_VALUE( - 10.106), SIMDE_FLOAT16_VALUE( 2.807), SIMDE_FLOAT16_VALUE( 0.036), SIMDE_FLOAT16_VALUE( 16.987), + SIMDE_FLOAT16_VALUE( - 8.282), SIMDE_FLOAT16_VALUE( - 0.626), SIMDE_FLOAT16_VALUE( 5.032), SIMDE_FLOAT16_VALUE( - 0.023) }, + { -INT16_C( 10), INT16_C( 3), INT16_C( 0), INT16_C( 17), + -INT16_C( 8), -INT16_C( 1), INT16_C( 5), INT16_C( 0) } }, + { { SIMDE_FLOAT16_VALUE( 0.850), SIMDE_FLOAT16_VALUE( 28.954), SIMDE_FLOAT16_VALUE( - 11.152), SIMDE_FLOAT16_VALUE( - 1.218), + SIMDE_FLOAT16_VALUE( 4.105), SIMDE_FLOAT16_VALUE( 26.947), SIMDE_FLOAT16_VALUE( 29.786), SIMDE_FLOAT16_VALUE( - 20.433) }, + { INT16_C( 1), INT16_C( 29), -INT16_C( 11), -INT16_C( 1), + INT16_C( 4), INT16_C( 27), INT16_C( 30), -INT16_C( 20) } }, + { { SIMDE_FLOAT16_VALUE( 9.218), SIMDE_FLOAT16_VALUE( 27.413), SIMDE_FLOAT16_VALUE( 29.857), SIMDE_FLOAT16_VALUE( 25.421), + SIMDE_FLOAT16_VALUE( 20.930), SIMDE_FLOAT16_VALUE( 4.858), SIMDE_FLOAT16_VALUE( 1.136), SIMDE_FLOAT16_VALUE( - 9.511) }, + { INT16_C( 9), INT16_C( 27), INT16_C( 30), INT16_C( 25), + INT16_C( 21), INT16_C( 5), INT16_C( 1), -INT16_C( 10) } }, + { { SIMDE_FLOAT16_VALUE( - 5.330), SIMDE_FLOAT16_VALUE( - 28.994), SIMDE_FLOAT16_VALUE( 6.794), SIMDE_FLOAT16_VALUE( 12.383), + SIMDE_FLOAT16_VALUE( - 27.061), SIMDE_FLOAT16_VALUE( - 14.157), SIMDE_FLOAT16_VALUE( 19.177), SIMDE_FLOAT16_VALUE( 24.741) }, + { -INT16_C( 5), -INT16_C( 29), INT16_C( 7), INT16_C( 12), + -INT16_C( 27), -INT16_C( 14), INT16_C( 19), INT16_C( 25) } }, + { { SIMDE_FLOAT16_VALUE( - 10.145), SIMDE_FLOAT16_VALUE( 14.145), SIMDE_FLOAT16_VALUE( 10.567), SIMDE_FLOAT16_VALUE( - 18.585), + SIMDE_FLOAT16_VALUE( - 24.925), SIMDE_FLOAT16_VALUE( - 27.646), SIMDE_FLOAT16_VALUE( 26.222), SIMDE_FLOAT16_VALUE( 4.194) }, + { -INT16_C( 10), INT16_C( 14), INT16_C( 11), -INT16_C( 19), + -INT16_C( 25), -INT16_C( 28), INT16_C( 26), INT16_C( 4) } }, + { { SIMDE_FLOAT16_VALUE( 10.447), SIMDE_FLOAT16_VALUE( - 19.586), SIMDE_FLOAT16_VALUE( - 6.815), SIMDE_FLOAT16_VALUE( - 16.894), + SIMDE_FLOAT16_VALUE( - 10.842), SIMDE_FLOAT16_VALUE( 23.604), SIMDE_FLOAT16_VALUE( - 11.436), SIMDE_FLOAT16_VALUE( - 10.067) }, + { INT16_C( 10), -INT16_C( 20), -INT16_C( 7), -INT16_C( 17), + -INT16_C( 11), INT16_C( 24), -INT16_C( 11), -INT16_C( 10) } }, + { { SIMDE_FLOAT16_VALUE( - 16.757), SIMDE_FLOAT16_VALUE( - 21.348), SIMDE_FLOAT16_VALUE( - 25.380), SIMDE_FLOAT16_VALUE( 26.218), + SIMDE_FLOAT16_VALUE( - 10.699), SIMDE_FLOAT16_VALUE( 27.303), SIMDE_FLOAT16_VALUE( 23.601), SIMDE_FLOAT16_VALUE( 9.547) }, + { -INT16_C( 17), -INT16_C( 21), -INT16_C( 25), INT16_C( 26), + -INT16_C( 11), INT16_C( 27), INT16_C( 24), INT16_C( 10) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_int16x8_t r = simde_vcvtaq_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_int16x8_t r = simde_vcvtaq_s16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 34.44), SIMDE_FLOAT16_VALUE( 55.34), SIMDE_FLOAT16_VALUE( 18.20), SIMDE_FLOAT16_VALUE( 10.84), + SIMDE_FLOAT16_VALUE( 99.12), SIMDE_FLOAT16_VALUE( 9.53), SIMDE_FLOAT16_VALUE( 52.00), SIMDE_FLOAT16_VALUE( 68.75) }, + { UINT16_C( 34), UINT16_C( 55), UINT16_C( 18), UINT16_C( 11), UINT16_C( 99), UINT16_C( 10), UINT16_C( 52), UINT16_C( 69) } }, + { { SIMDE_FLOAT16_VALUE( 7.50), SIMDE_FLOAT16_VALUE( 45.50), SIMDE_FLOAT16_VALUE( 98.69), SIMDE_FLOAT16_VALUE( 55.38), + SIMDE_FLOAT16_VALUE( 66.38), SIMDE_FLOAT16_VALUE( 42.06), SIMDE_FLOAT16_VALUE( 85.94), SIMDE_FLOAT16_VALUE( 17.45) }, + { UINT16_C( 8), UINT16_C( 46), UINT16_C( 99), UINT16_C( 55), UINT16_C( 66), UINT16_C( 42), UINT16_C( 86), UINT16_C( 17) } }, + { { SIMDE_FLOAT16_VALUE( 71.88), SIMDE_FLOAT16_VALUE( 22.06), SIMDE_FLOAT16_VALUE( 70.38), SIMDE_FLOAT16_VALUE( 49.84), + SIMDE_FLOAT16_VALUE( 98.25), SIMDE_FLOAT16_VALUE( 7.09), SIMDE_FLOAT16_VALUE( 96.62), SIMDE_FLOAT16_VALUE( 92.19) }, + { UINT16_C( 72), UINT16_C( 22), UINT16_C( 70), UINT16_C( 50), UINT16_C( 98), UINT16_C( 7), UINT16_C( 97), UINT16_C( 92) } }, + { { SIMDE_FLOAT16_VALUE( 92.19), SIMDE_FLOAT16_VALUE( 88.44), SIMDE_FLOAT16_VALUE( 90.31), SIMDE_FLOAT16_VALUE( 85.31), + SIMDE_FLOAT16_VALUE( 90.75), SIMDE_FLOAT16_VALUE( 30.58), SIMDE_FLOAT16_VALUE( 70.38), SIMDE_FLOAT16_VALUE( 25.20) }, + { UINT16_C( 92), UINT16_C( 88), UINT16_C( 90), UINT16_C( 85), UINT16_C( 91), UINT16_C( 31), UINT16_C( 70), UINT16_C( 25) } }, + { { SIMDE_FLOAT16_VALUE( 85.94), SIMDE_FLOAT16_VALUE( 88.56), SIMDE_FLOAT16_VALUE( 36.06), SIMDE_FLOAT16_VALUE( 85.06), + SIMDE_FLOAT16_VALUE( 98.06), SIMDE_FLOAT16_VALUE( 88.06), SIMDE_FLOAT16_VALUE( 53.81), SIMDE_FLOAT16_VALUE( 5.58) }, + { UINT16_C( 86), UINT16_C( 89), UINT16_C( 36), UINT16_C( 85), UINT16_C( 98), UINT16_C( 88), UINT16_C( 54), UINT16_C( 6) } }, + { { SIMDE_FLOAT16_VALUE( 33.56), SIMDE_FLOAT16_VALUE( 52.50), SIMDE_FLOAT16_VALUE( 60.97), SIMDE_FLOAT16_VALUE( 99.94), + SIMDE_FLOAT16_VALUE( 94.56), SIMDE_FLOAT16_VALUE( 46.88), SIMDE_FLOAT16_VALUE( 17.39), SIMDE_FLOAT16_VALUE( 66.44) }, + { UINT16_C( 34), UINT16_C( 53), UINT16_C( 61), UINT16_C( 100), UINT16_C( 95), UINT16_C( 47), UINT16_C( 17), UINT16_C( 66) } }, + { { SIMDE_FLOAT16_VALUE( 68.94), SIMDE_FLOAT16_VALUE( 87.75), SIMDE_FLOAT16_VALUE( 16.28), SIMDE_FLOAT16_VALUE( 67.19), + SIMDE_FLOAT16_VALUE( 94.88), SIMDE_FLOAT16_VALUE( 12.93), SIMDE_FLOAT16_VALUE( 59.38), SIMDE_FLOAT16_VALUE( 87.06) }, + { UINT16_C( 69), UINT16_C( 88), UINT16_C( 16), UINT16_C( 67), UINT16_C( 95), UINT16_C( 13), UINT16_C( 59), UINT16_C( 87) } }, + { { SIMDE_FLOAT16_VALUE( 1.39), SIMDE_FLOAT16_VALUE( 49.69), SIMDE_FLOAT16_VALUE( 72.44), SIMDE_FLOAT16_VALUE( 92.19), + SIMDE_FLOAT16_VALUE( 80.25), SIMDE_FLOAT16_VALUE( 42.75), SIMDE_FLOAT16_VALUE( 17.38), SIMDE_FLOAT16_VALUE( 66.19) }, + { UINT16_C( 1), UINT16_C( 50), UINT16_C( 72), UINT16_C( 92), UINT16_C( 80), UINT16_C( 43), UINT16_C( 17), UINT16_C( 66) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcvtaq_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(0.0f, 100.0f); + simde_uint16x8_t r = simde_vcvtaq_u16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + int32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( -91.12), SIMDE_FLOAT32_C( 276.21), SIMDE_FLOAT32_C( 314.70), SIMDE_FLOAT32_C( -855.95) }, + { -INT32_C( 91), INT32_C( 276), INT32_C( 315), -INT32_C( 856) } }, + { { SIMDE_FLOAT32_C( -996.84), SIMDE_FLOAT32_C( 540.73), SIMDE_FLOAT32_C( 797.87), SIMDE_FLOAT32_C( 73.20) }, + { -INT32_C( 997), INT32_C( 541), INT32_C( 798), INT32_C( 73) } }, + { { SIMDE_FLOAT32_C( 982.29), SIMDE_FLOAT32_C( 254.11), SIMDE_FLOAT32_C( -220.70), SIMDE_FLOAT32_C( -655.84) }, + { INT32_C( 982), INT32_C( 254), -INT32_C( 221), -INT32_C( 656) } }, + { { SIMDE_FLOAT32_C( 775.61), SIMDE_FLOAT32_C( 688.80), SIMDE_FLOAT32_C( -639.40), SIMDE_FLOAT32_C( 666.72) }, + { INT32_C( 776), INT32_C( 689), -INT32_C( 639), INT32_C( 667) } }, + { { SIMDE_FLOAT32_C( 587.36), SIMDE_FLOAT32_C( -397.45), SIMDE_FLOAT32_C( -684.16), SIMDE_FLOAT32_C( -689.59) }, + { INT32_C( 587), -INT32_C( 397), -INT32_C( 684), -INT32_C( 690) } }, + { { SIMDE_FLOAT32_C( -723.69), SIMDE_FLOAT32_C( 904.22), SIMDE_FLOAT32_C( 91.42), SIMDE_FLOAT32_C( -642.96) }, + { -INT32_C( 724), INT32_C( 904), INT32_C( 91), -INT32_C( 643) } }, + { { SIMDE_FLOAT32_C( 357.06), SIMDE_FLOAT32_C( -318.89), SIMDE_FLOAT32_C( -860.25), SIMDE_FLOAT32_C( -108.96) }, + { INT32_C( 357), -INT32_C( 319), -INT32_C( 860), -INT32_C( 109) } }, + { { SIMDE_FLOAT32_C( 333.78), SIMDE_FLOAT32_C( -14.76), SIMDE_FLOAT32_C( -212.33), SIMDE_FLOAT32_C( -757.34) }, + { INT32_C( 334), -INT32_C( 15), -INT32_C( 212), -INT32_C( 757) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_int32x4_t r = simde_vcvtaq_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_int32x4_t r = simde_vcvtaq_s32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + uint32_t r[4]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(10000.0), SIMDE_FLOAT32_C(-10000.0), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, + { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + #endif + { { SIMDE_FLOAT32_C(99.796890), SIMDE_FLOAT32_C(640.625061), SIMDE_FLOAT32_C(761.249390), SIMDE_FLOAT32_C(134.496353) }, + { UINT32_C(100), UINT32_C(641), UINT32_C(761), UINT32_C(134) } }, + { { SIMDE_FLOAT32_C(740.153748), SIMDE_FLOAT32_C(226.072403), SIMDE_FLOAT32_C(458.142426), SIMDE_FLOAT32_C(312.975708) }, + { UINT32_C(740), UINT32_C(226), UINT32_C(458), UINT32_C(313) } }, + { { SIMDE_FLOAT32_C(881.748596), SIMDE_FLOAT32_C(315.416504), SIMDE_FLOAT32_C(657.340698), SIMDE_FLOAT32_C(492.805298) }, + { UINT32_C(882), UINT32_C(315), UINT32_C(657), UINT32_C(493) } }, + { { SIMDE_FLOAT32_C(27.446901), SIMDE_FLOAT32_C(904.086670), SIMDE_FLOAT32_C(857.025085), SIMDE_FLOAT32_C(677.571045) }, + { UINT32_C(27), UINT32_C(904), UINT32_C(857), UINT32_C(678) } }, + { { SIMDE_FLOAT32_C(666.073059), SIMDE_FLOAT32_C(988.718506), SIMDE_FLOAT32_C(51.321510), SIMDE_FLOAT32_C(353.845490) }, + { UINT32_C(666), UINT32_C(989), UINT32_C(51), UINT32_C(354) } }, + { { SIMDE_FLOAT32_C(307.715729), SIMDE_FLOAT32_C(75.778244), SIMDE_FLOAT32_C(748.057373), SIMDE_FLOAT32_C(533.695679) }, + { UINT32_C(308), UINT32_C(76), UINT32_C(748), UINT32_C(534) } }, + { { SIMDE_FLOAT32_C(949.232422), SIMDE_FLOAT32_C(163.359085), SIMDE_FLOAT32_C(946.573120), SIMDE_FLOAT32_C(713.519104) }, + { UINT32_C(949), UINT32_C(163), UINT32_C(947), UINT32_C(714) } }, + { { SIMDE_FLOAT32_C(592.152954), SIMDE_FLOAT32_C(751.258545), SIMDE_FLOAT32_C(645.332520), SIMDE_FLOAT32_C(894.986938) }, + { UINT32_C(592), UINT32_C(751), UINT32_C(645), UINT32_C(895) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_uint32x4_t r = simde_vcvtaq_u32_f32(a); + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_uint32x4_t r = simde_vcvtaq_u32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + int64_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX) + SIMDE_FLOAT64_C(10000.0), HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) - SIMDE_FLOAT64_C(10000.0) }, + { INT64_MAX, INT64_MIN } }, + { { SIMDE_MATH_NAN, SIMDE_MATH_INFINITY }, + { INT64_C( 0), INT64_MAX } }, + #endif + { { SIMDE_FLOAT64_C(649473.000), -SIMDE_FLOAT64_C(977642.625) }, + { INT64_C( 649473), -INT64_C( 977643) } }, + { { -SIMDE_FLOAT64_C(179199.750), SIMDE_FLOAT64_C(179289.625) }, + { -INT64_C( 179200), INT64_C( 179290) } }, + { { -SIMDE_FLOAT64_C(454067.688), -SIMDE_FLOAT64_C(293304.875) }, + { -INT64_C( 454068), -INT64_C( 293305) } }, + { { -SIMDE_FLOAT64_C(158054.938), SIMDE_FLOAT64_C(237602.250) }, + { -INT64_C( 158055), INT64_C( 237602) } }, + { { SIMDE_FLOAT64_C(919757.375), SIMDE_FLOAT64_C(229895.000) }, + { INT64_C( 919757), INT64_C( 229895) } }, + { { -SIMDE_FLOAT64_C(977677.688), -SIMDE_FLOAT64_C(625679.750) }, + { -INT64_C( 977678), -INT64_C( 625680) } }, + { { SIMDE_FLOAT64_C(589580.875), SIMDE_FLOAT64_C(107357.750) }, + { INT64_C( 589581), INT64_C( 107358) } }, + { { -SIMDE_FLOAT64_C(409446.562), -SIMDE_FLOAT64_C(469972.188) }, + { -INT64_C( 409447), -INT64_C( 469972) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_int64x2_t r = simde_vcvtaq_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x2_t r = simde_vcvtaq_s64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtaq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + uint64_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(10000.0), SIMDE_FLOAT64_C(-10000.0) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { SIMDE_MATH_NAN, SIMDE_MATH_INFINITY }, + { UINT64_C( 0), UINT64_MAX } }, + #endif + { { SIMDE_FLOAT64_C( 633.17), SIMDE_FLOAT64_C( 804.93) }, + { UINT64_C( 633), UINT64_C( 805) } }, + { { SIMDE_FLOAT64_C( 703.41), SIMDE_FLOAT64_C( 627.44) }, + { UINT64_C( 703), UINT64_C( 627) } }, + { { SIMDE_FLOAT64_C( 462.56), SIMDE_FLOAT64_C( 955.00) }, + { UINT64_C( 463), UINT64_C( 955) } }, + { { SIMDE_FLOAT64_C( 21.07), SIMDE_FLOAT64_C( 229.08) }, + { UINT64_C( 21), UINT64_C( 229) } }, + { { SIMDE_FLOAT64_C( 979.06), SIMDE_FLOAT64_C( 321.00) }, + { UINT64_C( 979), UINT64_C( 321) } }, + { { SIMDE_FLOAT64_C( 153.68), SIMDE_FLOAT64_C( 825.89) }, + { UINT64_C( 154), UINT64_C( 826) } }, + { { SIMDE_FLOAT64_C( 206.68), SIMDE_FLOAT64_C( 853.41) }, + { UINT64_C( 207), UINT64_C( 853) } }, + { { SIMDE_FLOAT64_C( 159.03), SIMDE_FLOAT64_C( 851.04) }, + { UINT64_C( 159), UINT64_C( 851) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_uint64x2_t r = simde_vcvtaq_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(0.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x2_t r = simde_vcvtaq_u64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvt_high_f16_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 buf[4]; + simde_float32 a[4]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 24.372), SIMDE_FLOAT16_VALUE( 23.118), SIMDE_FLOAT16_VALUE( - 23.874), SIMDE_FLOAT16_VALUE( - 17.046) }, + { -SIMDE_FLOAT32_C(7209.161), -SIMDE_FLOAT32_C(3427.483), SIMDE_FLOAT32_C(4040.883), -SIMDE_FLOAT32_C(2098.786) }, + { SIMDE_FLOAT16_VALUE( - 24.372), SIMDE_FLOAT16_VALUE( 23.118), SIMDE_FLOAT16_VALUE( - 23.874), SIMDE_FLOAT16_VALUE( - 17.046), + SIMDE_FLOAT16_VALUE( -7209.161), SIMDE_FLOAT16_VALUE( -3427.483), SIMDE_FLOAT16_VALUE(4040.883), SIMDE_FLOAT16_VALUE( -2098.786) } }, + { { SIMDE_FLOAT16_VALUE( - 12.176), SIMDE_FLOAT16_VALUE( 8.252), SIMDE_FLOAT16_VALUE( 8.152), SIMDE_FLOAT16_VALUE( - 27.851) }, + { -SIMDE_FLOAT32_C(6963.870), -SIMDE_FLOAT32_C(5850.501), SIMDE_FLOAT32_C(3869.630), SIMDE_FLOAT32_C(3814.924) }, + { SIMDE_FLOAT16_VALUE( - 12.176), SIMDE_FLOAT16_VALUE( 8.252), SIMDE_FLOAT16_VALUE( 8.152), SIMDE_FLOAT16_VALUE( - 27.851), + SIMDE_FLOAT16_VALUE( -6963.870), SIMDE_FLOAT16_VALUE( -5850.501), SIMDE_FLOAT16_VALUE(3869.630), SIMDE_FLOAT16_VALUE(3814.924) } }, + { { SIMDE_FLOAT16_VALUE( - 14.831), SIMDE_FLOAT16_VALUE( - 2.611), SIMDE_FLOAT16_VALUE( 28.160), SIMDE_FLOAT16_VALUE( - 2.806) }, + { SIMDE_FLOAT32_C(8370.572), -SIMDE_FLOAT32_C(9646.048), -SIMDE_FLOAT32_C( 914.674), -SIMDE_FLOAT32_C( 96.147) }, + { SIMDE_FLOAT16_VALUE( - 14.831), SIMDE_FLOAT16_VALUE( - 2.611), SIMDE_FLOAT16_VALUE( 28.160), SIMDE_FLOAT16_VALUE( - 2.806), + SIMDE_FLOAT16_VALUE(8370.572), SIMDE_FLOAT16_VALUE( -9646.048), SIMDE_FLOAT16_VALUE( - 914.674), SIMDE_FLOAT16_VALUE( - 96.147) } }, + { { SIMDE_FLOAT16_VALUE( - 20.091), SIMDE_FLOAT16_VALUE( 18.641), SIMDE_FLOAT16_VALUE( - 4.159), SIMDE_FLOAT16_VALUE( 26.287) }, + { -SIMDE_FLOAT32_C(6804.170), SIMDE_FLOAT32_C(6809.797), -SIMDE_FLOAT32_C(6641.329), SIMDE_FLOAT32_C(5256.305) }, + { SIMDE_FLOAT16_VALUE( - 20.091), SIMDE_FLOAT16_VALUE( 18.641), SIMDE_FLOAT16_VALUE( - 4.159), SIMDE_FLOAT16_VALUE( 26.287), + SIMDE_FLOAT16_VALUE( -6804.170), SIMDE_FLOAT16_VALUE(6809.797), SIMDE_FLOAT16_VALUE( -6641.329), SIMDE_FLOAT16_VALUE(5256.305) } }, + { { SIMDE_FLOAT16_VALUE( 11.268), SIMDE_FLOAT16_VALUE( 24.385), SIMDE_FLOAT16_VALUE( - 11.918), SIMDE_FLOAT16_VALUE( - 20.926) }, + { -SIMDE_FLOAT32_C(9859.181), SIMDE_FLOAT32_C(7990.297), -SIMDE_FLOAT32_C(1745.461), -SIMDE_FLOAT32_C(6267.518) }, + { SIMDE_FLOAT16_VALUE( 11.268), SIMDE_FLOAT16_VALUE( 24.385), SIMDE_FLOAT16_VALUE( - 11.918), SIMDE_FLOAT16_VALUE( - 20.926), + SIMDE_FLOAT16_VALUE( -9859.181), SIMDE_FLOAT16_VALUE(7990.297), SIMDE_FLOAT16_VALUE( -1745.461), SIMDE_FLOAT16_VALUE( -6267.518) } }, + { { SIMDE_FLOAT16_VALUE( - 13.564), SIMDE_FLOAT16_VALUE( - 1.015), SIMDE_FLOAT16_VALUE( 28.694), SIMDE_FLOAT16_VALUE( - 22.499) }, + { -SIMDE_FLOAT32_C(6059.556), SIMDE_FLOAT32_C(6039.471), SIMDE_FLOAT32_C(9932.301), SIMDE_FLOAT32_C(7266.508) }, + { SIMDE_FLOAT16_VALUE( - 13.564), SIMDE_FLOAT16_VALUE( - 1.015), SIMDE_FLOAT16_VALUE( 28.694), SIMDE_FLOAT16_VALUE( - 22.499), + SIMDE_FLOAT16_VALUE( -6059.556), SIMDE_FLOAT16_VALUE(6039.471), SIMDE_FLOAT16_VALUE(9932.301), SIMDE_FLOAT16_VALUE(7266.508) } }, + { { SIMDE_FLOAT16_VALUE( - 26.110), SIMDE_FLOAT16_VALUE( 28.117), SIMDE_FLOAT16_VALUE( - 26.928), SIMDE_FLOAT16_VALUE( 2.198) }, + { -SIMDE_FLOAT32_C(4140.171), -SIMDE_FLOAT32_C(5252.628), SIMDE_FLOAT32_C(5685.721), -SIMDE_FLOAT32_C(3753.093) }, + { SIMDE_FLOAT16_VALUE( - 26.110), SIMDE_FLOAT16_VALUE( 28.117), SIMDE_FLOAT16_VALUE( - 26.928), SIMDE_FLOAT16_VALUE( 2.198), + SIMDE_FLOAT16_VALUE( -4140.171), SIMDE_FLOAT16_VALUE( -5252.628), SIMDE_FLOAT16_VALUE(5685.721), SIMDE_FLOAT16_VALUE( -3753.093) } }, + { { SIMDE_FLOAT16_VALUE( - 3.040), SIMDE_FLOAT16_VALUE( 10.693), SIMDE_FLOAT16_VALUE( 19.060), SIMDE_FLOAT16_VALUE( - 29.067) }, + { SIMDE_FLOAT32_C( 553.772), SIMDE_FLOAT32_C(3391.303), SIMDE_FLOAT32_C(3482.779), -SIMDE_FLOAT32_C( 673.666) }, + { SIMDE_FLOAT16_VALUE( - 3.040), SIMDE_FLOAT16_VALUE( 10.693), SIMDE_FLOAT16_VALUE( 19.060), SIMDE_FLOAT16_VALUE( - 29.067), + SIMDE_FLOAT16_VALUE( 553.772), SIMDE_FLOAT16_VALUE(3391.303), SIMDE_FLOAT16_VALUE(3482.779), SIMDE_FLOAT16_VALUE( - 673.666) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float16x4_t buf = simde_vld1_f16(test_vec[i].buf); + simde_float16x8_t r = simde_vcvt_high_f16_f32(buf, a); - simde_assert_equal_i32(r, test_vec[i].r); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t buf = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float16x8_t r = simde_vcvt_high_f16_f32(buf, a); + + simde_test_arm_neon_write_f16x4(2, buf, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int -test_simde_vcvtas_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { - simde_float32 a; - uint32_t r; +test_simde_vcvt_high_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 buf[2]; + simde_float64 a[2]; + simde_float32 r[4]; } test_vec[] = { - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { SIMDE_MATH_NANF, - UINT32_C( 0) }, - { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), - UINT32_MAX }, - { SIMDE_MATH_INFINITYF, - UINT32_MAX }, - #endif - { SIMDE_FLOAT32_C(238.269043), - UINT32_C(238) }, - { SIMDE_FLOAT32_C(884.073364), - UINT32_C(884) }, - { SIMDE_FLOAT32_C(517.341492), - UINT32_C(517) }, - { SIMDE_FLOAT32_C(161.270676), - UINT32_C(161) }, - { SIMDE_FLOAT32_C(302.139801), - UINT32_C(302) }, - { SIMDE_FLOAT32_C(949.265381), - UINT32_C(949) }, - { SIMDE_FLOAT32_C(586.265320), - UINT32_C(586) }, - { SIMDE_FLOAT32_C(230.019547), - UINT32_C(230) }, + { { SIMDE_FLOAT32_C( 3151.790), SIMDE_FLOAT32_C( 1569.615) }, + { -SIMDE_FLOAT64_C(330922.500), SIMDE_FLOAT64_C( 86650.750) }, + { SIMDE_FLOAT32_C( 3151.790), SIMDE_FLOAT32_C( 1569.615), -SIMDE_FLOAT32_C(330922.500), SIMDE_FLOAT32_C( 86650.750) } }, + { { SIMDE_FLOAT32_C( 2740.920), -SIMDE_FLOAT32_C( 6163.193) }, + { SIMDE_FLOAT64_C(258245.250), SIMDE_FLOAT64_C(192464.250) }, + { SIMDE_FLOAT32_C( 2740.920), -SIMDE_FLOAT32_C( 6163.193), SIMDE_FLOAT32_C(258245.250), SIMDE_FLOAT32_C(192464.250) } }, + { { SIMDE_FLOAT32_C( 5576.555), SIMDE_FLOAT32_C( 6417.400) }, + { -SIMDE_FLOAT64_C(145156.250), -SIMDE_FLOAT64_C(412130.250) }, + { SIMDE_FLOAT32_C( 5576.555), SIMDE_FLOAT32_C( 6417.400), -SIMDE_FLOAT32_C(145156.250), -SIMDE_FLOAT32_C(412130.250) } }, + { { SIMDE_FLOAT32_C( 2466.857), SIMDE_FLOAT32_C( 3059.207) }, + { -SIMDE_FLOAT64_C(930083.625), -SIMDE_FLOAT64_C(235856.375) }, + { SIMDE_FLOAT32_C( 2466.857), SIMDE_FLOAT32_C( 3059.207), -SIMDE_FLOAT32_C(930083.625), -SIMDE_FLOAT32_C(235856.375) } }, + { { SIMDE_FLOAT32_C( 2648.531), -SIMDE_FLOAT32_C( 9455.132) }, + { -SIMDE_FLOAT64_C(646168.250), SIMDE_FLOAT64_C(652389.375) }, + { SIMDE_FLOAT32_C( 2648.531), -SIMDE_FLOAT32_C( 9455.132), -SIMDE_FLOAT32_C(646168.250), SIMDE_FLOAT32_C(652389.375) } }, + { { -SIMDE_FLOAT32_C( 6091.833), -SIMDE_FLOAT32_C( 7970.317) }, + { -SIMDE_FLOAT64_C(928208.875), -SIMDE_FLOAT64_C(391840.250) }, + { -SIMDE_FLOAT32_C( 6091.833), -SIMDE_FLOAT32_C( 7970.317), -SIMDE_FLOAT32_C(928208.875), -SIMDE_FLOAT32_C(391840.250) } }, + { { -SIMDE_FLOAT32_C( 4632.915), SIMDE_FLOAT32_C( 1000.022) }, + { -SIMDE_FLOAT64_C(783316.375), SIMDE_FLOAT64_C( 2008.688) }, + { -SIMDE_FLOAT32_C( 4632.915), SIMDE_FLOAT32_C( 1000.022), -SIMDE_FLOAT32_C(783316.375), SIMDE_FLOAT32_C( 2008.688) } }, + { { -SIMDE_FLOAT32_C( 4256.610), -SIMDE_FLOAT32_C( 8440.278) }, + { SIMDE_FLOAT64_C(522630.125), -SIMDE_FLOAT64_C(222893.500) }, + { -SIMDE_FLOAT32_C( 4256.610), -SIMDE_FLOAT32_C( 8440.278), SIMDE_FLOAT32_C(522630.125), -SIMDE_FLOAT32_C(222893.500) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32 a = test_vec[i].a; - uint32_t r = simde_vcvtas_u32_f32(a); + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float32x2_t buf = simde_vld1_f32(test_vec[i].buf); + simde_float32x4_t r = simde_vcvt_high_f32_f64(buf, a); - simde_assert_equal_u32(r, test_vec[i].r); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t buf = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float32x4_t r = simde_vcvt_high_f32_f64(buf, a); + + simde_test_arm_neon_write_f32x2(2, buf, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int -test_simde_vcvta_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { - static const struct { - simde_float32 a[2]; - int32_t r[2]; +test_simde_vcvt_high_f32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float32 r[4]; } test_vec[] = { - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), - HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(1000.0) }, - { INT32_MAX, INT32_MIN } }, - { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, - { INT32_C( 0), INT32_MAX } }, - #endif - { { SIMDE_FLOAT32_C(-137.097046), SIMDE_FLOAT32_C(632.638672) }, - { -INT32_C(137), INT32_C(633) } }, - { { SIMDE_FLOAT32_C(135.947388), SIMDE_FLOAT32_C(-204.564087) }, - { INT32_C(136), -INT32_C(205) } }, - { { SIMDE_FLOAT32_C(422.245239), SIMDE_FLOAT32_C(972.902710) }, - { INT32_C(422), INT32_C(973) } }, - { { SIMDE_FLOAT32_C(-291.536621), SIMDE_FLOAT32_C(-849.554077) }, - { -INT32_C(292), -INT32_C(850) } }, - { { SIMDE_FLOAT32_C(-9.575623), SIMDE_FLOAT32_C(318.716919) }, - { -INT32_C(10), INT32_C(319) } }, - { { SIMDE_FLOAT32_C(-734.776367), SIMDE_FLOAT32_C(-510.679810) }, - { -INT32_C(735), -INT32_C(511) } }, - { { SIMDE_FLOAT32_C(-457.886719), SIMDE_FLOAT32_C(655.444580) }, - { -INT32_C(458), INT32_C(655) } }, - { { SIMDE_FLOAT32_C(847.546021), SIMDE_FLOAT32_C(849.980591) }, - { INT32_C(848), INT32_C(850) } }, + { { SIMDE_FLOAT16_VALUE( - 3.844), SIMDE_FLOAT16_VALUE(22.261), SIMDE_FLOAT16_VALUE(21.633), SIMDE_FLOAT16_VALUE(21.815), + SIMDE_FLOAT16_VALUE( - 8.432), SIMDE_FLOAT16_VALUE( -19.472), SIMDE_FLOAT16_VALUE(13.622), SIMDE_FLOAT16_VALUE( - 8.106) }, + { -SIMDE_FLOAT32_C( 8.432), -SIMDE_FLOAT32_C(19.472), SIMDE_FLOAT32_C(13.622), -SIMDE_FLOAT32_C( 8.106) } }, + { { SIMDE_FLOAT16_VALUE(26.057), SIMDE_FLOAT16_VALUE( 0.355), SIMDE_FLOAT16_VALUE( -13.476), SIMDE_FLOAT16_VALUE( - 9.150), + SIMDE_FLOAT16_VALUE( - 3.837), SIMDE_FLOAT16_VALUE( - 8.472), SIMDE_FLOAT16_VALUE( -12.672), SIMDE_FLOAT16_VALUE(11.071) }, + { -SIMDE_FLOAT32_C( 3.837), -SIMDE_FLOAT32_C( 8.472), -SIMDE_FLOAT32_C(12.672), SIMDE_FLOAT32_C(11.071) } }, + { { SIMDE_FLOAT16_VALUE( - 6.540), SIMDE_FLOAT16_VALUE( - 1.356), SIMDE_FLOAT16_VALUE(22.256), SIMDE_FLOAT16_VALUE( -12.208), + SIMDE_FLOAT16_VALUE(20.480), SIMDE_FLOAT16_VALUE( - 4.999), SIMDE_FLOAT16_VALUE( 0.486), SIMDE_FLOAT16_VALUE( - 8.171) }, + { SIMDE_FLOAT32_C(20.480), -SIMDE_FLOAT32_C( 4.999), SIMDE_FLOAT32_C( 0.486), -SIMDE_FLOAT32_C( 8.171) } }, + { { SIMDE_FLOAT16_VALUE(12.817), SIMDE_FLOAT16_VALUE( 7.000), SIMDE_FLOAT16_VALUE( - 6.571), SIMDE_FLOAT16_VALUE( 9.247), + SIMDE_FLOAT16_VALUE(12.120), SIMDE_FLOAT16_VALUE( -10.026), SIMDE_FLOAT16_VALUE( - 5.410), SIMDE_FLOAT16_VALUE( - 4.789) }, + { SIMDE_FLOAT32_C(12.120), -SIMDE_FLOAT32_C(10.026), -SIMDE_FLOAT32_C( 5.410), -SIMDE_FLOAT32_C( 4.789) } }, + { { SIMDE_FLOAT16_VALUE(25.521), SIMDE_FLOAT16_VALUE( -18.717), SIMDE_FLOAT16_VALUE( -24.798), SIMDE_FLOAT16_VALUE(20.203), + SIMDE_FLOAT16_VALUE(20.893), SIMDE_FLOAT16_VALUE(22.676), SIMDE_FLOAT16_VALUE( -11.232), SIMDE_FLOAT16_VALUE( - 4.399) }, + { SIMDE_FLOAT32_C(20.893), SIMDE_FLOAT32_C(22.676), -SIMDE_FLOAT32_C(11.232), -SIMDE_FLOAT32_C( 4.399) } }, + { { SIMDE_FLOAT16_VALUE( 7.250), SIMDE_FLOAT16_VALUE( -29.188), SIMDE_FLOAT16_VALUE( -15.288), SIMDE_FLOAT16_VALUE(10.280), + SIMDE_FLOAT16_VALUE( 1.174), SIMDE_FLOAT16_VALUE(29.433), SIMDE_FLOAT16_VALUE( - 3.853), SIMDE_FLOAT16_VALUE( 3.367) }, + { SIMDE_FLOAT32_C( 1.174), SIMDE_FLOAT32_C(29.433), -SIMDE_FLOAT32_C( 3.853), SIMDE_FLOAT32_C( 3.367) } }, + { { SIMDE_FLOAT16_VALUE( -14.536), SIMDE_FLOAT16_VALUE(24.455), SIMDE_FLOAT16_VALUE( -12.233), SIMDE_FLOAT16_VALUE( - 7.986), + SIMDE_FLOAT16_VALUE( -11.418), SIMDE_FLOAT16_VALUE( 4.680), SIMDE_FLOAT16_VALUE( 9.822), SIMDE_FLOAT16_VALUE(16.191) }, + { -SIMDE_FLOAT32_C(11.418), SIMDE_FLOAT32_C( 4.680), SIMDE_FLOAT32_C( 9.822), SIMDE_FLOAT32_C(16.191) } }, + { { SIMDE_FLOAT16_VALUE( -11.395), SIMDE_FLOAT16_VALUE(11.561), SIMDE_FLOAT16_VALUE( - 5.833), SIMDE_FLOAT16_VALUE(22.281), + SIMDE_FLOAT16_VALUE( -27.638), SIMDE_FLOAT16_VALUE( -17.180), SIMDE_FLOAT16_VALUE(14.937), SIMDE_FLOAT16_VALUE( - 3.647) }, + { -SIMDE_FLOAT32_C(27.638), -SIMDE_FLOAT32_C(17.180), SIMDE_FLOAT32_C(14.937), -SIMDE_FLOAT32_C( 3.647) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); - simde_int32x2_t r = simde_vcvta_s32_f32(a); + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float32x4_t r = simde_vcvt_high_f32_f16(a); - simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float32x4_t r = simde_vcvt_high_f32_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int -test_simde_vcvta_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +test_simde_vcvt_high_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[4]; + simde_float64 r[2]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C( 2801.914), -SIMDE_FLOAT32_C( 1690.910), SIMDE_FLOAT32_C( 579.511), SIMDE_FLOAT32_C( 514.098) }, + { SIMDE_FLOAT64_C( 579.511), SIMDE_FLOAT64_C( 514.098) } }, + { { SIMDE_FLOAT32_C( 23.345), -SIMDE_FLOAT32_C( 8818.351), SIMDE_FLOAT32_C( 3019.613), SIMDE_FLOAT32_C( 3442.440) }, + { SIMDE_FLOAT64_C( 3019.613), SIMDE_FLOAT64_C( 3442.440) } }, + { { SIMDE_FLOAT32_C( 3413.646), -SIMDE_FLOAT32_C( 1012.018), -SIMDE_FLOAT32_C( 6549.712), -SIMDE_FLOAT32_C( 269.341) }, + { -SIMDE_FLOAT64_C( 6549.712), -SIMDE_FLOAT64_C( 269.341) } }, + { { -SIMDE_FLOAT32_C( 8209.152), -SIMDE_FLOAT32_C( 6540.035), SIMDE_FLOAT32_C( 5627.302), -SIMDE_FLOAT32_C( 1488.624) }, + { SIMDE_FLOAT64_C( 5627.302), -SIMDE_FLOAT64_C( 1488.624) } }, + { { SIMDE_FLOAT32_C( 444.442), SIMDE_FLOAT32_C( 8308.801), SIMDE_FLOAT32_C( 3283.538), SIMDE_FLOAT32_C( 8306.715) }, + { SIMDE_FLOAT64_C( 3283.538), SIMDE_FLOAT64_C( 8306.715) } }, + { { SIMDE_FLOAT32_C( 4692.329), SIMDE_FLOAT32_C( 692.840), -SIMDE_FLOAT32_C( 3238.872), -SIMDE_FLOAT32_C( 4908.950) }, + { -SIMDE_FLOAT64_C( 3238.872), -SIMDE_FLOAT64_C( 4908.950) } }, + { { SIMDE_FLOAT32_C( 1319.597), -SIMDE_FLOAT32_C( 4241.800), -SIMDE_FLOAT32_C( 8086.388), SIMDE_FLOAT32_C( 496.338) }, + { -SIMDE_FLOAT64_C( 8086.388), SIMDE_FLOAT64_C( 496.338) } }, + { { -SIMDE_FLOAT32_C( 3654.481), SIMDE_FLOAT32_C( 4723.808), SIMDE_FLOAT32_C( 8781.266), -SIMDE_FLOAT32_C( 4554.919) }, + { SIMDE_FLOAT64_C( 8781.266), -SIMDE_FLOAT64_C( 4554.919) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_float64x2_t r = simde_vcvt_high_f64_f32(a); + + simde_test_arm_neon_assert_equal_f64x2(r, simde_vld1q_f64(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float64x2_t r = simde_vcvt_high_f64_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtxd_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { - simde_float32 a[2]; - uint32_t r[2]; + simde_float64 a; + simde_float32 r; } test_vec[] = { - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), - SIMDE_FLOAT32_C(-1000.0) }, - { UINT32_MAX, 0 } }, - { { SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, - { UINT32_C( 0), UINT32_MAX } }, - #endif - { { SIMDE_FLOAT32_C(518.760376), SIMDE_FLOAT32_C(796.769409) }, - { UINT32_C(519), UINT32_C(797) } }, - { { SIMDE_FLOAT32_C(161.204361), SIMDE_FLOAT32_C(381.395020) }, - { UINT32_C(161), UINT32_C(381) } }, - { { SIMDE_FLOAT32_C(803.856689), SIMDE_FLOAT32_C(971.859131) }, - { UINT32_C(804), UINT32_C(972) } }, - { { SIMDE_FLOAT32_C(445.868378), SIMDE_FLOAT32_C(558.828979) }, - { UINT32_C(446), UINT32_C(559) } }, - { { SIMDE_FLOAT32_C(83.968452), SIMDE_FLOAT32_C(140.023712) }, - { UINT32_C(84), UINT32_C(140) } }, - { { SIMDE_FLOAT32_C(230.921921), SIMDE_FLOAT32_C(235.137802) }, - { UINT32_C(231), UINT32_C(235) } }, - { { SIMDE_FLOAT32_C(367.292725), SIMDE_FLOAT32_C(815.052429) }, - { UINT32_C(367), UINT32_C(815) } }, - { { SIMDE_FLOAT32_C(13.168660), SIMDE_FLOAT32_C(406.672668) }, - { UINT32_C(13), UINT32_C(407) } }, + { -SIMDE_FLOAT64_C( 97957.500), + -SIMDE_FLOAT32_C( 97957.500) }, + { SIMDE_FLOAT64_C( 70879.625), + SIMDE_FLOAT32_C( 70879.625) }, + { -SIMDE_FLOAT64_C( 65748.703), + -SIMDE_FLOAT32_C( 65748.703) }, + { SIMDE_FLOAT64_C( 31406.359), + SIMDE_FLOAT32_C( 31406.359) }, + { SIMDE_FLOAT64_C( 29286.383), + SIMDE_FLOAT32_C( 29286.383) }, + { SIMDE_FLOAT64_C( 13994.172), + SIMDE_FLOAT32_C( 13994.172) }, + { SIMDE_FLOAT64_C( 97855.250), + SIMDE_FLOAT32_C( 97855.250) }, + { SIMDE_FLOAT64_C( 53307.422), + SIMDE_FLOAT32_C( 53307.422) }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); - simde_uint32x2_t r = simde_vcvta_u32_f32(a); + simde_float64 a = test_vec[i].a; + simde_float32 r = simde_vcvtxd_f32_f64(a); - simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + simde_assert_equal_f32(r, test_vec[i].r, 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_arm_neon_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float32_t r = simde_vcvtxd_f32_f64(a); + + simde_test_arm_neon_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int -test_simde_vcvtaq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +test_simde_vcvtx_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { - simde_float32 a[4]; - int32_t r[4]; + simde_float64 a[2]; + simde_float32 r[2]; } test_vec[] = { - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(10000.0), HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) - SIMDE_FLOAT32_C(10000.0), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, - { INT32_MAX, INT32_MIN, INT32_C( 0), INT32_MAX } }, - #endif - { { SIMDE_FLOAT32_C(-100.235291), SIMDE_FLOAT32_C(963.126831), SIMDE_FLOAT32_C(-513.717896), SIMDE_FLOAT32_C(76.769287) }, - { -INT32_C(100), INT32_C(963), -INT32_C(514), INT32_C(77) } }, - { { SIMDE_FLOAT32_C(188.924072), SIMDE_FLOAT32_C(145.440186), SIMDE_FLOAT32_C(889.209717), SIMDE_FLOAT32_C(443.582153) }, - { INT32_C(189), INT32_C(145), INT32_C(889), INT32_C(444) } }, - { { SIMDE_FLOAT32_C(-530.063477), SIMDE_FLOAT32_C(316.464478), SIMDE_FLOAT32_C(-720.190491), SIMDE_FLOAT32_C(800.602661) }, - { -INT32_C(530), INT32_C(316), -INT32_C(720), INT32_C(801) } }, - { { SIMDE_FLOAT32_C(-236.361084), SIMDE_FLOAT32_C(499.799438), SIMDE_FLOAT32_C(686.013672), SIMDE_FLOAT32_C(172.975098) }, - { -INT32_C(236), INT32_C(500), INT32_C(686), INT32_C(173) } }, - { { SIMDE_FLOAT32_C(43.644165), SIMDE_FLOAT32_C(-327.279907), SIMDE_FLOAT32_C(-920.398865), SIMDE_FLOAT32_C(528.922852) }, - { INT32_C(44), -INT32_C(327), -INT32_C(920), INT32_C(529) } }, - { { SIMDE_FLOAT32_C(-676.315308), SIMDE_FLOAT32_C(-156.078674), SIMDE_FLOAT32_C(475.171509), SIMDE_FLOAT32_C(885.232666) }, - { -INT32_C(676), -INT32_C(156), INT32_C(475), INT32_C(885) } }, - { { SIMDE_FLOAT32_C(599.373413), SIMDE_FLOAT32_C(8.430664), SIMDE_FLOAT32_C(-142.695679), SIMDE_FLOAT32_C(235.751221) }, - { INT32_C(599), INT32_C(8), -INT32_C(143), INT32_C(236) } }, - { { SIMDE_FLOAT32_C(895.002075), SIMDE_FLOAT32_C(-806.336182), SIMDE_FLOAT32_C(-732.325745), SIMDE_FLOAT32_C(-389.401733) }, - { INT32_C(895), -INT32_C(806), -INT32_C(732), -INT32_C(389) } }, + { { SIMDE_FLOAT64_C( 65007.797), -SIMDE_FLOAT64_C( 80727.609) }, + { SIMDE_FLOAT32_C( 65007.797), -SIMDE_FLOAT32_C( 80727.609) } }, + { { SIMDE_FLOAT64_C( 16635.250), SIMDE_FLOAT64_C( 34365.453) }, + { SIMDE_FLOAT32_C( 16635.250), SIMDE_FLOAT32_C( 34365.453) } }, + { { SIMDE_FLOAT64_C( 59608.250), -SIMDE_FLOAT64_C( 82840.891) }, + { SIMDE_FLOAT32_C( 59608.250), -SIMDE_FLOAT32_C( 82840.891) } }, + { { SIMDE_FLOAT64_C( 52610.078), SIMDE_FLOAT64_C( 7763.352) }, + { SIMDE_FLOAT32_C( 52610.078), SIMDE_FLOAT32_C( 7763.352) } }, + { { -SIMDE_FLOAT64_C( 32936.711), SIMDE_FLOAT64_C( 91758.141) }, + { -SIMDE_FLOAT32_C( 32936.711), SIMDE_FLOAT32_C( 91758.141) } }, + { { SIMDE_FLOAT64_C( 86913.797), -SIMDE_FLOAT64_C( 4983.180) }, + { SIMDE_FLOAT32_C( 86913.797), -SIMDE_FLOAT32_C( 4983.180) } }, + { { SIMDE_FLOAT64_C( 79903.813), -SIMDE_FLOAT64_C( 75940.688) }, + { SIMDE_FLOAT32_C( 79903.812), -SIMDE_FLOAT32_C( 75940.688) } }, + { { SIMDE_FLOAT64_C( 75752.375), SIMDE_FLOAT64_C( 72121.250) }, + { SIMDE_FLOAT32_C( 75752.375), SIMDE_FLOAT32_C( 72121.250) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); - simde_int32x4_t r = simde_vcvtaq_s32_f32(a); - simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float32x2_t r = simde_vcvtx_f32_f64(a); + + simde_test_arm_neon_assert_equal_f32x2(r, simde_vld1_f32(test_vec[i].r), 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float32x2_t r = simde_vcvtx_f32_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int -test_simde_vcvtaq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +test_simde_vcvtx_high_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { - simde_float32 a[4]; - uint32_t r[4]; + simde_float32 buf[2]; + simde_float64 a[2]; + simde_float32 r[4]; } test_vec[] = { - #if !defined(SIMDE_FAST_CONVERSION_RANGE) - { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(10000.0), SIMDE_FLOAT32_C(-10000.0), SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, - { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, - #endif - { { SIMDE_FLOAT32_C(99.796890), SIMDE_FLOAT32_C(640.625061), SIMDE_FLOAT32_C(761.249390), SIMDE_FLOAT32_C(134.496353) }, - { UINT32_C(100), UINT32_C(641), UINT32_C(761), UINT32_C(134) } }, - { { SIMDE_FLOAT32_C(740.153748), SIMDE_FLOAT32_C(226.072403), SIMDE_FLOAT32_C(458.142426), SIMDE_FLOAT32_C(312.975708) }, - { UINT32_C(740), UINT32_C(226), UINT32_C(458), UINT32_C(313) } }, - { { SIMDE_FLOAT32_C(881.748596), SIMDE_FLOAT32_C(315.416504), SIMDE_FLOAT32_C(657.340698), SIMDE_FLOAT32_C(492.805298) }, - { UINT32_C(882), UINT32_C(315), UINT32_C(657), UINT32_C(493) } }, - { { SIMDE_FLOAT32_C(27.446901), SIMDE_FLOAT32_C(904.086670), SIMDE_FLOAT32_C(857.025085), SIMDE_FLOAT32_C(677.571045) }, - { UINT32_C(27), UINT32_C(904), UINT32_C(857), UINT32_C(678) } }, - { { SIMDE_FLOAT32_C(666.073059), SIMDE_FLOAT32_C(988.718506), SIMDE_FLOAT32_C(51.321510), SIMDE_FLOAT32_C(353.845490) }, - { UINT32_C(666), UINT32_C(989), UINT32_C(51), UINT32_C(354) } }, - { { SIMDE_FLOAT32_C(307.715729), SIMDE_FLOAT32_C(75.778244), SIMDE_FLOAT32_C(748.057373), SIMDE_FLOAT32_C(533.695679) }, - { UINT32_C(308), UINT32_C(76), UINT32_C(748), UINT32_C(534) } }, - { { SIMDE_FLOAT32_C(949.232422), SIMDE_FLOAT32_C(163.359085), SIMDE_FLOAT32_C(946.573120), SIMDE_FLOAT32_C(713.519104) }, - { UINT32_C(949), UINT32_C(163), UINT32_C(947), UINT32_C(714) } }, - { { SIMDE_FLOAT32_C(592.152954), SIMDE_FLOAT32_C(751.258545), SIMDE_FLOAT32_C(645.332520), SIMDE_FLOAT32_C(894.986938) }, - { UINT32_C(592), UINT32_C(751), UINT32_C(645), UINT32_C(895) } }, + { { -SIMDE_FLOAT32_C( 71.413), -SIMDE_FLOAT32_C( 465.368) }, + { SIMDE_FLOAT64_C( 4100.359), SIMDE_FLOAT64_C( 33582.953) }, + { -SIMDE_FLOAT32_C( 71.413), -SIMDE_FLOAT32_C( 465.368), SIMDE_FLOAT32_C( 4100.359), SIMDE_FLOAT32_C( 33582.953) } }, + { { -SIMDE_FLOAT32_C( 951.379), SIMDE_FLOAT32_C( 78.455) }, + { SIMDE_FLOAT64_C( 68132.203), SIMDE_FLOAT64_C( 48841.672) }, + { -SIMDE_FLOAT32_C( 951.379), SIMDE_FLOAT32_C( 78.455), SIMDE_FLOAT32_C( 68132.203), SIMDE_FLOAT32_C( 48841.672) } }, + { { SIMDE_FLOAT32_C( 667.952), -SIMDE_FLOAT32_C( 326.162) }, + { SIMDE_FLOAT64_C( 21689.508), SIMDE_FLOAT64_C( 74848.328) }, + { SIMDE_FLOAT32_C( 667.952), -SIMDE_FLOAT32_C( 326.162), SIMDE_FLOAT32_C( 21689.508), SIMDE_FLOAT32_C( 74848.328) } }, + { { -SIMDE_FLOAT32_C( 713.122), SIMDE_FLOAT32_C( 137.994) }, + { -SIMDE_FLOAT64_C( 49391.813), -SIMDE_FLOAT64_C( 75434.734) }, + { -SIMDE_FLOAT32_C( 713.122), SIMDE_FLOAT32_C( 137.994), -SIMDE_FLOAT32_C( 49391.812), -SIMDE_FLOAT32_C( 75434.734) } }, + { { -SIMDE_FLOAT32_C( 988.176), -SIMDE_FLOAT32_C( 780.086) }, + { -SIMDE_FLOAT64_C( 54026.855), -SIMDE_FLOAT64_C( 50128.336) }, + { -SIMDE_FLOAT32_C( 988.176), -SIMDE_FLOAT32_C( 780.086), -SIMDE_FLOAT32_C( 54026.855), -SIMDE_FLOAT32_C( 50128.336) } }, + { { -SIMDE_FLOAT32_C( 667.715), -SIMDE_FLOAT32_C( 129.354) }, + { -SIMDE_FLOAT64_C( 37504.672), -SIMDE_FLOAT64_C( 21201.570) }, + { -SIMDE_FLOAT32_C( 667.715), -SIMDE_FLOAT32_C( 129.354), -SIMDE_FLOAT32_C( 37504.672), -SIMDE_FLOAT32_C( 21201.570) } }, + { { SIMDE_FLOAT32_C( 981.659), -SIMDE_FLOAT32_C( 873.152) }, + { SIMDE_FLOAT64_C( 83367.734), -SIMDE_FLOAT64_C( 98388.281) }, + { SIMDE_FLOAT32_C( 981.659), -SIMDE_FLOAT32_C( 873.152), SIMDE_FLOAT32_C( 83367.734), -SIMDE_FLOAT32_C( 98388.281) } }, + { { SIMDE_FLOAT32_C( 450.735), SIMDE_FLOAT32_C( 560.921) }, + { -SIMDE_FLOAT64_C( 3083.563), SIMDE_FLOAT64_C( 98364.953) }, + { SIMDE_FLOAT32_C( 450.735), SIMDE_FLOAT32_C( 560.921), -SIMDE_FLOAT32_C( 3083.563), SIMDE_FLOAT32_C( 98364.953) } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); - simde_uint32x4_t r = simde_vcvtaq_u32_f32(a); - simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + simde_float32x2_t buf = simde_vld1_f32(test_vec[i].buf); + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float32x4_t r = simde_vcvtx_high_f32_f64(buf, a); + + simde_test_arm_neon_assert_equal_f32x4(r, simde_vld1q_f32(test_vec[i].r), 1); } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t buf = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float32x4_t r = simde_vcvtx_high_f32_f64(buf, a); + + simde_test_arm_neon_write_f32x2(2, buf, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_s64_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_u64_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_f16_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_f16_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_f16_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_f16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_s64_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_u32_f32) @@ -1904,12 +3663,40 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_f64_f32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtah_u64_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtas_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtas_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtad_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtad_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_u16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvta_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_u16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtaq_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_high_f16_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_high_f32_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_high_f32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_high_f64_f32) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtxd_f32_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtx_f32_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtx_high_f32_f64) SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/cvt_n.c b/test/arm/neon/cvt_n.c index 683910bb3..386951660 100644 --- a/test/arm/neon/cvt_n.c +++ b/test/arm/neon/cvt_n.c @@ -4,8 +4,1758 @@ #include "../../../simde/arm/neon/cvt_n.h" #include "../../../simde/arm/neon/dup_n.h" +static int +test_simde_vcvth_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[1]; + int16_t r3[1]; + int16_t r6[1]; + int16_t r10[1]; + int16_t r13[1]; + int16_t r16[1]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 24.220) }, + { INT16_C( 193) }, + { INT16_C( 1550) }, + { INT16_C( 24800) }, + { INT16_MAX }, + { INT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 8.057) }, + { -INT16_C( 64) }, + { -INT16_C( 515) }, + { -INT16_C( 8248) }, + { INT16_MIN }, + { INT16_MIN } }, + { { SIMDE_FLOAT16_VALUE( - 1.310) }, + { -INT16_C( 10) }, + { -INT16_C( 83) }, + { -INT16_C( 1341) }, + { -INT16_C( 10728) }, + { INT16_MIN } }, + { { SIMDE_FLOAT16_VALUE( - 24.017) }, + { -INT16_C( 192) }, + { -INT16_C( 1537) }, + { -INT16_C( 24592) }, + { INT16_MIN }, + { INT16_MIN } }, + { { SIMDE_FLOAT16_VALUE( 23.037) }, + { INT16_C( 184) }, + { INT16_C( 1474) }, + { INT16_C( 23584) }, + { INT16_MAX }, + { INT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( 7.442) }, + { INT16_C( 59) }, + { INT16_C( 476) }, + { INT16_C( 7620) }, + { INT16_MAX }, + { INT16_MAX } }, + { { SIMDE_FLOAT16_VALUE( - 18.102) }, + { -INT16_C( 144) }, + { -INT16_C( 1159) }, + { -INT16_C( 18544) }, + { INT16_MIN }, + { INT16_MIN } }, + { { SIMDE_FLOAT16_VALUE( - 10.834) }, + { -INT16_C( 86) }, + { -INT16_C( 693) }, + { -INT16_C( 11096) }, + { INT16_MIN }, + { INT16_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a[0]; + int16_t r3 = simde_vcvth_n_s16_f16(a, 3); + int16_t r6 = simde_vcvth_n_s16_f16(a, 6); + int16_t r10 = simde_vcvth_n_s16_f16(a, 10); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + int16_t r13 = simde_vcvth_n_s16_f16(a, 13); + int16_t r16 = simde_vcvth_n_s16_f16(a, 16); + #endif + + simde_assert_equal_i16(r3, test_vec[i].r3[0]); + simde_assert_equal_i16(r6, test_vec[i].r6[0]); + simde_assert_equal_i16(r10, test_vec[i].r10[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_i16(r13, test_vec[i].r13[0]); + simde_assert_equal_i16(r16, test_vec[i].r16[0]); + #endif + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + int16_t r3 = simde_vcvth_n_s16_f16(a, 3); + int16_t r6 = simde_vcvth_n_s16_f16(a, 6); + int16_t r10 = simde_vcvth_n_s16_f16(a, 10); + int16_t r13 = simde_vcvth_n_s16_f16(a, 13); + int16_t r16 = simde_vcvth_n_s16_f16(a, 16); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int32_t r3; + int32_t r6; + int32_t r10; + int32_t r13; + int32_t r16; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 446.75), + INT32_C( 3574), + INT32_C( 28592), + INT32_MAX, + INT32_MAX, + INT32_MAX }, + { SIMDE_FLOAT16_VALUE( -87.31), + -INT32_C( 698), + -INT32_C( 5588), + INT32_MIN, + INT32_MIN, + INT32_MIN }, + { SIMDE_FLOAT16_VALUE( 826.00), + INT32_C( 6608), + INT32_C( 52864), + INT32_MAX, + INT32_MAX, + INT32_MAX }, + { SIMDE_FLOAT16_VALUE( -327.00), + -INT32_C( 2616), + -INT32_C( 20928), + INT32_MIN, + INT32_MIN, + INT32_MIN }, + { SIMDE_FLOAT16_VALUE( 852.00), + INT32_C( 6816), + INT32_C( 54528), + INT32_MAX, + INT32_MAX, + INT32_MAX }, + { SIMDE_FLOAT16_VALUE( -492.00), + -INT32_C( 3936), + -INT32_C( 31488), + INT32_MIN, + INT32_MIN, + INT32_MIN }, + { SIMDE_FLOAT16_VALUE( 332.25), + INT32_C( 2658), + INT32_C( 21264), + INT32_MAX, + INT32_MAX, + INT32_MAX }, + { SIMDE_FLOAT16_VALUE( 648.50), + INT32_C( 5188), + INT32_C( 41504), + INT32_MAX, + INT32_MAX, + INT32_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int32_t r3 = simde_vcvth_n_s32_f16(a, 3); + int32_t r6 = simde_vcvth_n_s32_f16(a, 6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + int32_t r10 = simde_vcvth_n_s32_f16(a, 10); + int32_t r13 = simde_vcvth_n_s32_f16(a, 13); + int32_t r16 = simde_vcvth_n_s32_f16(a, 16); + #endif + + simde_assert_equal_i32(r3, test_vec[i].r3); + simde_assert_equal_i32(r6, test_vec[i].r6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_i32(r10, test_vec[i].r10); + simde_assert_equal_i32(r13, test_vec[i].r13); + simde_assert_equal_i32(r16, test_vec[i].r16); + #endif + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + int32_t r3 = simde_vcvth_n_s32_f16(a, 3); + int32_t r6 = simde_vcvth_n_s32_f16(a, 6); + int32_t r10 = simde_vcvth_n_s32_f16(a, 10); + int32_t r13 = simde_vcvth_n_s32_f16(a, 13); + int32_t r16 = simde_vcvth_n_s32_f16(a, 16); + + simde_test_codegen_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i32(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int64_t r3; + int64_t r6; + int64_t r10; + int64_t r13; + int64_t r16; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 27.27), + INT64_C( 218), + INT64_C( 1745), + INT64_C( 27920), + INT64_MAX, + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( 195.12), + INT64_C( 1561), + INT64_C( 12488), + INT64_MAX, + INT64_MAX, + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( 323.50), + INT64_C( 2588), + INT64_C( 20704), + INT64_MAX, + INT64_MAX, + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( -532.00), + -INT64_C( 4256), + -INT64_C( 34048), + INT64_MIN, + INT64_MIN, + INT64_MIN }, + { SIMDE_FLOAT16_VALUE( 264.50), + INT64_C( 2116), + INT64_C( 16928), + INT64_MAX, + INT64_MAX, + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( 741.50), + INT64_C( 5932), + INT64_C( 47456), + INT64_MAX, + INT64_MAX, + INT64_MAX }, + { SIMDE_FLOAT16_VALUE( -545.50), + -INT64_C( 4364), + -INT64_C( 34912), + INT64_MIN, + INT64_MIN, + INT64_MIN }, + { SIMDE_FLOAT16_VALUE( -404.25), + -INT64_C( 3234), + -INT64_C( 25872), + INT64_MIN, + INT64_MIN, + INT64_MIN }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + int64_t r3 = simde_vcvth_n_s64_f16(a, 3); + int64_t r6 = simde_vcvth_n_s64_f16(a, 6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + int64_t r10 = simde_vcvth_n_s64_f16(a, 10); + int64_t r13 = simde_vcvth_n_s64_f16(a, 13); + int64_t r16 = simde_vcvth_n_s64_f16(a, 16); + #endif + + simde_assert_equal_i64(r3, test_vec[i].r3); + simde_assert_equal_i64(r6, test_vec[i].r6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_i64(r10, test_vec[i].r10); + simde_assert_equal_i64(r13, test_vec[i].r13); + simde_assert_equal_i64(r16, test_vec[i].r16); + #endif + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + int64_t r3 = simde_vcvth_n_s64_f16(a, 3); + int64_t r6 = simde_vcvth_n_s64_f16(a, 6); + int64_t r10 = simde_vcvth_n_s64_f16(a, 10); + int64_t r13 = simde_vcvth_n_s64_f16(a, 13); + int64_t r16 = simde_vcvth_n_s64_f16(a, 16); + + simde_test_codegen_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_i64(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i64(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i64(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i64(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_i64(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[1]; + uint16_t r3[1]; + uint16_t r6[1]; + uint16_t r10[1]; + uint16_t r13[1]; + uint16_t r16[1]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 1.01) }, + { UINT16_C( 8) }, + { UINT16_C( 64) }, + { UINT16_C( 1034) }, + { UINT16_C( 8272) }, + { UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a[0]; + uint16_t r3 = simde_vcvth_n_u16_f16(a, 3); + uint16_t r6 = simde_vcvth_n_u16_f16(a, 6); + uint16_t r10 = simde_vcvth_n_u16_f16(a, 10); + uint16_t r13 = simde_vcvth_n_u16_f16(a, 13); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + uint16_t r16 = simde_vcvth_n_u16_f16(a, 16); + #endif + + simde_assert_equal_u16(r3, test_vec[i].r3[0]); + simde_assert_equal_u16(r6, test_vec[i].r6[0]); + simde_assert_equal_u16(r10, test_vec[i].r10[0]); + simde_assert_equal_u16(r13, test_vec[i].r13[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_u16(r16, test_vec[i].r16[0]); + #endif + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + uint16_t r3 = simde_vcvth_n_u16_f16(a, 3); + uint16_t r6 = simde_vcvth_n_u16_f16(a, 6); + uint16_t r10 = simde_vcvth_n_u16_f16(a, 10); + uint16_t r13 = simde_vcvth_n_u16_f16(a, 13); + uint16_t r16 = simde_vcvth_n_u16_f16(a, 16); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint32_t r3; + uint32_t r6; + uint32_t r10; + uint32_t r13; + uint32_t r16; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 146.50), + UINT32_C( 1172), + UINT32_C( 9376), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 305.75), + UINT32_C( 2446), + UINT32_C( 19568), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 475.75), + UINT32_C( 3806), + UINT32_C( 30448), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 776.50), + UINT32_C( 6212), + UINT32_C( 49696), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 714.00), + UINT32_C( 5712), + UINT32_C( 45696), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE( 907.00), + UINT32_C( 7256), + UINT32_C( 58048), + UINT32_MAX, + UINT32_MAX, + UINT32_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint32_t r3 = simde_vcvth_n_u32_f16(a, 3); + uint32_t r6 = simde_vcvth_n_u32_f16(a, 6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32_t r10 = simde_vcvth_n_u32_f16(a, 10); + uint32_t r13 = simde_vcvth_n_u32_f16(a, 13); + uint32_t r16 = simde_vcvth_n_u32_f16(a, 16); + #endif + + simde_assert_equal_u32(r3, test_vec[i].r3); + simde_assert_equal_u32(r6, test_vec[i].r6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_u32(r10, test_vec[i].r10); + simde_assert_equal_u32(r13, test_vec[i].r13); + simde_assert_equal_u32(r16, test_vec[i].r16); + #endif + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + uint32_t r3 = simde_vcvth_n_u32_f16(a, 3); + uint32_t r6 = simde_vcvth_n_u32_f16(a, 6); + uint32_t r10 = simde_vcvth_n_u32_f16(a, 10); + uint32_t r13 = simde_vcvth_n_u32_f16(a, 13); + uint32_t r16 = simde_vcvth_n_u32_f16(a, 16); + + simde_test_codegen_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u32(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint64_t r3; + uint64_t r6; + uint64_t r10; + uint64_t r13; + uint64_t r16; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 215.88), + UINT64_C( 1727), + UINT64_C( 13816), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE( 420.75), + UINT64_C( 3366), + UINT64_C( 26928), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE( 603.50), + UINT64_C( 4828), + UINT64_C( 38624), + UINT64_MAX, + UINT64_MAX, + UINT64_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16_t a = test_vec[i].a; + uint64_t r3 = simde_vcvth_n_u64_f16(a, 3); + uint64_t r6 = simde_vcvth_n_u64_f16(a, 6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + uint64_t r10 = simde_vcvth_n_u64_f16(a, 10); + uint64_t r13 = simde_vcvth_n_u64_f16(a, 13); + uint64_t r16 = simde_vcvth_n_u64_f16(a, 16); + #endif + + simde_assert_equal_u64(r3, test_vec[i].r3); + simde_assert_equal_u64(r6, test_vec[i].r6); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_u64(r10, test_vec[i].r10); + simde_assert_equal_u64(r13, test_vec[i].r13); + simde_assert_equal_u64(r16, test_vec[i].r16); + #endif + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_codegen_random_f16(-1000.0f, 1000.0f); + uint64_t r3 = simde_vcvth_n_u64_f16(a, 3); + uint64_t r6 = simde_vcvth_n_u64_f16(a, 6); + uint64_t r10 = simde_vcvth_n_u64_f16(a, 10); + uint64_t r13 = simde_vcvth_n_u64_f16(a, 13); + uint64_t r16 = simde_vcvth_n_u64_f16(a, 16); + + simde_test_codegen_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_u64(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_u64(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int16_t a[1]; + simde_float16_t r3[1]; + simde_float16_t r6[1]; + simde_float16_t r10[1]; + simde_float16_t r13[1]; + simde_float16_t r16[1]; + } test_vec[] = { + { { INT16_C( 2586) }, + { SIMDE_FLOAT16_VALUE( 323.250) }, + { SIMDE_FLOAT16_VALUE( 40.406) }, + { SIMDE_FLOAT16_VALUE( 2.525) }, + { SIMDE_FLOAT16_VALUE( 0.316) }, + { SIMDE_FLOAT16_VALUE( 0.039) } }, + { { -INT16_C( 12099) }, + { SIMDE_FLOAT16_VALUE( - 1512.375) }, + { SIMDE_FLOAT16_VALUE( - 189.047) }, + { SIMDE_FLOAT16_VALUE( - 11.815) }, + { SIMDE_FLOAT16_VALUE( - 1.477) }, + { SIMDE_FLOAT16_VALUE( - 0.185) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t a = test_vec[i].a[0]; + simde_float16_t r3 = simde_vcvth_n_f16_s16(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s16(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s16(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s16(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s16(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3[0], 1); + simde_assert_equal_f16(r6, test_vec[i].r6[0], 1); + simde_assert_equal_f16(r10, test_vec[i].r10[0], 1); + simde_assert_equal_f16(r13, test_vec[i].r13[0], 1); + simde_assert_equal_f16(r16, test_vec[i].r16[0], 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int16_t a = simde_test_arm_neon_random_i16(); + simde_float16_t r3 = simde_vcvth_n_f16_s16(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s16(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s16(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s16(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s16(a, 16); + + simde_test_arm_neon_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int32_t a; + simde_float16_t r3; + simde_float16_t r6; + simde_float16_t r10; + simde_float16_t r13; + simde_float16_t r16; + } test_vec[] = { + { INT32_C( 1875020380), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 28608.00) }, + { INT32_C( 700277977), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 10688.00) }, + { INT32_C( 909565182), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 13880.00) }, + { INT32_C( 245822870), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 30000.00), + SIMDE_FLOAT16_VALUE( 3750.00) }, + { -INT32_C( 1253541882), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_FLOAT16_VALUE(-19120.00) }, + { INT32_C( 1238403304), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 18896.00) }, + { -INT32_C( 147081743), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_FLOAT16_VALUE(-17952.00), + SIMDE_FLOAT16_VALUE( -2244.00) }, + { -INT32_C( 1533160889), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_FLOAT16_VALUE(-23392.00) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t a = test_vec[i].a; + simde_float16_t r3 = simde_vcvth_n_f16_s32(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s32(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s32(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s32(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s32(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r10, test_vec[i].r10, 1); + simde_assert_equal_f16(r13, test_vec[i].r13, 1); + simde_assert_equal_f16(r16, test_vec[i].r16, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int32_t a = simde_test_codegen_random_i32(); + simde_float16_t r3 = simde_vcvth_n_f16_s32(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s32(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s32(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s32(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s32(a, 16); + + simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int64_t a; + simde_float16_t r3; + simde_float16_t r6; + simde_float16_t r10; + simde_float16_t r13; + simde_float16_t r16; + } test_vec[] = { + { -INT64_C( 4440601166441389988), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF }, + { -INT64_C( 7975419144118430292), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF }, + { INT64_C( 1389104449129453846), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { -INT64_C( 5568797330875120692), + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF, + SIMDE_NINFINITYHF }, + { INT64_C( 5156136258357414408), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { INT64_C( 4249388297338128092), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { INT64_C( 2498231527699660661), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { INT64_C( 7425007925136307715), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t a = test_vec[i].a; + simde_float16_t r3 = simde_vcvth_n_f16_s64(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s64(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s64(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s64(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s64(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r10, test_vec[i].r10, 1); + simde_assert_equal_f16(r13, test_vec[i].r13, 1); + simde_assert_equal_f16(r16, test_vec[i].r16, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int64_t a = simde_test_codegen_random_i64(); + simde_float16_t r3 = simde_vcvth_n_f16_s64(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_s64(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_s64(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_s64(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_s64(a, 16); + + simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint16_t a[1]; + simde_float16_t r3[1]; + simde_float16_t r6[1]; + simde_float16_t r10[1]; + simde_float16_t r13[1]; + simde_float16_t r16[1]; + } test_vec[] = { + { { UINT16_C( 195) }, + { SIMDE_FLOAT16_VALUE( 24.375) }, + { SIMDE_FLOAT16_VALUE( 3.047) }, + { SIMDE_FLOAT16_VALUE( 0.190) }, + { SIMDE_FLOAT16_VALUE( 0.024) }, + { SIMDE_FLOAT16_VALUE( 0.003) } }, + { { UINT16_C( 47989) }, + { SIMDE_FLOAT16_VALUE( 5998.625) }, + { SIMDE_FLOAT16_VALUE( 749.828) }, + { SIMDE_FLOAT16_VALUE( 46.864) }, + { SIMDE_FLOAT16_VALUE( 5.858) }, + { SIMDE_FLOAT16_VALUE( 0.732) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t a = test_vec[i].a[0]; + simde_float16_t r3 = simde_vcvth_n_f16_u16(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u16(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u16(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u16(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u16(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3[0], 1); + simde_assert_equal_f16(r6, test_vec[i].r6[0], 1); + simde_assert_equal_f16(r10, test_vec[i].r10[0], 1); + simde_assert_equal_f16(r13, test_vec[i].r13[0], 1); + simde_assert_equal_f16(r16, test_vec[i].r16[0], 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x1_t a = simde_test_arm_neon_random_u16x1(); + simde_float16_t r3 = simde_vcvth_n_f16_u16(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u16(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u16(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u16(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u16(a, 16); + + simde_test_arm_neon_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint32_t a; + simde_float16_t r3; + simde_float16_t r6; + simde_float16_t r10; + simde_float16_t r13; + simde_float16_t r16; + } test_vec[] = { + { UINT32_C(1379136183), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 21040.00) }, + { UINT32_C( 345686584), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 42208.00), + SIMDE_FLOAT16_VALUE( 5276.00) }, + { UINT32_C(1836786760), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 28032.00) }, + { UINT32_C(1990685696), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 30368.00) }, + { UINT32_C( 151904435), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 18544.00), + SIMDE_FLOAT16_VALUE( 2318.00) }, + { UINT32_C(1663809632), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 25392.00) }, + { UINT32_C(3451609198), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 52672.00) }, + { UINT32_C( 87344717), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_FLOAT16_VALUE( 10664.00), + SIMDE_FLOAT16_VALUE( 1333.00) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t a = test_vec[i].a; + simde_float16_t r3 = simde_vcvth_n_f16_u32(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u32(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u32(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u32(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u32(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r10, test_vec[i].r10, 1); + simde_assert_equal_f16(r13, test_vec[i].r13, 1); + simde_assert_equal_f16(r16, test_vec[i].r16, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + uint32_t a = simde_test_codegen_random_u32(); + simde_float16_t r3 = simde_vcvth_n_f16_u32(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u32(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u32(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u32(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u32(a, 16); + + simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvth_n_f16_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint64_t a; + simde_float16_t r3; + simde_float16_t r6; + simde_float16_t r10; + simde_float16_t r13; + simde_float16_t r16; + } test_vec[] = { + { UINT64_C( 8216238257635551160), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C(12073173083987345430), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C(13918897271932492390), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C( 7173885298450818364), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C(17133198039240140329), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C(10481442221277357273), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C( 4413106528267478314), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + { UINT64_C(13219284874911206502), + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF, + SIMDE_INFINITYHF }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t a = test_vec[i].a; + simde_float16_t r3 = simde_vcvth_n_f16_u64(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u64(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u64(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u64(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u64(a, 16); + + simde_assert_equal_f16(r3, test_vec[i].r3, 1); + simde_assert_equal_f16(r6, test_vec[i].r6, 1); + simde_assert_equal_f16(r10, test_vec[i].r10, 1); + simde_assert_equal_f16(r13, test_vec[i].r13, 1); + simde_assert_equal_f16(r16, test_vec[i].r16, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + uint64_t a = simde_test_codegen_random_u64(); + simde_float16_t r3 = simde_vcvth_n_f16_u64(a, 3); + simde_float16_t r6 = simde_vcvth_n_f16_u64(a, 6); + simde_float16_t r10 = simde_vcvth_n_f16_u64(a, 10); + simde_float16_t r13 = simde_vcvth_n_f16_u64(a, 13); + simde_float16_t r16 = simde_vcvth_n_f16_u64(a, 16); + + simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f16(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f16(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvts_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[1]; + int32_t r3[1]; + int32_t r10[1]; + int32_t r16[1]; + int32_t r23[1]; + int32_t r32[1]; + } test_vec[] = { + { { SIMDE_FLOAT32_C(-733.4) }, + { -INT32_C(5867) }, + { -INT32_C(751001) }, + { -INT32_C(48064104) }, + { INT32_MIN }, + { INT32_MIN } }, + { { SIMDE_FLOAT32_C( 883.313) }, + { INT32_C( 7066) }, + { INT32_C( 904512) }, + { INT32_C(57888800) }, + { INT32_MAX }, + { INT32_MAX } }, + { { SIMDE_FLOAT32_C( 750.328) }, + { INT32_C( 6002) }, + { INT32_C( 768335) }, + { INT32_C(49173496) }, + { INT32_MAX }, + { INT32_MAX } }, + { { -SIMDE_FLOAT32_C( 171.275) }, + { -INT32_C( 1370) }, + { -INT32_C( 175385) }, + { -INT32_C(11224678) }, + { -INT32_C(1436758784) }, + { INT32_MIN } }, + { { -SIMDE_FLOAT32_C( 425.134) }, + { -INT32_C( 3401) }, + { -INT32_C( 435337) }, + { -INT32_C(27861582) }, + { INT32_MIN }, + { INT32_MIN } }, + { { SIMDE_FLOAT32_C( 532.107) }, + { INT32_C( 4256) }, + { INT32_C( 544877) }, + { INT32_C(34872164) }, + { INT32_MAX }, + { INT32_MAX } }, + { { -SIMDE_FLOAT32_C( 413.820) }, + { -INT32_C( 3310) }, + { -INT32_C( 423751) }, + { -INT32_C(27120108) }, + { INT32_MIN }, + { INT32_MIN } }, + { { SIMDE_FLOAT32_C( 221.030) }, + { INT32_C( 1768) }, + { INT32_C( 226334) }, + { INT32_C(14485422) }, + { INT32_C(1854134016) }, + { INT32_MAX } }, + { { -SIMDE_FLOAT32_C( 711.342) }, + { -INT32_C( 5690) }, + { -INT32_C( 728414) }, + { -INT32_C(46618508) }, + { INT32_MIN }, + { INT32_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32_t a = test_vec[i].a[0]; + int32_t r3 = simde_vcvts_n_s32_f32(a, 3); + int32_t r10 = simde_vcvts_n_s32_f32(a, 10); + int32_t r16 = simde_vcvts_n_s32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + int32_t r23 = simde_vcvts_n_s32_f32(a, 23); + int32_t r32 = simde_vcvts_n_s32_f32(a, 32); + #endif + + simde_assert_equal_i32(r3, test_vec[i].r3[0]); + simde_assert_equal_i32(r10, test_vec[i].r10[0]); + simde_assert_equal_i32(r16, test_vec[i].r16[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_i32(r23, test_vec[i].r23[0]); + simde_assert_equal_i32(r32, test_vec[i].r32[0]); + #endif + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + int32_t r3 = simde_vcvts_n_s32_f32(a, 3); + int32_t r10 = simde_vcvts_n_s32_f32(a, 10); + int32_t r16 = simde_vcvts_n_s32_f32(a, 16); + int32_t r23 = simde_vcvts_n_s32_f32(a, 23); + int32_t r32 = simde_vcvts_n_s32_f32(a, 32); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvts_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[1]; + uint32_t r3[1]; + uint32_t r10[1]; + uint32_t r16[1]; + uint32_t r23[1]; + uint32_t r32[1]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 312.384) }, + { UINT32_C( 2499) }, + { UINT32_C( 319881) }, + { UINT32_C(20472398) }, + { UINT32_C(2620466944) }, + { UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 403.436) }, + { UINT32_C( 3227) }, + { UINT32_C( 413118) }, + { UINT32_C(26439582) }, + { UINT32_C(3384266496) }, + { UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 982.191) }, + { UINT32_C( 7857) }, + { UINT32_C( 1005763) }, + { UINT32_C(64368868) }, + { UINT32_MAX }, + { UINT32_MAX } }, + { { SIMDE_FLOAT32_C( 584.450) }, + { UINT32_C( 4675) }, + { UINT32_C( 598476) }, + { UINT32_C(38302516) }, + { UINT32_MAX }, + { UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32_t a = test_vec[i].a[0]; + uint32_t r3 = simde_vcvts_n_u32_f32(a, 3); + uint32_t r10 = simde_vcvts_n_u32_f32(a, 10); + uint32_t r16 = simde_vcvts_n_u32_f32(a, 16); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + uint32_t r23 = simde_vcvts_n_u32_f32(a, 23); + uint32_t r32 = simde_vcvts_n_u32_f32(a, 32); + #endif + + simde_assert_equal_u32(r3, test_vec[i].r3[0]); + simde_assert_equal_u32(r10, test_vec[i].r10[0]); + simde_assert_equal_u32(r16, test_vec[i].r16[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_u32(r23, test_vec[i].r23[0]); + simde_assert_equal_u32(r32, test_vec[i].r32[0]); + #endif + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x1_t a = simde_test_arm_neon_random_f32x1(-1000.0f, 1000.0f); + uint32_t r3 = simde_vcvts_n_u32_f32(a, 3); + uint32_t r10 = simde_vcvts_n_u32_f32(a, 10); + uint32_t r16 = simde_vcvts_n_u32_f32(a, 16); + uint32_t r23 = simde_vcvts_n_u32_f32(a, 23); + uint32_t r32 = simde_vcvts_n_u32_f32(a, 32); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvts_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int32_t a; + simde_float32_t r3; + simde_float32_t r10; + simde_float32_t r16; + simde_float32_t r23; + simde_float32_t r32; + } test_vec[] = { + { INT32_C( 382290179), + SIMDE_FLOAT32_C(47786272.00), + SIMDE_FLOAT32_C(373330.25), + SIMDE_FLOAT32_C( 5833.29), + SIMDE_FLOAT32_C( 45.57), + SIMDE_FLOAT32_C( 0.09) }, + { -INT32_C( 1375582254), + SIMDE_FLOAT32_C(-171947776.00), + SIMDE_FLOAT32_C(-1343342.00), + SIMDE_FLOAT32_C(-20989.72), + SIMDE_FLOAT32_C( -163.98), + SIMDE_FLOAT32_C( -0.32) }, + { -INT32_C( 176355251), + SIMDE_FLOAT32_C(-22044406.00), + SIMDE_FLOAT32_C(-172221.92), + SIMDE_FLOAT32_C( -2690.97), + SIMDE_FLOAT32_C( -21.02), + SIMDE_FLOAT32_C( -0.04) }, + { -INT32_C( 1699124069), + SIMDE_FLOAT32_C(-212390512.00), + SIMDE_FLOAT32_C(-1659300.88), + SIMDE_FLOAT32_C(-25926.58), + SIMDE_FLOAT32_C( -202.55), + SIMDE_FLOAT32_C( -0.40) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t a = test_vec[i].a; + simde_float32_t r3 = simde_vcvts_n_f32_s32(a, 3); + simde_float32_t r10 = simde_vcvts_n_f32_s32(a, 10); + simde_float32_t r16 = simde_vcvts_n_f32_s32(a, 16); + simde_float32_t r23 = simde_vcvts_n_f32_s32(a, 23); + simde_float32_t r32 = simde_vcvts_n_f32_s32(a, 32); + + simde_assert_equal_f32(r3, test_vec[i].r3, 1); + simde_assert_equal_f32(r10, test_vec[i].r10, 1); + simde_assert_equal_f32(r16, test_vec[i].r16, 1); + simde_assert_equal_f32(r23, test_vec[i].r23, 1); + simde_assert_equal_f32(r32, test_vec[i].r32, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 4 ; i++) { + int32_t a = simde_test_codegen_random_i32(); + simde_float32_t r3 = simde_vcvts_n_f32_s32(a, 3); + simde_float32_t r10 = simde_vcvts_n_f32_s32(a, 10); + simde_float32_t r16 = simde_vcvts_n_f32_s32(a, 16); + simde_float32_t r23 = simde_vcvts_n_f32_s32(a, 23); + simde_float32_t r32 = simde_vcvts_n_f32_s32(a, 32); + + simde_test_codegen_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvts_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint32_t a; + simde_float32_t r3; + simde_float32_t r10; + simde_float32_t r16; + simde_float32_t r23; + simde_float32_t r32; + } test_vec[] = { + { UINT32_C(1716442256), + SIMDE_FLOAT32_C(214555280.00), + SIMDE_FLOAT32_C(1676213.12), + SIMDE_FLOAT32_C( 26190.83), + SIMDE_FLOAT32_C( 204.62), + SIMDE_FLOAT32_C( 0.40) }, + { UINT32_C(1980926086), + SIMDE_FLOAT32_C(247615760.00), + SIMDE_FLOAT32_C(1934498.12), + SIMDE_FLOAT32_C( 30226.53), + SIMDE_FLOAT32_C( 236.14), + SIMDE_FLOAT32_C( 0.46) }, + { UINT32_C( 767096392), + SIMDE_FLOAT32_C(95887048.00), + SIMDE_FLOAT32_C(749117.56), + SIMDE_FLOAT32_C( 11704.96), + SIMDE_FLOAT32_C( 91.45), + SIMDE_FLOAT32_C( 0.18) }, + { UINT32_C(2969434285), + SIMDE_FLOAT32_C(371179296.00), + SIMDE_FLOAT32_C(2899838.25), + SIMDE_FLOAT32_C( 45309.97), + SIMDE_FLOAT32_C( 353.98), + SIMDE_FLOAT32_C( 0.69) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t a = test_vec[i].a; + simde_float32_t r3 = simde_vcvts_n_f32_u32(a, 3); + simde_float32_t r10 = simde_vcvts_n_f32_u32(a, 10); + simde_float32_t r16 = simde_vcvts_n_f32_u32(a, 16); + simde_float32_t r23 = simde_vcvts_n_f32_u32(a, 23); + simde_float32_t r32 = simde_vcvts_n_f32_u32(a, 32); + + simde_assert_equal_f32(r3, test_vec[i].r3, 1); + simde_assert_equal_f32(r10, test_vec[i].r10, 1); + simde_assert_equal_f32(r16, test_vec[i].r16, 1); + simde_assert_equal_f32(r23, test_vec[i].r23, 1); + simde_assert_equal_f32(r32, test_vec[i].r32, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 4 ; i++) { + uint32_t a = simde_test_codegen_random_u32(); + simde_float32_t r3 = simde_vcvts_n_f32_u32(a, 3); + simde_float32_t r10 = simde_vcvts_n_f32_u32(a, 10); + simde_float32_t r16 = simde_vcvts_n_f32_u32(a, 16); + simde_float32_t r23 = simde_vcvts_n_f32_u32(a, 23); + simde_float32_t r32 = simde_vcvts_n_f32_u32(a, 32); + + simde_test_codegen_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f32(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f32(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtd_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64_t a[1]; + int64_t r3[1]; + int64_t r17[1]; + int64_t r38[1]; + int64_t r55[1]; + int64_t r64[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 98809.484) }, + { INT64_C( 790475) }, + { INT64_C( 12951156686) }, + { INT64_C( 27160544148136656) }, + { INT64_MAX }, + { INT64_MAX } }, + { { SIMDE_FLOAT64_C( 8345.477) }, + { INT64_C( 66763) }, + { INT64_C( 1093858361) }, + { INT64_C( 2293987250209292) }, + { INT64_MAX }, + { INT64_MAX } }, + { { SIMDE_FLOAT64_C( 69039.125) }, + { INT64_C( 552313) }, + { INT64_C( 9049096192) }, + { INT64_C( 18977330177245184) }, + { INT64_MAX }, + { INT64_MAX } }, + { { -SIMDE_FLOAT64_C( 44042.309) }, + { -INT64_C( 352338) }, + { -INT64_C( 5772713525) }, + { -INT64_C( 12106257714900894) }, + { INT64_MIN }, + { INT64_MIN } }, + { { -SIMDE_FLOAT64_C( 19111.727) }, + { -INT64_C( 152893) }, + { -INT64_C( 2505012281) }, + { -INT64_C( 5253391515845132) }, + { INT64_MIN }, + { INT64_MIN } }, + { { SIMDE_FLOAT64_C( 39608.250) }, + { INT64_C( 316866) }, + { INT64_C( 5191532544) }, + { INT64_C( 10887432857714688) }, + { INT64_MAX }, + { INT64_MAX } }, + { { -SIMDE_FLOAT64_C( 39541.906) }, + { -INT64_C( 316335) }, + { -INT64_C( 5182836703) }, + { -INT64_C( 10869196357856396) }, + { INT64_MIN }, + { INT64_MIN } }, + { { SIMDE_FLOAT64_C( 93824.031) }, + { INT64_C( 750592) }, + { INT64_C( 12297703391) }, + { INT64_C( 25790153262328972) }, + { INT64_MAX }, + { INT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64_t a = test_vec[i].a[0]; + int64_t r3 = simde_vcvtd_n_s64_f64(a, 3); + int64_t r17 = simde_vcvtd_n_s64_f64(a, 17); + int64_t r38 = simde_vcvtd_n_s64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + int64_t r55 = simde_vcvtd_n_s64_f64(a, 55); + int64_t r64 = simde_vcvtd_n_s64_f64(a, 64); + #endif + + simde_assert_equal_i64(r3, test_vec[i].r3[0]); + simde_assert_equal_i64(r17, test_vec[i].r17[0]); + simde_assert_equal_i64(r38, test_vec[i].r38[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_i64(r55, test_vec[i].r55[0]); + simde_assert_equal_i64(r64, test_vec[i].r64[0]); + #endif + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + int64_t r3 = simde_vcvtd_n_s64_f64(a, 3); + int64_t r17 = simde_vcvtd_n_s64_f64(a, 17); + int64_t r38 = simde_vcvtd_n_s64_f64(a, 38); + int64_t r55 = simde_vcvtd_n_s64_f64(a, 55); + int64_t r64 = simde_vcvtd_n_s64_f64(a, 64); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtd_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64_t a[1]; + uint64_t r3[1]; + uint64_t r17[1]; + uint64_t r38[1]; + uint64_t r55[1]; + uint64_t r64[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 84182.672) }, + { UINT64_C( 673461) }, + { UINT64_C( 11033991184) }, + { UINT64_C( 23139956680313276) }, + { UINT64_MAX }, + { UINT64_MAX } }, + { { SIMDE_FLOAT64_C( 71694.797) }, + { UINT64_C( 573558) }, + { UINT64_C( 9397180432) }, + { UINT64_C( 19707315738134972) }, + { UINT64_MAX }, + { UINT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64_t a = test_vec[i].a[0]; + uint64_t r3 = simde_vcvtd_n_u64_f64(a, 3); + uint64_t r17 = simde_vcvtd_n_u64_f64(a, 17); + uint64_t r38 = simde_vcvtd_n_u64_f64(a, 38); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + uint64_t r55 = simde_vcvtd_n_u64_f64(a, 55); + uint64_t r64 = simde_vcvtd_n_u64_f64(a, 64); + #endif + + simde_assert_equal_u64(r3, test_vec[i].r3[0]); + simde_assert_equal_u64(r17, test_vec[i].r17[0]); + simde_assert_equal_u64(r38, test_vec[i].r38[0]); + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + simde_assert_equal_u64(r55, test_vec[i].r55[0]); + simde_assert_equal_u64(r64, test_vec[i].r64[0]); + #endif + } + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + uint64_t r3 = simde_vcvtd_n_u64_f64(a, 3); + uint64_t r17 = simde_vcvtd_n_u64_f64(a, 17); + uint64_t r38 = simde_vcvtd_n_u64_f64(a, 38); + uint64_t r55 = simde_vcvtd_n_u64_f64(a, 55); + uint64_t r64 = simde_vcvtd_n_u64_f64(a, 64); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtd_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int64_t a; + simde_float64_t r3; + simde_float64_t r17; + simde_float64_t r38; + simde_float64_t r55; + simde_float64_t r64; + } test_vec[] = { + { INT64_C( 1802271432678331317), + SIMDE_FLOAT64_C(225283929084791424.00), + SIMDE_FLOAT64_C(13750239812304.16), + SIMDE_FLOAT64_C(6556625.28), + SIMDE_FLOAT64_C( 50.02), + SIMDE_FLOAT64_C( 0.10) }, + { -INT64_C( 900386704286887549), + SIMDE_FLOAT64_C(-112548338035860944.00), + SIMDE_FLOAT64_C(-6869405397696.59), + SIMDE_FLOAT64_C(-3275587.75), + SIMDE_FLOAT64_C( -24.99), + SIMDE_FLOAT64_C( -0.05) }, + { INT64_C( 7487442315758603864), + SIMDE_FLOAT64_C(935930289469825536.00), + SIMDE_FLOAT64_C(57124651456898.53), + SIMDE_FLOAT64_C(27239156.46), + SIMDE_FLOAT64_C( 207.82), + SIMDE_FLOAT64_C( 0.41) }, + { INT64_C( 8649336546515843284), + SIMDE_FLOAT64_C(1081167068314480384.00), + SIMDE_FLOAT64_C(65989200946928.73), + SIMDE_FLOAT64_C(31466103.05), + SIMDE_FLOAT64_C( 240.07), + SIMDE_FLOAT64_C( 0.47) }, + { INT64_C( 2331510679777683696), + SIMDE_FLOAT64_C(291438834972210432.00), + SIMDE_FLOAT64_C(17788014829846.83), + SIMDE_FLOAT64_C(8481986.44), + SIMDE_FLOAT64_C( 64.71), + SIMDE_FLOAT64_C( 0.13) }, + { INT64_C( 6991199644197509633), + SIMDE_FLOAT64_C(873899955524688768.00), + SIMDE_FLOAT64_C(53338620332317.43), + SIMDE_FLOAT64_C(25433836.14), + SIMDE_FLOAT64_C( 194.04), + SIMDE_FLOAT64_C( 0.38) }, + { -INT64_C( 7255582914630055223), + SIMDE_FLOAT64_C(-906947864328756864.00), + SIMDE_FLOAT64_C(-55355704609909.48), + SIMDE_FLOAT64_C(-26395656.88), + SIMDE_FLOAT64_C( -201.38), + SIMDE_FLOAT64_C( -0.39) }, + { -INT64_C( 4777772969744650379), + SIMDE_FLOAT64_C(-597221621218081280.00), + SIMDE_FLOAT64_C(-36451514966923.91), + SIMDE_FLOAT64_C(-17381436.81), + SIMDE_FLOAT64_C( -132.61), + SIMDE_FLOAT64_C( -0.26) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t a = test_vec[i].a; + simde_float64_t r3 = simde_vcvtd_n_f64_s64(a, 3); + simde_float64_t r17 = simde_vcvtd_n_f64_s64(a, 17); + simde_float64_t r38 = simde_vcvtd_n_f64_s64(a, 38); + simde_float64_t r55 = simde_vcvtd_n_f64_s64(a, 55); + simde_float64_t r64 = simde_vcvtd_n_f64_s64(a, 64); + + simde_assert_equal_f64(r3, test_vec[i].r3, 1); + simde_assert_equal_f64(r17, test_vec[i].r17, 1); + simde_assert_equal_f64(r38, test_vec[i].r38, 1); + simde_assert_equal_f64(r55, test_vec[i].r55, 1); + simde_assert_equal_f64(r64, test_vec[i].r64, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + int64_t a = simde_test_codegen_random_i64(); + simde_float64_t r3 = simde_vcvtd_n_f64_s64(a, 3); + simde_float64_t r17 = simde_vcvtd_n_f64_s64(a, 17); + simde_float64_t r38 = simde_vcvtd_n_f64_s64(a, 38); + simde_float64_t r55 = simde_vcvtd_n_f64_s64(a, 55); + simde_float64_t r64 = simde_vcvtd_n_f64_s64(a, 64); + + simde_test_codegen_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f64(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtd_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint64_t a; + simde_float64_t r3; + simde_float64_t r17; + simde_float64_t r38; + simde_float64_t r55; + simde_float64_t r64; + } test_vec[] = { + { UINT64_C( 1176945894709255833), + SIMDE_FLOAT64_C(147118236838656992.00), + SIMDE_FLOAT64_C(8979384572671.94), + SIMDE_FLOAT64_C(4281704.22), + SIMDE_FLOAT64_C( 32.67), + SIMDE_FLOAT64_C( 0.06) }, + { UINT64_C( 1485949606174035764), + SIMDE_FLOAT64_C(185743700771754464.00), + SIMDE_FLOAT64_C(11336895799057.28), + SIMDE_FLOAT64_C(5405853.18), + SIMDE_FLOAT64_C( 41.24), + SIMDE_FLOAT64_C( 0.08) }, + { UINT64_C( 2779646423753012273), + SIMDE_FLOAT64_C(347455802969126528.00), + SIMDE_FLOAT64_C(21207019224189.85), + SIMDE_FLOAT64_C(10112294.78), + SIMDE_FLOAT64_C( 77.15), + SIMDE_FLOAT64_C( 0.15) }, + { UINT64_C( 360868977349458808), + SIMDE_FLOAT64_C(45108622168682352.00), + SIMDE_FLOAT64_C(2753211802287.74), + SIMDE_FLOAT64_C(1312833.69), + SIMDE_FLOAT64_C( 10.02), + SIMDE_FLOAT64_C( 0.02) }, + { UINT64_C(10380111440923722222), + SIMDE_FLOAT64_C(1297513930115465216.00), + SIMDE_FLOAT64_C(79193965461149.00), + SIMDE_FLOAT64_C(37762625.44), + SIMDE_FLOAT64_C( 288.11), + SIMDE_FLOAT64_C( 0.56) }, + { UINT64_C( 9763642735089037087), + SIMDE_FLOAT64_C(1220455341886129664.00), + SIMDE_FLOAT64_C(74490682488167.09), + SIMDE_FLOAT64_C(35519925.35), + SIMDE_FLOAT64_C( 271.00), + SIMDE_FLOAT64_C( 0.53) }, + { UINT64_C( 4393845023024577058), + SIMDE_FLOAT64_C(549230627878072128.00), + SIMDE_FLOAT64_C(33522377189823.74), + SIMDE_FLOAT64_C(15984715.08), + SIMDE_FLOAT64_C( 121.95), + SIMDE_FLOAT64_C( 0.24) }, + { UINT64_C( 6340649816565045645), + SIMDE_FLOAT64_C(792581227070630656.00), + SIMDE_FLOAT64_C(48375319035072.67), + SIMDE_FLOAT64_C(23067149.66), + SIMDE_FLOAT64_C( 175.99), + SIMDE_FLOAT64_C( 0.34) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t a = test_vec[i].a; + simde_float64_t r3 = simde_vcvtd_n_f64_u64(a, 3); + simde_float64_t r17 = simde_vcvtd_n_f64_u64(a, 17); + simde_float64_t r38 = simde_vcvtd_n_f64_u64(a, 38); + simde_float64_t r55 = simde_vcvtd_n_f64_u64(a, 55); + simde_float64_t r64 = simde_vcvtd_n_f64_u64(a, 64); + + simde_assert_equal_f64(r3, test_vec[i].r3, 1); + simde_assert_equal_f64(r17, test_vec[i].r17, 1); + simde_assert_equal_f64(r38, test_vec[i].r38, 1); + simde_assert_equal_f64(r55, test_vec[i].r55, 1); + simde_assert_equal_f64(r64, test_vec[i].r64, 1); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + uint64_t a = simde_test_codegen_random_u64(); + simde_float64_t r3 = simde_vcvtd_n_f64_u64(a, 3); + simde_float64_t r17 = simde_vcvtd_n_f64_u64(a, 17); + simde_float64_t r38 = simde_vcvtd_n_f64_u64(a, 38); + simde_float64_t r55 = simde_vcvtd_n_f64_u64(a, 55); + simde_float64_t r64 = simde_vcvtd_n_f64_u64(a, 64); + + simde_test_codegen_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_codegen_write_f64(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_codegen_write_f64(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[4]; int16_t r3[4]; @@ -42,10 +1792,31 @@ test_simde_vcvt_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_int16x4_t r3 = simde_vcvt_n_s16_f16(a, 3); + simde_int16x4_t r6 = simde_vcvt_n_s16_f16(a, 6); + simde_int16x4_t r10 = simde_vcvt_n_s16_f16(a, 10); + simde_int16x4_t r13 = simde_vcvt_n_s16_f16(a, 13); + simde_int16x4_t r16 = simde_vcvt_n_s16_f16(a, 16); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[2]; int32_t r3[2]; @@ -82,10 +1853,31 @@ test_simde_vcvt_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_int32x2_t r3 = simde_vcvt_n_s32_f32(a, 3); + simde_int32x2_t r10 = simde_vcvt_n_s32_f32(a, 10); + simde_int32x2_t r16 = simde_vcvt_n_s32_f32(a, 16); + simde_int32x2_t r23 = simde_vcvt_n_s32_f32(a, 23); + simde_int32x2_t r32 = simde_vcvt_n_s32_f32(a, 32); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[1]; int64_t r3[1]; @@ -133,10 +1925,33 @@ test_simde_vcvt_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x1_t r3 = simde_vcvt_n_s64_f64(a, 3); + simde_int64x1_t r17 = simde_vcvt_n_s64_f64(a, 17); + simde_int64x1_t r23 = simde_vcvt_n_s64_f64(a, 23); + simde_int64x1_t r38 = simde_vcvt_n_s64_f64(a, 38); + simde_int64x1_t r55 = simde_vcvt_n_s64_f64(a, 55); + simde_int64x1_t r64 = simde_vcvt_n_s64_f64(a, 64); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[4]; uint16_t r3[4]; @@ -173,10 +1988,31 @@ test_simde_vcvt_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_uint16x4_t r3 = simde_vcvt_n_u16_f16(a, 3); + simde_uint16x4_t r6 = simde_vcvt_n_u16_f16(a, 6); + simde_uint16x4_t r10 = simde_vcvt_n_u16_f16(a, 10); + simde_uint16x4_t r13 = simde_vcvt_n_u16_f16(a, 13); + simde_uint16x4_t r16 = simde_vcvt_n_u16_f16(a, 16); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[2]; uint32_t r3[2]; @@ -213,10 +2049,31 @@ test_simde_vcvt_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_uint32x2_t r3 = simde_vcvt_n_u32_f32(a, 3); + simde_uint32x2_t r10 = simde_vcvt_n_u32_f32(a, 10); + simde_uint32x2_t r16 = simde_vcvt_n_u32_f32(a, 16); + simde_uint32x2_t r23 = simde_vcvt_n_u32_f32(a, 23); + simde_uint32x2_t r32 = simde_vcvt_n_u32_f32(a, 32); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[1]; uint64_t r3[1]; @@ -264,10 +2121,33 @@ test_simde_vcvt_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x1_t r3 = simde_vcvt_n_u64_f64(a, 3); + simde_uint64x1_t r17 = simde_vcvt_n_u64_f64(a, 17); + simde_uint64x1_t r23 = simde_vcvt_n_u64_f64(a, 23); + simde_uint64x1_t r38 = simde_vcvt_n_u64_f64(a, 38); + simde_uint64x1_t r55 = simde_vcvt_n_u64_f64(a, 55); + simde_uint64x1_t r64 = simde_vcvt_n_u64_f64(a, 64); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[8]; int16_t r3[8]; @@ -304,10 +2184,31 @@ test_simde_vcvtq_n_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_int16x8_t r3 = simde_vcvtq_n_s16_f16(a, 3); + simde_int16x8_t r6 = simde_vcvtq_n_s16_f16(a, 6); + simde_int16x8_t r10 = simde_vcvtq_n_s16_f16(a, 10); + simde_int16x8_t r13 = simde_vcvtq_n_s16_f16(a, 13); + simde_int16x8_t r16 = simde_vcvtq_n_s16_f16(a, 16); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[4]; int32_t r3[4]; @@ -344,10 +2245,31 @@ test_simde_vcvtq_n_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_int32x4_t r3 = simde_vcvtq_n_s32_f32(a, 3); + simde_int32x4_t r10 = simde_vcvtq_n_s32_f32(a, 10); + simde_int32x4_t r16 = simde_vcvtq_n_s32_f32(a, 16); + simde_int32x4_t r23 = simde_vcvtq_n_s32_f32(a, 23); + simde_int32x4_t r32 = simde_vcvtq_n_s32_f32(a, 32); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[2]; int64_t r3[2]; @@ -388,10 +2310,33 @@ test_simde_vcvtq_n_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x2_t r3 = simde_vcvtq_n_s64_f64(a, 3); + simde_int64x2_t r17 = simde_vcvtq_n_s64_f64(a, 17); + simde_int64x2_t r23 = simde_vcvtq_n_s64_f64(a, 23); + simde_int64x2_t r38 = simde_vcvtq_n_s64_f64(a, 38); + simde_int64x2_t r55 = simde_vcvtq_n_s64_f64(a, 55); + simde_int64x2_t r64 = simde_vcvtq_n_s64_f64(a, 64); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[8]; uint16_t r3[8]; @@ -428,10 +2373,31 @@ test_simde_vcvtq_n_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_uint16x8_t r3 = simde_vcvtq_n_u16_f16(a, 3); + simde_uint16x8_t r6 = simde_vcvtq_n_u16_f16(a, 6); + simde_uint16x8_t r10 = simde_vcvtq_n_u16_f16(a, 10); + simde_uint16x8_t r13 = simde_vcvtq_n_u16_f16(a, 13); + simde_uint16x8_t r16 = simde_vcvtq_n_u16_f16(a, 16); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float32 a[4]; uint32_t r3[4]; @@ -468,10 +2434,31 @@ test_simde_vcvtq_n_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_uint32x4_t r3 = simde_vcvtq_n_u32_f32(a, 3); + simde_uint32x4_t r10 = simde_vcvtq_n_u32_f32(a, 10); + simde_uint32x4_t r16 = simde_vcvtq_n_u32_f32(a, 16); + simde_uint32x4_t r23 = simde_vcvtq_n_u32_f32(a, 23); + simde_uint32x4_t r32 = simde_vcvtq_n_u32_f32(a, 32); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { simde_float64 a[2]; uint64_t r3[2]; @@ -512,10 +2499,33 @@ test_simde_vcvtq_n_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x2_t r3 = simde_vcvtq_n_u64_f64(a, 3); + simde_uint64x2_t r17 = simde_vcvtq_n_u64_f64(a, 17); + simde_uint64x2_t r23 = simde_vcvtq_n_u64_f64(a, 23); + simde_uint64x2_t r38 = simde_vcvtq_n_u64_f64(a, 38); + simde_uint64x2_t r55 = simde_vcvtq_n_u64_f64(a, 55); + simde_uint64x2_t r64 = simde_vcvtq_n_u64_f64(a, 64); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; simde_float16_t r3[4]; @@ -554,10 +2564,31 @@ test_simde_vcvt_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_float16x4_t r3 = simde_vcvt_n_f16_u16(a, 3); + simde_float16x4_t r6 = simde_vcvt_n_f16_u16(a, 6); + simde_float16x4_t r10 = simde_vcvt_n_f16_u16(a, 10); + simde_float16x4_t r13 = simde_vcvt_n_f16_u16(a, 13); + simde_float16x4_t r16 = simde_vcvt_n_f16_u16(a, 16); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; simde_float16_t r3[4]; @@ -596,10 +2627,31 @@ test_simde_vcvt_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_float16x4_t r3 = simde_vcvt_n_f16_s16(a, 3); + simde_float16x4_t r6 = simde_vcvt_n_f16_s16(a, 6); + simde_float16x4_t r10 = simde_vcvt_n_f16_s16(a, 10); + simde_float16x4_t r13 = simde_vcvt_n_f16_s16(a, 13); + simde_float16x4_t r16 = simde_vcvt_n_f16_s16(a, 16); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; simde_float16_t r3[8]; @@ -632,10 +2684,31 @@ test_simde_vcvtq_n_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_float16x8_t r3 = simde_vcvtq_n_f16_u16(a, 3); + simde_float16x8_t r6 = simde_vcvtq_n_f16_u16(a, 6); + simde_float16x8_t r10 = simde_vcvtq_n_f16_u16(a, 10); + simde_float16x8_t r13 = simde_vcvtq_n_f16_u16(a, 13); + simde_float16x8_t r16 = simde_vcvtq_n_f16_u16(a, 16); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; simde_float16_t r3[8]; @@ -668,10 +2741,31 @@ test_simde_vcvtq_n_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_float16x8_t r3 = simde_vcvtq_n_f16_s16(a, 3); + simde_float16x8_t r6 = simde_vcvtq_n_f16_s16(a, 6); + simde_float16x8_t r10 = simde_vcvtq_n_f16_s16(a, 10); + simde_float16x8_t r13 = simde_vcvtq_n_f16_s16(a, 13); + simde_float16x8_t r16 = simde_vcvtq_n_f16_s16(a, 16); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r6, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r13, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r16, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; simde_float32 r3[4]; @@ -704,10 +2798,31 @@ test_simde_vcvtq_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_float32x4_t r3 = simde_vcvtq_n_f32_s32(a, 3); + simde_float32x4_t r10 = simde_vcvtq_n_f32_s32(a, 10); + simde_float32x4_t r16 = simde_vcvtq_n_f32_s32(a, 16); + simde_float32x4_t r23 = simde_vcvtq_n_f32_s32(a, 23); + simde_float32x4_t r32 = simde_vcvtq_n_f32_s32(a, 32); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; simde_float32 r3[2]; @@ -746,10 +2861,31 @@ test_simde_vcvt_n_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_float32x2_t r3 = simde_vcvt_n_f32_s32(a, 3); + simde_float32x2_t r10 = simde_vcvt_n_f32_s32(a, 10); + simde_float32x2_t r16 = simde_vcvt_n_f32_s32(a, 16); + simde_float32x2_t r23 = simde_vcvt_n_f32_s32(a, 23); + simde_float32x2_t r32 = simde_vcvt_n_f32_s32(a, 32); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[1]; simde_float64 r3[1]; @@ -793,11 +2929,34 @@ test_simde_vcvt_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); + simde_float64x1_t r3 = simde_vcvt_n_f64_u64(a, 3); + simde_float64x1_t r17 = simde_vcvt_n_f64_u64(a, 17); + simde_float64x1_t r23 = simde_vcvt_n_f64_u64(a, 23); + simde_float64x1_t r38 = simde_vcvt_n_f64_u64(a, 38); + simde_float64x1_t r55 = simde_vcvt_n_f64_u64(a, 55); + simde_float64x1_t r64 = simde_vcvt_n_f64_u64(a, 64); + + simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } /* Eric: Skip this function since it will trigger a compiler error when using i686-linux-gnu-g++-11. static int test_simde_vcvtq_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[2]; simde_float64 r3[2]; @@ -841,11 +3000,34 @@ test_simde_vcvtq_n_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_float64x2_t r3 = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r17 = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r23 = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r38 = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r55 = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r64 = simde_vcvtq_n_f64_u64(a, r3, r17, r23, r38, r55); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } */ static int test_simde_vcvt_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[1]; simde_float64 r3[1]; @@ -889,10 +3071,33 @@ test_simde_vcvt_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); + simde_float64x1_t r3 = simde_vcvt_n_f64_s64(a, 3); + simde_float64x1_t r17 = simde_vcvt_n_f64_s64(a, 17); + simde_float64x1_t r23 = simde_vcvt_n_f64_s64(a, 23); + simde_float64x1_t r38 = simde_vcvt_n_f64_s64(a, 38); + simde_float64x1_t r55 = simde_vcvt_n_f64_s64(a, 55); + simde_float64x1_t r64 = simde_vcvt_n_f64_s64(a, 64); + + simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x1(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x1(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[2]; simde_float64 r3[2]; @@ -929,10 +3134,33 @@ test_simde_vcvtq_n_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_float64x2_t r3 = simde_vcvtq_n_f64_s64(a, 3); + simde_float64x2_t r17 = simde_vcvtq_n_f64_s64(a, 17); + simde_float64x2_t r23 = simde_vcvtq_n_f64_s64(a, 23); + simde_float64x2_t r38 = simde_vcvtq_n_f64_s64(a, 38); + simde_float64x2_t r55 = simde_vcvtq_n_f64_s64(a, 55); + simde_float64x2_t r64 = simde_vcvtq_n_f64_s64(a, 64); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r17, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r38, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r55, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r64, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvtq_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; simde_float32 r3[4]; @@ -965,10 +3193,31 @@ test_simde_vcvtq_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_float32x4_t r3 = simde_vcvtq_n_f32_u32(a, 3); + simde_float32x4_t r10 = simde_vcvtq_n_f32_u32(a, 10); + simde_float32x4_t r16 = simde_vcvtq_n_f32_u32(a, 16); + simde_float32x4_t r23 = simde_vcvtq_n_f32_u32(a, 23); + simde_float32x4_t r32 = simde_vcvtq_n_f32_u32(a, 32); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vcvt_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; simde_float32 r3[2]; @@ -1007,10 +3256,53 @@ test_simde_vcvt_n_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_float32x2_t r3 = simde_vcvt_n_f32_u32(a, 3); + simde_float32x2_t r10 = simde_vcvt_n_f32_u32(a, 10); + simde_float32x2_t r16 = simde_vcvt_n_f32_u32(a, 16); + simde_float32x2_t r23 = simde_vcvt_n_f32_u32(a, 23); + simde_float32x2_t r32 = simde_vcvt_n_f32_u32(a, 32); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r10, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r16, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r23, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r32, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_u64_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvth_n_f16_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_n_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_n_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_n_f32_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvts_n_f32_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_n_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_n_u64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_n_f64_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtd_n_f64_u64) + SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s16_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s32_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vcvt_n_s64_f64) diff --git a/test/arm/neon/cvtm.c b/test/arm/neon/cvtm.c new file mode 100644 index 000000000..58bf5c524 --- /dev/null +++ b/test/arm/neon/cvtm.c @@ -0,0 +1,1158 @@ +#define SIMDE_TEST_ARM_NEON_INSN cvtm + +#include "test-neon.h" +#include "../../../simde/arm/neon/cvtm.h" + +static int +test_simde_vcvtmq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + int32_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 404.944), SIMDE_FLOAT32_C( 424.355), SIMDE_FLOAT32_C( 955.709), -SIMDE_FLOAT32_C( 841.813) }, + { INT32_C( 404), INT32_C( 424), INT32_C( 955), -INT32_C( 842) } }, + { { SIMDE_FLOAT32_C( 34.258), -SIMDE_FLOAT32_C( 239.332), -SIMDE_FLOAT32_C( 971.203), -SIMDE_FLOAT32_C( 672.986) }, + { INT32_C( 34), -INT32_C( 240), -INT32_C( 972), -INT32_C( 673) } }, + { { SIMDE_FLOAT32_C( 604.428), SIMDE_FLOAT32_C( 472.493), SIMDE_FLOAT32_C( 491.446), SIMDE_FLOAT32_C( 510.090) }, + { INT32_C( 604), INT32_C( 472), INT32_C( 491), INT32_C( 510) } }, + { { SIMDE_FLOAT32_C( 883.445), -SIMDE_FLOAT32_C( 876.070), SIMDE_FLOAT32_C( 620.121), -SIMDE_FLOAT32_C( 97.364) }, + { INT32_C( 883), -INT32_C( 877), INT32_C( 620), -INT32_C( 98) } }, + { { -SIMDE_FLOAT32_C( 696.852), SIMDE_FLOAT32_C( 176.132), -SIMDE_FLOAT32_C( 84.301), SIMDE_FLOAT32_C( 639.894) }, + { -INT32_C( 697), INT32_C( 176), -INT32_C( 85), INT32_C( 639) } }, + { { -SIMDE_FLOAT32_C( 630.062), SIMDE_FLOAT32_C( 393.803), SIMDE_FLOAT32_C( 853.109), SIMDE_FLOAT32_C( 346.168) }, + { -INT32_C( 631), INT32_C( 393), INT32_C( 853), INT32_C( 346) } }, + { { SIMDE_FLOAT32_C( 720.840), -SIMDE_FLOAT32_C( 629.362), -SIMDE_FLOAT32_C( 168.845), -SIMDE_FLOAT32_C( 790.098) }, + { INT32_C( 720), -INT32_C( 630), -INT32_C( 169), -INT32_C( 791) } }, + { { -SIMDE_FLOAT32_C( 617.821), -SIMDE_FLOAT32_C( 162.686), SIMDE_FLOAT32_C( 19.353), SIMDE_FLOAT32_C( 212.584) }, + { -INT32_C( 618), -INT32_C( 163), INT32_C( 19), INT32_C( 212) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_int32x4_t r = simde_vcvtmq_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_int32x4_t r = simde_vcvtmq_s32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + int64_t r[2]; + } test_vec[] = { + { { -SIMDE_FLOAT64_C( 94430.055), -SIMDE_FLOAT64_C( 3762.398) }, + { -INT64_C( 94431), -INT64_C( 3763) } }, + { { SIMDE_FLOAT64_C( 6248.047), SIMDE_FLOAT64_C( 7172.813) }, + { INT64_C( 6248), INT64_C( 7172) } }, + { { SIMDE_FLOAT64_C( 27462.281), SIMDE_FLOAT64_C( 80763.266) }, + { INT64_C( 27462), INT64_C( 80763) } }, + { { SIMDE_FLOAT64_C( 43674.109), SIMDE_FLOAT64_C( 69458.109) }, + { INT64_C( 43674), INT64_C( 69458) } }, + { { SIMDE_FLOAT64_C( 90122.016), -SIMDE_FLOAT64_C( 67679.383) }, + { INT64_C( 90122), -INT64_C( 67680) } }, + { { -SIMDE_FLOAT64_C( 65745.375), -SIMDE_FLOAT64_C( 79947.688) }, + { -INT64_C( 65746), -INT64_C( 79948) } }, + { { SIMDE_FLOAT64_C( 93151.797), -SIMDE_FLOAT64_C( 13928.320) }, + { INT64_C( 93151), -INT64_C( 13929) } }, + { { SIMDE_FLOAT64_C( 35406.250), -SIMDE_FLOAT64_C( 43005.527) }, + { INT64_C( 35406), -INT64_C( 43006) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_int64x2_t r = simde_vcvtmq_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x2_t r = simde_vcvtmq_s64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MAX)), + INT64_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MIN)), + INT64_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( - 29.102), + -INT64_C( 30) }, + { SIMDE_FLOAT16_VALUE( - 15.400), + -INT64_C( 16) }, + { SIMDE_FLOAT16_VALUE( - 5.887), + -INT64_C( 6) }, + { SIMDE_FLOAT16_VALUE( 16.529), + INT64_C( 16) }, + { SIMDE_FLOAT16_VALUE( 17.641), + INT64_C( 17) }, + { SIMDE_FLOAT16_VALUE( 16.624), + INT64_C( 16) }, + { SIMDE_FLOAT16_VALUE( 14.373), + INT64_C( 14) }, + { SIMDE_FLOAT16_VALUE( - 16.813), + -INT64_C( 17) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int64_t r = simde_vcvtmh_s64_f16(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int64_t r = simde_vcvtmh_s64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX+1000ll)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)), + INT32_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN-1000ll)), + INT32_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 12.44), + INT32_C( 12) }, + { SIMDE_FLOAT16_VALUE( 30.46), + INT32_C( 30) }, + { SIMDE_FLOAT16_VALUE( 16.51), + INT32_C( 16) }, + { SIMDE_FLOAT16_VALUE( 74.89), + INT32_C( 74) }, + { SIMDE_FLOAT16_VALUE( -24.05), + -INT32_C( 25) }, + { SIMDE_FLOAT16_VALUE( -7.75), + -INT32_C( 8) }, + { SIMDE_FLOAT16_VALUE( -57.31), + -INT32_C( 58) }, + { SIMDE_FLOAT16_VALUE( -14.65), + -INT32_C( 15) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int32_t r = simde_vcvtmh_s32_f16(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int32_t r = simde_vcvtmh_s32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX) + SIMDE_FLOAT32_C(100.0)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN)), + INT16_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN) + SIMDE_FLOAT32_C(-100.0)), + INT16_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( - 15.279), + -INT16_C( 16) }, + { SIMDE_FLOAT16_VALUE( - 15.541), + -INT16_C( 16) }, + { SIMDE_FLOAT16_VALUE( 24.266), + INT16_C( 24) }, + { SIMDE_FLOAT16_VALUE( 28.626), + INT16_C( 28) }, + { SIMDE_FLOAT16_VALUE( - 17.731), + -INT16_C( 18) }, + { SIMDE_FLOAT16_VALUE( - 14.985), + -INT16_C( 15) }, + { SIMDE_FLOAT16_VALUE( - 26.154), + -INT16_C( 27) }, + { SIMDE_FLOAT16_VALUE( 5.330), + INT16_C( 5) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int16_t r = simde_vcvtmh_s16_f16(a); + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int16_t r = simde_vcvtmh_s16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtms_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), + INT32_MIN }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) + SIMDE_FLOAT32_C(-1000.0), + INT32_MIN }, + { SIMDE_FLOAT32_C( 0.0), + INT32_C( 0) }, + #endif + { -SIMDE_FLOAT32_C( 842.300), + -INT32_C( 843) }, + { -SIMDE_FLOAT32_C( 649.091), + -INT32_C( 650) }, + { SIMDE_FLOAT32_C( 310.002), + INT32_C( 310) }, + { -SIMDE_FLOAT32_C( 803.887), + -INT32_C( 804) }, + { -SIMDE_FLOAT32_C( 527.652), + -INT32_C( 528) }, + { -SIMDE_FLOAT32_C( 327.117), + -INT32_C( 328) }, + { SIMDE_FLOAT32_C( 198.410), + INT32_C( 198) }, + { -SIMDE_FLOAT32_C( 75.376), + -INT32_C( 76) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + int32_t r = simde_vcvtms_s32_f32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_int32_t r = simde_vcvtms_s32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX)), + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT64_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 10.401), + UINT64_C( 10) }, + { SIMDE_FLOAT16_VALUE( 11.014), + UINT64_C( 11) }, + { SIMDE_FLOAT16_VALUE( 20.952), + UINT64_C( 20) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint64_t r = simde_vcvtmh_u64_f16(a); + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint64_t r = simde_vcvtmh_u64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + UINT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)), + UINT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX+1000ll)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT32_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 0.691), + UINT32_C( 0) }, + { SIMDE_FLOAT16_VALUE( 24.104), + UINT32_C( 24) }, + { SIMDE_FLOAT16_VALUE( 5.841), + UINT32_C( 5) }, + { SIMDE_FLOAT16_VALUE( 23.950), + UINT32_C( 23) }, + { SIMDE_FLOAT16_VALUE( 1.348), + UINT32_C( 1) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint32_t r = simde_vcvtmh_u32_f16(a); + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint32_t r = simde_vcvtmh_u32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmh_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX)), + UINT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX+1000)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.0), + UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 28.461), + UINT16_C( 28) }, + { SIMDE_FLOAT16_VALUE( 16.044), + UINT16_C( 16) }, + { SIMDE_FLOAT16_VALUE( 5.757), + UINT16_C( 5) }, + { SIMDE_FLOAT16_VALUE( 4.509), + UINT16_C( 4) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint16_t r = simde_vcvtmh_u16_f16(a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcvtmh_u16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtms_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX), + UINT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), + UINT32_MAX }, + { SIMDE_FLOAT32_C(-1000.0), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 0.0), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT32_C( 550.582), + UINT32_C( 550) }, + { SIMDE_FLOAT32_C( 378.414), + UINT32_C( 378) }, + { SIMDE_FLOAT32_C( 903.633), + UINT32_C( 903) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + uint32_t r = simde_vcvtms_u32_f32(a); + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_uint32_t r = simde_vcvtms_u32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + uint32_t r[4]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(10000.0), SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, + { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + #endif + { { SIMDE_FLOAT32_C( 683.92), SIMDE_FLOAT32_C( 226.88), SIMDE_FLOAT32_C( 659.62), SIMDE_FLOAT32_C( 439.27) }, + { UINT32_C( 683), UINT32_C( 226), UINT32_C( 659), UINT32_C( 439) } }, + { { SIMDE_FLOAT32_C( 917.53), SIMDE_FLOAT32_C( 947.14), SIMDE_FLOAT32_C( 341.17), SIMDE_FLOAT32_C( 418.62) }, + { UINT32_C( 917), UINT32_C( 947), UINT32_C( 341), UINT32_C( 418) } }, + { { SIMDE_FLOAT32_C( 513.08), SIMDE_FLOAT32_C( 844.72), SIMDE_FLOAT32_C( 576.00), SIMDE_FLOAT32_C( 196.76) }, + { UINT32_C( 513), UINT32_C( 844), UINT32_C( 576), UINT32_C( 196) } }, + { { SIMDE_FLOAT32_C( 586.32), SIMDE_FLOAT32_C( 651.30), SIMDE_FLOAT32_C( 99.84), SIMDE_FLOAT32_C( 666.55) }, + { UINT32_C( 586), UINT32_C( 651), UINT32_C( 99), UINT32_C( 666) } }, + { { SIMDE_FLOAT32_C( 483.00), SIMDE_FLOAT32_C( 100.67), SIMDE_FLOAT32_C( 797.45), SIMDE_FLOAT32_C( 735.83) }, + { UINT32_C( 483), UINT32_C( 100), UINT32_C( 797), UINT32_C( 735) } }, + { { SIMDE_FLOAT32_C( 418.68), SIMDE_FLOAT32_C( 118.54), SIMDE_FLOAT32_C( 312.65), SIMDE_FLOAT32_C( 13.40) }, + { UINT32_C( 418), UINT32_C( 118), UINT32_C( 312), UINT32_C( 13) } }, + { { SIMDE_FLOAT32_C( 0.63), SIMDE_FLOAT32_C( 108.62), SIMDE_FLOAT32_C( 377.25), SIMDE_FLOAT32_C( 439.53) }, + { UINT32_C( 0), UINT32_C( 108), UINT32_C( 377), UINT32_C( 439) } }, + { { SIMDE_FLOAT32_C( 569.79), SIMDE_FLOAT32_C( 110.84), SIMDE_FLOAT32_C( 287.33), SIMDE_FLOAT32_C( 253.70) }, + { UINT32_C( 569), UINT32_C( 110), UINT32_C( 287), UINT32_C( 253) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_uint32x4_t r = simde_vcvtmq_u32_f32(a); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(SIMDE_FLOAT32_C(0.0), SIMDE_FLOAT32_C(1000.0)); + simde_uint32x4_t r = simde_vcvtmq_u32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmd_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX) + SIMDE_FLOAT64_C(10000.0), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN), + INT64_MIN }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) + SIMDE_FLOAT64_C(-10000.0), + INT64_MIN }, + { SIMDE_FLOAT64_C( 0.0), + INT64_C( 0) }, + #endif + { -SIMDE_FLOAT64_C( 70876.391), + -INT64_C( 70877) }, + { -SIMDE_FLOAT64_C( 75147.281), + -INT64_C( 75148) }, + { SIMDE_FLOAT64_C( 60409.406), + INT64_C( 60409) }, + { SIMDE_FLOAT64_C( 42836.578), + INT64_C( 42836) }, + { SIMDE_FLOAT64_C( 42453.047), + INT64_C( 42453) }, + { SIMDE_FLOAT64_C( 17095.617), + INT64_C( 17095) }, + { -SIMDE_FLOAT64_C( 65433.555), + -INT64_C( 65434) }, + { -SIMDE_FLOAT64_C( 72962.914), + -INT64_C( 72963) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + int64_t r = simde_vcvtmd_s64_f64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_arm_neon_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64_t r = simde_vcvtmd_s64_f64(a); + + simde_test_arm_neon_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmd_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX), + UINT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(10000.0), + UINT64_MAX }, + { SIMDE_FLOAT64_C(-1000.0), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 0.0), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT64_C( 24923.992), + UINT64_C( 24923) }, + { SIMDE_FLOAT64_C( 18610.977), + UINT64_C( 18610) }, + { SIMDE_FLOAT64_C( 31085.938), + UINT64_C( 31085) }, + { SIMDE_FLOAT64_C( 81679.125), + UINT64_C( 81679) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + uint64_t r = simde_vcvtmd_u64_f64(a); + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_arm_neon_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64_t r = simde_vcvtmd_u64_f64(a); + + simde_test_arm_neon_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + uint64_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(10000.0) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { -SIMDE_MATH_NAN, -SIMDE_FLOAT64_C(10000.0) }, + { UINT64_C( 0), UINT64_C( 0) } }, + #endif + { { SIMDE_FLOAT64_C( 337.71), SIMDE_FLOAT64_C( 946.94) }, + { UINT64_C( 337), UINT64_C( 946) } }, + { { SIMDE_FLOAT64_C( 692.98), SIMDE_FLOAT64_C( 255.25) }, + { UINT64_C( 692), UINT64_C( 255) } }, + { { SIMDE_FLOAT64_C( 894.09), SIMDE_FLOAT64_C( 34.15) }, + { UINT64_C( 894), UINT64_C( 34) } }, + { { SIMDE_FLOAT64_C( 673.86), SIMDE_FLOAT64_C( 407.16) }, + { UINT64_C( 673), UINT64_C( 407) } }, + { { SIMDE_FLOAT64_C( 878.87), SIMDE_FLOAT64_C( 249.86) }, + { UINT64_C( 878), UINT64_C( 249) } }, + { { SIMDE_FLOAT64_C( 603.92), SIMDE_FLOAT64_C( 465.18) }, + { UINT64_C( 603), UINT64_C( 465) } }, + { { SIMDE_FLOAT64_C( 901.16), SIMDE_FLOAT64_C( 703.76) }, + { UINT64_C( 901), UINT64_C( 703) } }, + { { SIMDE_FLOAT64_C( 131.73), SIMDE_FLOAT64_C( 384.16) }, + { UINT64_C( 131), UINT64_C( 384) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_uint64x2_t r = simde_vcvtmq_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(0.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x2_t r = simde_vcvtmq_u64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + int16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 8.496), SIMDE_FLOAT16_VALUE( 18.975), SIMDE_FLOAT16_VALUE( - 2.199), SIMDE_FLOAT16_VALUE( - 20.493), + SIMDE_FLOAT16_VALUE( - 29.500), SIMDE_FLOAT16_VALUE( 4.372), SIMDE_FLOAT16_VALUE( - 0.571), SIMDE_FLOAT16_VALUE( 15.297) }, + { -INT16_C( 9), INT16_C( 18), -INT16_C( 3), -INT16_C( 21), + -INT16_C( 30), INT16_C( 4), -INT16_C( 1), INT16_C( 15) } }, + { { SIMDE_FLOAT16_VALUE( - 6.416), SIMDE_FLOAT16_VALUE( - 28.037), SIMDE_FLOAT16_VALUE( 0.233), SIMDE_FLOAT16_VALUE( - 2.340), + SIMDE_FLOAT16_VALUE( 21.686), SIMDE_FLOAT16_VALUE( 15.884), SIMDE_FLOAT16_VALUE( 5.613), SIMDE_FLOAT16_VALUE( - 3.479) }, + { -INT16_C( 7), -INT16_C( 29), INT16_C( 0), -INT16_C( 3), + INT16_C( 21), INT16_C( 15), INT16_C( 5), -INT16_C( 4) } }, + { { SIMDE_FLOAT16_VALUE( 29.801), SIMDE_FLOAT16_VALUE( 5.407), SIMDE_FLOAT16_VALUE( - 0.666), SIMDE_FLOAT16_VALUE( 24.410), + SIMDE_FLOAT16_VALUE( - 29.209), SIMDE_FLOAT16_VALUE( - 5.956), SIMDE_FLOAT16_VALUE( - 18.351), SIMDE_FLOAT16_VALUE( - 24.316) }, + { INT16_C( 29), INT16_C( 5), -INT16_C( 1), INT16_C( 24), + -INT16_C( 30), -INT16_C( 6), -INT16_C( 19), -INT16_C( 25) } }, + { { SIMDE_FLOAT16_VALUE( - 20.572), SIMDE_FLOAT16_VALUE( - 24.745), SIMDE_FLOAT16_VALUE( - 22.219), SIMDE_FLOAT16_VALUE( 20.375), + SIMDE_FLOAT16_VALUE( - 22.035), SIMDE_FLOAT16_VALUE( - 19.171), SIMDE_FLOAT16_VALUE( - 26.111), SIMDE_FLOAT16_VALUE( - 19.992) }, + { -INT16_C( 21), -INT16_C( 25), -INT16_C( 23), INT16_C( 20), + -INT16_C( 23), -INT16_C( 20), -INT16_C( 27), -INT16_C( 20) } }, + { { SIMDE_FLOAT16_VALUE( 17.446), SIMDE_FLOAT16_VALUE( - 21.229), SIMDE_FLOAT16_VALUE( - 23.525), SIMDE_FLOAT16_VALUE( 21.821), + SIMDE_FLOAT16_VALUE( 19.282), SIMDE_FLOAT16_VALUE( - 25.940), SIMDE_FLOAT16_VALUE( 17.691), SIMDE_FLOAT16_VALUE( 1.353) }, + { INT16_C( 17), -INT16_C( 22), -INT16_C( 24), INT16_C( 21), + INT16_C( 19), -INT16_C( 26), INT16_C( 17), INT16_C( 1) } }, + { { SIMDE_FLOAT16_VALUE( 17.782), SIMDE_FLOAT16_VALUE( 22.188), SIMDE_FLOAT16_VALUE( - 14.281), SIMDE_FLOAT16_VALUE( 10.450), + SIMDE_FLOAT16_VALUE( - 20.900), SIMDE_FLOAT16_VALUE( 7.321), SIMDE_FLOAT16_VALUE( - 3.231), SIMDE_FLOAT16_VALUE( 16.327) }, + { INT16_C( 17), INT16_C( 22), -INT16_C( 15), INT16_C( 10), + -INT16_C( 21), INT16_C( 7), -INT16_C( 4), INT16_C( 16) } }, + { { SIMDE_FLOAT16_VALUE( 24.787), SIMDE_FLOAT16_VALUE( - 17.192), SIMDE_FLOAT16_VALUE( - 22.870), SIMDE_FLOAT16_VALUE( - 24.778), + SIMDE_FLOAT16_VALUE( 4.885), SIMDE_FLOAT16_VALUE( - 23.686), SIMDE_FLOAT16_VALUE( - 5.354), SIMDE_FLOAT16_VALUE( - 25.350) }, + { INT16_C( 24), -INT16_C( 18), -INT16_C( 23), -INT16_C( 25), + INT16_C( 4), -INT16_C( 24), -INT16_C( 6), -INT16_C( 26) } }, + { { SIMDE_FLOAT16_VALUE( - 25.796), SIMDE_FLOAT16_VALUE( 0.055), SIMDE_FLOAT16_VALUE( - 23.812), SIMDE_FLOAT16_VALUE( - 7.248), + SIMDE_FLOAT16_VALUE( 2.304), SIMDE_FLOAT16_VALUE( 21.545), SIMDE_FLOAT16_VALUE( - 11.785), SIMDE_FLOAT16_VALUE( - 21.974) }, + { -INT16_C( 26), INT16_C( 0), -INT16_C( 24), -INT16_C( 8), + INT16_C( 2), INT16_C( 21), -INT16_C( 12), -INT16_C( 22) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_int16x8_t r = simde_vcvtmq_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_int16x8_t r = simde_vcvtmq_s16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + int16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 14.252), SIMDE_FLOAT16_VALUE( - 8.932), SIMDE_FLOAT16_VALUE( - 10.075), SIMDE_FLOAT16_VALUE( 2.645) }, + { -INT16_C( 15), -INT16_C( 9), -INT16_C( 11), INT16_C( 2) } }, + { { SIMDE_FLOAT16_VALUE( - 27.688), SIMDE_FLOAT16_VALUE( 12.692), SIMDE_FLOAT16_VALUE( - 13.616), SIMDE_FLOAT16_VALUE( - 25.644) }, + { -INT16_C( 28), INT16_C( 12), -INT16_C( 14), -INT16_C( 26) } }, + { { SIMDE_FLOAT16_VALUE( - 21.882), SIMDE_FLOAT16_VALUE( 23.002), SIMDE_FLOAT16_VALUE( - 0.430), SIMDE_FLOAT16_VALUE( 26.610) }, + { -INT16_C( 22), INT16_C( 23), -INT16_C( 1), INT16_C( 26) } }, + { { SIMDE_FLOAT16_VALUE( - 6.937), SIMDE_FLOAT16_VALUE( - 11.037), SIMDE_FLOAT16_VALUE( - 19.775), SIMDE_FLOAT16_VALUE( 18.979) }, + { -INT16_C( 7), -INT16_C( 12), -INT16_C( 20), INT16_C( 18) } }, + { { SIMDE_FLOAT16_VALUE( 8.040), SIMDE_FLOAT16_VALUE( 25.591), SIMDE_FLOAT16_VALUE( 4.751), SIMDE_FLOAT16_VALUE( 24.284) }, + { INT16_C( 8), INT16_C( 25), INT16_C( 4), INT16_C( 24) } }, + { { SIMDE_FLOAT16_VALUE( 18.155), SIMDE_FLOAT16_VALUE( - 26.878), SIMDE_FLOAT16_VALUE( 18.196), SIMDE_FLOAT16_VALUE( 24.029) }, + { INT16_C( 18), -INT16_C( 27), INT16_C( 18), INT16_C( 24) } }, + { { SIMDE_FLOAT16_VALUE( - 16.286), SIMDE_FLOAT16_VALUE( 10.157), SIMDE_FLOAT16_VALUE( - 1.975), SIMDE_FLOAT16_VALUE( 8.093) }, + { -INT16_C( 17), INT16_C( 10), -INT16_C( 2), INT16_C( 8) } }, + { { SIMDE_FLOAT16_VALUE( - 26.246), SIMDE_FLOAT16_VALUE( - 10.909), SIMDE_FLOAT16_VALUE( - 28.955), SIMDE_FLOAT16_VALUE( 6.701) }, + { -INT16_C( 27), -INT16_C( 11), -INT16_C( 29), INT16_C( 6) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_int16x4_t r = simde_vcvtm_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_int16x4_t r = simde_vcvtm_s16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtmq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 348.25), SIMDE_FLOAT16_VALUE( 859.50), SIMDE_FLOAT16_VALUE( 629.50), SIMDE_FLOAT16_VALUE( 746.50), + SIMDE_FLOAT16_VALUE( 510.25), SIMDE_FLOAT16_VALUE( 957.00), SIMDE_FLOAT16_VALUE( 485.50), SIMDE_FLOAT16_VALUE( 650.50) }, + { UINT16_C( 348), UINT16_C( 859), UINT16_C( 629), UINT16_C( 746), UINT16_C( 510), UINT16_C( 957), UINT16_C( 485), UINT16_C( 650) } }, + { { SIMDE_FLOAT16_VALUE( 280.00), SIMDE_FLOAT16_VALUE( 624.00), SIMDE_FLOAT16_VALUE( 758.00), SIMDE_FLOAT16_VALUE( 938.50), + SIMDE_FLOAT16_VALUE( 605.00), SIMDE_FLOAT16_VALUE( 770.00), SIMDE_FLOAT16_VALUE( 547.00), SIMDE_FLOAT16_VALUE( 943.50) }, + { UINT16_C( 280), UINT16_C( 624), UINT16_C( 758), UINT16_C( 938), UINT16_C( 605), UINT16_C( 770), UINT16_C( 547), UINT16_C( 943) } }, + { { SIMDE_FLOAT16_VALUE( 321.25), SIMDE_FLOAT16_VALUE( 963.50), SIMDE_FLOAT16_VALUE( 557.50), SIMDE_FLOAT16_VALUE( 467.25), + SIMDE_FLOAT16_VALUE( 201.00), SIMDE_FLOAT16_VALUE( 597.00), SIMDE_FLOAT16_VALUE( 497.25), SIMDE_FLOAT16_VALUE( 937.00) }, + { UINT16_C( 321), UINT16_C( 963), UINT16_C( 557), UINT16_C( 467), UINT16_C( 201), UINT16_C( 597), UINT16_C( 497), UINT16_C( 937) } }, + { { SIMDE_FLOAT16_VALUE( 542.50), SIMDE_FLOAT16_VALUE( 71.12), SIMDE_FLOAT16_VALUE( 842.50), SIMDE_FLOAT16_VALUE( 218.75), + SIMDE_FLOAT16_VALUE( 507.25), SIMDE_FLOAT16_VALUE( 363.25), SIMDE_FLOAT16_VALUE( 462.50), SIMDE_FLOAT16_VALUE( 855.50) }, + { UINT16_C( 542), UINT16_C( 71), UINT16_C( 842), UINT16_C( 218), UINT16_C( 507), UINT16_C( 363), UINT16_C( 462), UINT16_C( 855) } }, + { { SIMDE_FLOAT16_VALUE( 222.38), SIMDE_FLOAT16_VALUE( 92.12), SIMDE_FLOAT16_VALUE( 602.00), SIMDE_FLOAT16_VALUE( 733.00), + SIMDE_FLOAT16_VALUE( 48.91), SIMDE_FLOAT16_VALUE( 87.31), SIMDE_FLOAT16_VALUE( 383.25), SIMDE_FLOAT16_VALUE( 329.00) }, + { UINT16_C( 222), UINT16_C( 92), UINT16_C( 602), UINT16_C( 733), UINT16_C( 48), UINT16_C( 87), UINT16_C( 383), UINT16_C( 329) } }, + { { SIMDE_FLOAT16_VALUE( 711.50), SIMDE_FLOAT16_VALUE( 141.12), SIMDE_FLOAT16_VALUE( 267.50), SIMDE_FLOAT16_VALUE( 316.50), + SIMDE_FLOAT16_VALUE( 911.00), SIMDE_FLOAT16_VALUE( 814.00), SIMDE_FLOAT16_VALUE( 260.25), SIMDE_FLOAT16_VALUE( 232.38) }, + { UINT16_C( 711), UINT16_C( 141), UINT16_C( 267), UINT16_C( 316), UINT16_C( 911), UINT16_C( 814), UINT16_C( 260), UINT16_C( 232) } }, + { { SIMDE_FLOAT16_VALUE( 778.00), SIMDE_FLOAT16_VALUE( 818.00), SIMDE_FLOAT16_VALUE( 699.50), SIMDE_FLOAT16_VALUE( 979.00), + SIMDE_FLOAT16_VALUE( 415.00), SIMDE_FLOAT16_VALUE( 196.88), SIMDE_FLOAT16_VALUE( 916.00), SIMDE_FLOAT16_VALUE( 957.50) }, + { UINT16_C( 778), UINT16_C( 818), UINT16_C( 699), UINT16_C( 979), UINT16_C( 415), UINT16_C( 196), UINT16_C( 916), UINT16_C( 957) } }, + { { SIMDE_FLOAT16_VALUE( 268.00), SIMDE_FLOAT16_VALUE( 758.50), SIMDE_FLOAT16_VALUE( 176.25), SIMDE_FLOAT16_VALUE( 775.00), + SIMDE_FLOAT16_VALUE( 121.62), SIMDE_FLOAT16_VALUE( 638.50), SIMDE_FLOAT16_VALUE( 630.50), SIMDE_FLOAT16_VALUE( 344.00) }, + { UINT16_C( 268), UINT16_C( 758), UINT16_C( 176), UINT16_C( 775), UINT16_C( 121), UINT16_C( 638), UINT16_C( 630), UINT16_C( 344) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcvtmq_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(0.0f, 1000.0f); + simde_uint16x8_t r = simde_vcvtmq_u16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 340.75), SIMDE_FLOAT16_VALUE( 489.00), SIMDE_FLOAT16_VALUE( 996.00), SIMDE_FLOAT16_VALUE( 399.75) }, + { UINT16_C( 340), UINT16_C( 489), UINT16_C( 996), UINT16_C( 399) } }, + { { SIMDE_FLOAT16_VALUE( 967.00), SIMDE_FLOAT16_VALUE( 335.00), SIMDE_FLOAT16_VALUE( 842.00), SIMDE_FLOAT16_VALUE( 905.00) }, + { UINT16_C( 967), UINT16_C( 335), UINT16_C( 842), UINT16_C( 905) } }, + { { SIMDE_FLOAT16_VALUE( 950.00), SIMDE_FLOAT16_VALUE( 357.75), SIMDE_FLOAT16_VALUE( 539.00), SIMDE_FLOAT16_VALUE( 808.50) }, + { UINT16_C( 950), UINT16_C( 357), UINT16_C( 539), UINT16_C( 808) } }, + { { SIMDE_FLOAT16_VALUE( 275.25), SIMDE_FLOAT16_VALUE( 595.00), SIMDE_FLOAT16_VALUE( 820.50), SIMDE_FLOAT16_VALUE( 425.75) }, + { UINT16_C( 275), UINT16_C( 595), UINT16_C( 820), UINT16_C( 425) } }, + { { SIMDE_FLOAT16_VALUE( 884.00), SIMDE_FLOAT16_VALUE( 498.50), SIMDE_FLOAT16_VALUE( 906.00), SIMDE_FLOAT16_VALUE( 544.00) }, + { UINT16_C( 884), UINT16_C( 498), UINT16_C( 906), UINT16_C( 544) } }, + { { SIMDE_FLOAT16_VALUE( 169.62), SIMDE_FLOAT16_VALUE( 730.50), SIMDE_FLOAT16_VALUE( 862.50), SIMDE_FLOAT16_VALUE( 615.00) }, + { UINT16_C( 169), UINT16_C( 730), UINT16_C( 862), UINT16_C( 615) } }, + { { SIMDE_FLOAT16_VALUE( 567.50), SIMDE_FLOAT16_VALUE( 912.00), SIMDE_FLOAT16_VALUE( 338.75), SIMDE_FLOAT16_VALUE( 386.00) }, + { UINT16_C( 567), UINT16_C( 912), UINT16_C( 338), UINT16_C( 386) } }, + { { SIMDE_FLOAT16_VALUE( 398.25), SIMDE_FLOAT16_VALUE( 651.00), SIMDE_FLOAT16_VALUE( 327.25), SIMDE_FLOAT16_VALUE( 739.00) }, + { UINT16_C( 398), UINT16_C( 651), UINT16_C( 327), UINT16_C( 739) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcvtm_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(0.0f, 1000.0f); + simde_uint16x4_t r = simde_vcvtm_u16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 140.16), SIMDE_FLOAT32_C( 323.30) }, + { UINT32_C( 140), UINT32_C( 323) } }, + { { SIMDE_FLOAT32_C( 138.76), SIMDE_FLOAT32_C( 107.22) }, + { UINT32_C( 138), UINT32_C( 107) } }, + { { SIMDE_FLOAT32_C( 658.27), SIMDE_FLOAT32_C( 980.79) }, + { UINT32_C( 658), UINT32_C( 980) } }, + { { SIMDE_FLOAT32_C( 12.33), SIMDE_FLOAT32_C( 608.38) }, + { UINT32_C( 12), UINT32_C( 608) } }, + { { SIMDE_FLOAT32_C( 338.66), SIMDE_FLOAT32_C( 551.13) }, + { UINT32_C( 338), UINT32_C( 551) } }, + { { SIMDE_FLOAT32_C( 416.88), SIMDE_FLOAT32_C( 613.87) }, + { UINT32_C( 416), UINT32_C( 613) } }, + { { SIMDE_FLOAT32_C( 146.09), SIMDE_FLOAT32_C( 237.40) }, + { UINT32_C( 146), UINT32_C( 237) } }, + { { SIMDE_FLOAT32_C( 39.56), SIMDE_FLOAT32_C( 29.88) }, + { UINT32_C( 39), UINT32_C( 29) } }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_uint32x2_t r = simde_vcvtm_u32_f32(a); + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(0.0, 1000.0); + simde_uint32x2_t r = simde_vcvtm_u32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[2]; + int32_t r[2]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C( 981.216), -SIMDE_FLOAT32_C( 118.443) }, + { -INT32_C( 982), -INT32_C( 119) } }, + { { SIMDE_FLOAT32_C( 870.692), -SIMDE_FLOAT32_C( 444.879) }, + { INT32_C( 870), -INT32_C( 445) } }, + { { SIMDE_FLOAT32_C( 872.875), SIMDE_FLOAT32_C( 78.257) }, + { INT32_C( 872), INT32_C( 78) } }, + { { -SIMDE_FLOAT32_C( 350.761), SIMDE_FLOAT32_C( 708.457) }, + { -INT32_C( 351), INT32_C( 708) } }, + { { -SIMDE_FLOAT32_C( 489.035), SIMDE_FLOAT32_C( 327.625) }, + { -INT32_C( 490), INT32_C( 327) } }, + { { -SIMDE_FLOAT32_C( 447.613), -SIMDE_FLOAT32_C( 519.467) }, + { -INT32_C( 448), -INT32_C( 520) } }, + { { -SIMDE_FLOAT32_C( 65.217), SIMDE_FLOAT32_C( 843.023) }, + { -INT32_C( 66), INT32_C( 843) } }, + { { -SIMDE_FLOAT32_C( 398.463), -SIMDE_FLOAT32_C( 388.678) }, + { -INT32_C( 399), -INT32_C( 389) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_int32x2_t r = simde_vcvtm_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_int32x2_t r = simde_vcvtm_s32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64 a[1]; + int64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 27073.289) }, + { INT64_C( 27073) } }, + { { SIMDE_FLOAT64_C( 26717.773) }, + { INT64_C( 26717) } }, + { { -SIMDE_FLOAT64_C( 1977.039) }, + { -INT64_C( 1978) } }, + { { -SIMDE_FLOAT64_C( 10158.797) }, + { -INT64_C( 10159) } }, + { { -SIMDE_FLOAT64_C( 14198.961) }, + { -INT64_C( 14199) } }, + { { SIMDE_FLOAT64_C( 526.211) }, + { INT64_C( 526) } }, + { { SIMDE_FLOAT64_C( 76952.719) }, + { INT64_C( 76952) } }, + { { SIMDE_FLOAT64_C( 11393.563) }, + { INT64_C( 11393) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_int64x1_t r = simde_vcvtm_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x1_t r = simde_vcvtm_s64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtm_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 735.97) }, + { UINT64_C( 735) } }, + { { SIMDE_FLOAT64_C( 945.51) }, + { UINT64_C( 945) } }, + { { SIMDE_FLOAT64_C( 573.91) }, + { UINT64_C( 573) } }, + { { SIMDE_FLOAT64_C( 905.61) }, + { UINT64_C( 905) } }, + { { SIMDE_FLOAT64_C( 676.10) }, + { UINT64_C( 676) } }, + { { SIMDE_FLOAT64_C( 436.17) }, + { UINT64_C( 436) } }, + { { SIMDE_FLOAT64_C( 520.52) }, + { UINT64_C( 520) } }, + { { SIMDE_FLOAT64_C( 243.72) }, + { UINT64_C( 243) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_uint64x1_t r = simde_vcvtm_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(0.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x1_t r = simde_vcvtm_u64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtms_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmd_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtms_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmd_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmh_u64_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtm_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtmq_u64_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/cvtp.c b/test/arm/neon/cvtp.c new file mode 100644 index 000000000..576930776 --- /dev/null +++ b/test/arm/neon/cvtp.c @@ -0,0 +1,1147 @@ +#define SIMDE_TEST_ARM_NEON_INSN cvtp + +#include "test-neon.h" +#include "../../../simde/arm/neon/cvtp.h" + +static int +test_simde_vcvtpq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + int32_t r[4]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C( 607.741), SIMDE_FLOAT32_C( 403.453), SIMDE_FLOAT32_C( 97.739), -SIMDE_FLOAT32_C( 2.667) }, + { -INT32_C( 607), INT32_C( 404), INT32_C( 98), -INT32_C( 2) } }, + { { -SIMDE_FLOAT32_C( 438.220), -SIMDE_FLOAT32_C( 356.632), -SIMDE_FLOAT32_C( 355.499), SIMDE_FLOAT32_C( 12.256) }, + { -INT32_C( 438), -INT32_C( 356), -INT32_C( 355), INT32_C( 13) } }, + { { -SIMDE_FLOAT32_C( 712.986), SIMDE_FLOAT32_C( 505.464), -SIMDE_FLOAT32_C( 103.452), -SIMDE_FLOAT32_C( 725.220) }, + { -INT32_C( 712), INT32_C( 506), -INT32_C( 103), -INT32_C( 725) } }, + { { SIMDE_FLOAT32_C( 824.850), SIMDE_FLOAT32_C( 62.658), -SIMDE_FLOAT32_C( 644.643), SIMDE_FLOAT32_C( 510.005) }, + { INT32_C( 825), INT32_C( 63), -INT32_C( 644), INT32_C( 511) } }, + { { -SIMDE_FLOAT32_C( 2.843), -SIMDE_FLOAT32_C( 710.183), -SIMDE_FLOAT32_C( 382.143), -SIMDE_FLOAT32_C( 280.409) }, + { -INT32_C( 2), -INT32_C( 710), -INT32_C( 382), -INT32_C( 280) } }, + { { SIMDE_FLOAT32_C( 209.258), -SIMDE_FLOAT32_C( 68.390), SIMDE_FLOAT32_C( 737.373), SIMDE_FLOAT32_C( 383.386) }, + { INT32_C( 210), -INT32_C( 68), INT32_C( 738), INT32_C( 384) } }, + { { SIMDE_FLOAT32_C( 411.326), -SIMDE_FLOAT32_C( 427.964), -SIMDE_FLOAT32_C( 38.293), -SIMDE_FLOAT32_C( 18.446) }, + { INT32_C( 412), -INT32_C( 427), -INT32_C( 38), -INT32_C( 18) } }, + { { -SIMDE_FLOAT32_C( 375.151), -SIMDE_FLOAT32_C( 477.988), -SIMDE_FLOAT32_C( 879.299), SIMDE_FLOAT32_C( 967.790) }, + { -INT32_C( 375), -INT32_C( 477), -INT32_C( 879), INT32_C( 968) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_int32x4_t r = simde_vcvtpq_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_int32x4_t r = simde_vcvtpq_s32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + int64_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 99237.141), -SIMDE_FLOAT64_C( 50717.117) }, + { INT64_C( 99238), -INT64_C( 50717) } }, + { { -SIMDE_FLOAT64_C( 74116.766), SIMDE_FLOAT64_C( 66114.500) }, + { -INT64_C( 74116), INT64_C( 66115) } }, + { { -SIMDE_FLOAT64_C( 88666.844), SIMDE_FLOAT64_C( 79431.297) }, + { -INT64_C( 88666), INT64_C( 79432) } }, + { { SIMDE_FLOAT64_C( 18760.219), -SIMDE_FLOAT64_C( 76336.250) }, + { INT64_C( 18761), -INT64_C( 76336) } }, + { { -SIMDE_FLOAT64_C( 86460.719), SIMDE_FLOAT64_C( 56061.813) }, + { -INT64_C( 86460), INT64_C( 56062) } }, + { { -SIMDE_FLOAT64_C( 52624.641), SIMDE_FLOAT64_C( 76414.109) }, + { -INT64_C( 52624), INT64_C( 76415) } }, + { { SIMDE_FLOAT64_C( 87426.969), -SIMDE_FLOAT64_C( 65214.336) }, + { INT64_C( 87427), -INT64_C( 65214) } }, + { { SIMDE_FLOAT64_C( 94206.609), -SIMDE_FLOAT64_C( 63892.445) }, + { INT64_C( 94207), -INT64_C( 63892) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_int64x2_t r = simde_vcvtpq_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x2_t r = simde_vcvtpq_s64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MAX)), + INT64_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT64_MIN)), + INT64_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 3.188), + INT64_C( 4) }, + { SIMDE_FLOAT16_VALUE( 6.484), + INT64_C( 7) }, + { SIMDE_FLOAT16_VALUE( - 18.336), + -INT64_C( 18) }, + { SIMDE_FLOAT16_VALUE( 27.028), + INT64_C( 28) }, + { SIMDE_FLOAT16_VALUE( - 22.919), + -INT64_C( 22) }, + { SIMDE_FLOAT16_VALUE( 13.598), + INT64_C( 14) }, + { SIMDE_FLOAT16_VALUE( 27.711), + INT64_C( 28) }, + { SIMDE_FLOAT16_VALUE( - 1.019), + -INT64_C( 1) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int64_t r = simde_vcvtph_s64_f16(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int64_t r = simde_vcvtph_s64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MAX+1000ll)), + INT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN)), + INT32_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT32_MIN-1000ll)), + INT32_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( - 13.547), + -INT32_C( 13) }, + { SIMDE_FLOAT16_VALUE( - 28.627), + -INT32_C( 28) }, + { SIMDE_FLOAT16_VALUE( - 26.702), + -INT32_C( 26) }, + { SIMDE_FLOAT16_VALUE( - 8.158), + -INT32_C( 8) }, + { SIMDE_FLOAT16_VALUE( - 10.661), + -INT32_C( 10) }, + { SIMDE_FLOAT16_VALUE( - 14.185), + -INT32_C( 14) }, + { SIMDE_FLOAT16_VALUE( 10.164), + INT32_C( 11) }, + { SIMDE_FLOAT16_VALUE( - 14.690), + -INT32_C( 14) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int32_t r = simde_vcvtph_s32_f16(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int32_t r = simde_vcvtph_s32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + int16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MAX) + SIMDE_FLOAT32_C(100.0)), + INT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN)), + INT16_MIN }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, INT16_MIN) + SIMDE_FLOAT32_C(-100.0)), + INT16_MIN }, + { SIMDE_FLOAT16_VALUE( 0.0), + INT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 26.840), + INT16_C( 27) }, + { SIMDE_FLOAT16_VALUE( 28.316), + INT16_C( 29) }, + { SIMDE_FLOAT16_VALUE( 11.363), + INT16_C( 12) }, + { SIMDE_FLOAT16_VALUE( - 9.731), + -INT16_C( 9) }, + { SIMDE_FLOAT16_VALUE( 7.723), + INT16_C( 8) }, + { SIMDE_FLOAT16_VALUE( - 22.898), + -INT16_C( 22) }, + { SIMDE_FLOAT16_VALUE( - 19.354), + -INT16_C( 19) }, + { SIMDE_FLOAT16_VALUE( 24.613), + INT16_C( 25) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + int16_t r = simde_vcvtph_s16_f16(a); + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_int16_t r = simde_vcvtph_s16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtps_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + int32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MAX) + SIMDE_FLOAT32_C(1000.0), + INT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN), + INT32_MIN }, + { HEDLEY_STATIC_CAST(simde_float32, INT32_MIN) + SIMDE_FLOAT32_C(-1000.0), + INT32_MIN }, + { SIMDE_FLOAT32_C( 0.000), + INT32_C( 0) }, + #endif + { SIMDE_FLOAT32_C( 14.178), + INT32_C( 15) }, + { -SIMDE_FLOAT32_C( 607.139), + -INT32_C( 607) }, + { -SIMDE_FLOAT32_C( 414.899), + -INT32_C( 414) }, + { -SIMDE_FLOAT32_C( 887.709), + -INT32_C( 887) }, + { -SIMDE_FLOAT32_C( 746.822), + -INT32_C( 746) }, + { SIMDE_FLOAT32_C( 170.845), + INT32_C( 171) }, + { SIMDE_FLOAT32_C( 991.922), + INT32_C( 992) }, + { -SIMDE_FLOAT32_C( 557.800), + -INT32_C( 557) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + int32_t r = simde_vcvtps_s32_f32(a); + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_int32_t r = simde_vcvtps_s32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT64_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT64_MAX)), + UINT64_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT64_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.000), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 9.136), + UINT64_C( 10) }, + { SIMDE_FLOAT16_VALUE( 8.945), + UINT64_C( 9) }, + { SIMDE_FLOAT16_VALUE( 4.916), + UINT64_C( 5) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint64_t r = simde_vcvtph_u64_f16(a); + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint64_t r = simde_vcvtph_u64_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT32_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX)), + UINT32_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX+1000ll)), + UINT32_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT32_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.000), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 14.954), + UINT32_C( 15) }, + { SIMDE_FLOAT16_VALUE( 17.106), + UINT32_C( 18) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint32_t r = simde_vcvtph_u32_f16(a); + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint32_t r = simde_vcvtph_u32_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtph_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a; + uint16_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_NANHF, + INT16_C( 0) }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX)), + UINT16_MAX }, + { simde_float16_from_float32(HEDLEY_STATIC_CAST(simde_float32, UINT16_MAX+1000)), + UINT16_MAX }, + { SIMDE_FLOAT16_VALUE(-1000.0), + UINT16_C( 0) }, + { SIMDE_FLOAT16_VALUE( 0.000), + UINT16_C( 0) }, + #endif + { SIMDE_FLOAT16_VALUE( 17.766), + UINT16_C( 18) }, + { SIMDE_FLOAT16_VALUE( 22.378), + UINT16_C( 23) }, + { SIMDE_FLOAT16_VALUE( 2.456), + UINT16_C( 3) }, + { SIMDE_FLOAT16_VALUE( 25.255), + UINT16_C( 26) }, + { SIMDE_FLOAT16_VALUE( 5.504), + UINT16_C( 6) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16 a = test_vec[i].a; + uint16_t r = simde_vcvtph_u16_f16(a); + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16_t a = simde_test_arm_neon_random_f16(-100.0f, 100.0f); + simde_uint16_t r = simde_vcvtph_u16_f16(a); + + simde_test_arm_neon_write_f16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtps_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a; + uint32_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NANF, + INT32_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX), + UINT32_MAX }, + { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(1000.0), + UINT32_MAX }, + { SIMDE_FLOAT32_C(-1000.0), + UINT32_C( 0) }, + { SIMDE_FLOAT32_C( 0.000), + UINT32_C( 0) }, + #endif + { SIMDE_FLOAT32_C( 517.893), + UINT32_C( 518) }, + { SIMDE_FLOAT32_C( 288.716), + UINT32_C( 289) }, + { SIMDE_FLOAT32_C( 197.506), + UINT32_C( 198) }, + { SIMDE_FLOAT32_C( 973.600), + UINT32_C( 974) }, + { SIMDE_FLOAT32_C( 976.840), + UINT32_C( 977) }, + { SIMDE_FLOAT32_C( 920.392), + UINT32_C( 921) }, + { SIMDE_FLOAT32_C( 70.544), + UINT32_C( 71) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32 a = test_vec[i].a; + uint32_t r = simde_vcvtps_u32_f32(a); + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32_t a = simde_test_arm_neon_random_f32(-1000.0f, 1000.0f); + simde_uint32_t r = simde_vcvtps_u32_f32(a); + + simde_test_arm_neon_write_f32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float32 a[4]; + uint32_t r[4]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float32, UINT32_MAX) + SIMDE_FLOAT32_C(10000.0), SIMDE_MATH_NANF, -SIMDE_MATH_NANF, SIMDE_MATH_INFINITYF }, + { UINT32_MAX, UINT32_C( 0), UINT32_C( 0), UINT32_MAX } }, + #endif + { { SIMDE_FLOAT32_C( 619.63), SIMDE_FLOAT32_C( 364.48), SIMDE_FLOAT32_C( 938.24), SIMDE_FLOAT32_C( 444.69) }, + { UINT32_C( 620), UINT32_C( 365), UINT32_C( 939), UINT32_C( 445) } }, + { { SIMDE_FLOAT32_C( 955.05), SIMDE_FLOAT32_C( 217.80), SIMDE_FLOAT32_C( 439.91), SIMDE_FLOAT32_C( 55.17) }, + { UINT32_C( 956), UINT32_C( 218), UINT32_C( 440), UINT32_C( 56) } }, + { { SIMDE_FLOAT32_C( 859.62), SIMDE_FLOAT32_C( 349.38), SIMDE_FLOAT32_C( 956.45), SIMDE_FLOAT32_C( 249.96) }, + { UINT32_C( 860), UINT32_C( 350), UINT32_C( 957), UINT32_C( 250) } }, + { { SIMDE_FLOAT32_C( 511.78), SIMDE_FLOAT32_C( 571.90), SIMDE_FLOAT32_C( 930.47), SIMDE_FLOAT32_C( 688.88) }, + { UINT32_C( 512), UINT32_C( 572), UINT32_C( 931), UINT32_C( 689) } }, + { { SIMDE_FLOAT32_C( 637.73), SIMDE_FLOAT32_C( 370.86), SIMDE_FLOAT32_C( 732.69), SIMDE_FLOAT32_C( 402.84) }, + { UINT32_C( 638), UINT32_C( 371), UINT32_C( 733), UINT32_C( 403) } }, + { { SIMDE_FLOAT32_C( 328.28), SIMDE_FLOAT32_C( 536.20), SIMDE_FLOAT32_C( 378.54), SIMDE_FLOAT32_C( 375.08) }, + { UINT32_C( 329), UINT32_C( 537), UINT32_C( 379), UINT32_C( 376) } }, + { { SIMDE_FLOAT32_C( 709.63), SIMDE_FLOAT32_C( 671.54), SIMDE_FLOAT32_C( 418.37), SIMDE_FLOAT32_C( 407.44) }, + { UINT32_C( 710), UINT32_C( 672), UINT32_C( 419), UINT32_C( 408) } }, + { { SIMDE_FLOAT32_C( 782.97), SIMDE_FLOAT32_C( 601.77), SIMDE_FLOAT32_C( 970.47), SIMDE_FLOAT32_C( 402.60) }, + { UINT32_C( 783), UINT32_C( 602), UINT32_C( 971), UINT32_C( 403) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x4_t a = simde_vld1q_f32(test_vec[i].a); + simde_uint32x4_t r = simde_vcvtpq_u32_f32(a); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(0.0f, 1000.0f); + simde_uint32x4_t r = simde_vcvtpq_u32_f32(a); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpd_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + int64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MAX) + SIMDE_FLOAT64_C(10000.0), + INT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN), + INT64_MIN }, + { HEDLEY_STATIC_CAST(simde_float64, INT64_MIN) + SIMDE_FLOAT64_C(-10000.0), + INT64_MIN }, + #endif + { -SIMDE_FLOAT64_C( 79202.922), + -INT64_C( 79202) }, + { -SIMDE_FLOAT64_C( 89537.219), + -INT64_C( 89537) }, + { -SIMDE_FLOAT64_C( 12001.297), + -INT64_C( 12001) }, + { -SIMDE_FLOAT64_C( 9055.063), + -INT64_C( 9055) }, + { SIMDE_FLOAT64_C( 17832.305), + INT64_C( 17833) }, + { SIMDE_FLOAT64_C( 96832.719), + INT64_C( 96833) }, + { SIMDE_FLOAT64_C( 1258.672), + INT64_C( 1259) }, + { -SIMDE_FLOAT64_C( 68830.148), + -INT64_C( 68830) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + int64_t r = simde_vcvtpd_s64_f64(a); + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_arm_neon_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64_t r = simde_vcvtpd_s64_f64(a); + + simde_test_arm_neon_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpd_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a; + uint64_t r; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { SIMDE_MATH_NAN, + INT64_C( 0) }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX), + UINT64_MAX }, + { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(10000.0), + UINT64_MAX }, + { SIMDE_FLOAT64_C(-1000.0), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( -84790.281), + UINT64_C( 0) }, + { SIMDE_FLOAT64_C( 0.0), + UINT64_C( 0) }, + #endif + { SIMDE_FLOAT64_C( 60286.391), + UINT64_C( 60287) }, + { SIMDE_FLOAT64_C( 75317.063), + UINT64_C( 75318) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64 a = test_vec[i].a; + uint64_t r = simde_vcvtpd_u64_f64(a); + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64_t a = simde_test_arm_neon_random_f64(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64_t r = simde_vcvtpd_u64_f64(a); + + simde_test_arm_neon_write_f64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float64 a[2]; + uint64_t r[2]; + } test_vec[] = { + #if !defined(SIMDE_FAST_CONVERSION_RANGE) + { { HEDLEY_STATIC_CAST(simde_float64, UINT64_MAX) + SIMDE_FLOAT64_C(10000.0) }, + { UINT64_MAX, UINT64_C( 0) } }, + { { -SIMDE_MATH_NAN, -SIMDE_FLOAT64_C(10000.0) }, + { UINT64_C( 0), UINT64_C( 0) } }, + #endif + { { SIMDE_FLOAT64_C( 966.26), SIMDE_FLOAT64_C( 908.71) }, + { UINT64_C( 967), UINT64_C( 909) } }, + { { SIMDE_FLOAT64_C( 847.29), SIMDE_FLOAT64_C( 921.31) }, + { UINT64_C( 848), UINT64_C( 922) } }, + { { SIMDE_FLOAT64_C( 126.50), SIMDE_FLOAT64_C( 287.19) }, + { UINT64_C( 127), UINT64_C( 288) } }, + { { SIMDE_FLOAT64_C( 976.48), SIMDE_FLOAT64_C( 986.12) }, + { UINT64_C( 977), UINT64_C( 987) } }, + { { SIMDE_FLOAT64_C( 636.57), SIMDE_FLOAT64_C( 932.93) }, + { UINT64_C( 637), UINT64_C( 933) } }, + { { SIMDE_FLOAT64_C( 236.08), SIMDE_FLOAT64_C( 148.35) }, + { UINT64_C( 237), UINT64_C( 149) } }, + { { SIMDE_FLOAT64_C( 504.84), SIMDE_FLOAT64_C( 166.55) }, + { UINT64_C( 505), UINT64_C( 167) } }, + { { SIMDE_FLOAT64_C( 837.23), SIMDE_FLOAT64_C( 142.57) }, + { UINT64_C( 838), UINT64_C( 143) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_uint64x2_t r = simde_vcvtpq_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(0.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x2_t r = simde_vcvtpq_u64_f64(a); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + int16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 17.482), SIMDE_FLOAT16_VALUE( 25.249), SIMDE_FLOAT16_VALUE( - 23.786), SIMDE_FLOAT16_VALUE( 26.713), + SIMDE_FLOAT16_VALUE( - 27.250), SIMDE_FLOAT16_VALUE( - 25.132), SIMDE_FLOAT16_VALUE( 16.970), SIMDE_FLOAT16_VALUE( 1.147) }, + { -INT16_C( 17), INT16_C( 26), -INT16_C( 23), INT16_C( 27), + -INT16_C( 27), -INT16_C( 25), INT16_C( 17), INT16_C( 2) } }, + { { SIMDE_FLOAT16_VALUE( - 22.099), SIMDE_FLOAT16_VALUE( - 21.667), SIMDE_FLOAT16_VALUE( - 8.017), SIMDE_FLOAT16_VALUE( - 2.271), + SIMDE_FLOAT16_VALUE( - 12.312), SIMDE_FLOAT16_VALUE( 22.849), SIMDE_FLOAT16_VALUE( 15.892), SIMDE_FLOAT16_VALUE( - 8.588) }, + { -INT16_C( 22), -INT16_C( 21), -INT16_C( 8), -INT16_C( 2), + -INT16_C( 12), INT16_C( 23), INT16_C( 16), -INT16_C( 8) } }, + { { SIMDE_FLOAT16_VALUE( 2.094), SIMDE_FLOAT16_VALUE( - 2.744), SIMDE_FLOAT16_VALUE( - 25.968), SIMDE_FLOAT16_VALUE( - 21.280), + SIMDE_FLOAT16_VALUE( 19.739), SIMDE_FLOAT16_VALUE( 20.780), SIMDE_FLOAT16_VALUE( - 3.155), SIMDE_FLOAT16_VALUE( 19.892) }, + { INT16_C( 3), -INT16_C( 2), -INT16_C( 25), -INT16_C( 21), + INT16_C( 20), INT16_C( 21), -INT16_C( 3), INT16_C( 20) } }, + { { SIMDE_FLOAT16_VALUE( - 17.802), SIMDE_FLOAT16_VALUE( - 7.956), SIMDE_FLOAT16_VALUE( - 25.547), SIMDE_FLOAT16_VALUE( 29.976), + SIMDE_FLOAT16_VALUE( - 16.109), SIMDE_FLOAT16_VALUE( - 0.241), SIMDE_FLOAT16_VALUE( - 17.325), SIMDE_FLOAT16_VALUE( 2.343) }, + { -INT16_C( 17), -INT16_C( 7), -INT16_C( 25), INT16_C( 30), + -INT16_C( 16), INT16_C( 0), -INT16_C( 17), INT16_C( 3) } }, + { { SIMDE_FLOAT16_VALUE( - 2.318), SIMDE_FLOAT16_VALUE( 27.885), SIMDE_FLOAT16_VALUE( 10.828), SIMDE_FLOAT16_VALUE( 16.530), + SIMDE_FLOAT16_VALUE( 19.659), SIMDE_FLOAT16_VALUE( - 11.861), SIMDE_FLOAT16_VALUE( - 15.435), SIMDE_FLOAT16_VALUE( - 22.972) }, + { -INT16_C( 2), INT16_C( 28), INT16_C( 11), INT16_C( 17), + INT16_C( 20), -INT16_C( 11), -INT16_C( 15), -INT16_C( 22) } }, + { { SIMDE_FLOAT16_VALUE( - 26.398), SIMDE_FLOAT16_VALUE( 12.636), SIMDE_FLOAT16_VALUE( 23.632), SIMDE_FLOAT16_VALUE( 22.209), + SIMDE_FLOAT16_VALUE( 16.480), SIMDE_FLOAT16_VALUE( - 8.479), SIMDE_FLOAT16_VALUE( - 11.944), SIMDE_FLOAT16_VALUE( - 10.576) }, + { -INT16_C( 26), INT16_C( 13), INT16_C( 24), INT16_C( 23), + INT16_C( 17), -INT16_C( 8), -INT16_C( 11), -INT16_C( 10) } }, + { { SIMDE_FLOAT16_VALUE( 10.556), SIMDE_FLOAT16_VALUE( 14.462), SIMDE_FLOAT16_VALUE( 4.323), SIMDE_FLOAT16_VALUE( 29.542), + SIMDE_FLOAT16_VALUE( - 13.708), SIMDE_FLOAT16_VALUE( - 11.548), SIMDE_FLOAT16_VALUE( 13.467), SIMDE_FLOAT16_VALUE( 4.887) }, + { INT16_C( 11), INT16_C( 15), INT16_C( 5), INT16_C( 30), + -INT16_C( 13), -INT16_C( 11), INT16_C( 14), INT16_C( 5) } }, + { { SIMDE_FLOAT16_VALUE( - 28.249), SIMDE_FLOAT16_VALUE( 10.669), SIMDE_FLOAT16_VALUE( 15.634), SIMDE_FLOAT16_VALUE( - 29.024), + SIMDE_FLOAT16_VALUE( - 5.544), SIMDE_FLOAT16_VALUE( 5.271), SIMDE_FLOAT16_VALUE( 7.326), SIMDE_FLOAT16_VALUE( 22.421) }, + { -INT16_C( 28), INT16_C( 11), INT16_C( 16), -INT16_C( 29), + -INT16_C( 5), INT16_C( 6), INT16_C( 8), INT16_C( 23) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_int16x8_t r = simde_vcvtpq_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_int16x8_t r = simde_vcvtpq_s16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + int16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 24.451), SIMDE_FLOAT16_VALUE( - 26.951), SIMDE_FLOAT16_VALUE( 4.545), SIMDE_FLOAT16_VALUE( - 16.203) }, + { -INT16_C( 24), -INT16_C( 26), INT16_C( 5), -INT16_C( 16) } }, + { { SIMDE_FLOAT16_VALUE( - 6.971), SIMDE_FLOAT16_VALUE( - 0.372), SIMDE_FLOAT16_VALUE( - 23.985), SIMDE_FLOAT16_VALUE( - 5.566) }, + { -INT16_C( 6), INT16_C( 0), -INT16_C( 23), -INT16_C( 5) } }, + { { SIMDE_FLOAT16_VALUE( - 17.684), SIMDE_FLOAT16_VALUE( - 12.522), SIMDE_FLOAT16_VALUE( - 26.196), SIMDE_FLOAT16_VALUE( - 17.907) }, + { -INT16_C( 17), -INT16_C( 12), -INT16_C( 26), -INT16_C( 17) } }, + { { SIMDE_FLOAT16_VALUE( - 15.913), SIMDE_FLOAT16_VALUE( 11.327), SIMDE_FLOAT16_VALUE( - 20.331), SIMDE_FLOAT16_VALUE( 6.990) }, + { -INT16_C( 15), INT16_C( 12), -INT16_C( 20), INT16_C( 7) } }, + { { SIMDE_FLOAT16_VALUE( - 7.259), SIMDE_FLOAT16_VALUE( 10.857), SIMDE_FLOAT16_VALUE( - 14.002), SIMDE_FLOAT16_VALUE( 16.367) }, + { -INT16_C( 7), INT16_C( 11), -INT16_C( 14), INT16_C( 17) } }, + { { SIMDE_FLOAT16_VALUE( - 14.487), SIMDE_FLOAT16_VALUE( 25.818), SIMDE_FLOAT16_VALUE( 25.545), SIMDE_FLOAT16_VALUE( - 9.464) }, + { -INT16_C( 14), INT16_C( 26), INT16_C( 26), -INT16_C( 9) } }, + { { SIMDE_FLOAT16_VALUE( 10.801), SIMDE_FLOAT16_VALUE( - 16.889), SIMDE_FLOAT16_VALUE( 15.712), SIMDE_FLOAT16_VALUE( - 26.668) }, + { INT16_C( 11), -INT16_C( 16), INT16_C( 16), -INT16_C( 26) } }, + { { SIMDE_FLOAT16_VALUE( 16.707), SIMDE_FLOAT16_VALUE( - 6.907), SIMDE_FLOAT16_VALUE( - 16.125), SIMDE_FLOAT16_VALUE( 16.581) }, + { INT16_C( 17), -INT16_C( 6), -INT16_C( 16), INT16_C( 17) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_int16x4_t r = simde_vcvtp_s16_f16(a); + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_int16x4_t r = simde_vcvtp_s16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtpq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + uint16_t r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 14.36), SIMDE_FLOAT16_VALUE( 34.09), SIMDE_FLOAT16_VALUE( 69.00), SIMDE_FLOAT16_VALUE( 4.10), + SIMDE_FLOAT16_VALUE( 51.00), SIMDE_FLOAT16_VALUE( 31.69), SIMDE_FLOAT16_VALUE( 40.94), SIMDE_FLOAT16_VALUE( 90.44) }, + { UINT16_C( 15), UINT16_C( 35), UINT16_C( 69), UINT16_C( 5), UINT16_C( 51), UINT16_C( 32), UINT16_C( 41), UINT16_C( 91) } }, + { { SIMDE_FLOAT16_VALUE( 17.02), SIMDE_FLOAT16_VALUE( 50.97), SIMDE_FLOAT16_VALUE( 36.22), SIMDE_FLOAT16_VALUE( 91.44), + SIMDE_FLOAT16_VALUE( 80.12), SIMDE_FLOAT16_VALUE( 74.88), SIMDE_FLOAT16_VALUE( 25.30), SIMDE_FLOAT16_VALUE( 71.44) }, + { UINT16_C( 18), UINT16_C( 51), UINT16_C( 37), UINT16_C( 92), UINT16_C( 81), UINT16_C( 75), UINT16_C( 26), UINT16_C( 72) } }, + { { SIMDE_FLOAT16_VALUE( 29.02), SIMDE_FLOAT16_VALUE( 1.47), SIMDE_FLOAT16_VALUE( 45.03), SIMDE_FLOAT16_VALUE( 76.00), + SIMDE_FLOAT16_VALUE( 86.69), SIMDE_FLOAT16_VALUE( 69.94), SIMDE_FLOAT16_VALUE( 51.69), SIMDE_FLOAT16_VALUE( 76.00) }, + { UINT16_C( 30), UINT16_C( 2), UINT16_C( 46), UINT16_C( 76), UINT16_C( 87), UINT16_C( 70), UINT16_C( 52), UINT16_C( 76) } }, + { { SIMDE_FLOAT16_VALUE( 21.06), SIMDE_FLOAT16_VALUE( 8.94), SIMDE_FLOAT16_VALUE( 37.00), SIMDE_FLOAT16_VALUE( 31.73), + SIMDE_FLOAT16_VALUE( 99.62), SIMDE_FLOAT16_VALUE( 52.44), SIMDE_FLOAT16_VALUE( 81.81), SIMDE_FLOAT16_VALUE( 14.01) }, + { UINT16_C( 22), UINT16_C( 9), UINT16_C( 37), UINT16_C( 32), UINT16_C( 100), UINT16_C( 53), UINT16_C( 82), UINT16_C( 15) } }, + { { SIMDE_FLOAT16_VALUE( 86.50), SIMDE_FLOAT16_VALUE( 50.81), SIMDE_FLOAT16_VALUE( 18.11), SIMDE_FLOAT16_VALUE( 37.53), + SIMDE_FLOAT16_VALUE( 82.50), SIMDE_FLOAT16_VALUE( 59.06), SIMDE_FLOAT16_VALUE( 27.95), SIMDE_FLOAT16_VALUE( 99.50) }, + { UINT16_C( 87), UINT16_C( 51), UINT16_C( 19), UINT16_C( 38), UINT16_C( 83), UINT16_C( 60), UINT16_C( 28), UINT16_C( 100) } }, + { { SIMDE_FLOAT16_VALUE( 10.01), SIMDE_FLOAT16_VALUE( 64.19), SIMDE_FLOAT16_VALUE( 90.94), SIMDE_FLOAT16_VALUE( 90.12), + SIMDE_FLOAT16_VALUE( 39.03), SIMDE_FLOAT16_VALUE( 16.23), SIMDE_FLOAT16_VALUE( 61.53), SIMDE_FLOAT16_VALUE( 68.06) }, + { UINT16_C( 11), UINT16_C( 65), UINT16_C( 91), UINT16_C( 91), UINT16_C( 40), UINT16_C( 17), UINT16_C( 62), UINT16_C( 69) } }, + { { SIMDE_FLOAT16_VALUE( 17.70), SIMDE_FLOAT16_VALUE( 6.58), SIMDE_FLOAT16_VALUE( 44.03), SIMDE_FLOAT16_VALUE( 4.38), + SIMDE_FLOAT16_VALUE( 76.50), SIMDE_FLOAT16_VALUE( 95.75), SIMDE_FLOAT16_VALUE( 80.38), SIMDE_FLOAT16_VALUE( 97.56) }, + { UINT16_C( 18), UINT16_C( 7), UINT16_C( 45), UINT16_C( 5), UINT16_C( 77), UINT16_C( 96), UINT16_C( 81), UINT16_C( 98) } }, + { { SIMDE_FLOAT16_VALUE( 4.68), SIMDE_FLOAT16_VALUE( 17.34), SIMDE_FLOAT16_VALUE( 29.30), SIMDE_FLOAT16_VALUE( 4.33), + SIMDE_FLOAT16_VALUE( 69.75), SIMDE_FLOAT16_VALUE( 11.14), SIMDE_FLOAT16_VALUE( 18.34), SIMDE_FLOAT16_VALUE( 56.31) }, + { UINT16_C( 5), UINT16_C( 18), UINT16_C( 30), UINT16_C( 5), UINT16_C( 70), UINT16_C( 12), UINT16_C( 19), UINT16_C( 57) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_uint16x8_t r = simde_vcvtpq_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(0.0f, 100.0f); + simde_uint16x8_t r = simde_vcvtpq_u16_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + uint16_t r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 92.38), SIMDE_FLOAT16_VALUE( 41.81), SIMDE_FLOAT16_VALUE( 12.00), SIMDE_FLOAT16_VALUE( 86.38) }, + { UINT16_C( 93), UINT16_C( 42), UINT16_C( 12), UINT16_C( 87) } }, + { { SIMDE_FLOAT16_VALUE( 83.69), SIMDE_FLOAT16_VALUE( 72.44), SIMDE_FLOAT16_VALUE( 21.73), SIMDE_FLOAT16_VALUE( 51.94) }, + { UINT16_C( 84), UINT16_C( 73), UINT16_C( 22), UINT16_C( 52) } }, + { { SIMDE_FLOAT16_VALUE( 73.50), SIMDE_FLOAT16_VALUE( 84.75), SIMDE_FLOAT16_VALUE( 27.58), SIMDE_FLOAT16_VALUE( 35.84) }, + { UINT16_C( 74), UINT16_C( 85), UINT16_C( 28), UINT16_C( 36) } }, + { { SIMDE_FLOAT16_VALUE( 77.38), SIMDE_FLOAT16_VALUE( 63.91), SIMDE_FLOAT16_VALUE( 92.06), SIMDE_FLOAT16_VALUE( 92.00) }, + { UINT16_C( 78), UINT16_C( 64), UINT16_C( 93), UINT16_C( 92) } }, + { { SIMDE_FLOAT16_VALUE( 60.22), SIMDE_FLOAT16_VALUE( 3.73), SIMDE_FLOAT16_VALUE( 84.62), SIMDE_FLOAT16_VALUE( 88.56) }, + { UINT16_C( 61), UINT16_C( 4), UINT16_C( 85), UINT16_C( 89) } }, + { { SIMDE_FLOAT16_VALUE( 5.28), SIMDE_FLOAT16_VALUE( 54.75), SIMDE_FLOAT16_VALUE( 2.63), SIMDE_FLOAT16_VALUE( 30.70) }, + { UINT16_C( 6), UINT16_C( 55), UINT16_C( 3), UINT16_C( 31) } }, + { { SIMDE_FLOAT16_VALUE( 41.84), SIMDE_FLOAT16_VALUE( 2.31), SIMDE_FLOAT16_VALUE( 86.88), SIMDE_FLOAT16_VALUE( 82.25) }, + { UINT16_C( 42), UINT16_C( 3), UINT16_C( 87), UINT16_C( 83) } }, + { { SIMDE_FLOAT16_VALUE( 83.00), SIMDE_FLOAT16_VALUE( 78.25), SIMDE_FLOAT16_VALUE( 9.41), SIMDE_FLOAT16_VALUE( 75.31) }, + { UINT16_C( 83), UINT16_C( 79), UINT16_C( 10), UINT16_C( 76) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_uint16x4_t r = simde_vcvtp_u16_f16(a); + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(0.0f, 100.0f); + simde_uint16x4_t r = simde_vcvtp_u16_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[2]; + uint32_t r[2]; + } test_vec[] = { + { { SIMDE_FLOAT32_C( 610.058), SIMDE_FLOAT32_C( 408.327) }, + { UINT32_C( 611), UINT32_C( 409) } }, + { { SIMDE_FLOAT32_C( 683.218), SIMDE_FLOAT32_C( 523.587) }, + { UINT32_C( 684), UINT32_C( 524) } }, + { { SIMDE_FLOAT32_C( 840.155), SIMDE_FLOAT32_C( 535.544) }, + { UINT32_C( 841), UINT32_C( 536) } }, + { { SIMDE_FLOAT32_C( 878.489), SIMDE_FLOAT32_C( 177.802) }, + { UINT32_C( 879), UINT32_C( 178) } }, + { { SIMDE_FLOAT32_C( 160.684), SIMDE_FLOAT32_C( 430.784) }, + { UINT32_C( 161), UINT32_C( 431) } }, + { { SIMDE_FLOAT32_C( 816.827), SIMDE_FLOAT32_C( 582.145) }, + { UINT32_C( 817), UINT32_C( 583) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_uint32x2_t r = simde_vcvtp_u32_f32(a); + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_uint32x2_t r = simde_vcvtp_u32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float32 a[2]; + int32_t r[2]; + } test_vec[] = { + { { -SIMDE_FLOAT32_C( 308.681), SIMDE_FLOAT32_C( 253.856) }, + { -INT32_C( 308), INT32_C( 254) } }, + { { SIMDE_FLOAT32_C( 415.847), -SIMDE_FLOAT32_C( 667.662) }, + { INT32_C( 416), -INT32_C( 667) } }, + { { SIMDE_FLOAT32_C( 225.278), -SIMDE_FLOAT32_C( 604.949) }, + { INT32_C( 226), -INT32_C( 604) } }, + { { SIMDE_FLOAT32_C( 924.365), SIMDE_FLOAT32_C( 945.477) }, + { INT32_C( 925), INT32_C( 946) } }, + { { SIMDE_FLOAT32_C( 895.563), SIMDE_FLOAT32_C( 45.311) }, + { INT32_C( 896), INT32_C( 46) } }, + { { -SIMDE_FLOAT32_C( 317.703), -SIMDE_FLOAT32_C( 868.310) }, + { -INT32_C( 317), -INT32_C( 868) } }, + { { SIMDE_FLOAT32_C( 541.485), -SIMDE_FLOAT32_C( 504.698) }, + { INT32_C( 542), -INT32_C( 504) } }, + { { -SIMDE_FLOAT32_C( 653.810), -SIMDE_FLOAT32_C( 265.026) }, + { -INT32_C( 653), -INT32_C( 265) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float32x2_t a = simde_vld1_f32(test_vec[i].a); + simde_int32x2_t r = simde_vcvtp_s32_f32(a); + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_int32x2_t r = simde_vcvtp_s32_f32(a); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64 a[1]; + int64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 24965.766) }, + { INT64_C( 24966) } }, + { { -SIMDE_FLOAT64_C( 30984.422) }, + { -INT64_C( 30984) } }, + { { -SIMDE_FLOAT64_C( 16724.391) }, + { -INT64_C( 16724) } }, + { { SIMDE_FLOAT64_C( 31802.109) }, + { INT64_C( 31803) } }, + { { SIMDE_FLOAT64_C( 46616.438) }, + { INT64_C( 46617) } }, + { { SIMDE_FLOAT64_C( 57103.266) }, + { INT64_C( 57104) } }, + { { -SIMDE_FLOAT64_C( 21279.477) }, + { -INT64_C( 21279) } }, + { { -SIMDE_FLOAT64_C( 18106.797) }, + { -INT64_C( 18106) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_int64x1_t r = simde_vcvtp_s64_f64(a); + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_int64x1_t r = simde_vcvtp_s64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vcvtp_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float64 a[1]; + uint64_t r[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 20147.570) }, + { UINT64_C( 20148) } }, + { { SIMDE_FLOAT64_C( 83020.297) }, + { UINT64_C( 83021) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_uint64x1_t r = simde_vcvtp_u64_f64(a); + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x1_t a = simde_test_arm_neon_random_f64x1(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_uint64x1_t r = simde_vcvtp_u64_f64(a); + + simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtps_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpd_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtps_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpd_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtph_u64_f16) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtp_u64_f64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_s32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_s64_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_u16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_u32_f32) +SIMDE_TEST_FUNC_LIST_ENTRY(vcvtpq_u64_f64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/ld3.c b/test/arm/neon/ld3.c index d6287174a..e112b0d60 100644 --- a/test/arm/neon/ld3.c +++ b/test/arm/neon/ld3.c @@ -7,6 +7,7 @@ static int test_simde_vld3_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 static const struct { int8_t a[24]; int8_t r[3][8]; @@ -56,10 +57,23 @@ test_simde_vld3_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x24_t a = simde_test_arm_neon_random_i8x24(); + simde_int8x3_t r[3] = simde_vld3_s8(a); + + simde_test_arm_neon_write_i8x24(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x3(2, r[3], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vld3_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16_t a[12]; simde_float16_t r[3][4]; @@ -114,1564 +128,161 @@ test_simde_vld3_f16 (SIMDE_MUNIT_TEST_ARGS) { simde_test_arm_neon_assert_equal_f16x4(r.val[0], expected.val[0], INT_MAX); simde_test_arm_neon_assert_equal_f16x4(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x4(r.val[2], expected.val[2], INT_MAX); } return 0; -} -/* -static int -test_simde_vld2_s16 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int16_t a[8]; - int16_t r[2][4]; - } test_vec[] = { - { { INT16_C( 26434), -INT16_C( 7742), -INT16_C( 24667), INT16_C( 2612), - -INT16_C( 16418), -INT16_C( 32141), -INT16_C( 30519), INT16_C( 21039) }, - { { INT16_C( 26434), -INT16_C( 24667), -INT16_C( 16418), -INT16_C( 30519) }, - { -INT16_C( 7742), INT16_C( 2612), -INT16_C( 32141), INT16_C( 21039) } }, - }, - { { INT16_C( 12584), INT16_C( 13680), INT16_C( 9409), INT16_C( 17421), - INT16_C( 27590), -INT16_C( 17399), -INT16_C( 23807), INT16_C( 17632) }, - { { INT16_C( 12584), INT16_C( 9409), INT16_C( 27590), -INT16_C( 23807) }, - { INT16_C( 13680), INT16_C( 17421), -INT16_C( 17399), INT16_C( 17632) } }, - }, - { { -INT16_C( 24054), -INT16_C( 20443), INT16_C( 22849), INT16_C( 8122), - INT16_C( 11544), -INT16_C( 7519), -INT16_C( 11851), -INT16_C( 8652) }, - { { -INT16_C( 24054), INT16_C( 22849), INT16_C( 11544), -INT16_C( 11851) }, - { -INT16_C( 20443), INT16_C( 8122), -INT16_C( 7519), -INT16_C( 8652) } }, - }, - { { -INT16_C( 23294), -INT16_C( 15597), INT16_C( 8649), -INT16_C( 28921), - INT16_C( 4236), -INT16_C( 29365), INT16_C( 11188), -INT16_C( 16687) }, - { { -INT16_C( 23294), INT16_C( 8649), INT16_C( 4236), INT16_C( 11188) }, - { -INT16_C( 15597), -INT16_C( 28921), -INT16_C( 29365), -INT16_C( 16687) } } - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int16x4x2_t r = simde_vld2_s16(test_vec[i].a); - - simde_int16x4x2_t expected = { - {simde_vld1_s16(test_vec[i].r[0]), simde_vld1_s16(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i16x4(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i16x4(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 4 ; i++) { - simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); - simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); - simde_int16x4x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int16_t buf[8]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int16x4x2_t r = simde_vld2_s16(buf); - - simde_test_arm_neon_write_i16x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_s32 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int32_t a[4]; - int32_t r[2][2]; - } test_vec[] = { - { { INT32_C( 886724837), INT32_C( 903508407), - -INT32_C( 977712366), INT32_C( 1183767792) }, - { { INT32_C( 886724837), -INT32_C( 977712366) }, - { INT32_C( 903508407), INT32_C( 1183767792) } }, - }, - { { INT32_C( 1474850969), INT32_C( 975881925), - INT32_C( 1191817794), -INT32_C( 1270968626) }, - { { INT32_C( 1474850969), INT32_C( 1191817794) }, - { INT32_C( 975881925), -INT32_C( 1270968626) } }, - }, - { { -INT32_C( 1628956186), -INT32_C( 1663843702), - -INT32_C( 43938803), INT32_C( 21229672) }, - { { -INT32_C( 1628956186), -INT32_C( 43938803) }, - { -INT32_C( 1663843702), INT32_C( 21229672) } }, - }, - { { INT32_C( 643312736), INT32_C( 878740466), - INT32_C( 58419765), -INT32_C( 558384392) }, - { { INT32_C( 643312736), INT32_C( 58419765) }, - { INT32_C( 878740466), -INT32_C( 558384392) } }, - }, - { { INT32_C( 1568448467), INT32_C( 1861832801), - INT32_C( 1164729308), -INT32_C( 1421430965) }, - { { INT32_C( 1568448467), INT32_C( 1164729308) }, - { INT32_C( 1861832801), -INT32_C( 1421430965) } }, - }, - { { -INT32_C( 841900069), INT32_C( 1459761698), - -INT32_C( 1806008932), INT32_C( 175247927) }, - { { -INT32_C( 841900069), -INT32_C( 1806008932) }, - { INT32_C( 1459761698), INT32_C( 175247927) } }, - }, - { { INT32_C( 325578673), INT32_C( 461463871), - INT32_C( 123792828), INT32_C( 2024974237) }, - { { INT32_C( 325578673), INT32_C( 123792828) }, - { INT32_C( 461463871), INT32_C( 2024974237) } }, - }, - { { INT32_C( 1749451846), INT32_C( 1388267702), - -INT32_C( 35251771), -INT32_C( 586721237) }, - { { INT32_C( 1749451846), -INT32_C( 35251771) }, - { INT32_C( 1388267702), -INT32_C( 586721237) } }} - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int32x2x2_t r = simde_vld2_s32(test_vec[i].a); - - simde_int32x2x2_t expected = { - {simde_vld1_s32(test_vec[i].r[0]), simde_vld1_s32(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i32x2(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i32x2(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); - simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); - simde_int32x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int32_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int32x2x2_t r = simde_vld2_s32(buf); - - simde_test_arm_neon_write_i32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_s64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int64_t a[2]; - int64_t r[2][1]; - } test_vec[] = { - { { INT64_C( 2491657136620446655), - -INT64_C( 8008712209217472471) }, - { { INT64_C( 2491657136620446655) }, - { -INT64_C( 8008712209217472471) } } - }, - { { INT64_C( 5905840427281538397), - -INT64_C( 256047405469913514) }, - { { INT64_C( 5905840427281538397) }, - { -INT64_C( 256047405469913514) } }, - }, - { { INT64_C( 1410598559050352250), - -INT64_C( 8581208681535646293) }, - { { INT64_C( 1410598559050352250) }, - { -INT64_C( 8581208681535646293) } }, - }, - { { INT64_C( 4768888611226069577), - INT64_C( 5613591610635419053) }, - { { INT64_C( 4768888611226069577) }, - { INT64_C( 5613591610635419053) } }, - }, - { { -INT64_C( 1999508928546814398), - -INT64_C( 8729622882906418906) }, - { { -INT64_C( 1999508928546814398) }, - { -INT64_C( 8729622882906418906) } }, - }, - { { -INT64_C( 8178376526721227951), - INT64_C( 5129424078989003022) }, - { { -INT64_C( 8178376526721227951) }, - { INT64_C( 5129424078989003022) } }, - }, - { { INT64_C( 7224815307703184678), - -INT64_C( 556316978773065340) }, - { { INT64_C( 7224815307703184678) }, - { -INT64_C( 556316978773065340) } }, - }, - { { INT64_C( 5360328353875667699), - INT64_C( 7100870651391302719) }, - { { INT64_C( 5360328353875667699) }, - { INT64_C( 7100870651391302719) } }} - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int64x1x2_t r = simde_vld2_s64(test_vec[i].a); - - simde_int64x1x2_t expected = { - {simde_vld1_s64(test_vec[i].r[0]), simde_vld1_s64(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i64x1(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i64x1(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); - simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); - simde_int64x1x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int64x1x2_t r = simde_vld2_s64(buf); - - simde_test_arm_neon_write_i64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_u8 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint8_t a[16]; - uint8_t r[2][8]; - } test_vec[] = { - - { { UINT8_C(144), UINT8_C(234), UINT8_C(130), UINT8_C(145), UINT8_C( 26), UINT8_C(241), UINT8_C( 35), UINT8_C( 43), - UINT8_C( 76), UINT8_C(223), UINT8_C(152), UINT8_C(128), UINT8_C(203), UINT8_C( 66), UINT8_C( 17), UINT8_C(218) }, - { { UINT8_C(144), UINT8_C(130), UINT8_C( 26), UINT8_C( 35), UINT8_C( 76), UINT8_C(152), UINT8_C(203), UINT8_C( 17) }, - { UINT8_C(234), UINT8_C(145), UINT8_C(241), UINT8_C( 43), UINT8_C(223), UINT8_C(128), UINT8_C( 66), UINT8_C(218) } }, - }, - { { UINT8_C(196), UINT8_C( 14), UINT8_C( 36), UINT8_C( 59), UINT8_C(230), UINT8_C(253), UINT8_C(216), UINT8_C( 14), - UINT8_C( 31), UINT8_C( 73), UINT8_C( 48), UINT8_C( 55), UINT8_C(200), UINT8_C( 71), UINT8_C(176), UINT8_C( 88) }, - { { UINT8_C(196), UINT8_C( 36), UINT8_C(230), UINT8_C(216), UINT8_C( 31), UINT8_C( 48), UINT8_C(200), UINT8_C(176) }, - { UINT8_C( 14), UINT8_C( 59), UINT8_C(253), UINT8_C( 14), UINT8_C( 73), UINT8_C( 55), UINT8_C( 71), UINT8_C( 88) } }, - }, - { { UINT8_C( 49), UINT8_C( 50), UINT8_C(233), UINT8_C( 76), UINT8_C( 35), UINT8_C( 13), UINT8_C(119), UINT8_C(111), - UINT8_C(236), UINT8_C( 15), UINT8_C(240), UINT8_C(184), UINT8_C( 81), UINT8_C( 1), UINT8_C(146), UINT8_C( 22) }, - { { UINT8_C( 49), UINT8_C(233), UINT8_C( 35), UINT8_C(119), UINT8_C(236), UINT8_C(240), UINT8_C( 81), UINT8_C(146) }, - { UINT8_C( 50), UINT8_C( 76), UINT8_C( 13), UINT8_C(111), UINT8_C( 15), UINT8_C(184), UINT8_C( 1), UINT8_C( 22) } }, - }, - { { UINT8_C( 15), UINT8_C(182), UINT8_C( 81), UINT8_C(245), UINT8_C(179), UINT8_C( 41), UINT8_C( 4), UINT8_C(211), - UINT8_C(115), UINT8_C( 52), UINT8_C( 10), UINT8_C( 59), UINT8_C(123), UINT8_C(187), UINT8_C(147), UINT8_C(173) }, - { { UINT8_C( 15), UINT8_C( 81), UINT8_C(179), UINT8_C( 4), UINT8_C(115), UINT8_C( 10), UINT8_C(123), UINT8_C(147) }, - { UINT8_C(182), UINT8_C(245), UINT8_C( 41), UINT8_C(211), UINT8_C( 52), UINT8_C( 59), UINT8_C(187), UINT8_C(173) } }, - }, - { { UINT8_C(237), UINT8_C(125), UINT8_C(249), UINT8_C( 17), UINT8_C(138), UINT8_C(112), UINT8_C(128), UINT8_C(118), - UINT8_C(127), UINT8_C(112), UINT8_C( 46), UINT8_C(208), UINT8_C(113), UINT8_C(193), UINT8_C(230), UINT8_C(129) }, - { { UINT8_C(237), UINT8_C(249), UINT8_C(138), UINT8_C(128), UINT8_C(127), UINT8_C( 46), UINT8_C(113), UINT8_C(230) }, - { UINT8_C(125), UINT8_C( 17), UINT8_C(112), UINT8_C(118), UINT8_C(112), UINT8_C(208), UINT8_C(193), UINT8_C(129) } }, - }, - { { UINT8_C(119), UINT8_C( 56), UINT8_C(118), UINT8_C( 43), UINT8_C( 97), UINT8_C(122), UINT8_C(254), UINT8_C(212), - UINT8_C(175), UINT8_C( 8), UINT8_C( 15), UINT8_C( 42), UINT8_C(195), UINT8_C(163), UINT8_C(215), UINT8_C(177) }, - { { UINT8_C(119), UINT8_C(118), UINT8_C( 97), UINT8_C(254), UINT8_C(175), UINT8_C( 15), UINT8_C(195), UINT8_C(215) }, - { UINT8_C( 56), UINT8_C( 43), UINT8_C(122), UINT8_C(212), UINT8_C( 8), UINT8_C( 42), UINT8_C(163), UINT8_C(177) } }, - }, - { { UINT8_C( 32), UINT8_C(208), UINT8_C(194), UINT8_C(170), UINT8_C( 64), UINT8_C( 66), UINT8_C( 32), UINT8_C(191), - UINT8_C(179), UINT8_C( 79), UINT8_C(144), UINT8_C( 36), UINT8_C( 16), UINT8_C(118), UINT8_C(165), UINT8_C(135) }, - { { UINT8_C( 32), UINT8_C(194), UINT8_C( 64), UINT8_C( 32), UINT8_C(179), UINT8_C(144), UINT8_C( 16), UINT8_C(165) }, - { UINT8_C(208), UINT8_C(170), UINT8_C( 66), UINT8_C(191), UINT8_C( 79), UINT8_C( 36), UINT8_C(118), UINT8_C(135) } }, - }, - { { UINT8_C(174), UINT8_C( 28), UINT8_C(178), UINT8_C( 16), UINT8_C(150), UINT8_C(176), UINT8_C(228), UINT8_C( 69), - UINT8_C(185), UINT8_C(244), UINT8_C(112), UINT8_C(124), UINT8_C(151), UINT8_C( 71), UINT8_C( 45), UINT8_C(183) }, - { { UINT8_C(174), UINT8_C(178), UINT8_C(150), UINT8_C(228), UINT8_C(185), UINT8_C(112), UINT8_C(151), UINT8_C( 45) }, - { UINT8_C( 28), UINT8_C( 16), UINT8_C(176), UINT8_C( 69), UINT8_C(244), UINT8_C(124), UINT8_C( 71), UINT8_C(183) } }, - }, - - - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint8x8x2_t r = simde_vld2_u8(test_vec[i].a); - - simde_uint8x8x2_t expected = { - {simde_vld1_u8(test_vec[i].r[0]), simde_vld1_u8(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u8x8(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u8x8(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); - simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); - simde_uint8x8x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint8_t buf[16]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint8x8x2_t r = simde_vld2_u8(buf); - - simde_test_arm_neon_write_u8x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_u16 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint16_t a[8]; - uint16_t r[2][4]; - } test_vec[] = { - { { UINT16_C(42664), UINT16_C(53887), UINT16_C(10958), UINT16_C(61123), - UINT16_C( 7800), UINT16_C(15834), UINT16_C(36089), UINT16_C(22799) }, - { { UINT16_C(42664), UINT16_C(10958), UINT16_C( 7800), UINT16_C(36089) }, - { UINT16_C(53887), UINT16_C(61123), UINT16_C(15834), UINT16_C(22799) } }, - }, - { { UINT16_C(62795), UINT16_C(22857), UINT16_C(11632), UINT16_C(39934), - UINT16_C(56742), UINT16_C(64150), UINT16_C( 8095), UINT16_C(18414) }, - { { UINT16_C(62795), UINT16_C(11632), UINT16_C(56742), UINT16_C( 8095) }, - { UINT16_C(22857), UINT16_C(39934), UINT16_C(64150), UINT16_C(18414) } }, - }, - { { UINT16_C(28101), UINT16_C(37914), UINT16_C(56727), UINT16_C( 3970), - UINT16_C(23803), UINT16_C(62797), UINT16_C(23784), UINT16_C(13390) }, - { { UINT16_C(28101), UINT16_C(56727), UINT16_C(23803), UINT16_C(23784) }, - { UINT16_C(37914), UINT16_C( 3970), UINT16_C(62797), UINT16_C(13390) } }, - }, - { { UINT16_C(38737), UINT16_C(49549), UINT16_C(36037), UINT16_C(27485), - UINT16_C(62313), UINT16_C( 2149), UINT16_C(21522), UINT16_C(55375) }, - { { UINT16_C(38737), UINT16_C(36037), UINT16_C(62313), UINT16_C(21522) }, - { UINT16_C(49549), UINT16_C(27485), UINT16_C( 2149), UINT16_C(55375) } }, - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint16x4x2_t r = simde_vld2_u16(test_vec[i].a); - - simde_uint16x4x2_t expected = { - {simde_vld1_u16(test_vec[i].r[0]), simde_vld1_u16(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u16x4(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u16x4(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 4 ; i++) { - simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); - simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); - simde_uint16x4x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint16_t buf[8]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint16x4x2_t r = simde_vld2_u16(buf); - - simde_test_arm_neon_write_u16x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_u32 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint32_t a[4]; - uint32_t r[2][2]; - } test_vec[] = { - { { UINT32_C(1500277185), UINT32_C(1114172999), - UINT32_C( 859288906), UINT32_C(1650951697) }, - { { UINT32_C(1500277185), UINT32_C( 859288906) }, - { UINT32_C(1114172999), UINT32_C(1650951697) } }, - }, - { { UINT32_C(3794072605), UINT32_C(3914236288), - UINT32_C(2280764276), UINT32_C(3378462983) }, - { { UINT32_C(3794072605), UINT32_C(2280764276) }, - { UINT32_C(3914236288), UINT32_C(3378462983) } }, - }, - { { UINT32_C(4045589418), UINT32_C( 53775033), - UINT32_C(1362520896), UINT32_C( 263495153) }, - { { UINT32_C(4045589418), UINT32_C(1362520896) }, - { UINT32_C( 53775033), UINT32_C( 263495153) } }, - }, - { { UINT32_C( 317839506), UINT32_C(3455860569), - UINT32_C(4199869939), UINT32_C(3653481262) }, - { { UINT32_C( 317839506), UINT32_C(4199869939) }, - { UINT32_C(3455860569), UINT32_C(3653481262) } }, - }, - { { UINT32_C( 936043902), UINT32_C(2956721776), - UINT32_C(1526821226), UINT32_C(2708124943) }, - { { UINT32_C( 936043902), UINT32_C(1526821226) }, - { UINT32_C(2956721776), UINT32_C(2708124943) } }, - }, - { { UINT32_C(3870514317), UINT32_C(2394206107), - UINT32_C(3414755485), UINT32_C( 983846076) }, - { { UINT32_C(3870514317), UINT32_C(3414755485) }, - { UINT32_C(2394206107), UINT32_C( 983846076) } }, - }, - { { UINT32_C(2725408562), UINT32_C(3612519789), - UINT32_C( 758338334), UINT32_C(2530123017) }, - { { UINT32_C(2725408562), UINT32_C( 758338334) }, - { UINT32_C(3612519789), UINT32_C(2530123017) } }, - }, - { { UINT32_C(2508030713), UINT32_C(3458412849), - UINT32_C(4120554553), UINT32_C( 724582137) }, - { { UINT32_C(2508030713), UINT32_C(4120554553) }, - { UINT32_C(3458412849), UINT32_C( 724582137) } }, - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint32x2x2_t r = simde_vld2_u32(test_vec[i].a); - - simde_uint32x2x2_t expected = { - {simde_vld1_u32(test_vec[i].r[0]), simde_vld1_u32(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u32x2(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u32x2(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); - simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); - simde_uint32x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint32_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint32x2x2_t r = simde_vld2_u32(buf); - - simde_test_arm_neon_write_u32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_u64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint64_t a[2]; - uint64_t r[2][1]; - } test_vec[] = { - { { UINT64_C( 9544671133075875798), - UINT64_C(10026771010818587806) }, - { { UINT64_C( 9544671133075875798) }, - { UINT64_C(10026771010818587806) } }, - }, - { { UINT64_C(11716502022888129015), - UINT64_C( 9207447440231071203) }, - { { UINT64_C(11716502022888129015) }, - { UINT64_C( 9207447440231071203) } }, - }, - { { UINT64_C(14585844370014973971), - UINT64_C(12085455436694909200) }, - { { UINT64_C(14585844370014973971) }, - { UINT64_C(12085455436694909200) } }, - }, - { { UINT64_C( 6976248983748549802), - UINT64_C( 6648178682459053338) }, - { { UINT64_C( 6976248983748549802) }, - { UINT64_C( 6648178682459053338) } }, - }, - { { UINT64_C( 9780321370926949059), - UINT64_C(10409627327296486687) }, - { { UINT64_C( 9780321370926949059) }, - { UINT64_C(10409627327296486687) } }, - }, - { { UINT64_C( 3559687685519800969), - UINT64_C( 9804398810564506218) }, - { { UINT64_C( 3559687685519800969) }, - { UINT64_C( 9804398810564506218) } }, - }, - { { UINT64_C( 2403006863864517466), - UINT64_C( 244715532034575855) }, - { { UINT64_C( 2403006863864517466) }, - { UINT64_C( 244715532034575855) } }, - }, - { { UINT64_C( 1885442199874249441), - UINT64_C(12785682336243916577) }, - { { UINT64_C( 1885442199874249441) }, - { UINT64_C(12785682336243916577) } }, - }, - - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint64x1x2_t r = simde_vld2_u64(test_vec[i].a); - - simde_uint64x1x2_t expected = { - {simde_vld1_u64(test_vec[i].r[0]), simde_vld1_u64(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u64x1(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u64x1(r.val[1], expected.val[1]); - } - return 0; #else + fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { - simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); - simde_uint64x1_t b = simde_test_arm_neon_random_u64x1(); - simde_uint64x1x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_float16x12_t a = simde_test_arm_neon_random_f16x12(-100.0f, 100.0f); + simde_float16x3_t r[3] = simde_vld3_f16(a); - uint64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint64x1x2_t r = simde_vld2_u64(buf); - - simde_test_arm_neon_write_u64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_write_f16x12(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x3(2, r[3], SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } static int -test_simde_vld2_f32 (SIMDE_MUNIT_TEST_ARGS) { +test_simde_vld3q_f16 (SIMDE_MUNIT_TEST_ARGS) { #if 1 static const struct { - simde_float32_t a[4]; - simde_float32_t r[2][2]; + simde_float16_t a[24]; + simde_float16_t r[3][8]; } test_vec[] = { - { { SIMDE_FLOAT32_C( -22.58), SIMDE_FLOAT32_C( -87.59), - SIMDE_FLOAT32_C( 1.12), SIMDE_FLOAT32_C( -68.18) }, - { { SIMDE_FLOAT32_C( -22.58), SIMDE_FLOAT32_C( 1.12) }, - { SIMDE_FLOAT32_C( -87.59), SIMDE_FLOAT32_C( -68.18) } }, - }, - { { SIMDE_FLOAT32_C( -19.64), SIMDE_FLOAT32_C( -79.11), - SIMDE_FLOAT32_C( -13.56), SIMDE_FLOAT32_C( 59.24) }, - { { SIMDE_FLOAT32_C( -19.64), SIMDE_FLOAT32_C( -13.56) }, - { SIMDE_FLOAT32_C( -79.11), SIMDE_FLOAT32_C( 59.24) } }, - }, - { { SIMDE_FLOAT32_C( 84.01), SIMDE_FLOAT32_C( -49.79), - SIMDE_FLOAT32_C( 69.20), SIMDE_FLOAT32_C( 82.05) }, - { { SIMDE_FLOAT32_C( 84.01), SIMDE_FLOAT32_C( 69.20) }, - { SIMDE_FLOAT32_C( -49.79), SIMDE_FLOAT32_C( 82.05) } }, - }, - { { SIMDE_FLOAT32_C( -12.11), SIMDE_FLOAT32_C( 93.29), - SIMDE_FLOAT32_C( 70.29), SIMDE_FLOAT32_C( 70.67) }, - { { SIMDE_FLOAT32_C( -12.11), SIMDE_FLOAT32_C( 70.29) }, - { SIMDE_FLOAT32_C( 93.29), SIMDE_FLOAT32_C( 70.67) } }, - }, - { { SIMDE_FLOAT32_C( 14.62), SIMDE_FLOAT32_C( 51.53), - SIMDE_FLOAT32_C( 11.77), SIMDE_FLOAT32_C( 81.69) }, - { { SIMDE_FLOAT32_C( 14.62), SIMDE_FLOAT32_C( 11.77) }, - { SIMDE_FLOAT32_C( 51.53), SIMDE_FLOAT32_C( 81.69) } }, - }, - { { SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( 59.50), - SIMDE_FLOAT32_C( -77.17), SIMDE_FLOAT32_C( -41.73) }, - { { SIMDE_FLOAT32_C( -19.28), SIMDE_FLOAT32_C( -77.17) }, - { SIMDE_FLOAT32_C( 59.50), SIMDE_FLOAT32_C( -41.73) } }, - }, - { { SIMDE_FLOAT32_C( -85.96), SIMDE_FLOAT32_C( 68.07), - SIMDE_FLOAT32_C( -11.65), SIMDE_FLOAT32_C( 31.99) }, - { { SIMDE_FLOAT32_C( -85.96), SIMDE_FLOAT32_C( -11.65) }, - { SIMDE_FLOAT32_C( 68.07), SIMDE_FLOAT32_C( 31.99) } }, - }, - { { SIMDE_FLOAT32_C( -11.72), SIMDE_FLOAT32_C( -86.96), - SIMDE_FLOAT32_C( 52.84), SIMDE_FLOAT32_C( 65.70) }, - { { SIMDE_FLOAT32_C( -11.72), SIMDE_FLOAT32_C( 52.84) }, - { SIMDE_FLOAT32_C( -86.96), SIMDE_FLOAT32_C( 65.70) } } - }, + { { SIMDE_FLOAT16_VALUE( 50.511), SIMDE_FLOAT16_VALUE( 86.684), SIMDE_FLOAT16_VALUE( 96.211), SIMDE_FLOAT16_VALUE( 21.009), + SIMDE_FLOAT16_VALUE( - 58.967), SIMDE_FLOAT16_VALUE( - 95.046), SIMDE_FLOAT16_VALUE( 3.355), SIMDE_FLOAT16_VALUE( - 32.048), + SIMDE_FLOAT16_VALUE( - 72.615), SIMDE_FLOAT16_VALUE( 13.174), SIMDE_FLOAT16_VALUE( 15.374), SIMDE_FLOAT16_VALUE( - 72.543), + SIMDE_FLOAT16_VALUE( - 68.833), SIMDE_FLOAT16_VALUE( 53.897), SIMDE_FLOAT16_VALUE( 93.769), SIMDE_FLOAT16_VALUE( 0.685), + SIMDE_FLOAT16_VALUE( - 70.586), SIMDE_FLOAT16_VALUE( 38.881), SIMDE_FLOAT16_VALUE( - 14.775), SIMDE_FLOAT16_VALUE( - 3.719), + SIMDE_FLOAT16_VALUE( 20.072), SIMDE_FLOAT16_VALUE( - 2.675), SIMDE_FLOAT16_VALUE( - 36.488), SIMDE_FLOAT16_VALUE( 30.856) }, + { { SIMDE_FLOAT16_VALUE( 50.511), SIMDE_FLOAT16_VALUE( 21.009), SIMDE_FLOAT16_VALUE( 3.355), SIMDE_FLOAT16_VALUE( 13.174), + SIMDE_FLOAT16_VALUE( - 68.833), SIMDE_FLOAT16_VALUE( 0.685), SIMDE_FLOAT16_VALUE( - 14.775), SIMDE_FLOAT16_VALUE( - 2.675) }, + { SIMDE_FLOAT16_VALUE( 86.684), SIMDE_FLOAT16_VALUE( - 58.967), SIMDE_FLOAT16_VALUE( - 32.048), SIMDE_FLOAT16_VALUE( 15.374), + SIMDE_FLOAT16_VALUE( 53.897), SIMDE_FLOAT16_VALUE( - 70.586), SIMDE_FLOAT16_VALUE( - 3.719), SIMDE_FLOAT16_VALUE( - 36.488) }, + { SIMDE_FLOAT16_VALUE( 96.211), SIMDE_FLOAT16_VALUE( - 95.046), SIMDE_FLOAT16_VALUE( - 72.615), SIMDE_FLOAT16_VALUE( - 72.543), + SIMDE_FLOAT16_VALUE( 93.769), SIMDE_FLOAT16_VALUE( 38.881), SIMDE_FLOAT16_VALUE( 20.072), SIMDE_FLOAT16_VALUE( 30.856) } } }, + { { SIMDE_FLOAT16_VALUE( 27.169), SIMDE_FLOAT16_VALUE( 97.696), SIMDE_FLOAT16_VALUE( 6.429), SIMDE_FLOAT16_VALUE( 1.851), + SIMDE_FLOAT16_VALUE( 57.529), SIMDE_FLOAT16_VALUE( - 8.606), SIMDE_FLOAT16_VALUE( - 73.783), SIMDE_FLOAT16_VALUE( 38.167), + SIMDE_FLOAT16_VALUE( 66.160), SIMDE_FLOAT16_VALUE( - 43.612), SIMDE_FLOAT16_VALUE( 7.569), SIMDE_FLOAT16_VALUE( 30.400), + SIMDE_FLOAT16_VALUE( 73.369), SIMDE_FLOAT16_VALUE( - 37.197), SIMDE_FLOAT16_VALUE( 44.001), SIMDE_FLOAT16_VALUE( - 61.104), + SIMDE_FLOAT16_VALUE( - 91.738), SIMDE_FLOAT16_VALUE( - 76.714), SIMDE_FLOAT16_VALUE( 62.278), SIMDE_FLOAT16_VALUE( 64.843), + SIMDE_FLOAT16_VALUE( - 31.622), SIMDE_FLOAT16_VALUE( 39.644), SIMDE_FLOAT16_VALUE( - 50.048), SIMDE_FLOAT16_VALUE( 16.733) }, + { { SIMDE_FLOAT16_VALUE( 27.169), SIMDE_FLOAT16_VALUE( 1.851), SIMDE_FLOAT16_VALUE( - 73.783), SIMDE_FLOAT16_VALUE( - 43.612), + SIMDE_FLOAT16_VALUE( 73.369), SIMDE_FLOAT16_VALUE( - 61.104), SIMDE_FLOAT16_VALUE( 62.278), SIMDE_FLOAT16_VALUE( 39.644) }, + { SIMDE_FLOAT16_VALUE( 97.696), SIMDE_FLOAT16_VALUE( 57.529), SIMDE_FLOAT16_VALUE( 38.167), SIMDE_FLOAT16_VALUE( 7.569), + SIMDE_FLOAT16_VALUE( - 37.197), SIMDE_FLOAT16_VALUE( - 91.738), SIMDE_FLOAT16_VALUE( 64.843), SIMDE_FLOAT16_VALUE( - 50.048) }, + { SIMDE_FLOAT16_VALUE( 6.429), SIMDE_FLOAT16_VALUE( - 8.606), SIMDE_FLOAT16_VALUE( 66.160), SIMDE_FLOAT16_VALUE( 30.400), + SIMDE_FLOAT16_VALUE( 44.001), SIMDE_FLOAT16_VALUE( - 76.714), SIMDE_FLOAT16_VALUE( - 31.622), SIMDE_FLOAT16_VALUE( 16.733) } } }, + { { SIMDE_FLOAT16_VALUE( 79.887), SIMDE_FLOAT16_VALUE( 75.064), SIMDE_FLOAT16_VALUE( 97.298), SIMDE_FLOAT16_VALUE( - 36.532), + SIMDE_FLOAT16_VALUE( 31.519), SIMDE_FLOAT16_VALUE( - 21.137), SIMDE_FLOAT16_VALUE( 37.884), SIMDE_FLOAT16_VALUE( 75.605), + SIMDE_FLOAT16_VALUE( - 7.086), SIMDE_FLOAT16_VALUE( - 61.433), SIMDE_FLOAT16_VALUE( 4.750), SIMDE_FLOAT16_VALUE( - 68.334), + SIMDE_FLOAT16_VALUE( - 27.825), SIMDE_FLOAT16_VALUE( - 95.446), SIMDE_FLOAT16_VALUE( - 57.864), SIMDE_FLOAT16_VALUE( 35.983), + SIMDE_FLOAT16_VALUE( 47.271), SIMDE_FLOAT16_VALUE( 89.486), SIMDE_FLOAT16_VALUE( 44.198), SIMDE_FLOAT16_VALUE( - 43.979), + SIMDE_FLOAT16_VALUE( 98.973), SIMDE_FLOAT16_VALUE( 96.892), SIMDE_FLOAT16_VALUE( 83.644), SIMDE_FLOAT16_VALUE( 1.933) }, + { { SIMDE_FLOAT16_VALUE( 79.887), SIMDE_FLOAT16_VALUE( - 36.532), SIMDE_FLOAT16_VALUE( 37.884), SIMDE_FLOAT16_VALUE( - 61.433), + SIMDE_FLOAT16_VALUE( - 27.825), SIMDE_FLOAT16_VALUE( 35.983), SIMDE_FLOAT16_VALUE( 44.198), SIMDE_FLOAT16_VALUE( 96.892) }, + { SIMDE_FLOAT16_VALUE( 75.064), SIMDE_FLOAT16_VALUE( 31.519), SIMDE_FLOAT16_VALUE( 75.605), SIMDE_FLOAT16_VALUE( 4.750), + SIMDE_FLOAT16_VALUE( - 95.446), SIMDE_FLOAT16_VALUE( 47.271), SIMDE_FLOAT16_VALUE( - 43.979), SIMDE_FLOAT16_VALUE( 83.644) }, + { SIMDE_FLOAT16_VALUE( 97.298), SIMDE_FLOAT16_VALUE( - 21.137), SIMDE_FLOAT16_VALUE( - 7.086), SIMDE_FLOAT16_VALUE( - 68.334), + SIMDE_FLOAT16_VALUE( - 57.864), SIMDE_FLOAT16_VALUE( 89.486), SIMDE_FLOAT16_VALUE( 98.973), SIMDE_FLOAT16_VALUE( 1.933) } } }, + { { SIMDE_FLOAT16_VALUE( 45.623), SIMDE_FLOAT16_VALUE( 40.930), SIMDE_FLOAT16_VALUE( - 18.856), SIMDE_FLOAT16_VALUE( - 35.474), + SIMDE_FLOAT16_VALUE( - 25.714), SIMDE_FLOAT16_VALUE( 28.034), SIMDE_FLOAT16_VALUE( 86.809), SIMDE_FLOAT16_VALUE( 96.976), + SIMDE_FLOAT16_VALUE( - 81.890), SIMDE_FLOAT16_VALUE( - 7.184), SIMDE_FLOAT16_VALUE( - 66.904), SIMDE_FLOAT16_VALUE( - 32.987), + SIMDE_FLOAT16_VALUE( - 21.348), SIMDE_FLOAT16_VALUE( - 74.742), SIMDE_FLOAT16_VALUE( - 34.888), SIMDE_FLOAT16_VALUE( 18.741), + SIMDE_FLOAT16_VALUE( - 15.061), SIMDE_FLOAT16_VALUE( 38.458), SIMDE_FLOAT16_VALUE( - 14.151), SIMDE_FLOAT16_VALUE( - 48.572), + SIMDE_FLOAT16_VALUE( 57.645), SIMDE_FLOAT16_VALUE( - 40.036), SIMDE_FLOAT16_VALUE( - 33.994), SIMDE_FLOAT16_VALUE( 52.148) }, + { { SIMDE_FLOAT16_VALUE( 45.623), SIMDE_FLOAT16_VALUE( - 35.474), SIMDE_FLOAT16_VALUE( 86.809), SIMDE_FLOAT16_VALUE( - 7.184), + SIMDE_FLOAT16_VALUE( - 21.348), SIMDE_FLOAT16_VALUE( 18.741), SIMDE_FLOAT16_VALUE( - 14.151), SIMDE_FLOAT16_VALUE( - 40.036) }, + { SIMDE_FLOAT16_VALUE( 40.930), SIMDE_FLOAT16_VALUE( - 25.714), SIMDE_FLOAT16_VALUE( 96.976), SIMDE_FLOAT16_VALUE( - 66.904), + SIMDE_FLOAT16_VALUE( - 74.742), SIMDE_FLOAT16_VALUE( - 15.061), SIMDE_FLOAT16_VALUE( - 48.572), SIMDE_FLOAT16_VALUE( - 33.994) }, + { SIMDE_FLOAT16_VALUE( - 18.856), SIMDE_FLOAT16_VALUE( 28.034), SIMDE_FLOAT16_VALUE( - 81.890), SIMDE_FLOAT16_VALUE( - 32.987), + SIMDE_FLOAT16_VALUE( - 34.888), SIMDE_FLOAT16_VALUE( 38.458), SIMDE_FLOAT16_VALUE( 57.645), SIMDE_FLOAT16_VALUE( 52.148) } } }, + { { SIMDE_FLOAT16_VALUE( 77.853), SIMDE_FLOAT16_VALUE( - 5.293), SIMDE_FLOAT16_VALUE( 65.177), SIMDE_FLOAT16_VALUE( - 50.710), + SIMDE_FLOAT16_VALUE( 22.953), SIMDE_FLOAT16_VALUE( - 6.422), SIMDE_FLOAT16_VALUE( 85.417), SIMDE_FLOAT16_VALUE( 1.387), + SIMDE_FLOAT16_VALUE( 65.246), SIMDE_FLOAT16_VALUE( - 24.277), SIMDE_FLOAT16_VALUE( - 46.158), SIMDE_FLOAT16_VALUE( - 67.905), + SIMDE_FLOAT16_VALUE( 43.158), SIMDE_FLOAT16_VALUE( - 62.899), SIMDE_FLOAT16_VALUE( - 27.784), SIMDE_FLOAT16_VALUE( 56.200), + SIMDE_FLOAT16_VALUE( 82.898), SIMDE_FLOAT16_VALUE( - 71.415), SIMDE_FLOAT16_VALUE( - 29.585), SIMDE_FLOAT16_VALUE( - 28.012), + SIMDE_FLOAT16_VALUE( 52.765), SIMDE_FLOAT16_VALUE( 82.867), SIMDE_FLOAT16_VALUE( 22.238), SIMDE_FLOAT16_VALUE( - 37.496) }, + { { SIMDE_FLOAT16_VALUE( 77.853), SIMDE_FLOAT16_VALUE( - 50.710), SIMDE_FLOAT16_VALUE( 85.417), SIMDE_FLOAT16_VALUE( - 24.277), + SIMDE_FLOAT16_VALUE( 43.158), SIMDE_FLOAT16_VALUE( 56.200), SIMDE_FLOAT16_VALUE( - 29.585), SIMDE_FLOAT16_VALUE( 82.867) }, + { SIMDE_FLOAT16_VALUE( - 5.293), SIMDE_FLOAT16_VALUE( 22.953), SIMDE_FLOAT16_VALUE( 1.387), SIMDE_FLOAT16_VALUE( - 46.158), + SIMDE_FLOAT16_VALUE( - 62.899), SIMDE_FLOAT16_VALUE( 82.898), SIMDE_FLOAT16_VALUE( - 28.012), SIMDE_FLOAT16_VALUE( 22.238) }, + { SIMDE_FLOAT16_VALUE( 65.177), SIMDE_FLOAT16_VALUE( - 6.422), SIMDE_FLOAT16_VALUE( 65.246), SIMDE_FLOAT16_VALUE( - 67.905), + SIMDE_FLOAT16_VALUE( - 27.784), SIMDE_FLOAT16_VALUE( - 71.415), SIMDE_FLOAT16_VALUE( 52.765), SIMDE_FLOAT16_VALUE( - 37.496) } } }, + { { SIMDE_FLOAT16_VALUE( - 2.331), SIMDE_FLOAT16_VALUE( 29.415), SIMDE_FLOAT16_VALUE( 35.770), SIMDE_FLOAT16_VALUE( 16.302), + SIMDE_FLOAT16_VALUE( - 91.643), SIMDE_FLOAT16_VALUE( - 70.642), SIMDE_FLOAT16_VALUE( - 22.834), SIMDE_FLOAT16_VALUE( - 16.944), + SIMDE_FLOAT16_VALUE( 84.108), SIMDE_FLOAT16_VALUE( 1.977), SIMDE_FLOAT16_VALUE( - 9.416), SIMDE_FLOAT16_VALUE( 74.766), + SIMDE_FLOAT16_VALUE( - 30.242), SIMDE_FLOAT16_VALUE( 20.937), SIMDE_FLOAT16_VALUE( - 2.896), SIMDE_FLOAT16_VALUE( - 6.387), + SIMDE_FLOAT16_VALUE( 3.481), SIMDE_FLOAT16_VALUE( 35.087), SIMDE_FLOAT16_VALUE( - 51.719), SIMDE_FLOAT16_VALUE( 63.190), + SIMDE_FLOAT16_VALUE( 69.433), SIMDE_FLOAT16_VALUE( 25.431), SIMDE_FLOAT16_VALUE( 62.916), SIMDE_FLOAT16_VALUE( 65.064) }, + { { SIMDE_FLOAT16_VALUE( - 2.331), SIMDE_FLOAT16_VALUE( 16.302), SIMDE_FLOAT16_VALUE( - 22.834), SIMDE_FLOAT16_VALUE( 1.977), + SIMDE_FLOAT16_VALUE( - 30.242), SIMDE_FLOAT16_VALUE( - 6.387), SIMDE_FLOAT16_VALUE( - 51.719), SIMDE_FLOAT16_VALUE( 25.431) }, + { SIMDE_FLOAT16_VALUE( 29.415), SIMDE_FLOAT16_VALUE( - 91.643), SIMDE_FLOAT16_VALUE( - 16.944), SIMDE_FLOAT16_VALUE( - 9.416), + SIMDE_FLOAT16_VALUE( 20.937), SIMDE_FLOAT16_VALUE( 3.481), SIMDE_FLOAT16_VALUE( 63.190), SIMDE_FLOAT16_VALUE( 62.916) }, + { SIMDE_FLOAT16_VALUE( 35.770), SIMDE_FLOAT16_VALUE( - 70.642), SIMDE_FLOAT16_VALUE( 84.108), SIMDE_FLOAT16_VALUE( 74.766), + SIMDE_FLOAT16_VALUE( - 2.896), SIMDE_FLOAT16_VALUE( 35.087), SIMDE_FLOAT16_VALUE( 69.433), SIMDE_FLOAT16_VALUE( 65.064) } } }, + { { SIMDE_FLOAT16_VALUE( 98.895), SIMDE_FLOAT16_VALUE( 72.717), SIMDE_FLOAT16_VALUE( - 6.353), SIMDE_FLOAT16_VALUE( 70.032), + SIMDE_FLOAT16_VALUE( 3.465), SIMDE_FLOAT16_VALUE( - 59.832), SIMDE_FLOAT16_VALUE( - 45.154), SIMDE_FLOAT16_VALUE( 21.203), + SIMDE_FLOAT16_VALUE( 58.153), SIMDE_FLOAT16_VALUE( - 22.957), SIMDE_FLOAT16_VALUE( - 85.381), SIMDE_FLOAT16_VALUE( 81.511), + SIMDE_FLOAT16_VALUE( 14.659), SIMDE_FLOAT16_VALUE( 98.757), SIMDE_FLOAT16_VALUE( 99.317), SIMDE_FLOAT16_VALUE( - 83.947), + SIMDE_FLOAT16_VALUE( 38.963), SIMDE_FLOAT16_VALUE( - 44.639), SIMDE_FLOAT16_VALUE( - 34.702), SIMDE_FLOAT16_VALUE( - 64.943), + SIMDE_FLOAT16_VALUE( - 90.372), SIMDE_FLOAT16_VALUE( - 85.000), SIMDE_FLOAT16_VALUE( 24.672), SIMDE_FLOAT16_VALUE( 12.816) }, + { { SIMDE_FLOAT16_VALUE( 98.895), SIMDE_FLOAT16_VALUE( 70.032), SIMDE_FLOAT16_VALUE( - 45.154), SIMDE_FLOAT16_VALUE( - 22.957), + SIMDE_FLOAT16_VALUE( 14.659), SIMDE_FLOAT16_VALUE( - 83.947), SIMDE_FLOAT16_VALUE( - 34.702), SIMDE_FLOAT16_VALUE( - 85.000) }, + { SIMDE_FLOAT16_VALUE( 72.717), SIMDE_FLOAT16_VALUE( 3.465), SIMDE_FLOAT16_VALUE( 21.203), SIMDE_FLOAT16_VALUE( - 85.381), + SIMDE_FLOAT16_VALUE( 98.757), SIMDE_FLOAT16_VALUE( 38.963), SIMDE_FLOAT16_VALUE( - 64.943), SIMDE_FLOAT16_VALUE( 24.672) }, + { SIMDE_FLOAT16_VALUE( - 6.353), SIMDE_FLOAT16_VALUE( - 59.832), SIMDE_FLOAT16_VALUE( 58.153), SIMDE_FLOAT16_VALUE( 81.511), + SIMDE_FLOAT16_VALUE( 99.317), SIMDE_FLOAT16_VALUE( - 44.639), SIMDE_FLOAT16_VALUE( - 90.372), SIMDE_FLOAT16_VALUE( 12.816) } } }, + { { SIMDE_FLOAT16_VALUE( 67.964), SIMDE_FLOAT16_VALUE( - 49.709), SIMDE_FLOAT16_VALUE( 91.615), SIMDE_FLOAT16_VALUE( - 45.904), + SIMDE_FLOAT16_VALUE( 25.435), SIMDE_FLOAT16_VALUE( 79.259), SIMDE_FLOAT16_VALUE( 12.338), SIMDE_FLOAT16_VALUE( - 54.866), + SIMDE_FLOAT16_VALUE( - 4.301), SIMDE_FLOAT16_VALUE( - 20.530), SIMDE_FLOAT16_VALUE( 71.469), SIMDE_FLOAT16_VALUE( 9.646), + SIMDE_FLOAT16_VALUE( 27.156), SIMDE_FLOAT16_VALUE( 4.728), SIMDE_FLOAT16_VALUE( 9.490), SIMDE_FLOAT16_VALUE( 82.699), + SIMDE_FLOAT16_VALUE( - 27.427), SIMDE_FLOAT16_VALUE( 57.932), SIMDE_FLOAT16_VALUE( 66.510), SIMDE_FLOAT16_VALUE( - 27.262), + SIMDE_FLOAT16_VALUE( 54.618), SIMDE_FLOAT16_VALUE( 46.855), SIMDE_FLOAT16_VALUE( 62.478), SIMDE_FLOAT16_VALUE( 61.709) }, + { { SIMDE_FLOAT16_VALUE( 67.964), SIMDE_FLOAT16_VALUE( - 45.904), SIMDE_FLOAT16_VALUE( 12.338), SIMDE_FLOAT16_VALUE( - 20.530), + SIMDE_FLOAT16_VALUE( 27.156), SIMDE_FLOAT16_VALUE( 82.699), SIMDE_FLOAT16_VALUE( 66.510), SIMDE_FLOAT16_VALUE( 46.855) }, + { SIMDE_FLOAT16_VALUE( - 49.709), SIMDE_FLOAT16_VALUE( 25.435), SIMDE_FLOAT16_VALUE( - 54.866), SIMDE_FLOAT16_VALUE( 71.469), + SIMDE_FLOAT16_VALUE( 4.728), SIMDE_FLOAT16_VALUE( - 27.427), SIMDE_FLOAT16_VALUE( - 27.262), SIMDE_FLOAT16_VALUE( 62.478) }, + { SIMDE_FLOAT16_VALUE( 91.615), SIMDE_FLOAT16_VALUE( 79.259), SIMDE_FLOAT16_VALUE( - 4.301), SIMDE_FLOAT16_VALUE( 9.646), + SIMDE_FLOAT16_VALUE( 9.490), SIMDE_FLOAT16_VALUE( 57.932), SIMDE_FLOAT16_VALUE( 54.618), SIMDE_FLOAT16_VALUE( 61.709) } } }, }; for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x2x2_t r = simde_vld2_f32(test_vec[i].a); + simde_float16x8x3_t r = simde_vld3q_f16(test_vec[i].a); - simde_float32x2x2_t expected = { - {simde_vld1_f32(test_vec[i].r[0]), simde_vld1_f32(test_vec[i].r[1])}}; + simde_float16x8x3_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1]), simde_vld1q_f16(test_vec[i].r[2])}}; - simde_test_arm_neon_assert_equal_f32x2(r.val[0], expected.val[0], 1); - simde_test_arm_neon_assert_equal_f32x2(r.val[1], expected.val[1], 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], INT_MAX); } return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-100.0f, 100.0f); - simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-100.0f, 100.0f); - simde_float32x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - simde_float32_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_float32x2x2_t r = simde_vld2_f32(buf); - - simde_test_arm_neon_write_f32x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2_f64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - simde_float64_t a[2]; - simde_float64_t r[2][1]; - } test_vec[] = { - { { SIMDE_FLOAT64_C( -60.52), - SIMDE_FLOAT64_C( -27.97) }, - { { SIMDE_FLOAT64_C( -60.52) }, - { SIMDE_FLOAT64_C( -27.97) } }, - }, - { { SIMDE_FLOAT64_C( 41.23), - SIMDE_FLOAT64_C( -82.15) }, - { { SIMDE_FLOAT64_C( 41.23) }, - { SIMDE_FLOAT64_C( -82.15) } }, - }, - { { SIMDE_FLOAT64_C( -45.22), - SIMDE_FLOAT64_C( -82.20) }, - { { SIMDE_FLOAT64_C( -45.22) }, - { SIMDE_FLOAT64_C( -82.20) } }, - }, - { { SIMDE_FLOAT64_C( 20.47), - SIMDE_FLOAT64_C( -91.57) }, - { { SIMDE_FLOAT64_C( 20.47) }, - { SIMDE_FLOAT64_C( -91.57) } }, - }, - { { SIMDE_FLOAT64_C( 87.79), - SIMDE_FLOAT64_C( 27.03) }, - { { SIMDE_FLOAT64_C( 87.79) }, - { SIMDE_FLOAT64_C( 27.03) } }, - }, - { { SIMDE_FLOAT64_C( -13.17), - SIMDE_FLOAT64_C( 89.71) }, - { { SIMDE_FLOAT64_C( -13.17) }, - { SIMDE_FLOAT64_C( 89.71) } }, - }, - { { SIMDE_FLOAT64_C( -91.04), - SIMDE_FLOAT64_C( 54.16) }, - { { SIMDE_FLOAT64_C( -91.04) }, - { SIMDE_FLOAT64_C( 54.16) } }, - }, - { { SIMDE_FLOAT64_C( 49.63), - SIMDE_FLOAT64_C( 71.75) }, - { { SIMDE_FLOAT64_C( 49.63) }, - { SIMDE_FLOAT64_C( 71.75) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float64x1x2_t r = simde_vld2_f64(test_vec[i].a); - - simde_float64x1x2_t expected = { - {simde_vld1_f64(test_vec[i].r[0]), simde_vld1_f64(test_vec[i].r[1])}}; - simde_test_arm_neon_assert_equal_f64x1(r.val[0], expected.val[0], 1); - simde_test_arm_neon_assert_equal_f64x1(r.val[1], expected.val[1], 1); - } - - return 0; #else + fputc('\n', stdout); for (int i = 0 ; i < 8 ; i++) { - simde_float64x1_t a = simde_test_arm_neon_random_f64x1(-100.0, 100.0); - simde_float64x1_t b = simde_test_arm_neon_random_f64x1(-100.0, 100.0); - simde_float64x1x2_t c = {{a, b}}; - - simde_test_arm_neon_write_f64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_f64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - simde_float64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_float64x1x2_t r = simde_vld2_f64(buf); - - simde_test_arm_neon_write_f64x1x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_s8 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int8_t a[32]; - int8_t r[2][16]; - } test_vec[] = { - { { -INT8_C( 25), INT8_C( 77), INT8_C( 76), INT8_C( 77), INT8_C( 68), INT8_C( 84), INT8_C( 81), INT8_C( 59), - INT8_C( 69), INT8_C( 8), -INT8_C( 108), -INT8_C( 45), -INT8_C( 85), INT8_C( 73), -INT8_C( 110), INT8_C( 69), - -INT8_C( 13), INT8_C( 23), -INT8_C( 80), -INT8_C( 93), -INT8_C( 102), INT8_C( 80), -INT8_C( 63), INT8_C( 67), - INT8_C( 11), -INT8_C( 75), INT8_C( 9), INT8_C( 98), INT8_C( 19), INT8_C( 38), INT8_C( 41), -INT8_C( 6) }, - { { -INT8_C( 25), INT8_C( 76), INT8_C( 68), INT8_C( 81), INT8_C( 69), -INT8_C( 108), -INT8_C( 85), -INT8_C( 110), - -INT8_C( 13), -INT8_C( 80), -INT8_C( 102), -INT8_C( 63), INT8_C( 11), INT8_C( 9), INT8_C( 19), INT8_C( 41) }, - { INT8_C( 77), INT8_C( 77), INT8_C( 84), INT8_C( 59), INT8_C( 8), -INT8_C( 45), INT8_C( 73), INT8_C( 69), - INT8_C( 23), -INT8_C( 93), INT8_C( 80), INT8_C( 67), -INT8_C( 75), INT8_C( 98), INT8_C( 38), -INT8_C( 6) } }, - }, - { { INT8_C( 116), INT8_C( 117), INT8_C( 71), -INT8_C( 72), -INT8_C( 55), -INT8_C( 104), -INT8_C( 13), INT8_C( 15), - -INT8_C( 96), -INT8_C( 121), -INT8_C( 30), INT8_C( 76), -INT8_C( 48), INT8_C( 116), -INT8_C( 111), -INT8_C( 60), - -INT8_C( 117), INT8_C( 65), INT8_C( 103), INT8_C( 38), -INT8_C( 111), INT8_C( 40), INT8_C( 105), -INT8_C( 100), - -INT8_C( 35), INT8_C( 115), -INT8_C( 2), -INT8_C( 15), -INT8_C( 103), INT8_C( 39), -INT8_C( 21), INT8_C( 13) }, - { { INT8_C( 116), INT8_C( 71), -INT8_C( 55), -INT8_C( 13), -INT8_C( 96), -INT8_C( 30), -INT8_C( 48), -INT8_C( 111), - -INT8_C( 117), INT8_C( 103), -INT8_C( 111), INT8_C( 105), -INT8_C( 35), -INT8_C( 2), -INT8_C( 103), -INT8_C( 21) }, - { INT8_C( 117), -INT8_C( 72), -INT8_C( 104), INT8_C( 15), -INT8_C( 121), INT8_C( 76), INT8_C( 116), -INT8_C( 60), - INT8_C( 65), INT8_C( 38), INT8_C( 40), -INT8_C( 100), INT8_C( 115), -INT8_C( 15), INT8_C( 39), INT8_C( 13) } }, - }, - { { -INT8_C( 100), INT8_C( 51), -INT8_C( 59), INT8_C( 102), -INT8_C( 53), -INT8_C( 71), INT8_C( 117), INT8_C( 108), - INT8_C( 64), INT8_C( 87), -INT8_C( 72), INT8_C( 17), -INT8_C( 52), INT8_C( 73), -INT8_C( 43), INT8_C( 87), - -INT8_C( 117), INT8_C( 60), INT8_C( 125), INT8_C( 28), INT8_C( 101), -INT8_C( 25), -INT8_C( 71), INT8_C( 66), - INT8_C( 90), -INT8_C( 73), INT8_C( 51), -INT8_C( 13), -INT8_C( 33), INT8_C( 31), INT8_C( 1), INT8_C( 123) }, - { { -INT8_C( 100), -INT8_C( 59), -INT8_C( 53), INT8_C( 117), INT8_C( 64), -INT8_C( 72), -INT8_C( 52), -INT8_C( 43), - -INT8_C( 117), INT8_C( 125), INT8_C( 101), -INT8_C( 71), INT8_C( 90), INT8_C( 51), -INT8_C( 33), INT8_C( 1) }, - { INT8_C( 51), INT8_C( 102), -INT8_C( 71), INT8_C( 108), INT8_C( 87), INT8_C( 17), INT8_C( 73), INT8_C( 87), - INT8_C( 60), INT8_C( 28), -INT8_C( 25), INT8_C( 66), -INT8_C( 73), -INT8_C( 13), INT8_C( 31), INT8_C( 123) } }, - }, - { { INT8_C( 82), -INT8_C( 58), -INT8_C( 31), INT8_C( 29), INT8_MAX, INT8_C( 86), -INT8_C( 119), -INT8_C( 64), - -INT8_C( 82), INT8_C( 65), -INT8_C( 47), INT8_C( 122), -INT8_C( 117), -INT8_C( 90), -INT8_C( 47), INT8_C( 22), - -INT8_C( 30), INT8_C( 79), INT8_C( 50), INT8_C( 71), INT8_C( 54), -INT8_C( 21), -INT8_C( 118), -INT8_C( 112), - -INT8_C( 93), -INT8_C( 67), -INT8_C( 125), -INT8_C( 126), -INT8_C( 36), -INT8_C( 124), -INT8_C( 3), INT8_C( 46) }, - { { INT8_C( 82), -INT8_C( 31), INT8_MAX, -INT8_C( 119), -INT8_C( 82), -INT8_C( 47), -INT8_C( 117), -INT8_C( 47), - -INT8_C( 30), INT8_C( 50), INT8_C( 54), -INT8_C( 118), -INT8_C( 93), -INT8_C( 125), -INT8_C( 36), -INT8_C( 3) }, - { -INT8_C( 58), INT8_C( 29), INT8_C( 86), -INT8_C( 64), INT8_C( 65), INT8_C( 122), -INT8_C( 90), INT8_C( 22), - INT8_C( 79), INT8_C( 71), -INT8_C( 21), -INT8_C( 112), -INT8_C( 67), -INT8_C( 126), -INT8_C( 124), INT8_C( 46) } }, - }, - { { INT8_C( 75), -INT8_C( 33), INT8_C( 76), -INT8_C( 54), INT8_C( 53), -INT8_C( 43), -INT8_C( 118), -INT8_C( 29), - INT8_C( 23), INT8_C( 91), INT8_C( 93), -INT8_C( 94), INT8_C( 1), INT8_C( 47), -INT8_C( 72), -INT8_C( 28), - INT8_C( 126), -INT8_C( 22), INT8_C( 43), -INT8_C( 76), -INT8_C( 42), -INT8_C( 75), INT8_C( 68), INT8_C( 121), - INT8_C( 115), -INT8_C( 57), -INT8_C( 5), INT8_C( 79), INT8_C( 76), -INT8_C( 8), INT8_C( 126), -INT8_C( 105) }, - { { INT8_C( 75), INT8_C( 76), INT8_C( 53), -INT8_C( 118), INT8_C( 23), INT8_C( 93), INT8_C( 1), -INT8_C( 72), - INT8_C( 126), INT8_C( 43), -INT8_C( 42), INT8_C( 68), INT8_C( 115), -INT8_C( 5), INT8_C( 76), INT8_C( 126) }, - { -INT8_C( 33), -INT8_C( 54), -INT8_C( 43), -INT8_C( 29), INT8_C( 91), -INT8_C( 94), INT8_C( 47), -INT8_C( 28), - -INT8_C( 22), -INT8_C( 76), -INT8_C( 75), INT8_C( 121), -INT8_C( 57), INT8_C( 79), -INT8_C( 8), -INT8_C( 105) } }, - }, - { { -INT8_C( 41), -INT8_C( 54), INT8_C( 97), INT8_C( 13), -INT8_C( 97), -INT8_C( 20), -INT8_C( 16), -INT8_C( 74), - INT8_C( 71), INT8_C( 78), INT8_C( 88), INT8_C( 73), INT8_C( 125), INT8_C( 16), INT8_C( 45), -INT8_C( 5), - -INT8_C( 5), INT8_C( 88), -INT8_C( 81), -INT8_C( 47), INT8_C( 14), -INT8_C( 13), INT8_C( 74), -INT8_C( 127), - -INT8_C( 70), INT8_C( 69), -INT8_C( 48), INT8_C( 6), INT8_C( 61), INT8_C( 78), -INT8_C( 99), INT8_C( 21) }, - { { -INT8_C( 41), INT8_C( 97), -INT8_C( 97), -INT8_C( 16), INT8_C( 71), INT8_C( 88), INT8_C( 125), INT8_C( 45), - -INT8_C( 5), -INT8_C( 81), INT8_C( 14), INT8_C( 74), -INT8_C( 70), -INT8_C( 48), INT8_C( 61), -INT8_C( 99) }, - { -INT8_C( 54), INT8_C( 13), -INT8_C( 20), -INT8_C( 74), INT8_C( 78), INT8_C( 73), INT8_C( 16), -INT8_C( 5), - INT8_C( 88), -INT8_C( 47), -INT8_C( 13), -INT8_C( 127), INT8_C( 69), INT8_C( 6), INT8_C( 78), INT8_C( 21) } }, - }, - { { INT8_C( 24), -INT8_C( 1), INT8_C( 34), -INT8_C( 72), -INT8_C( 21), INT8_C( 18), INT8_C( 110), INT8_C( 50), - INT8_C( 96), -INT8_C( 57), INT8_C( 123), -INT8_C( 35), -INT8_C( 41), -INT8_C( 88), -INT8_C( 40), -INT8_C( 46), - INT8_C( 1), -INT8_C( 121), -INT8_C( 93), INT8_C( 15), INT8_C( 122), -INT8_C( 19), -INT8_C( 112), INT8_C( 53), - INT8_C( 50), INT8_C( 96), INT8_C( 59), INT8_C( 112), -INT8_C( 81), -INT8_C( 39), -INT8_C( 123), -INT8_C( 57) }, - { { INT8_C( 24), INT8_C( 34), -INT8_C( 21), INT8_C( 110), INT8_C( 96), INT8_C( 123), -INT8_C( 41), -INT8_C( 40), - INT8_C( 1), -INT8_C( 93), INT8_C( 122), -INT8_C( 112), INT8_C( 50), INT8_C( 59), -INT8_C( 81), -INT8_C( 123) }, - { -INT8_C( 1), -INT8_C( 72), INT8_C( 18), INT8_C( 50), -INT8_C( 57), -INT8_C( 35), -INT8_C( 88), -INT8_C( 46), - -INT8_C( 121), INT8_C( 15), -INT8_C( 19), INT8_C( 53), INT8_C( 96), INT8_C( 112), -INT8_C( 39), -INT8_C( 57) } }, - }, - { { -INT8_C( 40), -INT8_C( 89), INT8_MAX, -INT8_C( 61), -INT8_C( 71), -INT8_C( 18), -INT8_C( 11), INT8_C( 26), - -INT8_C( 75), INT8_C( 113), -INT8_C( 9), -INT8_C( 116), INT8_C( 25), -INT8_C( 48), INT8_C( 95), INT8_C( 26), - INT8_C( 87), INT8_C( 2), INT8_C( 41), -INT8_C( 46), -INT8_C( 16), -INT8_C( 71), INT8_C( 7), INT8_C( 34), - INT8_C( 26), INT8_C( 66), -INT8_C( 110), -INT8_C( 55), INT8_C( 27), INT8_C( 23), -INT8_C( 112), -INT8_C( 13) }, - { { -INT8_C( 40), INT8_MAX, -INT8_C( 71), -INT8_C( 11), -INT8_C( 75), -INT8_C( 9), INT8_C( 25), INT8_C( 95), - INT8_C( 87), INT8_C( 41), -INT8_C( 16), INT8_C( 7), INT8_C( 26), -INT8_C( 110), INT8_C( 27), -INT8_C( 112) }, - { -INT8_C( 89), -INT8_C( 61), -INT8_C( 18), INT8_C( 26), INT8_C( 113), -INT8_C( 116), -INT8_C( 48), INT8_C( 26), - INT8_C( 2), -INT8_C( 46), -INT8_C( 71), INT8_C( 34), INT8_C( 66), -INT8_C( 55), INT8_C( 23), -INT8_C( 13) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int8x16x2_t r = simde_vld2q_s8(test_vec[i].a); - - simde_int8x16x2_t expected = { - {simde_vld1q_s8(test_vec[i].r[0]), simde_vld1q_s8(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i8x16(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i8x16(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); - simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); - simde_int8x16x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int8_t buf[32]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int8x16x2_t r = simde_vld2q_s8(buf); - - simde_test_arm_neon_write_i8x16x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_s16 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int16_t a[16]; - int16_t r[2][8]; - } test_vec[] = { - { { INT16_C( 11850), INT16_C( 12679), INT16_C( 1668), -INT16_C( 2829), INT16_C( 2417), INT16_C( 6613), -INT16_C( 26382), -INT16_C( 6963), - INT16_C( 11746), -INT16_C( 19992), INT16_C( 261), -INT16_C( 26272), -INT16_C( 27245), INT16_C( 20274), -INT16_C( 24440), -INT16_C( 11702) }, - { { INT16_C( 11850), INT16_C( 1668), INT16_C( 2417), -INT16_C( 26382), INT16_C( 11746), INT16_C( 261), -INT16_C( 27245), -INT16_C( 24440) }, - { INT16_C( 12679), -INT16_C( 2829), INT16_C( 6613), -INT16_C( 6963), -INT16_C( 19992), -INT16_C( 26272), INT16_C( 20274), -INT16_C( 11702) } }, - }, - { { -INT16_C( 11826), INT16_C( 21252), -INT16_C( 2089), INT16_C( 18503), INT16_C( 7168), -INT16_C( 3231), INT16_C( 11956), -INT16_C( 26921), - -INT16_C( 16548), INT16_C( 24903), -INT16_C( 22335), INT16_C( 21754), INT16_C( 11325), -INT16_C( 14941), -INT16_C( 4659), -INT16_C( 25704) }, - { { -INT16_C( 11826), -INT16_C( 2089), INT16_C( 7168), INT16_C( 11956), -INT16_C( 16548), -INT16_C( 22335), INT16_C( 11325), -INT16_C( 4659) }, - { INT16_C( 21252), INT16_C( 18503), -INT16_C( 3231), -INT16_C( 26921), INT16_C( 24903), INT16_C( 21754), -INT16_C( 14941), -INT16_C( 25704) } }, - }, - { { -INT16_C( 25410), -INT16_C( 27154), INT16_C( 13715), -INT16_C( 27427), INT16_C( 15953), INT16_C( 1415), INT16_C( 24172), -INT16_C( 14181), - -INT16_C( 7395), -INT16_C( 8663), INT16_C( 9355), -INT16_C( 14286), -INT16_C( 10928), INT16_C( 7566), INT16_C( 9922), -INT16_C( 32583) }, - { { -INT16_C( 25410), INT16_C( 13715), INT16_C( 15953), INT16_C( 24172), -INT16_C( 7395), INT16_C( 9355), -INT16_C( 10928), INT16_C( 9922) }, - { -INT16_C( 27154), -INT16_C( 27427), INT16_C( 1415), -INT16_C( 14181), -INT16_C( 8663), -INT16_C( 14286), INT16_C( 7566), -INT16_C( 32583) } }, - }, - { { -INT16_C( 22590), INT16_C( 21781), -INT16_C( 3363), INT16_C( 12009), INT16_C( 28720), -INT16_C( 25292), -INT16_C( 12338), -INT16_C( 5019), - -INT16_C( 28750), INT16_C( 15818), -INT16_C( 589), INT16_C( 774), -INT16_C( 27438), -INT16_C( 27359), -INT16_C( 9542), INT16_C( 31765) }, - { { -INT16_C( 22590), -INT16_C( 3363), INT16_C( 28720), -INT16_C( 12338), -INT16_C( 28750), -INT16_C( 589), -INT16_C( 27438), -INT16_C( 9542) }, - { INT16_C( 21781), INT16_C( 12009), -INT16_C( 25292), -INT16_C( 5019), INT16_C( 15818), INT16_C( 774), -INT16_C( 27359), INT16_C( 31765) } }, - }, - { { INT16_C( 11137), INT16_C( 24273), -INT16_C( 17635), INT16_C( 20109), -INT16_C( 16085), -INT16_C( 1301), INT16_C( 20624), INT16_C( 17382), - -INT16_C( 20257), -INT16_C( 28032), -INT16_C( 31059), -INT16_C( 32618), -INT16_C( 18662), -INT16_C( 11243), INT16_C( 10897), INT16_C( 4688) }, - { { INT16_C( 11137), -INT16_C( 17635), -INT16_C( 16085), INT16_C( 20624), -INT16_C( 20257), -INT16_C( 31059), -INT16_C( 18662), INT16_C( 10897) }, - { INT16_C( 24273), INT16_C( 20109), -INT16_C( 1301), INT16_C( 17382), -INT16_C( 28032), -INT16_C( 32618), -INT16_C( 11243), INT16_C( 4688) } }, - }, - { { INT16_C( 8789), INT16_C( 29553), -INT16_C( 291), INT16_C( 2241), -INT16_C( 21313), INT16_C( 20226), -INT16_C( 5892), -INT16_C( 9070), - INT16_C( 5017), INT16_C( 18030), INT16_C( 1177), -INT16_C( 19258), -INT16_C( 9285), INT16_C( 19592), -INT16_C( 9978), INT16_C( 23391) }, - { { INT16_C( 8789), -INT16_C( 291), -INT16_C( 21313), -INT16_C( 5892), INT16_C( 5017), INT16_C( 1177), -INT16_C( 9285), -INT16_C( 9978) }, - { INT16_C( 29553), INT16_C( 2241), INT16_C( 20226), -INT16_C( 9070), INT16_C( 18030), -INT16_C( 19258), INT16_C( 19592), INT16_C( 23391) } }, - }, - { { -INT16_C( 12037), -INT16_C( 10034), -INT16_C( 28722), -INT16_C( 29216), -INT16_C( 7365), INT16_C( 14556), INT16_C( 28619), INT16_C( 25620), - -INT16_C( 32126), INT16_C( 7083), INT16_C( 29063), INT16_C( 17103), INT16_C( 22605), INT16_C( 21391), -INT16_C( 4559), INT16_C( 11438) }, - { { -INT16_C( 12037), -INT16_C( 28722), -INT16_C( 7365), INT16_C( 28619), -INT16_C( 32126), INT16_C( 29063), INT16_C( 22605), -INT16_C( 4559) }, - { -INT16_C( 10034), -INT16_C( 29216), INT16_C( 14556), INT16_C( 25620), INT16_C( 7083), INT16_C( 17103), INT16_C( 21391), INT16_C( 11438) } }, - }, - { { INT16_C( 32190), -INT16_C( 29692), -INT16_C( 7156), INT16_C( 18457), -INT16_C( 2617), -INT16_C( 27776), -INT16_C( 27548), -INT16_C( 6409), - -INT16_C( 24042), -INT16_C( 25342), -INT16_C( 12012), INT16_C( 25056), INT16_C( 28457), INT16_C( 23220), INT16_C( 25181), INT16_C( 7046) }, - { { INT16_C( 32190), -INT16_C( 7156), -INT16_C( 2617), -INT16_C( 27548), -INT16_C( 24042), -INT16_C( 12012), INT16_C( 28457), INT16_C( 25181) }, - { -INT16_C( 29692), INT16_C( 18457), -INT16_C( 27776), -INT16_C( 6409), -INT16_C( 25342), INT16_C( 25056), INT16_C( 23220), INT16_C( 7046) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int16x8x2_t r = simde_vld2q_s16(test_vec[i].a); - - simde_int16x8x2_t expected = { - {simde_vld1q_s16(test_vec[i].r[0]), simde_vld1q_s16(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i16x8(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i16x8(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); - simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); - simde_int16x8x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int16_t buf[16]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int16x8x2_t r = simde_vld2q_s16(buf); - - simde_test_arm_neon_write_i16x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_s32 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int32_t a[8]; - int32_t r[2][4]; - } test_vec[] = { - { { -INT32_C( 1652910308), INT32_C( 1276922200), -INT32_C( 1246624074), INT32_C( 345995066), - -INT32_C( 1909421954), INT32_C( 1484737180), -INT32_C( 1927907536), INT32_C( 1716163914) }, - { { -INT32_C( 1652910308), -INT32_C( 1246624074), -INT32_C( 1909421954), -INT32_C( 1927907536) }, - { INT32_C( 1276922200), INT32_C( 345995066), INT32_C( 1484737180), INT32_C( 1716163914) } }, - }, - { { -INT32_C( 2063350484), -INT32_C( 992927986), INT32_C( 1601798949), INT32_C( 2037651963), - INT32_C( 1023976609), INT32_C( 513116142), INT32_C( 1236052991), INT32_C( 1840248385) }, - { { -INT32_C( 2063350484), INT32_C( 1601798949), INT32_C( 1023976609), INT32_C( 1236052991) }, - { -INT32_C( 992927986), INT32_C( 2037651963), INT32_C( 513116142), INT32_C( 1840248385) } }, - }, - { { -INT32_C( 890064197), -INT32_C( 141638702), INT32_C( 1113000007), -INT32_C( 1027880159), - INT32_C( 1593820015), INT32_C( 1249678667), -INT32_C( 2121062336), -INT32_C( 621854177) }, - { { -INT32_C( 890064197), INT32_C( 1113000007), INT32_C( 1593820015), -INT32_C( 2121062336) }, - { -INT32_C( 141638702), -INT32_C( 1027880159), INT32_C( 1249678667), -INT32_C( 621854177) } }, - }, - { { -INT32_C( 962272780), -INT32_C( 323079259), INT32_C( 1546523963), INT32_C( 1327426016), - -INT32_C( 122872403), -INT32_C( 213767502), INT32_C( 1903482194), INT32_C( 189555479) }, - { { -INT32_C( 962272780), INT32_C( 1546523963), -INT32_C( 122872403), INT32_C( 1903482194) }, - { -INT32_C( 323079259), INT32_C( 1327426016), -INT32_C( 213767502), INT32_C( 189555479) } }, - }, - { { -INT32_C( 355274683), INT32_C( 1591119907), -INT32_C( 2051406683), -INT32_C( 1697326867), - -INT32_C( 1466793226), -INT32_C( 23341908), -INT32_C( 1066397527), -INT32_C( 1194541965) }, - { { -INT32_C( 355274683), -INT32_C( 2051406683), -INT32_C( 1466793226), -INT32_C( 1066397527) }, - { INT32_C( 1591119907), -INT32_C( 1697326867), -INT32_C( 23341908), -INT32_C( 1194541965) } }, - }, - { { -INT32_C( 794648916), -INT32_C( 751929298), INT32_C( 1784211836), -INT32_C( 1224463167), - INT32_C( 1516279726), INT32_C( 358218603), INT32_C( 2144717067), INT32_C( 825729413) }, - { { -INT32_C( 794648916), INT32_C( 1784211836), INT32_C( 1516279726), INT32_C( 2144717067) }, - { -INT32_C( 751929298), -INT32_C( 1224463167), INT32_C( 358218603), INT32_C( 825729413) } }, - }, - { { INT32_C( 1828837951), -INT32_C( 817876910), -INT32_C( 633759719), INT32_C( 1938963909), - INT32_C( 1087304404), -INT32_C( 111859731), INT32_C( 1970809584), INT32_C( 195473356) }, - { { INT32_C( 1828837951), -INT32_C( 633759719), INT32_C( 1087304404), INT32_C( 1970809584) }, - { -INT32_C( 817876910), INT32_C( 1938963909), -INT32_C( 111859731), INT32_C( 195473356) } }, - }, - { { -INT32_C( 596006775), -INT32_C( 240404008), INT32_C( 399238226), -INT32_C( 158704351), - INT32_C( 1026971727), INT32_C( 1865845631), -INT32_C( 2115719499), -INT32_C( 410154147) }, - { { -INT32_C( 596006775), INT32_C( 399238226), INT32_C( 1026971727), -INT32_C( 2115719499) }, - { -INT32_C( 240404008), -INT32_C( 158704351), INT32_C( 1865845631), -INT32_C( 410154147) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int32x4x2_t r = simde_vld2q_s32(test_vec[i].a); - - simde_int32x4x2_t expected = { - {simde_vld1q_s32(test_vec[i].r[0]), simde_vld1q_s32(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i32x4(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i32x4(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); - simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); - simde_int32x4x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int32_t buf[8]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int32x4x2_t r = simde_vld2q_s32(buf); - - simde_test_arm_neon_write_i32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_s64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - int64_t a[4]; - int64_t r[2][2]; - } test_vec[] = { - { { INT64_C( 761675503690497765), -INT64_C( 3317868187565492127), - INT64_C( 6422292174704708047), -INT64_C( 4276746329755678056) }, - { { INT64_C( 761675503690497765), INT64_C( 6422292174704708047) }, - { -INT64_C( 3317868187565492127), -INT64_C( 4276746329755678056) } }, - }, - { { -INT64_C( 6949583921125508177), -INT64_C( 8908427146440727274), - -INT64_C( 7890751878801495695), -INT64_C( 7775319739955481739) }, - { { -INT64_C( 6949583921125508177), -INT64_C( 7890751878801495695) }, - { -INT64_C( 8908427146440727274), -INT64_C( 7775319739955481739) } }, - }, - { { -INT64_C( 2838209209255287618), -INT64_C( 1655702365654380340), - INT64_C( 3438371571634163500), -INT64_C( 2812327433930041729) }, - { { -INT64_C( 2838209209255287618), INT64_C( 3438371571634163500) }, - { -INT64_C( 1655702365654380340), -INT64_C( 2812327433930041729) } }, - }, - { { -INT64_C( 8945432303512580032), INT64_C( 3414021002538270720), - INT64_C( 8881675687628664665), -INT64_C( 6803857797262883422) }, - { { -INT64_C( 8945432303512580032), INT64_C( 8881675687628664665) }, - { INT64_C( 3414021002538270720), -INT64_C( 6803857797262883422) } }, - }, - { { -INT64_C( 2800536549414704921), -INT64_C( 8435045075786848322), - -INT64_C( 893436412045671134), INT64_C( 3938470781334223014) }, - { { -INT64_C( 2800536549414704921), -INT64_C( 893436412045671134) }, - { -INT64_C( 8435045075786848322), INT64_C( 3938470781334223014) } }, - }, - { { -INT64_C( 8621023513008583766), -INT64_C( 9017831118517654241), - -INT64_C( 17106915664299905), INT64_C( 6789087051402080945) }, - { { -INT64_C( 8621023513008583766), -INT64_C( 17106915664299905) }, - { -INT64_C( 9017831118517654241), INT64_C( 6789087051402080945) } }, - }, - { { INT64_C( 2543473880721263865), INT64_C( 5821833599798974185), - INT64_C( 6277706328444005574), -INT64_C( 3717564459626585002) }, - { { INT64_C( 2543473880721263865), INT64_C( 6277706328444005574) }, - { INT64_C( 5821833599798974185), -INT64_C( 3717564459626585002) } }, - }, - { { INT64_C( 7909406733251856539), INT64_C( 5445558404010882673), - -INT64_C( 1375135456396922130), INT64_C( 7731658909898130093) }, - { { INT64_C( 7909406733251856539), -INT64_C( 1375135456396922130) }, - { INT64_C( 5445558404010882673), INT64_C( 7731658909898130093) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_int64x2x2_t r = simde_vld2q_s64(test_vec[i].a); - - simde_int64x2x2_t expected = { - {simde_vld1q_s64(test_vec[i].r[0]), simde_vld1q_s64(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_i64x2(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_i64x2(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); - simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); - simde_int64x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - int64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_int64x2x2_t r = simde_vld2q_s64(buf); - - simde_test_arm_neon_write_i64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_u8 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint8_t a[32]; - uint8_t r[2][16]; - } test_vec[] = { - { { UINT8_C( 56), UINT8_C(119), UINT8_C(167), UINT8_C( 99), UINT8_C(221), UINT8_C(126), UINT8_C(139), UINT8_C( 94), - UINT8_C( 0), UINT8_C(203), UINT8_C(216), UINT8_C( 77), UINT8_C( 79), UINT8_C(152), UINT8_C( 2), UINT8_C(187), - UINT8_C(173), UINT8_C(251), UINT8_C( 63), UINT8_C( 57), UINT8_C(133), UINT8_C( 35), UINT8_C(243), UINT8_C(166), - UINT8_C( 51), UINT8_C(149), UINT8_C(128), UINT8_C( 99), UINT8_C(176), UINT8_C(195), UINT8_C( 30), UINT8_C(232) }, - { { UINT8_C( 56), UINT8_C(167), UINT8_C(221), UINT8_C(139), UINT8_C( 0), UINT8_C(216), UINT8_C( 79), UINT8_C( 2), - UINT8_C(173), UINT8_C( 63), UINT8_C(133), UINT8_C(243), UINT8_C( 51), UINT8_C(128), UINT8_C(176), UINT8_C( 30) }, - { UINT8_C(119), UINT8_C( 99), UINT8_C(126), UINT8_C( 94), UINT8_C(203), UINT8_C( 77), UINT8_C(152), UINT8_C(187), - UINT8_C(251), UINT8_C( 57), UINT8_C( 35), UINT8_C(166), UINT8_C(149), UINT8_C( 99), UINT8_C(195), UINT8_C(232) } }, - }, - { { UINT8_C( 58), UINT8_C(198), UINT8_C( 75), UINT8_C( 24), UINT8_C( 68), UINT8_C(214), UINT8_C(118), UINT8_C( 68), - UINT8_C(161), UINT8_C( 78), UINT8_C(145), UINT8_C(240), UINT8_C(231), UINT8_C(148), UINT8_C(172), UINT8_C(148), - UINT8_C(143), UINT8_C(235), UINT8_C(205), UINT8_C( 20), UINT8_C( 14), UINT8_C(192), UINT8_C(186), UINT8_C( 65), - UINT8_C( 85), UINT8_C( 58), UINT8_C(164), UINT8_C( 5), UINT8_C(254), UINT8_C(195), UINT8_C(237), UINT8_C( 56) }, - { { UINT8_C( 58), UINT8_C( 75), UINT8_C( 68), UINT8_C(118), UINT8_C(161), UINT8_C(145), UINT8_C(231), UINT8_C(172), - UINT8_C(143), UINT8_C(205), UINT8_C( 14), UINT8_C(186), UINT8_C( 85), UINT8_C(164), UINT8_C(254), UINT8_C(237) }, - { UINT8_C(198), UINT8_C( 24), UINT8_C(214), UINT8_C( 68), UINT8_C( 78), UINT8_C(240), UINT8_C(148), UINT8_C(148), - UINT8_C(235), UINT8_C( 20), UINT8_C(192), UINT8_C( 65), UINT8_C( 58), UINT8_C( 5), UINT8_C(195), UINT8_C( 56) } }, - }, - { { UINT8_C(137), UINT8_C( 56), UINT8_C( 80), UINT8_C(205), UINT8_C( 15), UINT8_C(199), UINT8_C( 18), UINT8_C(176), - UINT8_C( 21), UINT8_C(163), UINT8_C(161), UINT8_C(252), UINT8_C( 55), UINT8_C( 77), UINT8_C(144), UINT8_C(198), - UINT8_C( 56), UINT8_C( 93), UINT8_C(219), UINT8_C( 70), UINT8_C( 29), UINT8_C(149), UINT8_C(135), UINT8_C(115), - UINT8_C(208), UINT8_C( 43), UINT8_C(120), UINT8_C(206), UINT8_C(238), UINT8_C(102), UINT8_C( 6), UINT8_C(119) }, - { { UINT8_C(137), UINT8_C( 80), UINT8_C( 15), UINT8_C( 18), UINT8_C( 21), UINT8_C(161), UINT8_C( 55), UINT8_C(144), - UINT8_C( 56), UINT8_C(219), UINT8_C( 29), UINT8_C(135), UINT8_C(208), UINT8_C(120), UINT8_C(238), UINT8_C( 6) }, - { UINT8_C( 56), UINT8_C(205), UINT8_C(199), UINT8_C(176), UINT8_C(163), UINT8_C(252), UINT8_C( 77), UINT8_C(198), - UINT8_C( 93), UINT8_C( 70), UINT8_C(149), UINT8_C(115), UINT8_C( 43), UINT8_C(206), UINT8_C(102), UINT8_C(119) } }, - }, - { { UINT8_C(158), UINT8_C( 87), UINT8_C( 69), UINT8_C(173), UINT8_C( 30), UINT8_C( 87), UINT8_C( 94), UINT8_C( 51), - UINT8_C(250), UINT8_MAX, UINT8_C( 48), UINT8_C( 50), UINT8_C( 76), UINT8_C(192), UINT8_C(248), UINT8_C(132), - UINT8_C( 30), UINT8_C(211), UINT8_C(202), UINT8_C( 59), UINT8_C(105), UINT8_C( 81), UINT8_C(174), UINT8_C( 57), - UINT8_C(124), UINT8_C( 39), UINT8_C( 7), UINT8_C(107), UINT8_C(141), UINT8_C( 13), UINT8_C(226), UINT8_C( 43) }, - { { UINT8_C(158), UINT8_C( 69), UINT8_C( 30), UINT8_C( 94), UINT8_C(250), UINT8_C( 48), UINT8_C( 76), UINT8_C(248), - UINT8_C( 30), UINT8_C(202), UINT8_C(105), UINT8_C(174), UINT8_C(124), UINT8_C( 7), UINT8_C(141), UINT8_C(226) }, - { UINT8_C( 87), UINT8_C(173), UINT8_C( 87), UINT8_C( 51), UINT8_MAX, UINT8_C( 50), UINT8_C(192), UINT8_C(132), - UINT8_C(211), UINT8_C( 59), UINT8_C( 81), UINT8_C( 57), UINT8_C( 39), UINT8_C(107), UINT8_C( 13), UINT8_C( 43) } }, - }, - { { UINT8_C(100), UINT8_C( 39), UINT8_C(217), UINT8_C(130), UINT8_C(126), UINT8_C( 55), UINT8_C(182), UINT8_C(121), - UINT8_C( 54), UINT8_C(230), UINT8_C(171), UINT8_C(130), UINT8_C(166), UINT8_C(163), UINT8_C( 6), UINT8_C(196), - UINT8_C(119), UINT8_C(208), UINT8_C( 0), UINT8_C(224), UINT8_C( 33), UINT8_C(174), UINT8_C( 25), UINT8_C(157), - UINT8_C(213), UINT8_C( 32), UINT8_C( 8), UINT8_C( 98), UINT8_C( 45), UINT8_C(235), UINT8_C(142), UINT8_C(146) }, - { { UINT8_C(100), UINT8_C(217), UINT8_C(126), UINT8_C(182), UINT8_C( 54), UINT8_C(171), UINT8_C(166), UINT8_C( 6), - UINT8_C(119), UINT8_C( 0), UINT8_C( 33), UINT8_C( 25), UINT8_C(213), UINT8_C( 8), UINT8_C( 45), UINT8_C(142) }, - { UINT8_C( 39), UINT8_C(130), UINT8_C( 55), UINT8_C(121), UINT8_C(230), UINT8_C(130), UINT8_C(163), UINT8_C(196), - UINT8_C(208), UINT8_C(224), UINT8_C(174), UINT8_C(157), UINT8_C( 32), UINT8_C( 98), UINT8_C(235), UINT8_C(146) } }, - }, - { { UINT8_C( 18), UINT8_C(103), UINT8_C( 20), UINT8_C(145), UINT8_C(158), UINT8_C(202), UINT8_C( 10), UINT8_C(212), - UINT8_C(176), UINT8_C(181), UINT8_C( 86), UINT8_C( 87), UINT8_C( 88), UINT8_C( 92), UINT8_C( 27), UINT8_C(207), - UINT8_C( 44), UINT8_C( 27), UINT8_C(175), UINT8_C( 77), UINT8_C(202), UINT8_C(200), UINT8_C(234), UINT8_C(159), - UINT8_C(232), UINT8_C(243), UINT8_C( 2), UINT8_C( 22), UINT8_C(222), UINT8_C(144), UINT8_C(168), UINT8_C(240) }, - { { UINT8_C( 18), UINT8_C( 20), UINT8_C(158), UINT8_C( 10), UINT8_C(176), UINT8_C( 86), UINT8_C( 88), UINT8_C( 27), - UINT8_C( 44), UINT8_C(175), UINT8_C(202), UINT8_C(234), UINT8_C(232), UINT8_C( 2), UINT8_C(222), UINT8_C(168) }, - { UINT8_C(103), UINT8_C(145), UINT8_C(202), UINT8_C(212), UINT8_C(181), UINT8_C( 87), UINT8_C( 92), UINT8_C(207), - UINT8_C( 27), UINT8_C( 77), UINT8_C(200), UINT8_C(159), UINT8_C(243), UINT8_C( 22), UINT8_C(144), UINT8_C(240) } }, - }, - { { UINT8_C(247), UINT8_C(188), UINT8_C(129), UINT8_C(149), UINT8_C(135), UINT8_C(139), UINT8_C(105), UINT8_C( 55), - UINT8_C( 64), UINT8_C(191), UINT8_C(142), UINT8_C(153), UINT8_C( 27), UINT8_C(170), UINT8_C(104), UINT8_C( 71), - UINT8_C(197), UINT8_C( 24), UINT8_C(148), UINT8_C(143), UINT8_C(224), UINT8_C(126), UINT8_C( 47), UINT8_C(201), - UINT8_C(113), UINT8_C( 49), UINT8_C(223), UINT8_C( 79), UINT8_C(193), UINT8_C(135), UINT8_C( 64), UINT8_C(184) }, - { { UINT8_C(247), UINT8_C(129), UINT8_C(135), UINT8_C(105), UINT8_C( 64), UINT8_C(142), UINT8_C( 27), UINT8_C(104), - UINT8_C(197), UINT8_C(148), UINT8_C(224), UINT8_C( 47), UINT8_C(113), UINT8_C(223), UINT8_C(193), UINT8_C( 64) }, - { UINT8_C(188), UINT8_C(149), UINT8_C(139), UINT8_C( 55), UINT8_C(191), UINT8_C(153), UINT8_C(170), UINT8_C( 71), - UINT8_C( 24), UINT8_C(143), UINT8_C(126), UINT8_C(201), UINT8_C( 49), UINT8_C( 79), UINT8_C(135), UINT8_C(184) } }, - }, - { { UINT8_C( 67), UINT8_C(193), UINT8_C( 77), UINT8_C(202), UINT8_C( 77), UINT8_C(182), UINT8_C( 2), UINT8_C(141), - UINT8_C(117), UINT8_C(144), UINT8_C( 38), UINT8_C(144), UINT8_C( 58), UINT8_C(143), UINT8_C(215), UINT8_C( 0), - UINT8_C(167), UINT8_C(107), UINT8_C(143), UINT8_C(135), UINT8_C(233), UINT8_C(190), UINT8_C( 80), UINT8_C( 91), - UINT8_C(239), UINT8_C( 47), UINT8_C(170), UINT8_C(176), UINT8_C(182), UINT8_C(234), UINT8_C(104), UINT8_C(250) }, - { { UINT8_C( 67), UINT8_C( 77), UINT8_C( 77), UINT8_C( 2), UINT8_C(117), UINT8_C( 38), UINT8_C( 58), UINT8_C(215), - UINT8_C(167), UINT8_C(143), UINT8_C(233), UINT8_C( 80), UINT8_C(239), UINT8_C(170), UINT8_C(182), UINT8_C(104) }, - { UINT8_C(193), UINT8_C(202), UINT8_C(182), UINT8_C(141), UINT8_C(144), UINT8_C(144), UINT8_C(143), UINT8_C( 0), - UINT8_C(107), UINT8_C(135), UINT8_C(190), UINT8_C( 91), UINT8_C( 47), UINT8_C(176), UINT8_C(234), UINT8_C(250) } }, - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint8x16x2_t r = simde_vld2q_u8(test_vec[i].a); - - simde_uint8x16x2_t expected = { - {simde_vld1q_u8(test_vec[i].r[0]), simde_vld1q_u8(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u8x16(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u8x16(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); - simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); - simde_uint8x16x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint8_t buf[32]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint8x16x2_t r = simde_vld2q_u8(buf); - - simde_test_arm_neon_write_u8x16x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_u16 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint16_t a[16]; - uint16_t r[2][8]; - } test_vec[] = { - { { UINT16_C( 5263), UINT16_C(46704), UINT16_C(17324), UINT16_C( 435), UINT16_C(22826), UINT16_C(35226), UINT16_C( 4289), UINT16_C(14289), - UINT16_C(15842), UINT16_C(32624), UINT16_C( 9166), UINT16_C(50530), UINT16_C(15251), UINT16_C(37458), UINT16_C(64003), UINT16_C(37377) }, - { { UINT16_C( 5263), UINT16_C(17324), UINT16_C(22826), UINT16_C( 4289), UINT16_C(15842), UINT16_C( 9166), UINT16_C(15251), UINT16_C(64003) }, - { UINT16_C(46704), UINT16_C( 435), UINT16_C(35226), UINT16_C(14289), UINT16_C(32624), UINT16_C(50530), UINT16_C(37458), UINT16_C(37377) } }, - }, - { { UINT16_C(28943), UINT16_C(47944), UINT16_C(64436), UINT16_C(57020), UINT16_C(22100), UINT16_C( 5480), UINT16_C(14695), UINT16_C(18765), - UINT16_C(48502), UINT16_C(17865), UINT16_C(11232), UINT16_C(29450), UINT16_C(23654), UINT16_C(26886), UINT16_C( 1879), UINT16_C(26363) }, - { { UINT16_C(28943), UINT16_C(64436), UINT16_C(22100), UINT16_C(14695), UINT16_C(48502), UINT16_C(11232), UINT16_C(23654), UINT16_C( 1879) }, - { UINT16_C(47944), UINT16_C(57020), UINT16_C( 5480), UINT16_C(18765), UINT16_C(17865), UINT16_C(29450), UINT16_C(26886), UINT16_C(26363) } }, - }, - { { UINT16_C(17528), UINT16_C(11297), UINT16_C(56639), UINT16_C(37899), UINT16_C(29491), UINT16_C(39593), UINT16_C(63148), UINT16_C( 8932), - UINT16_C(44468), UINT16_C(37991), UINT16_C(29144), UINT16_C(16136), UINT16_C( 3790), UINT16_C( 9640), UINT16_C(42005), UINT16_C(36235) }, - { { UINT16_C(17528), UINT16_C(56639), UINT16_C(29491), UINT16_C(63148), UINT16_C(44468), UINT16_C(29144), UINT16_C( 3790), UINT16_C(42005) }, - { UINT16_C(11297), UINT16_C(37899), UINT16_C(39593), UINT16_C( 8932), UINT16_C(37991), UINT16_C(16136), UINT16_C( 9640), UINT16_C(36235) } }, - }, - { { UINT16_C(44264), UINT16_C(10169), UINT16_C(50313), UINT16_C(48315), UINT16_C(25911), UINT16_C(58199), UINT16_C(15195), UINT16_C( 3846), - UINT16_C(28136), UINT16_C(49316), UINT16_C(44255), UINT16_C(44543), UINT16_C(43194), UINT16_C(53202), UINT16_C(23884), UINT16_C(13404) }, - { { UINT16_C(44264), UINT16_C(50313), UINT16_C(25911), UINT16_C(15195), UINT16_C(28136), UINT16_C(44255), UINT16_C(43194), UINT16_C(23884) }, - { UINT16_C(10169), UINT16_C(48315), UINT16_C(58199), UINT16_C( 3846), UINT16_C(49316), UINT16_C(44543), UINT16_C(53202), UINT16_C(13404) } }, - }, - { { UINT16_C( 5385), UINT16_C(37467), UINT16_C( 6106), UINT16_C( 4430), UINT16_C(42364), UINT16_C(55285), UINT16_C(64480), UINT16_C(51431), - UINT16_C(35688), UINT16_C(18313), UINT16_C(34871), UINT16_C(61940), UINT16_C(50736), UINT16_C(31936), UINT16_C( 7203), UINT16_C(11440) }, - { { UINT16_C( 5385), UINT16_C( 6106), UINT16_C(42364), UINT16_C(64480), UINT16_C(35688), UINT16_C(34871), UINT16_C(50736), UINT16_C( 7203) }, - { UINT16_C(37467), UINT16_C( 4430), UINT16_C(55285), UINT16_C(51431), UINT16_C(18313), UINT16_C(61940), UINT16_C(31936), UINT16_C(11440) } }, - }, - { { UINT16_C( 3121), UINT16_C( 3006), UINT16_C( 3363), UINT16_C(40733), UINT16_C( 4786), UINT16_C(37750), UINT16_C(23821), UINT16_C(30043), - UINT16_C(58600), UINT16_C( 8125), UINT16_C(45421), UINT16_C(40208), UINT16_C(53368), UINT16_C(39706), UINT16_C(51948), UINT16_C( 7880) }, - { { UINT16_C( 3121), UINT16_C( 3363), UINT16_C( 4786), UINT16_C(23821), UINT16_C(58600), UINT16_C(45421), UINT16_C(53368), UINT16_C(51948) }, - { UINT16_C( 3006), UINT16_C(40733), UINT16_C(37750), UINT16_C(30043), UINT16_C( 8125), UINT16_C(40208), UINT16_C(39706), UINT16_C( 7880) } }, - }, - { { UINT16_C(34518), UINT16_C(63785), UINT16_C(18067), UINT16_C(18072), UINT16_C( 3928), UINT16_C(26073), UINT16_C(13420), UINT16_C(21979), - UINT16_C(38937), UINT16_C(34420), UINT16_C(34121), UINT16_C(49443), UINT16_C(15701), UINT16_C(16989), UINT16_C( 9480), UINT16_C(56928) }, - { { UINT16_C(34518), UINT16_C(18067), UINT16_C( 3928), UINT16_C(13420), UINT16_C(38937), UINT16_C(34121), UINT16_C(15701), UINT16_C( 9480) }, - { UINT16_C(63785), UINT16_C(18072), UINT16_C(26073), UINT16_C(21979), UINT16_C(34420), UINT16_C(49443), UINT16_C(16989), UINT16_C(56928) } }, - }, - { { UINT16_C(35243), UINT16_C(16344), UINT16_C(28880), UINT16_C(10373), UINT16_C(24191), UINT16_C(60558), UINT16_C(27026), UINT16_C(43841), - UINT16_C(46337), UINT16_C(18993), UINT16_C(21818), UINT16_C(36876), UINT16_C(27026), UINT16_C(39634), UINT16_C(12942), UINT16_C(14713) }, - { { UINT16_C(35243), UINT16_C(28880), UINT16_C(24191), UINT16_C(27026), UINT16_C(46337), UINT16_C(21818), UINT16_C(27026), UINT16_C(12942) }, - { UINT16_C(16344), UINT16_C(10373), UINT16_C(60558), UINT16_C(43841), UINT16_C(18993), UINT16_C(36876), UINT16_C(39634), UINT16_C(14713) } }, - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint16x8x2_t r = simde_vld2q_u16(test_vec[i].a); - - simde_uint16x8x2_t expected = { - {simde_vld1q_u16(test_vec[i].r[0]), simde_vld1q_u16(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u16x8(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u16x8(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); - simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); - simde_uint16x8x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint16_t buf[16]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint16x8x2_t r = simde_vld2q_u16(buf); - - simde_test_arm_neon_write_u16x8x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_u32 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint32_t a[8]; - uint32_t r[2][4]; - } test_vec[] = { - { { UINT32_C(2339918267), UINT32_C(1102380481), UINT32_C(3995943515), UINT32_C(2895736491), - UINT32_C(1593232163), UINT32_C(3001942560), UINT32_C(4182622315), UINT32_C(2905851634) }, - { { UINT32_C(2339918267), UINT32_C(3995943515), UINT32_C(1593232163), UINT32_C(4182622315) }, - { UINT32_C(1102380481), UINT32_C(2895736491), UINT32_C(3001942560), UINT32_C(2905851634) } }, - }, - { { UINT32_C(3627658007), UINT32_C( 68808105), UINT32_C(3673310767), UINT32_C(3632696500), - UINT32_C(2000059479), UINT32_C(3928564863), UINT32_C(3605296868), UINT32_C(1401100092) }, - { { UINT32_C(3627658007), UINT32_C(3673310767), UINT32_C(2000059479), UINT32_C(3605296868) }, - { UINT32_C( 68808105), UINT32_C(3632696500), UINT32_C(3928564863), UINT32_C(1401100092) } }, - }, - { { UINT32_C(1798094018), UINT32_C(3631236521), UINT32_C(1085432460), UINT32_C(1159215342), - UINT32_C( 884756149), UINT32_C(1444865650), UINT32_C(2569798236), UINT32_C(3706499097) }, - { { UINT32_C(1798094018), UINT32_C(1085432460), UINT32_C( 884756149), UINT32_C(2569798236) }, - { UINT32_C(3631236521), UINT32_C(1159215342), UINT32_C(1444865650), UINT32_C(3706499097) } }, - }, - { { UINT32_C( 373758060), UINT32_C(3941513054), UINT32_C( 137011482), UINT32_C(2387493849), - UINT32_C( 79825553), UINT32_C(1297801712), UINT32_C(4259743715), UINT32_C(2748961335) }, - { { UINT32_C( 373758060), UINT32_C( 137011482), UINT32_C( 79825553), UINT32_C(4259743715) }, - { UINT32_C(3941513054), UINT32_C(2387493849), UINT32_C(1297801712), UINT32_C(2748961335) } }, - }, - { { UINT32_C(1236869355), UINT32_C(4063471832), UINT32_C( 586833225), UINT32_C( 850479264), - UINT32_C(1127641939), UINT32_C( 948998228), UINT32_C(1312126487), UINT32_C( 888278601) }, - { { UINT32_C(1236869355), UINT32_C( 586833225), UINT32_C(1127641939), UINT32_C(1312126487) }, - { UINT32_C(4063471832), UINT32_C( 850479264), UINT32_C( 948998228), UINT32_C( 888278601) } }, - }, - { { UINT32_C( 108899118), UINT32_C(2633543763), UINT32_C(2931815181), UINT32_C(2397073467), - UINT32_C( 953292515), UINT32_C(3195036326), UINT32_C( 571254233), UINT32_C(3780574899) }, - { { UINT32_C( 108899118), UINT32_C(2931815181), UINT32_C( 953292515), UINT32_C( 571254233) }, - { UINT32_C(2633543763), UINT32_C(2397073467), UINT32_C(3195036326), UINT32_C(3780574899) } }, - }, - { { UINT32_C(4259894186), UINT32_C(2426069123), UINT32_C( 255744467), UINT32_C(2895978185), - UINT32_C(3689180980), UINT32_C(2878952658), UINT32_C(2899158521), UINT32_C(1317938084) }, - { { UINT32_C(4259894186), UINT32_C( 255744467), UINT32_C(3689180980), UINT32_C(2899158521) }, - { UINT32_C(2426069123), UINT32_C(2895978185), UINT32_C(2878952658), UINT32_C(1317938084) } }, - }, - { { UINT32_C(2034988790), UINT32_C( 705291606), UINT32_C( 121194558), UINT32_C(2612319846), - UINT32_C( 410425414), UINT32_C(3871543277), UINT32_C(1486065844), UINT32_C(2846237107) }, - { { UINT32_C(2034988790), UINT32_C( 121194558), UINT32_C( 410425414), UINT32_C(1486065844) }, - { UINT32_C( 705291606), UINT32_C(2612319846), UINT32_C(3871543277), UINT32_C(2846237107) } }, - }, - - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint32x4x2_t r = simde_vld2q_u32(test_vec[i].a); - - simde_uint32x4x2_t expected = { - {simde_vld1q_u32(test_vec[i].r[0]), simde_vld1q_u32(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u32x4(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u32x4(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); - simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); - simde_uint32x4x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint32_t buf[8]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint32x4x2_t r = simde_vld2q_u32(buf); - - simde_test_arm_neon_write_u32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_u64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - uint64_t a[4]; - uint64_t r[2][2]; - } test_vec[] = { - { { UINT64_C( 3020762151839533812), UINT64_C(17954495856467081562), - UINT64_C(14288482954718013222), UINT64_C( 3819020876812341264) }, - { { UINT64_C( 3020762151839533812), UINT64_C(14288482954718013222) }, - { UINT64_C(17954495856467081562), UINT64_C( 3819020876812341264) } }, - }, - { { UINT64_C(10899347977887241965), UINT64_C( 9840223772233446588), - UINT64_C(12950102532167286886), UINT64_C(13530663546384542545) }, - { { UINT64_C(10899347977887241965), UINT64_C(12950102532167286886) }, - { UINT64_C( 9840223772233446588), UINT64_C(13530663546384542545) } }, - }, - { { UINT64_C( 2070555630402543080), UINT64_C(17741159496252854347), - UINT64_C( 4492799045846756354), UINT64_C( 7500346603649101196) }, - { { UINT64_C( 2070555630402543080), UINT64_C( 4492799045846756354) }, - { UINT64_C(17741159496252854347), UINT64_C( 7500346603649101196) } }, - }, - { { UINT64_C( 5489969314248125107), UINT64_C( 2945686371667927898), - UINT64_C(17745633243074316570), UINT64_C( 6772400822477133076) }, - { { UINT64_C( 5489969314248125107), UINT64_C(17745633243074316570) }, - { UINT64_C( 2945686371667927898), UINT64_C( 6772400822477133076) } }, - }, - { { UINT64_C( 3359581776035023185), UINT64_C(16272061564597082244), - UINT64_C(15790516151494746051), UINT64_C( 3119705754931524419) }, - { { UINT64_C( 3359581776035023185), UINT64_C(15790516151494746051) }, - { UINT64_C(16272061564597082244), UINT64_C( 3119705754931524419) } }, - }, - { { UINT64_C(16282232691925826805), UINT64_C(14653969954052444085), - UINT64_C(13303100541911975676), UINT64_C( 7968260244002705039) }, - { { UINT64_C(16282232691925826805), UINT64_C(13303100541911975676) }, - { UINT64_C(14653969954052444085), UINT64_C( 7968260244002705039) } }, - }, - { { UINT64_C(16423833091886748078), UINT64_C(15064748377732249660), - UINT64_C(12911875263894391043), UINT64_C( 8586307070039217984) }, - { { UINT64_C(16423833091886748078), UINT64_C(12911875263894391043) }, - { UINT64_C(15064748377732249660), UINT64_C( 8586307070039217984) } }, - }, - { { UINT64_C(14543092476148932546), UINT64_C(17406166195061535465), - UINT64_C(10808808801129014056), UINT64_C(10354190940655066225) }, - { { UINT64_C(14543092476148932546), UINT64_C(10808808801129014056) }, - { UINT64_C(17406166195061535465), UINT64_C(10354190940655066225) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_uint64x2x2_t r = simde_vld2q_u64(test_vec[i].a); - - simde_uint64x2x2_t expected = { - {simde_vld1q_u64(test_vec[i].r[0]), simde_vld1q_u64(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_u64x2(r.val[0], expected.val[0]); - simde_test_arm_neon_assert_equal_u64x2(r.val[1], expected.val[1]); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); - simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); - simde_uint64x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - uint64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_uint64x2x2_t r = simde_vld2q_u64(buf); - - simde_test_arm_neon_write_u64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_f32 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - simde_float32_t a[8]; - simde_float32_t r[2][4]; - } test_vec[] = { - { { SIMDE_FLOAT32_C( 793.71), SIMDE_FLOAT32_C( 221.62), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( -525.52), - SIMDE_FLOAT32_C( 800.99), SIMDE_FLOAT32_C( -411.87), SIMDE_FLOAT32_C( 483.99), SIMDE_FLOAT32_C( 852.73) }, - { { SIMDE_FLOAT32_C( 793.71), SIMDE_FLOAT32_C( 781.57), SIMDE_FLOAT32_C( 800.99), SIMDE_FLOAT32_C( 483.99) }, - { SIMDE_FLOAT32_C( 221.62), SIMDE_FLOAT32_C( -525.52), SIMDE_FLOAT32_C( -411.87), SIMDE_FLOAT32_C( 852.73) } }, - }, - { { SIMDE_FLOAT32_C( -433.25), SIMDE_FLOAT32_C( -811.76), SIMDE_FLOAT32_C( -463.04), SIMDE_FLOAT32_C( 671.94), - SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( -434.20), SIMDE_FLOAT32_C( -263.89), SIMDE_FLOAT32_C( 754.63) }, - { { SIMDE_FLOAT32_C( -433.25), SIMDE_FLOAT32_C( -463.04), SIMDE_FLOAT32_C( -60.94), SIMDE_FLOAT32_C( -263.89) }, - { SIMDE_FLOAT32_C( -811.76), SIMDE_FLOAT32_C( 671.94), SIMDE_FLOAT32_C( -434.20), SIMDE_FLOAT32_C( 754.63) } }, - }, - { { SIMDE_FLOAT32_C( -877.85), SIMDE_FLOAT32_C( -225.98), SIMDE_FLOAT32_C( -292.03), SIMDE_FLOAT32_C( 932.32), - SIMDE_FLOAT32_C( -463.10), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( -115.23), SIMDE_FLOAT32_C( -867.52) }, - { { SIMDE_FLOAT32_C( -877.85), SIMDE_FLOAT32_C( -292.03), SIMDE_FLOAT32_C( -463.10), SIMDE_FLOAT32_C( -115.23) }, - { SIMDE_FLOAT32_C( -225.98), SIMDE_FLOAT32_C( 932.32), SIMDE_FLOAT32_C( 171.18), SIMDE_FLOAT32_C( -867.52) } }, - }, - { { SIMDE_FLOAT32_C( -182.14), SIMDE_FLOAT32_C( 874.57), SIMDE_FLOAT32_C( -306.44), SIMDE_FLOAT32_C( 180.87), - SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -449.53), SIMDE_FLOAT32_C( -593.39), SIMDE_FLOAT32_C( 966.11) }, - { { SIMDE_FLOAT32_C( -182.14), SIMDE_FLOAT32_C( -306.44), SIMDE_FLOAT32_C( -827.59), SIMDE_FLOAT32_C( -593.39) }, - { SIMDE_FLOAT32_C( 874.57), SIMDE_FLOAT32_C( 180.87), SIMDE_FLOAT32_C( -449.53), SIMDE_FLOAT32_C( 966.11) } }, - }, - { { SIMDE_FLOAT32_C( 772.09), SIMDE_FLOAT32_C( -811.82), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( 573.08), - SIMDE_FLOAT32_C( -223.69), SIMDE_FLOAT32_C( 924.59), SIMDE_FLOAT32_C( 425.81), SIMDE_FLOAT32_C( 343.06) }, - { { SIMDE_FLOAT32_C( 772.09), SIMDE_FLOAT32_C( -559.40), SIMDE_FLOAT32_C( -223.69), SIMDE_FLOAT32_C( 425.81) }, - { SIMDE_FLOAT32_C( -811.82), SIMDE_FLOAT32_C( 573.08), SIMDE_FLOAT32_C( 924.59), SIMDE_FLOAT32_C( 343.06) } }, - }, - { { SIMDE_FLOAT32_C( -887.17), SIMDE_FLOAT32_C( 962.76), SIMDE_FLOAT32_C( 15.01), SIMDE_FLOAT32_C( 51.89), - SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( 751.11), SIMDE_FLOAT32_C( -193.48), SIMDE_FLOAT32_C( -349.29) }, - { { SIMDE_FLOAT32_C( -887.17), SIMDE_FLOAT32_C( 15.01), SIMDE_FLOAT32_C( -471.44), SIMDE_FLOAT32_C( -193.48) }, - { SIMDE_FLOAT32_C( 962.76), SIMDE_FLOAT32_C( 51.89), SIMDE_FLOAT32_C( 751.11), SIMDE_FLOAT32_C( -349.29) } }, - }, - { { SIMDE_FLOAT32_C( -474.87), SIMDE_FLOAT32_C( 514.49), SIMDE_FLOAT32_C( -416.97), SIMDE_FLOAT32_C( 62.03), - SIMDE_FLOAT32_C( -314.33), SIMDE_FLOAT32_C( 467.80), SIMDE_FLOAT32_C( 194.51), SIMDE_FLOAT32_C( 503.53) }, - { { SIMDE_FLOAT32_C( -474.87), SIMDE_FLOAT32_C( -416.97), SIMDE_FLOAT32_C( -314.33), SIMDE_FLOAT32_C( 194.51) }, - { SIMDE_FLOAT32_C( 514.49), SIMDE_FLOAT32_C( 62.03), SIMDE_FLOAT32_C( 467.80), SIMDE_FLOAT32_C( 503.53) } }, - }, - { { SIMDE_FLOAT32_C( 342.37), SIMDE_FLOAT32_C( 888.07), SIMDE_FLOAT32_C( -315.60), SIMDE_FLOAT32_C( 514.77), - SIMDE_FLOAT32_C( -561.46), SIMDE_FLOAT32_C( 91.01), SIMDE_FLOAT32_C( 480.89), SIMDE_FLOAT32_C( -789.37) }, - { { SIMDE_FLOAT32_C( 342.37), SIMDE_FLOAT32_C( -315.60), SIMDE_FLOAT32_C( -561.46), SIMDE_FLOAT32_C( 480.89) }, - { SIMDE_FLOAT32_C( 888.07), SIMDE_FLOAT32_C( 514.77), SIMDE_FLOAT32_C( 91.01), SIMDE_FLOAT32_C( -789.37) } }, - }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float32x4x2_t r = simde_vld2q_f32(test_vec[i].a); - - simde_float32x4x2_t expected = { - {simde_vld1q_f32(test_vec[i].r[0]), simde_vld1q_f32(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_f32x4(r.val[0], expected.val[0], INT_MAX); - simde_test_arm_neon_assert_equal_f32x4(r.val[1], expected.val[1], INT_MAX); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); - simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); - simde_float32x4x2_t c = {{a, b}}; - - simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - simde_float32_t buf[8]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_float32x4x2_t r = simde_vld2q_f32(buf); - - simde_test_arm_neon_write_f32x4x2(2, r, SIMDE_TEST_VEC_POS_LAST); - } - return 1; -#endif -} - -static int -test_simde_vld2q_f64 (SIMDE_MUNIT_TEST_ARGS) { -#if 1 - static const struct { - simde_float64_t a[4]; - simde_float64_t r[2][2]; - } test_vec[] = { - { { SIMDE_FLOAT64_C( -81.12), SIMDE_FLOAT64_C( -90.46), - SIMDE_FLOAT64_C( -83.90), SIMDE_FLOAT64_C( 20.75) }, - { { SIMDE_FLOAT64_C( -81.12), SIMDE_FLOAT64_C( -83.90) }, - { SIMDE_FLOAT64_C( -90.46), SIMDE_FLOAT64_C( 20.75) } }, - }, - { { SIMDE_FLOAT64_C( -91.92), SIMDE_FLOAT64_C( 5.15), - SIMDE_FLOAT64_C( -58.53), SIMDE_FLOAT64_C( -40.61) }, - { { SIMDE_FLOAT64_C( -91.92), SIMDE_FLOAT64_C( -58.53) }, - { SIMDE_FLOAT64_C( 5.15), SIMDE_FLOAT64_C( -40.61) } }, - }, - { { SIMDE_FLOAT64_C( 92.56), SIMDE_FLOAT64_C( 91.44), - SIMDE_FLOAT64_C( 67.84), SIMDE_FLOAT64_C( -58.14) }, - { { SIMDE_FLOAT64_C( 92.56), SIMDE_FLOAT64_C( 67.84) }, - { SIMDE_FLOAT64_C( 91.44), SIMDE_FLOAT64_C( -58.14) } }, - }, - { { SIMDE_FLOAT64_C( 63.47), SIMDE_FLOAT64_C( 42.43), - SIMDE_FLOAT64_C( 29.58), SIMDE_FLOAT64_C( 4.03) }, - { { SIMDE_FLOAT64_C( 63.47), SIMDE_FLOAT64_C( 29.58) }, - { SIMDE_FLOAT64_C( 42.43), SIMDE_FLOAT64_C( 4.03) } }, - }, - { { SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -69.19), - SIMDE_FLOAT64_C( -73.56), SIMDE_FLOAT64_C( -97.91) }, - { { SIMDE_FLOAT64_C( 0.29), SIMDE_FLOAT64_C( -73.56) }, - { SIMDE_FLOAT64_C( -69.19), SIMDE_FLOAT64_C( -97.91) } }, - }, - { { SIMDE_FLOAT64_C( -62.67), SIMDE_FLOAT64_C( -66.03), - SIMDE_FLOAT64_C( -17.85), SIMDE_FLOAT64_C( -62.88) }, - { { SIMDE_FLOAT64_C( -62.67), SIMDE_FLOAT64_C( -17.85) }, - { SIMDE_FLOAT64_C( -66.03), SIMDE_FLOAT64_C( -62.88) } }, - }, - { { SIMDE_FLOAT64_C( -24.49), SIMDE_FLOAT64_C( -93.09), - SIMDE_FLOAT64_C( 12.23), SIMDE_FLOAT64_C( 80.71) }, - { { SIMDE_FLOAT64_C( -24.49), SIMDE_FLOAT64_C( 12.23) }, - { SIMDE_FLOAT64_C( -93.09), SIMDE_FLOAT64_C( 80.71) } }, - }, - { { SIMDE_FLOAT64_C( -90.54), SIMDE_FLOAT64_C( 20.34), - SIMDE_FLOAT64_C( -11.66), SIMDE_FLOAT64_C( -71.66) }, - { { SIMDE_FLOAT64_C( -90.54), SIMDE_FLOAT64_C( -11.66) }, - { SIMDE_FLOAT64_C( 20.34), SIMDE_FLOAT64_C( -71.66) } }, - } - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float64x2x2_t r = simde_vld2q_f64(test_vec[i].a); - - simde_float64x2x2_t expected = { - {simde_vld1q_f64(test_vec[i].r[0]), simde_vld1q_f64(test_vec[i].r[1])}}; - - simde_test_arm_neon_assert_equal_f64x2(r.val[0], expected.val[0], 1); - simde_test_arm_neon_assert_equal_f64x2(r.val[1], expected.val[1], 1); - } - - return 0; -#else - for (int i = 0 ; i < 8 ; i++) { - simde_float64x2_t a = simde_test_arm_neon_random_f64x2(-100.0, 100.0); - simde_float64x2_t b = simde_test_arm_neon_random_f64x2(-100.0, 100.0); - simde_float64x2x2_t c = {{a, b}}; - - simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); - simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); - - simde_float64_t buf[4]; - simde_memcpy(buf, c.val, sizeof(buf)); - simde_float64x2x2_t r = simde_vld2q_f64(buf); + simde_float16x24_t a = simde_test_arm_neon_random_f16x24(-100.0f, 100.0f); + simde_float16x3_t r[3] = simde_vld3q_f16(a); - simde_test_arm_neon_write_f64x2x2(2, r, SIMDE_TEST_VEC_POS_LAST); + simde_test_arm_neon_write_f16x24(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x3(2, r[3], SIMDE_TEST_VEC_POS_LAST); } return 1; #endif } -*/ #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vld3_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vld3_f16) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s16) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_s64) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u8) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u16) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_u64) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2_f64) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u8) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s8) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s16) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_s64) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u16) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_u64) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_f32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vld2q_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vld3q_f16) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/ld4.c b/test/arm/neon/ld4.c index b96995627..2a302636c 100644 --- a/test/arm/neon/ld4.c +++ b/test/arm/neon/ld4.c @@ -8,6 +8,7 @@ static int test_simde_vld4_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16_t a[16]; simde_float16_t r[4][4]; @@ -76,12 +77,190 @@ test_simde_vld4_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x16_t a = simde_test_arm_neon_random_f16x16(-100.0f, 100.0f); + simde_float16x4_t r[4] = simde_vld4_f16(a); + + simde_test_arm_neon_write_f16x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r[4], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vld4q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + simde_float16_t a[32]; + simde_float16_t r[4][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 5.490), SIMDE_FLOAT16_VALUE( 56.363), SIMDE_FLOAT16_VALUE( 50.265), SIMDE_FLOAT16_VALUE( - 62.541), + SIMDE_FLOAT16_VALUE( 32.240), SIMDE_FLOAT16_VALUE( 80.441), SIMDE_FLOAT16_VALUE( 9.478), SIMDE_FLOAT16_VALUE( - 73.979), + SIMDE_FLOAT16_VALUE( 72.907), SIMDE_FLOAT16_VALUE( - 37.547), SIMDE_FLOAT16_VALUE( - 98.897), SIMDE_FLOAT16_VALUE( - 96.841), + SIMDE_FLOAT16_VALUE( - 64.286), SIMDE_FLOAT16_VALUE( - 46.003), SIMDE_FLOAT16_VALUE( 60.223), SIMDE_FLOAT16_VALUE( 83.652), + SIMDE_FLOAT16_VALUE( - 51.410), SIMDE_FLOAT16_VALUE( 60.687), SIMDE_FLOAT16_VALUE( 76.661), SIMDE_FLOAT16_VALUE( 87.481), + SIMDE_FLOAT16_VALUE( - 58.057), SIMDE_FLOAT16_VALUE( 80.682), SIMDE_FLOAT16_VALUE( - 45.694), SIMDE_FLOAT16_VALUE( - 18.375), + SIMDE_FLOAT16_VALUE( 4.987), SIMDE_FLOAT16_VALUE( 72.799), SIMDE_FLOAT16_VALUE( - 32.887), SIMDE_FLOAT16_VALUE( - 91.185), + SIMDE_FLOAT16_VALUE( - 5.996), SIMDE_FLOAT16_VALUE( - 80.690), SIMDE_FLOAT16_VALUE( - 87.372), SIMDE_FLOAT16_VALUE( 74.876) }, + { { SIMDE_FLOAT16_VALUE( 5.490), SIMDE_FLOAT16_VALUE( 32.240), SIMDE_FLOAT16_VALUE( 72.907), SIMDE_FLOAT16_VALUE( - 64.286), + SIMDE_FLOAT16_VALUE( - 51.410), SIMDE_FLOAT16_VALUE( - 58.057), SIMDE_FLOAT16_VALUE( 4.987), SIMDE_FLOAT16_VALUE( - 5.996) }, + { SIMDE_FLOAT16_VALUE( 56.363), SIMDE_FLOAT16_VALUE( 80.441), SIMDE_FLOAT16_VALUE( - 37.547), SIMDE_FLOAT16_VALUE( - 46.003), + SIMDE_FLOAT16_VALUE( 60.687), SIMDE_FLOAT16_VALUE( 80.682), SIMDE_FLOAT16_VALUE( 72.799), SIMDE_FLOAT16_VALUE( - 80.690) }, + { SIMDE_FLOAT16_VALUE( 50.265), SIMDE_FLOAT16_VALUE( 9.478), SIMDE_FLOAT16_VALUE( - 98.897), SIMDE_FLOAT16_VALUE( 60.223), + SIMDE_FLOAT16_VALUE( 76.661), SIMDE_FLOAT16_VALUE( - 45.694), SIMDE_FLOAT16_VALUE( - 32.887), SIMDE_FLOAT16_VALUE( - 87.372) }, + { SIMDE_FLOAT16_VALUE( - 62.541), SIMDE_FLOAT16_VALUE( - 73.979), SIMDE_FLOAT16_VALUE( - 96.841), SIMDE_FLOAT16_VALUE( 83.652), + SIMDE_FLOAT16_VALUE( 87.481), SIMDE_FLOAT16_VALUE( - 18.375), SIMDE_FLOAT16_VALUE( - 91.185), SIMDE_FLOAT16_VALUE( 74.876) } } }, + { { SIMDE_FLOAT16_VALUE( 98.260), SIMDE_FLOAT16_VALUE( - 53.720), SIMDE_FLOAT16_VALUE( 64.055), SIMDE_FLOAT16_VALUE( - 42.434), + SIMDE_FLOAT16_VALUE( - 30.691), SIMDE_FLOAT16_VALUE( - 38.678), SIMDE_FLOAT16_VALUE( 54.052), SIMDE_FLOAT16_VALUE( - 8.191), + SIMDE_FLOAT16_VALUE( 46.474), SIMDE_FLOAT16_VALUE( - 27.690), SIMDE_FLOAT16_VALUE( - 52.522), SIMDE_FLOAT16_VALUE( 18.409), + SIMDE_FLOAT16_VALUE( 43.536), SIMDE_FLOAT16_VALUE( 22.115), SIMDE_FLOAT16_VALUE( - 28.459), SIMDE_FLOAT16_VALUE( 5.606), + SIMDE_FLOAT16_VALUE( 61.671), SIMDE_FLOAT16_VALUE( - 91.137), SIMDE_FLOAT16_VALUE( - 60.090), SIMDE_FLOAT16_VALUE( 52.864), + SIMDE_FLOAT16_VALUE( - 91.079), SIMDE_FLOAT16_VALUE( 12.797), SIMDE_FLOAT16_VALUE( - 0.876), SIMDE_FLOAT16_VALUE( 60.271), + SIMDE_FLOAT16_VALUE( - 31.039), SIMDE_FLOAT16_VALUE( - 8.129), SIMDE_FLOAT16_VALUE( 80.874), SIMDE_FLOAT16_VALUE( - 18.920), + SIMDE_FLOAT16_VALUE( 66.029), SIMDE_FLOAT16_VALUE( 53.380), SIMDE_FLOAT16_VALUE( 52.293), SIMDE_FLOAT16_VALUE( 88.604) }, + { { SIMDE_FLOAT16_VALUE( 98.260), SIMDE_FLOAT16_VALUE( - 30.691), SIMDE_FLOAT16_VALUE( 46.474), SIMDE_FLOAT16_VALUE( 43.536), + SIMDE_FLOAT16_VALUE( 61.671), SIMDE_FLOAT16_VALUE( - 91.079), SIMDE_FLOAT16_VALUE( - 31.039), SIMDE_FLOAT16_VALUE( 66.029) }, + { SIMDE_FLOAT16_VALUE( - 53.720), SIMDE_FLOAT16_VALUE( - 38.678), SIMDE_FLOAT16_VALUE( - 27.690), SIMDE_FLOAT16_VALUE( 22.115), + SIMDE_FLOAT16_VALUE( - 91.137), SIMDE_FLOAT16_VALUE( 12.797), SIMDE_FLOAT16_VALUE( - 8.129), SIMDE_FLOAT16_VALUE( 53.380) }, + { SIMDE_FLOAT16_VALUE( 64.055), SIMDE_FLOAT16_VALUE( 54.052), SIMDE_FLOAT16_VALUE( - 52.522), SIMDE_FLOAT16_VALUE( - 28.459), + SIMDE_FLOAT16_VALUE( - 60.090), SIMDE_FLOAT16_VALUE( - 0.876), SIMDE_FLOAT16_VALUE( 80.874), SIMDE_FLOAT16_VALUE( 52.293) }, + { SIMDE_FLOAT16_VALUE( - 42.434), SIMDE_FLOAT16_VALUE( - 8.191), SIMDE_FLOAT16_VALUE( 18.409), SIMDE_FLOAT16_VALUE( 5.606), + SIMDE_FLOAT16_VALUE( 52.864), SIMDE_FLOAT16_VALUE( 60.271), SIMDE_FLOAT16_VALUE( - 18.920), SIMDE_FLOAT16_VALUE( 88.604) } } }, + { { SIMDE_FLOAT16_VALUE( - 77.566), SIMDE_FLOAT16_VALUE( 49.732), SIMDE_FLOAT16_VALUE( 13.953), SIMDE_FLOAT16_VALUE( 73.486), + SIMDE_FLOAT16_VALUE( - 49.632), SIMDE_FLOAT16_VALUE( 90.775), SIMDE_FLOAT16_VALUE( 99.231), SIMDE_FLOAT16_VALUE( - 60.897), + SIMDE_FLOAT16_VALUE( - 70.236), SIMDE_FLOAT16_VALUE( 41.518), SIMDE_FLOAT16_VALUE( 63.677), SIMDE_FLOAT16_VALUE( - 42.192), + SIMDE_FLOAT16_VALUE( - 77.678), SIMDE_FLOAT16_VALUE( - 20.756), SIMDE_FLOAT16_VALUE( - 63.219), SIMDE_FLOAT16_VALUE( - 32.109), + SIMDE_FLOAT16_VALUE( - 46.923), SIMDE_FLOAT16_VALUE( - 99.472), SIMDE_FLOAT16_VALUE( 64.420), SIMDE_FLOAT16_VALUE( 94.805), + SIMDE_FLOAT16_VALUE( 7.983), SIMDE_FLOAT16_VALUE( 71.674), SIMDE_FLOAT16_VALUE( 25.662), SIMDE_FLOAT16_VALUE( - 88.933), + SIMDE_FLOAT16_VALUE( 63.394), SIMDE_FLOAT16_VALUE( 11.632), SIMDE_FLOAT16_VALUE( - 50.221), SIMDE_FLOAT16_VALUE( 10.966), + SIMDE_FLOAT16_VALUE( - 89.809), SIMDE_FLOAT16_VALUE( 42.301), SIMDE_FLOAT16_VALUE( 27.079), SIMDE_FLOAT16_VALUE( 10.245) }, + { { SIMDE_FLOAT16_VALUE( - 77.566), SIMDE_FLOAT16_VALUE( - 49.632), SIMDE_FLOAT16_VALUE( - 70.236), SIMDE_FLOAT16_VALUE( - 77.678), + SIMDE_FLOAT16_VALUE( - 46.923), SIMDE_FLOAT16_VALUE( 7.983), SIMDE_FLOAT16_VALUE( 63.394), SIMDE_FLOAT16_VALUE( - 89.809) }, + { SIMDE_FLOAT16_VALUE( 49.732), SIMDE_FLOAT16_VALUE( 90.775), SIMDE_FLOAT16_VALUE( 41.518), SIMDE_FLOAT16_VALUE( - 20.756), + SIMDE_FLOAT16_VALUE( - 99.472), SIMDE_FLOAT16_VALUE( 71.674), SIMDE_FLOAT16_VALUE( 11.632), SIMDE_FLOAT16_VALUE( 42.301) }, + { SIMDE_FLOAT16_VALUE( 13.953), SIMDE_FLOAT16_VALUE( 99.231), SIMDE_FLOAT16_VALUE( 63.677), SIMDE_FLOAT16_VALUE( - 63.219), + SIMDE_FLOAT16_VALUE( 64.420), SIMDE_FLOAT16_VALUE( 25.662), SIMDE_FLOAT16_VALUE( - 50.221), SIMDE_FLOAT16_VALUE( 27.079) }, + { SIMDE_FLOAT16_VALUE( 73.486), SIMDE_FLOAT16_VALUE( - 60.897), SIMDE_FLOAT16_VALUE( - 42.192), SIMDE_FLOAT16_VALUE( - 32.109), + SIMDE_FLOAT16_VALUE( 94.805), SIMDE_FLOAT16_VALUE( - 88.933), SIMDE_FLOAT16_VALUE( 10.966), SIMDE_FLOAT16_VALUE( 10.245) } } }, + { { SIMDE_FLOAT16_VALUE( 68.132), SIMDE_FLOAT16_VALUE( 98.448), SIMDE_FLOAT16_VALUE( 89.569), SIMDE_FLOAT16_VALUE( 52.859), + SIMDE_FLOAT16_VALUE( - 30.550), SIMDE_FLOAT16_VALUE( 60.162), SIMDE_FLOAT16_VALUE( 64.127), SIMDE_FLOAT16_VALUE( - 36.081), + SIMDE_FLOAT16_VALUE( 22.014), SIMDE_FLOAT16_VALUE( - 4.060), SIMDE_FLOAT16_VALUE( - 1.772), SIMDE_FLOAT16_VALUE( - 61.338), + SIMDE_FLOAT16_VALUE( 73.559), SIMDE_FLOAT16_VALUE( - 56.442), SIMDE_FLOAT16_VALUE( 92.937), SIMDE_FLOAT16_VALUE( 46.595), + SIMDE_FLOAT16_VALUE( 96.771), SIMDE_FLOAT16_VALUE( 67.430), SIMDE_FLOAT16_VALUE( 36.578), SIMDE_FLOAT16_VALUE( 86.457), + SIMDE_FLOAT16_VALUE( 18.411), SIMDE_FLOAT16_VALUE( - 54.887), SIMDE_FLOAT16_VALUE( - 40.622), SIMDE_FLOAT16_VALUE( - 29.592), + SIMDE_FLOAT16_VALUE( 15.246), SIMDE_FLOAT16_VALUE( 9.582), SIMDE_FLOAT16_VALUE( - 96.924), SIMDE_FLOAT16_VALUE( 86.899), + SIMDE_FLOAT16_VALUE( - 52.384), SIMDE_FLOAT16_VALUE( - 92.867), SIMDE_FLOAT16_VALUE( - 44.117), SIMDE_FLOAT16_VALUE( - 90.337) }, + { { SIMDE_FLOAT16_VALUE( 68.132), SIMDE_FLOAT16_VALUE( - 30.550), SIMDE_FLOAT16_VALUE( 22.014), SIMDE_FLOAT16_VALUE( 73.559), + SIMDE_FLOAT16_VALUE( 96.771), SIMDE_FLOAT16_VALUE( 18.411), SIMDE_FLOAT16_VALUE( 15.246), SIMDE_FLOAT16_VALUE( - 52.384) }, + { SIMDE_FLOAT16_VALUE( 98.448), SIMDE_FLOAT16_VALUE( 60.162), SIMDE_FLOAT16_VALUE( - 4.060), SIMDE_FLOAT16_VALUE( - 56.442), + SIMDE_FLOAT16_VALUE( 67.430), SIMDE_FLOAT16_VALUE( - 54.887), SIMDE_FLOAT16_VALUE( 9.582), SIMDE_FLOAT16_VALUE( - 92.867) }, + { SIMDE_FLOAT16_VALUE( 89.569), SIMDE_FLOAT16_VALUE( 64.127), SIMDE_FLOAT16_VALUE( - 1.772), SIMDE_FLOAT16_VALUE( 92.937), + SIMDE_FLOAT16_VALUE( 36.578), SIMDE_FLOAT16_VALUE( - 40.622), SIMDE_FLOAT16_VALUE( - 96.924), SIMDE_FLOAT16_VALUE( - 44.117) }, + { SIMDE_FLOAT16_VALUE( 52.859), SIMDE_FLOAT16_VALUE( - 36.081), SIMDE_FLOAT16_VALUE( - 61.338), SIMDE_FLOAT16_VALUE( 46.595), + SIMDE_FLOAT16_VALUE( 86.457), SIMDE_FLOAT16_VALUE( - 29.592), SIMDE_FLOAT16_VALUE( 86.899), SIMDE_FLOAT16_VALUE( - 90.337) } } }, + { { SIMDE_FLOAT16_VALUE( 86.525), SIMDE_FLOAT16_VALUE( - 7.364), SIMDE_FLOAT16_VALUE( 76.938), SIMDE_FLOAT16_VALUE( - 33.673), + SIMDE_FLOAT16_VALUE( 17.284), SIMDE_FLOAT16_VALUE( 86.559), SIMDE_FLOAT16_VALUE( - 98.760), SIMDE_FLOAT16_VALUE( 64.214), + SIMDE_FLOAT16_VALUE( - 82.404), SIMDE_FLOAT16_VALUE( - 55.915), SIMDE_FLOAT16_VALUE( - 99.164), SIMDE_FLOAT16_VALUE( 57.475), + SIMDE_FLOAT16_VALUE( 37.359), SIMDE_FLOAT16_VALUE( 4.426), SIMDE_FLOAT16_VALUE( 96.766), SIMDE_FLOAT16_VALUE( 1.291), + SIMDE_FLOAT16_VALUE( 67.966), SIMDE_FLOAT16_VALUE( 48.150), SIMDE_FLOAT16_VALUE( - 90.178), SIMDE_FLOAT16_VALUE( 40.405), + SIMDE_FLOAT16_VALUE( 38.096), SIMDE_FLOAT16_VALUE( 52.337), SIMDE_FLOAT16_VALUE( - 71.332), SIMDE_FLOAT16_VALUE( 60.795), + SIMDE_FLOAT16_VALUE( - 66.608), SIMDE_FLOAT16_VALUE( 83.954), SIMDE_FLOAT16_VALUE( - 75.610), SIMDE_FLOAT16_VALUE( 72.697), + SIMDE_FLOAT16_VALUE( 88.933), SIMDE_FLOAT16_VALUE( 58.545), SIMDE_FLOAT16_VALUE( 55.080), SIMDE_FLOAT16_VALUE( - 1.566) }, + { { SIMDE_FLOAT16_VALUE( 86.525), SIMDE_FLOAT16_VALUE( 17.284), SIMDE_FLOAT16_VALUE( - 82.404), SIMDE_FLOAT16_VALUE( 37.359), + SIMDE_FLOAT16_VALUE( 67.966), SIMDE_FLOAT16_VALUE( 38.096), SIMDE_FLOAT16_VALUE( - 66.608), SIMDE_FLOAT16_VALUE( 88.933) }, + { SIMDE_FLOAT16_VALUE( - 7.364), SIMDE_FLOAT16_VALUE( 86.559), SIMDE_FLOAT16_VALUE( - 55.915), SIMDE_FLOAT16_VALUE( 4.426), + SIMDE_FLOAT16_VALUE( 48.150), SIMDE_FLOAT16_VALUE( 52.337), SIMDE_FLOAT16_VALUE( 83.954), SIMDE_FLOAT16_VALUE( 58.545) }, + { SIMDE_FLOAT16_VALUE( 76.938), SIMDE_FLOAT16_VALUE( - 98.760), SIMDE_FLOAT16_VALUE( - 99.164), SIMDE_FLOAT16_VALUE( 96.766), + SIMDE_FLOAT16_VALUE( - 90.178), SIMDE_FLOAT16_VALUE( - 71.332), SIMDE_FLOAT16_VALUE( - 75.610), SIMDE_FLOAT16_VALUE( 55.080) }, + { SIMDE_FLOAT16_VALUE( - 33.673), SIMDE_FLOAT16_VALUE( 64.214), SIMDE_FLOAT16_VALUE( 57.475), SIMDE_FLOAT16_VALUE( 1.291), + SIMDE_FLOAT16_VALUE( 40.405), SIMDE_FLOAT16_VALUE( 60.795), SIMDE_FLOAT16_VALUE( 72.697), SIMDE_FLOAT16_VALUE( - 1.566) } } }, + { { SIMDE_FLOAT16_VALUE( 61.363), SIMDE_FLOAT16_VALUE( - 94.735), SIMDE_FLOAT16_VALUE( 71.030), SIMDE_FLOAT16_VALUE( - 54.274), + SIMDE_FLOAT16_VALUE( 88.136), SIMDE_FLOAT16_VALUE( - 5.456), SIMDE_FLOAT16_VALUE( 47.220), SIMDE_FLOAT16_VALUE( - 29.565), + SIMDE_FLOAT16_VALUE( 22.067), SIMDE_FLOAT16_VALUE( - 60.529), SIMDE_FLOAT16_VALUE( - 86.561), SIMDE_FLOAT16_VALUE( 59.456), + SIMDE_FLOAT16_VALUE( 18.893), SIMDE_FLOAT16_VALUE( - 3.232), SIMDE_FLOAT16_VALUE( - 5.500), SIMDE_FLOAT16_VALUE( 38.212), + SIMDE_FLOAT16_VALUE( 64.008), SIMDE_FLOAT16_VALUE( 23.363), SIMDE_FLOAT16_VALUE( - 52.655), SIMDE_FLOAT16_VALUE( - 83.862), + SIMDE_FLOAT16_VALUE( 82.969), SIMDE_FLOAT16_VALUE( - 51.413), SIMDE_FLOAT16_VALUE( - 6.933), SIMDE_FLOAT16_VALUE( 82.898), + SIMDE_FLOAT16_VALUE( 66.306), SIMDE_FLOAT16_VALUE( - 60.470), SIMDE_FLOAT16_VALUE( 30.553), SIMDE_FLOAT16_VALUE( 84.495), + SIMDE_FLOAT16_VALUE( 23.918), SIMDE_FLOAT16_VALUE( 73.675), SIMDE_FLOAT16_VALUE( 29.921), SIMDE_FLOAT16_VALUE( - 13.427) }, + { { SIMDE_FLOAT16_VALUE( 61.363), SIMDE_FLOAT16_VALUE( 88.136), SIMDE_FLOAT16_VALUE( 22.067), SIMDE_FLOAT16_VALUE( 18.893), + SIMDE_FLOAT16_VALUE( 64.008), SIMDE_FLOAT16_VALUE( 82.969), SIMDE_FLOAT16_VALUE( 66.306), SIMDE_FLOAT16_VALUE( 23.918) }, + { SIMDE_FLOAT16_VALUE( - 94.735), SIMDE_FLOAT16_VALUE( - 5.456), SIMDE_FLOAT16_VALUE( - 60.529), SIMDE_FLOAT16_VALUE( - 3.232), + SIMDE_FLOAT16_VALUE( 23.363), SIMDE_FLOAT16_VALUE( - 51.413), SIMDE_FLOAT16_VALUE( - 60.470), SIMDE_FLOAT16_VALUE( 73.675) }, + { SIMDE_FLOAT16_VALUE( 71.030), SIMDE_FLOAT16_VALUE( 47.220), SIMDE_FLOAT16_VALUE( - 86.561), SIMDE_FLOAT16_VALUE( - 5.500), + SIMDE_FLOAT16_VALUE( - 52.655), SIMDE_FLOAT16_VALUE( - 6.933), SIMDE_FLOAT16_VALUE( 30.553), SIMDE_FLOAT16_VALUE( 29.921) }, + { SIMDE_FLOAT16_VALUE( - 54.274), SIMDE_FLOAT16_VALUE( - 29.565), SIMDE_FLOAT16_VALUE( 59.456), SIMDE_FLOAT16_VALUE( 38.212), + SIMDE_FLOAT16_VALUE( - 83.862), SIMDE_FLOAT16_VALUE( 82.898), SIMDE_FLOAT16_VALUE( 84.495), SIMDE_FLOAT16_VALUE( - 13.427) } } }, + { { SIMDE_FLOAT16_VALUE( 35.383), SIMDE_FLOAT16_VALUE( - 89.112), SIMDE_FLOAT16_VALUE( 26.970), SIMDE_FLOAT16_VALUE( 7.485), + SIMDE_FLOAT16_VALUE( - 82.298), SIMDE_FLOAT16_VALUE( 74.716), SIMDE_FLOAT16_VALUE( - 98.970), SIMDE_FLOAT16_VALUE( 39.884), + SIMDE_FLOAT16_VALUE( - 46.463), SIMDE_FLOAT16_VALUE( 57.795), SIMDE_FLOAT16_VALUE( - 1.432), SIMDE_FLOAT16_VALUE( 99.301), + SIMDE_FLOAT16_VALUE( 32.827), SIMDE_FLOAT16_VALUE( - 87.007), SIMDE_FLOAT16_VALUE( - 21.524), SIMDE_FLOAT16_VALUE( 27.532), + SIMDE_FLOAT16_VALUE( - 80.235), SIMDE_FLOAT16_VALUE( - 6.353), SIMDE_FLOAT16_VALUE( - 90.137), SIMDE_FLOAT16_VALUE( 13.171), + SIMDE_FLOAT16_VALUE( 76.820), SIMDE_FLOAT16_VALUE( 54.313), SIMDE_FLOAT16_VALUE( 54.393), SIMDE_FLOAT16_VALUE( - 32.240), + SIMDE_FLOAT16_VALUE( - 70.987), SIMDE_FLOAT16_VALUE( - 72.386), SIMDE_FLOAT16_VALUE( 33.691), SIMDE_FLOAT16_VALUE( 8.833), + SIMDE_FLOAT16_VALUE( - 74.879), SIMDE_FLOAT16_VALUE( - 3.497), SIMDE_FLOAT16_VALUE( - 98.523), SIMDE_FLOAT16_VALUE( 53.411) }, + { { SIMDE_FLOAT16_VALUE( 35.383), SIMDE_FLOAT16_VALUE( - 82.298), SIMDE_FLOAT16_VALUE( - 46.463), SIMDE_FLOAT16_VALUE( 32.827), + SIMDE_FLOAT16_VALUE( - 80.235), SIMDE_FLOAT16_VALUE( 76.820), SIMDE_FLOAT16_VALUE( - 70.987), SIMDE_FLOAT16_VALUE( - 74.879) }, + { SIMDE_FLOAT16_VALUE( - 89.112), SIMDE_FLOAT16_VALUE( 74.716), SIMDE_FLOAT16_VALUE( 57.795), SIMDE_FLOAT16_VALUE( - 87.007), + SIMDE_FLOAT16_VALUE( - 6.353), SIMDE_FLOAT16_VALUE( 54.313), SIMDE_FLOAT16_VALUE( - 72.386), SIMDE_FLOAT16_VALUE( - 3.497) }, + { SIMDE_FLOAT16_VALUE( 26.970), SIMDE_FLOAT16_VALUE( - 98.970), SIMDE_FLOAT16_VALUE( - 1.432), SIMDE_FLOAT16_VALUE( - 21.524), + SIMDE_FLOAT16_VALUE( - 90.137), SIMDE_FLOAT16_VALUE( 54.393), SIMDE_FLOAT16_VALUE( 33.691), SIMDE_FLOAT16_VALUE( - 98.523) }, + { SIMDE_FLOAT16_VALUE( 7.485), SIMDE_FLOAT16_VALUE( 39.884), SIMDE_FLOAT16_VALUE( 99.301), SIMDE_FLOAT16_VALUE( 27.532), + SIMDE_FLOAT16_VALUE( 13.171), SIMDE_FLOAT16_VALUE( - 32.240), SIMDE_FLOAT16_VALUE( 8.833), SIMDE_FLOAT16_VALUE( 53.411) } } }, + { { SIMDE_FLOAT16_VALUE( - 50.312), SIMDE_FLOAT16_VALUE( - 24.102), SIMDE_FLOAT16_VALUE( - 97.391), SIMDE_FLOAT16_VALUE( 3.022), + SIMDE_FLOAT16_VALUE( 78.988), SIMDE_FLOAT16_VALUE( - 25.237), SIMDE_FLOAT16_VALUE( - 90.131), SIMDE_FLOAT16_VALUE( - 97.836), + SIMDE_FLOAT16_VALUE( - 76.208), SIMDE_FLOAT16_VALUE( - 18.262), SIMDE_FLOAT16_VALUE( 23.059), SIMDE_FLOAT16_VALUE( - 86.163), + SIMDE_FLOAT16_VALUE( 65.842), SIMDE_FLOAT16_VALUE( - 75.486), SIMDE_FLOAT16_VALUE( 67.646), SIMDE_FLOAT16_VALUE( 59.138), + SIMDE_FLOAT16_VALUE( 55.197), SIMDE_FLOAT16_VALUE( 52.575), SIMDE_FLOAT16_VALUE( - 52.818), SIMDE_FLOAT16_VALUE( 58.329), + SIMDE_FLOAT16_VALUE( - 28.835), SIMDE_FLOAT16_VALUE( - 10.915), SIMDE_FLOAT16_VALUE( 75.879), SIMDE_FLOAT16_VALUE( - 0.973), + SIMDE_FLOAT16_VALUE( - 58.718), SIMDE_FLOAT16_VALUE( 12.668), SIMDE_FLOAT16_VALUE( - 98.837), SIMDE_FLOAT16_VALUE( 76.632), + SIMDE_FLOAT16_VALUE( 79.542), SIMDE_FLOAT16_VALUE( 34.512), SIMDE_FLOAT16_VALUE( - 94.397), SIMDE_FLOAT16_VALUE( 26.405) }, + { { SIMDE_FLOAT16_VALUE( - 50.312), SIMDE_FLOAT16_VALUE( 78.988), SIMDE_FLOAT16_VALUE( - 76.208), SIMDE_FLOAT16_VALUE( 65.842), + SIMDE_FLOAT16_VALUE( 55.197), SIMDE_FLOAT16_VALUE( - 28.835), SIMDE_FLOAT16_VALUE( - 58.718), SIMDE_FLOAT16_VALUE( 79.542) }, + { SIMDE_FLOAT16_VALUE( - 24.102), SIMDE_FLOAT16_VALUE( - 25.237), SIMDE_FLOAT16_VALUE( - 18.262), SIMDE_FLOAT16_VALUE( - 75.486), + SIMDE_FLOAT16_VALUE( 52.575), SIMDE_FLOAT16_VALUE( - 10.915), SIMDE_FLOAT16_VALUE( 12.668), SIMDE_FLOAT16_VALUE( 34.512) }, + { SIMDE_FLOAT16_VALUE( - 97.391), SIMDE_FLOAT16_VALUE( - 90.131), SIMDE_FLOAT16_VALUE( 23.059), SIMDE_FLOAT16_VALUE( 67.646), + SIMDE_FLOAT16_VALUE( - 52.818), SIMDE_FLOAT16_VALUE( 75.879), SIMDE_FLOAT16_VALUE( - 98.837), SIMDE_FLOAT16_VALUE( - 94.397) }, + { SIMDE_FLOAT16_VALUE( 3.022), SIMDE_FLOAT16_VALUE( - 97.836), SIMDE_FLOAT16_VALUE( - 86.163), SIMDE_FLOAT16_VALUE( 59.138), + SIMDE_FLOAT16_VALUE( 58.329), SIMDE_FLOAT16_VALUE( - 0.973), SIMDE_FLOAT16_VALUE( 76.632), SIMDE_FLOAT16_VALUE( 26.405) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x4_t r = simde_vld4q_f16(test_vec[i].a); + + simde_float16x8x4_t expected = { + {simde_vld1q_f16(test_vec[i].r[0]), simde_vld1q_f16(test_vec[i].r[1]), + simde_vld1q_f16(test_vec[i].r[2]), simde_vld1q_f16(test_vec[i].r[3])}}; + + simde_test_arm_neon_assert_equal_f16x8(r.val[0], expected.val[0], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], expected.val[1], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[2], expected.val[2], INT_MAX); + simde_test_arm_neon_assert_equal_f16x8(r.val[3], expected.val[3], INT_MAX); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x32_t a = simde_test_arm_neon_random_f16x32(-100.0f, 100.0f); + simde_float16x4_t r[4] = simde_vld4q_f16(a); + + simde_test_arm_neon_write_f16x32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r[4], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) SIMDE_TEST_FUNC_LIST_ENTRY(vld4_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vld4q_f16) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/qrshl.c b/test/arm/neon/qrshl.c new file mode 100644 index 000000000..97180cb28 --- /dev/null +++ b/test/arm/neon/qrshl.c @@ -0,0 +1,1708 @@ +#define SIMDE_TEST_ARM_NEON_INSN qrshl + +#include "test-neon.h" + +/* Check that both of these work */ +#if defined(__cplusplus) + #include "../../../simde/arm/neon/qrshl.h" +#else + #include "../../../simde/arm/neon.h" +#endif + +#if 0 +#define PROBABILITY 80 +#define probability(p) (rand() < (((int64_t) RAND_MAX * (p)) / 100)) +#endif + +static int +test_simde_vqrshlb_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t a; + int8_t b; + int8_t r; + } test_vec[] = { + { INT8_C( 50), + INT8_C( 41), + INT8_MAX }, + { INT8_C( 25), + -INT8_C( 6), + INT8_C( 0) }, + { INT8_C( 8), + -INT8_C( 56), + INT8_C( 0) }, + { -INT8_C( 120), + INT8_C( 7), + INT8_MIN }, + { -INT8_C( 116), + INT8_C( 23), + INT8_MIN }, + { -INT8_C( 123), + INT8_C( 8), + INT8_MIN }, + { INT8_C( 63), + -INT8_C( 58), + INT8_C( 0) }, + { INT8_C( 7), + INT8_C( 0), + INT8_C( 7) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int8_t r = simde_vqrshlb_s8(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_i8(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8_t a = simde_test_arm_neon_random_i8(); + simde_int8_t b = simde_test_arm_neon_random_i8(); + simde_int8_t r = simde_vqrshlb_s8(a, b); + + simde_test_arm_neon_write_i8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlh_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a; + int16_t b; + int16_t r; + } test_vec[] = { + { -INT16_C( 18529), + -INT16_C( 16129), + -INT16_C( 9264) }, + { INT16_C( 23969), + INT16_C( 5), + INT16_MAX }, + { INT16_C( 12527), + INT16_C( 19725), + INT16_MAX }, + { -INT16_C( 22762), + -INT16_C( 16), + INT16_C( 0) }, + { -INT16_C( 17650), + -INT16_C( 19012), + INT16_C( 0) }, + { -INT16_C( 18897), + -INT16_C( 2), + -INT16_C( 4724) }, + { -INT16_C( 19983), + -INT16_C( 27798), + INT16_MIN }, + { INT16_C( 329), + INT16_C( 14), + INT16_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t r = simde_vqrshlh_s16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16_t a = simde_test_arm_neon_random_i16(); + simde_int16_t b = simde_test_arm_neon_random_i16(); + simde_int16_t r = simde_vqrshlh_s16(a, b); + + simde_test_arm_neon_write_i16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshls_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a; + int32_t b; + int32_t r; + } test_vec[] = { + { -INT32_C( 356193628), + -INT32_C( 6), + -INT32_C( 5565525) }, + { -INT32_C( 1204375858), + INT32_C( 57), + INT32_MIN }, + { -INT32_C( 1282375307), + INT32_C( 36), + INT32_MIN }, + { INT32_C( 66359825), + INT32_C( 46), + INT32_MAX }, + { -INT32_C( 79322588), + -INT32_C( 55), + INT32_C( 0) }, + { -INT32_C( 930923386), + INT32_C( 9), + INT32_MIN }, + { -INT32_C( 649560211), + -INT32_C( 27), + -INT32_C( 5) }, + { INT32_C( 1204135454), + INT32_C( 63), + INT32_MAX }, + { INT32_C( 1162581765), + INT32_C( 32), + INT32_MAX }, + { INT32_C( 155224493), + INT32_C( 32), + INT32_MAX }, + { INT32_C( 109880828), + INT32_C( 32), + INT32_MAX }, + { INT32_C( 882539136), + INT32_C( 32), + INT32_MAX }, + { -INT32_C( 25440898), + INT32_C( 32), + INT32_MIN }, + { -INT32_C( 1324715201), + INT32_C( 32), + INT32_MIN }, + { -INT32_C( 2085551108), + INT32_C( 32), + INT32_MIN }, + { INT32_C( 435262094), + INT32_C( 32), + INT32_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t r = simde_vqrshls_s32(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32_t a = simde_test_arm_neon_random_i32(); + simde_int32_t b = simde_test_arm_neon_random_i32(); + simde_int32_t r = simde_vqrshls_s32(a, b); + + simde_test_arm_neon_write_i32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshld_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a; + int64_t b; + int64_t r; + } test_vec[] = { + { INT64_C( 4843512680152437875), + INT64_C( 63), + INT64_MAX }, + { INT64_C( 8241406571090019417), + INT64_C( 56), + INT64_MAX }, + { -INT64_C( 7297452753530989252), + -INT64_C( 8), + -INT64_C( 28505674818480427) }, + { -INT64_C( 7093894782106403155), + INT64_C( 12), + INT64_MIN }, + { -INT64_C( 5066332558751241991), + -INT64_C( 16), + -INT64_C( 77306099834461) }, + { INT64_C( 1160675192825562685), + INT64_C( 48), + INT64_MAX }, + { INT64_C( 3559677482238416902), + -INT64_C( 17), + INT64_C( 27158183916004) }, + { INT64_C( 3143776020433277350), + INT64_C( 21), + INT64_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t r = simde_vqrshld_s64(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64_t a = simde_test_arm_neon_random_i64(); + simde_int64_t b = simde_test_arm_neon_random_i64(); + simde_int64_t r = simde_vqrshld_s64(a, b); + + simde_test_arm_neon_write_i64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlb_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t a; + int8_t b; + uint8_t r; + } test_vec[] = { + { UINT8_C( 156), + INT8_C( 7), + UINT8_C( 255) }, + { UINT8_C( 128), + -INT8_C( 7), + UINT8_C( 1) }, + { UINT8_C( 191), + INT8_C( 0), + UINT8_C( 191) }, + { UINT8_C( 212), + INT8_C( 0), + UINT8_C( 212) }, + { UINT8_C( 45), + INT8_C( 1), + UINT8_C( 90) }, + { UINT8_C( 141), + -INT8_C( 2), + UINT8_C( 35) }, + { UINT8_C( 0), + -INT8_C( 3), + UINT8_C( 0) }, + { UINT8_C( 147), + -INT8_C( 8), + UINT8_C( 1) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint8_t r = simde_vqrshlb_u8(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u8(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8_t a = simde_test_arm_neon_random_u8(); + simde_int8_t b = simde_test_arm_neon_random_i8(); + simde_uint8_t r = simde_vqrshlb_u8(a, b); + + simde_test_arm_neon_write_u8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlh_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a; + int16_t b; + uint16_t r; + } test_vec[] = { + { UINT16_C( 6825), + -INT16_C( 7), + UINT16_C( 53) }, + { UINT16_C( 37778), + INT16_C( 4), + UINT16_C( 65535) }, + { UINT16_C( 5210), + INT16_C( 10), + UINT16_C( 65535) }, + { UINT16_C( 58556), + -INT16_C( 21), + UINT16_C( 0) }, + { UINT16_C( 42502), + -INT16_C( 24), + UINT16_C( 0) }, + { UINT16_C( 63509), + -INT16_C( 32), + UINT16_C( 0) }, + { UINT16_C( 47814), + -INT16_C( 3), + UINT16_C( 5977) }, + { UINT16_C( 52229), + INT16_C( 4), + UINT16_C( 65535) }, + { UINT16_C( 60609), + INT16_C( 16), + UINT16_C( 65535) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r = simde_vqrshlh_u16(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16_t a = simde_test_arm_neon_random_u16(); + simde_int16_t b = simde_test_arm_neon_random_i16(); + simde_uint16_t r = simde_vqrshlh_u16(a, b); + + simde_test_arm_neon_write_u16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshls_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a; + int32_t b; + uint32_t r; + } test_vec[] = { + { UINT32_C( 266226573), + INT32_C( 376254835), + UINT32_MAX }, + { UINT32_C( 627439137), + -INT32_C( 30), + UINT32_C( 1) }, + { UINT32_C( 1026024828), + INT32_C( 827959370), + UINT32_MAX }, + { UINT32_C( 1847505906), + -INT32_C( 13), + UINT32_C( 225526) }, + { UINT32_C( 912395655), + INT32_C( 1671239939), + UINT32_MAX }, + { UINT32_C( 2010933546), + -INT32_C( 2), + UINT32_C( 502733387) }, + { UINT32_C( 2631640868), + INT32_C( 1562231698), + UINT32_C( 0) }, + { UINT32_C( 468197513), + -INT32_C( 24), + UINT32_C( 28) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t r = simde_vqrshls_u32(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32_t a = simde_test_arm_neon_random_u32(); + simde_int32_t b = simde_test_arm_neon_random_i32(); + simde_uint32_t r = simde_vqrshls_u32(a, b); + + simde_test_arm_neon_write_u32(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshld_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint64_t a; + int64_t b; + uint64_t r; + } test_vec[] = { + { UINT64_C( 2962349988052721578), + INT64_C( 5539606120659599506), + UINT64_C( 0) }, + { UINT64_C(10064468895855897213), + INT64_C( 16), + UINT64_MAX }, + { UINT64_C(15743453338776996870), + INT64_C( 3803046405739062919), + UINT64_C( 0) }, + { UINT64_C( 5520169503289739711), + INT64_C( 31), + UINT64_MAX }, + { UINT64_C( 5174950593436841605), + INT64_C( 6612268300308619863), + UINT64_MAX }, + { UINT64_C(13897644625553180665), + -INT64_C( 12), + UINT64_C( 3392979644910444) }, + { UINT64_C(15140102277279441050), + -INT64_C( 8156124063209764011), + UINT64_MAX }, + { UINT64_C( 9024129550253675895), + INT64_C( 26), + UINT64_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t r = simde_vqrshld_u64(test_vec[i].a, test_vec[i].b); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64_t a = simde_test_arm_neon_random_u64(); + simde_int64_t b = simde_test_arm_neon_random_i64(); + simde_uint64_t r = simde_vqrshld_u64(a, b); + + simde_test_arm_neon_write_u64(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int8_t a[8]; + int8_t b[8]; + int8_t r[8]; + } test_vec[] = { + { { -INT8_C( 87), -INT8_C( 1), INT8_C( 80), INT8_C( 77), + INT8_C( 56), -INT8_C( 54), INT8_C( 102), INT8_C( 111) }, + { INT8_C( 8), INT8_C( 3), -INT8_C( 7), INT8_C( 2), + INT8_C( 1), INT8_C( 7), INT8_C( 4), -INT8_C( 7) }, + { INT8_MIN, -INT8_C( 8), INT8_C( 1), INT8_MAX, + INT8_C( 112), INT8_MIN, INT8_MAX, INT8_C( 1) } }, + { { INT8_C( 42), INT8_C( 74), -INT8_C( 106), -INT8_C( 29), + -INT8_C( 94), -INT8_C( 66), -INT8_C( 22), INT8_C( 88) }, + { INT8_C( 5), INT8_C( 4), -INT8_C( 5), INT8_C( 1), + -INT8_C( 7), -INT8_C( 6), -INT8_C( 4), INT8_C( 0) }, + { INT8_MAX, INT8_MAX, -INT8_C( 3), -INT8_C( 58), + -INT8_C( 1), -INT8_C( 1), -INT8_C( 1), INT8_C( 88) } }, + { { INT8_MIN, INT8_C( 100), INT8_C( 4), INT8_C( 14), + INT8_C( 92), INT8_C( 74), INT8_C( 59), -INT8_C( 98) }, + { -INT8_C( 3), INT8_C( 8), INT8_C( 8), INT8_C( 6), + INT8_C( 5), INT8_C( 5), INT8_C( 5), -INT8_C( 6) }, + { -INT8_C( 16), INT8_MAX, INT8_MAX, INT8_MAX, + INT8_MAX, INT8_MAX, INT8_MAX, -INT8_C( 2) } }, + { { INT8_C( 40), INT8_C( 61), INT8_C( 73), -INT8_C( 94), + INT8_C( 120), INT8_C( 65), INT8_C( 61), -INT8_C( 71) }, + { INT8_C( 7), INT8_C( 1), -INT8_C( 6), -INT8_C( 8), + INT8_C( 6), -INT8_C( 5), -INT8_C( 4), -INT8_C( 6) }, + { INT8_MAX, INT8_C( 122), INT8_C( 1), INT8_C( 0), + INT8_MAX, INT8_C( 2), INT8_C( 4), -INT8_C( 1) } }, + { { INT8_C( 32), -INT8_C( 33), -INT8_C( 42), -INT8_C( 124), + INT8_C( 13), -INT8_C( 93), INT8_C( 29), INT8_C( 93) }, + { -INT8_C( 8), -INT8_C( 1), INT8_C( 1), -INT8_C( 1), + INT8_C( 7), -INT8_C( 2), INT8_C( 0), INT8_C( 6) }, + { INT8_C( 0), -INT8_C( 16), -INT8_C( 84), -INT8_C( 62), + INT8_MAX, -INT8_C( 23), INT8_C( 29), INT8_MAX } }, + { { INT8_C( 109), -INT8_C( 121), -INT8_C( 67), -INT8_C( 26), + INT8_C( 116), -INT8_C( 14), -INT8_C( 107), INT8_C( 120) }, + { -INT8_C( 8), INT8_C( 1), -INT8_C( 7), INT8_C( 2), + INT8_C( 1), -INT8_C( 5), INT8_C( 2), INT8_C( 2) }, + { INT8_C( 0), INT8_MIN, -INT8_C( 1), -INT8_C( 104), + INT8_MAX, INT8_C( 0), INT8_MIN, INT8_MAX } }, + { { -INT8_C( 75), INT8_C( 9), INT8_C( 17), INT8_C( 9), + -INT8_C( 82), INT8_C( 82), -INT8_C( 91), -INT8_C( 24) }, + { INT8_C( 1), INT8_C( 7), INT8_C( 4), -INT8_C( 4), + INT8_C( 0), INT8_C( 6), -INT8_C( 8), -INT8_C( 7) }, + { INT8_MIN, INT8_MAX, INT8_MAX, INT8_C( 1), + -INT8_C( 82), INT8_MAX, INT8_C( 0), INT8_C( 0) } }, + { { -INT8_C( 3), -INT8_C( 107), INT8_C( 50), -INT8_C( 31), + INT8_MIN, INT8_C( 61), INT8_C( 53), -INT8_C( 4) }, + { -INT8_C( 5), INT8_C( 5), INT8_C( 6), INT8_C( 2), + -INT8_C( 2), INT8_C( 5), -INT8_C( 6), -INT8_C( 5) }, + { INT8_C( 0), INT8_MIN, INT8_MAX, -INT8_C( 124), + -INT8_C( 32), INT8_MAX, INT8_C( 1), INT8_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + simde_int8x8_t r = simde_vqrshl_s8(a, b); + + simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vqrshl_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +static int +test_simde_vqrshl_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int16_t a[4]; + int16_t b[4]; + int16_t r[4]; + } test_vec[] = { + { { INT16_C( 31263), INT16_C( 18737), INT16_C( 14706), -INT16_C( 22084) }, + { INT16_C( 0), -INT16_C( 6), INT16_C( 5), INT16_C( 11) }, + { INT16_C( 31263), INT16_C( 293), INT16_MAX, INT16_MIN } }, + { { -INT16_C( 27185), INT16_C( 1410), INT16_C( 26995), -INT16_C( 8974) }, + { -INT16_C( 15), INT16_C( 9), -INT16_C( 4), INT16_C( 4) }, + { -INT16_C( 1), INT16_MAX, INT16_C( 1687), INT16_MIN } }, + { { -INT16_C( 5300), INT16_C( 1064), -INT16_C( 4694), INT16_C( 25472) }, + { INT16_C( 3), INT16_C( 0), INT16_C( 5), INT16_C( 8) }, + { INT16_MIN, INT16_C( 1064), INT16_MIN, INT16_MAX } }, + { { -INT16_C( 9080), INT16_C( 16802), -INT16_C( 10592), -INT16_C( 30376) }, + { -INT16_C( 4), -INT16_C( 8), INT16_C( 0), INT16_C( 16) }, + { -INT16_C( 567), INT16_C( 66), -INT16_C( 10592), INT16_MIN } }, + { { -INT16_C( 10167), INT16_C( 25018), INT16_C( 28423), -INT16_C( 9608) }, + { -INT16_C( 2), -INT16_C( 4), -INT16_C( 6), -INT16_C( 8) }, + { -INT16_C( 2542), INT16_C( 1564), INT16_C( 444), -INT16_C( 38) } }, + { { INT16_C( 32142), -INT16_C( 28370), INT16_C( 24856), INT16_C( 26195) }, + { -INT16_C( 3), -INT16_C( 8), INT16_C( 15), INT16_C( 9) }, + { INT16_C( 4018), -INT16_C( 111), INT16_MAX, INT16_MAX } }, + { { -INT16_C( 23494), -INT16_C( 25691), -INT16_C( 8401), -INT16_C( 31132) }, + { INT16_C( 3), -INT16_C( 13), -INT16_C( 16), INT16_C( 14) }, + { INT16_MIN, -INT16_C( 3), INT16_C( 0), INT16_MIN } }, + { { INT16_C( 14453), -INT16_C( 12196), INT16_C( 27445), INT16_C( 31840) }, + { INT16_C( 14), INT16_C( 5), INT16_C( 13), -INT16_C( 9) }, + { INT16_MAX, INT16_MIN, INT16_MAX, INT16_C( 62) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_int16x4_t r = simde_vqrshl_s16(a, b); + + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vqrshl_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int32_t a[2]; + int32_t b[2]; + int32_t r[2]; + } test_vec[] = { + { { INT32_C( 37316345), INT32_C( 852588016) }, + { -INT32_C( 21), INT32_C( 22) }, + { INT32_C( 18), INT32_MAX } }, + { { INT32_C( 1713047364), -INT32_C( 718155773) }, + { -INT32_C( 2), INT32_C( 18) }, + { INT32_C( 428261841), INT32_MIN } }, + { { INT32_C( 2011282434), -INT32_C( 1644508231) }, + { INT32_C( 12), -INT32_C( 21) }, + { INT32_MAX, -INT32_C( 784) } }, + { { -INT32_C( 8819381), -INT32_C( 112184656) }, + { -INT32_C( 12), -INT32_C( 21) }, + { -INT32_C( 2153), -INT32_C( 53) } }, + { { INT32_C( 305438996), INT32_C( 1662430416) }, + { INT32_C( 29), -INT32_C( 19) }, + { INT32_MAX, INT32_C( 3171) } }, + { { INT32_C( 1246050521), -INT32_C( 703046011) }, + { INT32_C( 11), INT32_C( 11) }, + { INT32_MAX, INT32_MIN } }, + { { -INT32_C( 129492298), -INT32_C( 1643446120) }, + { INT32_C( 13), INT32_C( 25) }, + { INT32_MIN, INT32_MIN } }, + { { INT32_C( 58593943), -INT32_C( 594339506) }, + { -INT32_C( 21), -INT32_C( 9) }, + { INT32_C( 28), -INT32_C( 1160819) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_int32x2_t r = simde_vqrshl_s32(a, b); + + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vqrshl_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int64_t a[1]; + int64_t b[1]; + int64_t r[1]; + } test_vec[] = { + { { INT64_C( 2379025802943267314) }, + { -INT64_C( 3) }, + { INT64_C( 297378225367908414) } }, + { { -INT64_C( 7880843795017100943) }, + { INT64_C( 13) }, + { INT64_MIN } }, + { { INT64_C( 5060086036473607234) }, + { -INT64_C( 12) }, + { INT64_C( 1235372567498439) } }, + { { -INT64_C( 1251288881944570923) }, + { INT64_C( 59) }, + { INT64_MIN } }, + { { -INT64_C( 818197169458855311) }, + { -INT64_C( 46) }, + { -INT64_C( 11627) } }, + { { -INT64_C( 3470264563169298453) }, + { -INT64_C( 56) }, + { -INT64_C( 48) } }, + { { -INT64_C( 6566310065777805324) }, + { -INT64_C( 54) }, + { -INT64_C( 365) } }, + { { INT64_C( 8846445702649439056) }, + { -INT64_C( 5) }, + { INT64_C( 276451428207794971) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); + simde_int64x1_t r = simde_vqrshl_s64(a, b); + + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x1_t a = simde_test_arm_neon_random_i64x1(); + simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); + simde_int64x1_t r = simde_vqrshl_s64(a, b); + + simde_test_arm_neon_write_i64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint8_t a[8]; + int8_t b[8]; + uint8_t r[8]; + } test_vec[] = { + { { UINT8_C( 17), UINT8_C( 254), UINT8_C( 166), UINT8_C( 242), + UINT8_C( 224), UINT8_C( 96), UINT8_C( 134), UINT8_C( 31) }, + { -INT8_C( 2), INT8_C( 6), -INT8_C( 5), -INT8_C( 2), + -INT8_C( 1), INT8_C( 0), -INT8_C( 4), INT8_C( 4) }, + { UINT8_C( 4), UINT8_C( 255), UINT8_C( 5), UINT8_C( 61), + UINT8_C( 112), UINT8_C( 96), UINT8_C( 8), UINT8_C( 255) } }, + { { UINT8_C( 236), UINT8_C( 17), UINT8_C( 172), UINT8_C( 17), + UINT8_C( 72), UINT8_C( 97), UINT8_C( 108), UINT8_C( 128) }, + { INT8_C( 6), INT8_C( 0), INT8_C( 6), INT8_C( 7), + -INT8_C( 4), -INT8_C( 5), INT8_C( 1), -INT8_C( 7) }, + { UINT8_C( 255), UINT8_C( 17), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 5), UINT8_C( 3), UINT8_C( 216), UINT8_C( 1) } }, + { { UINT8_C( 101), UINT8_C( 167), UINT8_C( 60), UINT8_C( 143), + UINT8_C( 234), UINT8_C( 177), UINT8_C( 216), UINT8_C( 221) }, + { -INT8_C( 3), -INT8_C( 8), INT8_C( 3), -INT8_C( 6), + INT8_C( 3), INT8_C( 5), -INT8_C( 8), INT8_C( 5) }, + { UINT8_C( 13), UINT8_C( 1), UINT8_C( 255), UINT8_C( 2), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 1), UINT8_C( 255) } }, + { { UINT8_C( 9), UINT8_C( 223), UINT8_C( 27), UINT8_C( 204), + UINT8_C( 106), UINT8_C( 241), UINT8_C( 32), UINT8_C( 220) }, + { -INT8_C( 5), INT8_C( 8), INT8_C( 3), INT8_C( 4), + -INT8_C( 4), INT8_C( 6), INT8_C( 6), -INT8_C( 8) }, + { UINT8_C( 0), UINT8_C( 255), UINT8_C( 216), UINT8_C( 255), + UINT8_C( 7), UINT8_C( 255), UINT8_C( 255), UINT8_C( 1) } }, + { { UINT8_C( 175), UINT8_C( 114), UINT8_C( 53), UINT8_C( 46), + UINT8_C( 26), UINT8_C( 170), UINT8_C( 171), UINT8_C( 213) }, + { -INT8_C( 6), -INT8_C( 6), -INT8_C( 4), INT8_C( 3), + INT8_C( 1), -INT8_C( 2), -INT8_C( 3), -INT8_C( 8) }, + { UINT8_C( 3), UINT8_C( 2), UINT8_C( 3), UINT8_C( 255), + UINT8_C( 52), UINT8_C( 43), UINT8_C( 21), UINT8_C( 1) } }, + { { UINT8_C( 37), UINT8_C( 113), UINT8_C( 207), UINT8_C( 209), + UINT8_C( 209), UINT8_C( 8), UINT8_C( 239), UINT8_C( 42) }, + { -INT8_C( 8), -INT8_C( 6), -INT8_C( 7), -INT8_C( 5), + INT8_C( 2), INT8_C( 7), INT8_C( 1), -INT8_C( 5) }, + { UINT8_C( 0), UINT8_C( 2), UINT8_C( 2), UINT8_C( 7), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 1) } }, + { { UINT8_C( 249), UINT8_C( 98), UINT8_C( 62), UINT8_C( 112), + UINT8_C( 21), UINT8_C( 89), UINT8_C( 159), UINT8_C( 208) }, + { INT8_C( 4), -INT8_C( 3), INT8_C( 8), INT8_C( 0), + -INT8_C( 4), -INT8_C( 6), -INT8_C( 3), INT8_C( 2) }, + { UINT8_C( 255), UINT8_C( 12), UINT8_C( 255), UINT8_C( 112), + UINT8_C( 1), UINT8_C( 1), UINT8_C( 20), UINT8_C( 255) } }, + { { UINT8_C( 38), UINT8_C( 128), UINT8_C( 239), UINT8_C( 139), + UINT8_C( 165), UINT8_C( 233), UINT8_C( 134), UINT8_C( 247) }, + { -INT8_C( 3), INT8_C( 5), INT8_C( 6), INT8_C( 3), + -INT8_C( 5), INT8_C( 4), INT8_C( 1), INT8_C( 7) }, + { UINT8_C( 5), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 5), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + simde_uint8x8_t r = simde_vqrshl_u8(a, b); + + simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_uint8x8_t r = simde_vqrshl_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint16_t a[4]; + int16_t b[4]; + uint16_t r[4]; + } test_vec[] = { + { { UINT16_C( 36529), UINT16_C( 29829), UINT16_C( 2096), UINT16_C( 2729) }, + { INT16_C( 0), INT16_C( 4), INT16_C( 9), -INT16_C( 4) }, + { UINT16_C( 36529), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 171) } }, + { { UINT16_C( 39946), UINT16_C( 27950), UINT16_C( 23605), UINT16_C( 4911) }, + { INT16_C( 11), -INT16_C( 14), -INT16_C( 7), INT16_C( 9) }, + { UINT16_C( 65535), UINT16_C( 2), UINT16_C( 184), UINT16_C( 65535) } }, + { { UINT16_C( 42859), UINT16_C( 2240), UINT16_C( 20043), UINT16_C( 10036) }, + { INT16_C( 8), INT16_C( 14), -INT16_C( 15), -INT16_C( 12) }, + { UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 1), UINT16_C( 2) } }, + { { UINT16_C( 50980), UINT16_C( 165), UINT16_C( 10039), UINT16_C( 30538) }, + { INT16_C( 8), -INT16_C( 8), INT16_C( 11), -INT16_C( 9) }, + { UINT16_C( 65535), UINT16_C( 1), UINT16_C( 65535), UINT16_C( 60) } }, + { { UINT16_C( 61605), UINT16_C( 27801), UINT16_C( 14514), UINT16_C( 31459) }, + { -INT16_C( 2), INT16_C( 9), INT16_C( 7), INT16_C( 13) }, + { UINT16_C( 15401), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535) } }, + { { UINT16_C( 27895), UINT16_C( 11683), UINT16_C( 16781), UINT16_C( 38533) }, + { -INT16_C( 10), -INT16_C( 4), INT16_C( 16), -INT16_C( 10) }, + { UINT16_C( 27), UINT16_C( 730), UINT16_C( 65535), UINT16_C( 38) } }, + { { UINT16_C( 53996), UINT16_C( 31874), UINT16_C( 57883), UINT16_C( 4747) }, + { -INT16_C( 16), INT16_C( 8), -INT16_C( 9), INT16_C( 9) }, + { UINT16_C( 1), UINT16_C( 65535), UINT16_C( 113), UINT16_C( 65535) } }, + { { UINT16_C( 37059), UINT16_C( 57524), UINT16_C( 51263), UINT16_C( 61052) }, + { INT16_C( 6), -INT16_C( 11), -INT16_C( 1), -INT16_C( 6) }, + { UINT16_C( 65535), UINT16_C( 28), UINT16_C( 25632), UINT16_C( 954) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + simde_uint16x4_t r = simde_vqrshl_u16(a, b); + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_uint16x4_t r = simde_vqrshl_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint32_t a[2]; + int32_t b[2]; + uint32_t r[2]; + } test_vec[] = { + { { UINT32_C( 4032142366), UINT32_C( 2503991721) }, + { INT32_C( 23), -INT32_C( 11) }, + { UINT32_C( 4294967295), UINT32_C( 1222652) } }, + { { UINT32_C( 1849889977), UINT32_C( 1539239213) }, + { INT32_C( 3), -INT32_C( 31) }, + { UINT32_C( 4294967295), UINT32_C( 1) } }, + { { UINT32_C( 3780982194), UINT32_C( 1551554012) }, + { INT32_C( 8), INT32_C( 30) }, + { UINT32_C( 4294967295), UINT32_C( 4294967295) } }, + { { UINT32_C( 457931350), UINT32_C( 2322708363) }, + { -INT32_C( 24), -INT32_C( 15) }, + { UINT32_C( 27), UINT32_C( 70883) } }, + { { UINT32_C( 1302772324), UINT32_C( 3149457440) }, + { -INT32_C( 8), INT32_C( 31) }, + { UINT32_C( 5088954), UINT32_C( 4294967295) } }, + { { UINT32_C( 2656566271), UINT32_C( 10727285) }, + { -INT32_C( 11), INT32_C( 23) }, + { UINT32_C( 1297151), UINT32_C( 4294967295) } }, + { { UINT32_C( 580186040), UINT32_C( 879645142) }, + { INT32_C( 26), INT32_C( 8) }, + { UINT32_C( 4294967295), UINT32_C( 4294967295) } }, + { { UINT32_C( 2842364758), UINT32_C( 627566619) }, + { -INT32_C( 19), -INT32_C( 17) }, + { UINT32_C( 5421), UINT32_C( 4788) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + simde_uint32x2_t r = simde_vqrshl_u32(a, b); + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_uint32x2_t r = simde_vqrshl_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshl_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint64_t a[1]; + int64_t b[1]; + uint64_t r[1]; + } test_vec[] = { + { { UINT64_C(10606652271688138073) }, + { -INT64_C( 24) }, + { UINT64_C( 632205740910) } }, + { { UINT64_C( 6951000685328207734) }, + { -INT64_C( 41) }, + { UINT64_C( 3160949) } }, + { { UINT64_C( 2652753700446653166) }, + { INT64_C( 16) }, + { UINT64_C(18446744073709551615) } }, + { { UINT64_C( 2887850072060195408) }, + { INT64_C( 8) }, + { UINT64_C(18446744073709551615) } }, + { { UINT64_C( 3003532981166020057) }, + { -INT64_C( 18) }, + { UINT64_C( 11457569050469) } }, + { { UINT64_C( 2467308484911859140) }, + { -INT64_C( 4) }, + { UINT64_C( 154206780306991196) } }, + { { UINT64_C(10329885492628171968) }, + { INT64_C( 8) }, + { UINT64_C(18446744073709551615) } }, + { { UINT64_C( 2016469675685954892) }, + { -INT64_C( 62) }, + { UINT64_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); + simde_uint64x1_t r = simde_vqrshl_u64(a, b); + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x1_t a = simde_test_arm_neon_random_u64x1(); + simde_int64x1_t b = simde_test_arm_neon_random_i64x1(); + simde_uint64x1_t r = simde_vqrshl_u64(a, b); + + simde_test_arm_neon_write_u64x1(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x1(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x1(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int8_t a[16]; + int8_t b[16]; + int8_t r[16]; + } test_vec[] = { + { { INT8_C( 40), -INT8_C( 12), -INT8_C( 107), INT8_C( 91), + INT8_C( 39), INT8_C( 70), -INT8_C( 66), -INT8_C( 32), + INT8_C( 44), INT8_C( 106), INT8_C( 13), -INT8_C( 17), + INT8_C( 42), -INT8_C( 33), INT8_C( 0), INT8_C( 99) }, + { -INT8_C( 1), INT8_C( 5), INT8_C( 7), -INT8_C( 6), + INT8_C( 5), INT8_C( 3), -INT8_C( 4), -INT8_C( 2), + INT8_C( 8), INT8_C( 7), -INT8_C( 8), -INT8_C( 8), + INT8_C( 8), INT8_C( 4), INT8_C( 4), -INT8_C( 7) }, + { INT8_C( 20), INT8_MIN, INT8_MIN, INT8_C( 1), + INT8_MAX, INT8_MAX, -INT8_C( 4), -INT8_C( 8), + INT8_MAX, INT8_MAX, INT8_C( 0), INT8_C( 0), + INT8_MAX, INT8_MIN, INT8_C( 0), INT8_C( 1) } }, + { { INT8_C( 13), -INT8_C( 18), -INT8_C( 109), INT8_C( 117), + -INT8_C( 115), INT8_C( 11), INT8_C( 28), INT8_C( 34), + INT8_C( 84), INT8_C( 24), -INT8_C( 56), INT8_C( 69), + INT8_C( 112), -INT8_C( 93), -INT8_C( 57), INT8_C( 46) }, + { INT8_C( 3), INT8_C( 1), -INT8_C( 6), INT8_C( 4), + -INT8_C( 7), INT8_C( 1), INT8_C( 3), -INT8_C( 4), + INT8_C( 6), INT8_C( 1), -INT8_C( 2), -INT8_C( 3), + INT8_C( 0), INT8_C( 6), INT8_C( 3), INT8_C( 8) }, + { INT8_C( 104), -INT8_C( 36), -INT8_C( 2), INT8_MAX, + -INT8_C( 1), INT8_C( 22), INT8_MAX, INT8_C( 2), + INT8_MAX, INT8_C( 48), -INT8_C( 14), INT8_C( 9), + INT8_C( 112), INT8_MIN, INT8_MIN, INT8_MAX } }, + { { -INT8_C( 20), INT8_C( 92), -INT8_C( 120), INT8_C( 62), + -INT8_C( 85), INT8_C( 61), -INT8_C( 9), -INT8_C( 121), + INT8_C( 18), -INT8_C( 16), -INT8_C( 124), -INT8_C( 109), + INT8_C( 101), INT8_C( 14), -INT8_C( 15), -INT8_C( 69) }, + { INT8_C( 2), -INT8_C( 3), INT8_C( 3), INT8_C( 2), + -INT8_C( 2), -INT8_C( 2), -INT8_C( 2), INT8_C( 6), + INT8_C( 0), INT8_C( 2), INT8_C( 4), INT8_C( 5), + -INT8_C( 8), -INT8_C( 3), -INT8_C( 1), INT8_C( 8) }, + { -INT8_C( 80), INT8_C( 12), INT8_MIN, INT8_MAX, + -INT8_C( 21), INT8_C( 15), -INT8_C( 2), INT8_MIN, + INT8_C( 18), -INT8_C( 64), INT8_MIN, INT8_MIN, + INT8_C( 0), INT8_C( 2), -INT8_C( 7), INT8_MIN } }, + { { INT8_C( 38), INT8_C( 60), -INT8_C( 70), INT8_C( 91), + INT8_C( 120), -INT8_C( 121), INT8_C( 71), -INT8_C( 22), + INT8_C( 92), INT8_C( 8), INT8_C( 124), -INT8_C( 117), + INT8_C( 115), INT8_C( 44), -INT8_C( 93), INT8_C( 66) }, + { INT8_C( 1), INT8_C( 5), INT8_C( 2), INT8_C( 8), + -INT8_C( 6), -INT8_C( 4), INT8_C( 8), -INT8_C( 2), + INT8_C( 5), INT8_C( 4), -INT8_C( 2), INT8_C( 2), + INT8_C( 8), INT8_C( 0), -INT8_C( 5), INT8_C( 0) }, + { INT8_C( 76), INT8_MAX, INT8_MIN, INT8_MAX, + INT8_C( 2), -INT8_C( 8), INT8_MAX, -INT8_C( 5), + INT8_MAX, INT8_MAX, INT8_C( 31), INT8_MIN, + INT8_MAX, INT8_C( 44), -INT8_C( 3), INT8_C( 66) } }, + { { INT8_C( 122), -INT8_C( 112), -INT8_C( 124), INT8_C( 42), + INT8_C( 87), -INT8_C( 117), INT8_C( 56), -INT8_C( 93), + INT8_C( 78), INT8_C( 31), -INT8_C( 3), -INT8_C( 50), + INT8_C( 116), INT8_C( 68), INT8_C( 64), INT8_C( 69) }, + { INT8_C( 2), INT8_C( 4), -INT8_C( 7), INT8_C( 4), + -INT8_C( 3), -INT8_C( 8), INT8_C( 8), INT8_C( 1), + -INT8_C( 5), -INT8_C( 1), INT8_C( 6), -INT8_C( 3), + -INT8_C( 3), INT8_C( 5), INT8_C( 2), INT8_C( 1) }, + { INT8_MAX, INT8_MIN, -INT8_C( 1), INT8_MAX, + INT8_C( 11), INT8_C( 0), INT8_MAX, INT8_MIN, + INT8_C( 2), INT8_C( 16), INT8_MIN, -INT8_C( 6), + INT8_C( 15), INT8_MAX, INT8_MAX, INT8_MAX } }, + { { -INT8_C( 34), -INT8_C( 5), -INT8_C( 85), -INT8_C( 77), + INT8_C( 98), INT8_C( 107), INT8_C( 84), -INT8_C( 115), + -INT8_C( 127), -INT8_C( 1), -INT8_C( 33), INT8_C( 111), + INT8_C( 33), INT8_C( 97), INT8_C( 46), INT8_C( 76) }, + { INT8_C( 8), INT8_C( 4), INT8_C( 0), -INT8_C( 7), + INT8_C( 2), -INT8_C( 6), INT8_C( 4), -INT8_C( 8), + INT8_C( 4), INT8_C( 6), INT8_C( 4), -INT8_C( 8), + INT8_C( 1), -INT8_C( 5), -INT8_C( 6), -INT8_C( 7) }, + { INT8_MIN, -INT8_C( 80), -INT8_C( 85), -INT8_C( 1), + INT8_MAX, INT8_C( 2), INT8_MAX, INT8_C( 0), + INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_C( 0), + INT8_C( 66), INT8_C( 3), INT8_C( 1), INT8_C( 1) } }, + { { -INT8_C( 69), -INT8_C( 87), INT8_C( 96), INT8_C( 21), + INT8_C( 66), -INT8_C( 29), -INT8_C( 72), -INT8_C( 116), + -INT8_C( 125), INT8_C( 72), INT8_C( 98), INT8_C( 60), + INT8_C( 99), INT8_C( 102), -INT8_C( 72), -INT8_C( 39) }, + { INT8_C( 2), -INT8_C( 3), -INT8_C( 6), -INT8_C( 3), + -INT8_C( 3), INT8_C( 4), INT8_C( 4), -INT8_C( 2), + INT8_C( 0), -INT8_C( 5), INT8_C( 1), INT8_C( 5), + -INT8_C( 3), -INT8_C( 3), -INT8_C( 7), -INT8_C( 7) }, + { INT8_MIN, -INT8_C( 11), INT8_C( 2), INT8_C( 3), + INT8_C( 8), INT8_MIN, INT8_MIN, -INT8_C( 29), + -INT8_C( 125), INT8_C( 2), INT8_MAX, INT8_MAX, + INT8_C( 12), INT8_C( 13), -INT8_C( 1), INT8_C( 0) } }, + { { -INT8_C( 121), INT8_C( 38), INT8_C( 117), INT8_C( 37), + -INT8_C( 14), -INT8_C( 96), INT8_C( 74), INT8_C( 115), + -INT8_C( 24), INT8_C( 64), INT8_C( 105), -INT8_C( 122), + -INT8_C( 82), INT8_C( 2), INT8_C( 119), INT8_C( 84) }, + { INT8_C( 5), INT8_C( 1), INT8_C( 1), INT8_C( 3), + -INT8_C( 1), INT8_C( 3), INT8_C( 8), INT8_C( 2), + INT8_C( 0), -INT8_C( 4), INT8_C( 8), INT8_C( 3), + INT8_C( 0), INT8_C( 0), -INT8_C( 6), -INT8_C( 1) }, + { INT8_MIN, INT8_C( 76), INT8_MAX, INT8_MAX, + -INT8_C( 7), INT8_MIN, INT8_MAX, INT8_MAX, + -INT8_C( 24), INT8_C( 4), INT8_MAX, INT8_MIN, + -INT8_C( 82), INT8_C( 2), INT8_C( 2), INT8_C( 42) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + simde_int8x16_t r = simde_vqrshlq_s8(a, b); + + simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vqrshlq_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int16_t a[8]; + int16_t b[8]; + int16_t r[8]; + } test_vec[] = { + { { INT16_C( 17), -INT16_C( 18060), INT16_C( 2082), INT16_C( 3026), + -INT16_C( 23678), INT16_C( 13144), -INT16_C( 19898), INT16_C( 26465) }, + { -INT16_C( 9), INT16_C( 4), -INT16_C( 9), -INT16_C( 7), + INT16_C( 6), -INT16_C( 8), -INT16_C( 5), -INT16_C( 7) }, + { INT16_C( 0), INT16_MIN, INT16_C( 4), INT16_C( 24), + INT16_MIN, INT16_C( 51), -INT16_C( 622), INT16_C( 207) } }, + { { -INT16_C( 31358), INT16_C( 28497), -INT16_C( 24101), -INT16_C( 29556), + INT16_C( 23640), INT16_C( 26523), INT16_C( 30555), INT16_C( 32733) }, + { INT16_C( 8), INT16_C( 12), INT16_C( 11), -INT16_C( 9), + -INT16_C( 8), -INT16_C( 11), INT16_C( 4), -INT16_C( 3) }, + { INT16_MIN, INT16_MAX, INT16_MIN, -INT16_C( 58), + INT16_C( 92), INT16_C( 13), INT16_MAX, INT16_C( 4092) } }, + { { -INT16_C( 25371), -INT16_C( 28626), -INT16_C( 10661), -INT16_C( 29662), + INT16_C( 8039), INT16_C( 27599), INT16_C( 27158), INT16_C( 14531) }, + { INT16_C( 0), -INT16_C( 10), INT16_C( 2), INT16_C( 11), + -INT16_C( 2), -INT16_C( 9), INT16_C( 15), INT16_C( 6) }, + { -INT16_C( 25371), -INT16_C( 28), INT16_MIN, INT16_MIN, + INT16_C( 2010), INT16_C( 54), INT16_MAX, INT16_MAX } }, + { { INT16_C( 29383), INT16_C( 13775), INT16_C( 1669), INT16_C( 16370), + -INT16_C( 31950), -INT16_C( 10488), -INT16_C( 4281), -INT16_C( 15443) }, + { INT16_C( 6), -INT16_C( 14), INT16_C( 6), -INT16_C( 13), + -INT16_C( 7), INT16_C( 5), INT16_C( 12), -INT16_C( 2) }, + { INT16_MAX, INT16_C( 1), INT16_MAX, INT16_C( 2), + -INT16_C( 250), INT16_MIN, INT16_MIN, -INT16_C( 3861) } }, + { { -INT16_C( 16008), INT16_C( 6576), INT16_C( 25375), -INT16_C( 2472), + -INT16_C( 22062), -INT16_C( 1594), -INT16_C( 33), -INT16_C( 29140) }, + { -INT16_C( 10), -INT16_C( 16), -INT16_C( 14), INT16_C( 15), + -INT16_C( 11), -INT16_C( 13), INT16_C( 3), -INT16_C( 13) }, + { -INT16_C( 16), INT16_C( 0), INT16_C( 2), INT16_MIN, + -INT16_C( 11), INT16_C( 0), -INT16_C( 264), -INT16_C( 4) } }, + { { -INT16_C( 17438), INT16_C( 21035), -INT16_C( 4482), -INT16_C( 5337), + -INT16_C( 25235), -INT16_C( 6754), -INT16_C( 9668), -INT16_C( 27862) }, + { -INT16_C( 1), -INT16_C( 15), INT16_C( 5), -INT16_C( 4), + -INT16_C( 5), INT16_C( 14), -INT16_C( 8), -INT16_C( 9) }, + { -INT16_C( 8719), INT16_C( 1), INT16_MIN, -INT16_C( 334), + -INT16_C( 789), INT16_MIN, -INT16_C( 38), -INT16_C( 54) } }, + { { INT16_C( 10172), -INT16_C( 6237), -INT16_C( 25431), -INT16_C( 30731), + -INT16_C( 2806), -INT16_C( 24307), -INT16_C( 1303), -INT16_C( 17051) }, + { -INT16_C( 4), INT16_C( 13), -INT16_C( 11), -INT16_C( 1), + INT16_C( 10), INT16_C( 7), -INT16_C( 16), INT16_C( 1) }, + { INT16_C( 636), INT16_MIN, -INT16_C( 12), -INT16_C( 15365), + INT16_MIN, INT16_MIN, INT16_C( 0), INT16_MIN } }, + { { -INT16_C( 1297), INT16_C( 6102), INT16_C( 12306), INT16_C( 23191), + -INT16_C( 2630), -INT16_C( 28109), -INT16_C( 20844), INT16_C( 271) }, + { INT16_C( 3), INT16_C( 15), INT16_C( 8), INT16_C( 6), + INT16_C( 0), INT16_C( 15), -INT16_C( 5), -INT16_C( 13) }, + { -INT16_C( 10376), INT16_MAX, INT16_MAX, INT16_MAX, + -INT16_C( 2630), INT16_MIN, -INT16_C( 651), INT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int16x8_t r = simde_vqrshlq_s16(a, b); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vqrshlq_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int32_t a[4]; + int32_t b[4]; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 1814886595), -INT32_C( 1646956830), INT32_C( 1237502269), INT32_C( 534733024) }, + { -INT32_C( 21), -INT32_C( 25), INT32_C( 31), -INT32_C( 11) }, + { INT32_C( 865), -INT32_C( 49), INT32_MAX, INT32_C( 261100) } }, + { { INT32_C( 1736113552), INT32_C( 1258318563), INT32_C( 1069774949), INT32_C( 755812542) }, + { -INT32_C( 32), INT32_C( 8), INT32_C( 32), INT32_C( 19) }, + { INT32_C( 0), INT32_MAX, INT32_MAX, INT32_MAX } }, + { { -INT32_C( 1923447339), -INT32_C( 1220148547), -INT32_C( 1485011002), -INT32_C( 1143271085) }, + { -INT32_C( 30), -INT32_C( 24), -INT32_C( 25), INT32_C( 30) }, + { -INT32_C( 2), -INT32_C( 73), -INT32_C( 44), INT32_MIN } }, + { { INT32_C( 1865665830), INT32_C( 2143927441), -INT32_C( 1135678127), INT32_C( 123259338) }, + { INT32_C( 20), -INT32_C( 9), -INT32_C( 31), -INT32_C( 14) }, + { INT32_MAX, INT32_C( 4187358), -INT32_C( 1), INT32_C( 7523) } }, + { { INT32_C( 3352422), -INT32_C( 698249524), INT32_C( 1806447909), INT32_C( 800823842) }, + { -INT32_C( 6), INT32_C( 4), -INT32_C( 6), INT32_C( 32) }, + { INT32_C( 52382), INT32_MIN, INT32_C( 28225749), INT32_MAX } }, + { { -INT32_C( 337342704), INT32_C( 1303521110), INT32_C( 425408117), INT32_C( 231996299) }, + { -INT32_C( 32), -INT32_C( 16), -INT32_C( 31), INT32_C( 2) }, + { INT32_C( 0), INT32_C( 19890), INT32_C( 0), INT32_C( 927985196) } }, + { { -INT32_C( 529877133), -INT32_C( 1960225014), -INT32_C( 1757285913), INT32_C( 1764816475) }, + { INT32_C( 20), -INT32_C( 6), INT32_C( 32), INT32_C( 24) }, + { INT32_MIN, -INT32_C( 30628516), INT32_MIN, INT32_MAX } }, + { { INT32_C( 219035158), INT32_C( 2118934887), INT32_C( 641686675), INT32_C( 264846024) }, + { INT32_C( 6), INT32_C( 28), -INT32_C( 1), INT32_C( 12) }, + { INT32_MAX, INT32_MAX, INT32_C( 320843338), INT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int32x4_t r = simde_vqrshlq_s32(a, b); + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vqrshlq_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + int64_t a[2]; + int64_t b[2]; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 5414884328532004708), -INT64_C( 7732031084687741673) }, + { -INT64_C( 63), INT64_C( 61) }, + { -INT64_C( 1), INT64_MIN } }, + { { INT64_C( 3932969372243887233), -INT64_C( 7443478377457562434) }, + { -INT64_C( 45), -INT64_C( 1) }, + { INT64_C( 111782), -INT64_C( 3721739188728781217) } }, + { { -INT64_C( 4624332781887269652), INT64_C( 9194359439567769551) }, + { INT64_C( 18), INT64_C( 12) }, + { INT64_MIN, INT64_MAX } }, + { { INT64_C( 1303116637892605202), INT64_C( 2411425157371539502) }, + { -INT64_C( 35), -INT64_C( 12) }, + { INT64_C( 37925686), INT64_C( 588726845061411) } }, + { { -INT64_C( 5953546370211056151), INT64_C( 4807810721811645268) }, + { -INT64_C( 15), INT64_C( 44) }, + { -INT64_C( 181687816473726), INT64_MAX } }, + { { -INT64_C( 671209687377174666), -INT64_C( 2105758642682840471) }, + { INT64_C( 35), INT64_C( 6) }, + { INT64_MIN, INT64_MIN } }, + { { -INT64_C( 1318546396640194493), INT64_C( 4726180957159977633) }, + { INT64_C( 33), INT64_C( 3) }, + { INT64_MIN, INT64_MAX } }, + { { INT64_C( 570160631599597498), INT64_C( 7291438976119207855) }, + { -INT64_C( 54), INT64_C( 30) }, + { INT64_C( 32), INT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int64x2_t r = simde_vqrshlq_s64(a, b); + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vqrshlq_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint8_t a[16]; + int8_t b[16]; + uint8_t r[16]; + } test_vec[] = { + { { UINT8_C( 48), UINT8_C( 110), UINT8_C( 160), UINT8_C( 230), + UINT8_C( 244), UINT8_C( 14), UINT8_C( 38), UINT8_C( 180), + UINT8_C( 44), UINT8_C( 4), UINT8_C( 143), UINT8_C( 172), + UINT8_C( 51), UINT8_C( 77), UINT8_C( 188), UINT8_C( 81) }, + { INT8_C( 2), -INT8_C( 3), INT8_C( 1), INT8_C( 7), + INT8_C( 1), -INT8_C( 7), -INT8_C( 3), -INT8_C( 3), + -INT8_C( 6), -INT8_C( 6), INT8_C( 0), -INT8_C( 5), + -INT8_C( 7), INT8_C( 0), INT8_C( 1), -INT8_C( 7) }, + { UINT8_C( 192), UINT8_C( 14), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 255), UINT8_C( 0), UINT8_C( 5), UINT8_C( 23), + UINT8_C( 1), UINT8_C( 0), UINT8_C( 143), UINT8_C( 5), + UINT8_C( 0), UINT8_C( 77), UINT8_C( 255), UINT8_C( 1) } }, + { { UINT8_C( 145), UINT8_C( 198), UINT8_C( 59), UINT8_C( 229), + UINT8_C( 193), UINT8_C( 40), UINT8_C( 202), UINT8_C( 18), + UINT8_C( 189), UINT8_C( 226), UINT8_C( 53), UINT8_C( 190), + UINT8_C( 171), UINT8_C( 119), UINT8_C( 184), UINT8_C( 228) }, + { -INT8_C( 5), INT8_C( 2), INT8_C( 3), -INT8_C( 1), + -INT8_C( 6), INT8_C( 6), INT8_C( 0), INT8_C( 5), + INT8_C( 5), INT8_C( 1), -INT8_C( 3), INT8_C( 1), + -INT8_C( 5), INT8_C( 7), -INT8_C( 4), INT8_C( 4) }, + { UINT8_C( 5), UINT8_C( 255), UINT8_C( 255), UINT8_C( 115), + UINT8_C( 3), UINT8_C( 255), UINT8_C( 202), UINT8_C( 255), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 7), UINT8_C( 255), + UINT8_C( 5), UINT8_C( 255), UINT8_C( 12), UINT8_C( 255) } }, + { { UINT8_C( 144), UINT8_C( 70), UINT8_C( 74), UINT8_C( 4), + UINT8_C( 109), UINT8_C( 142), UINT8_C( 116), UINT8_C( 159), + UINT8_C( 142), UINT8_C( 25), UINT8_C( 177), UINT8_C( 177), + UINT8_C( 76), UINT8_C( 150), UINT8_C( 86), UINT8_C( 238) }, + { INT8_C( 0), -INT8_C( 3), INT8_C( 3), -INT8_C( 3), + INT8_C( 5), -INT8_C( 4), -INT8_C( 3), -INT8_C( 2), + INT8_C( 3), -INT8_C( 4), -INT8_C( 3), INT8_C( 3), + INT8_C( 1), INT8_C( 5), INT8_C( 7), INT8_C( 5) }, + { UINT8_C( 144), UINT8_C( 9), UINT8_C( 255), UINT8_C( 1), + UINT8_C( 255), UINT8_C( 9), UINT8_C( 15), UINT8_C( 40), + UINT8_C( 255), UINT8_C( 2), UINT8_C( 22), UINT8_C( 255), + UINT8_C( 152), UINT8_C( 255), UINT8_C( 255), UINT8_C( 255) } }, + { { UINT8_C( 199), UINT8_C( 104), UINT8_C( 74), UINT8_C( 67), + UINT8_C( 128), UINT8_C( 140), UINT8_C( 56), UINT8_C( 104), + UINT8_C( 88), UINT8_C( 100), UINT8_C( 110), UINT8_C( 42), + UINT8_C( 123), UINT8_C( 111), UINT8_C( 252), UINT8_C( 156) }, + { -INT8_C( 8), INT8_C( 0), -INT8_C( 7), -INT8_C( 1), + -INT8_C( 8), INT8_C( 4), -INT8_C( 5), INT8_C( 4), + -INT8_C( 2), INT8_C( 5), INT8_C( 0), -INT8_C( 6), + INT8_C( 5), -INT8_C( 8), INT8_C( 8), INT8_C( 5) }, + { UINT8_C( 1), UINT8_C( 104), UINT8_C( 1), UINT8_C( 34), + UINT8_C( 1), UINT8_C( 255), UINT8_C( 2), UINT8_C( 255), + UINT8_C( 22), UINT8_C( 255), UINT8_C( 110), UINT8_C( 1), + UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), UINT8_C( 255) } }, + { { UINT8_C( 140), UINT8_C( 24), UINT8_C( 20), UINT8_C( 116), + UINT8_C( 5), UINT8_C( 138), UINT8_C( 42), UINT8_C( 115), + UINT8_C( 111), UINT8_C( 129), UINT8_C( 142), UINT8_C( 213), + UINT8_C( 155), UINT8_C( 130), UINT8_C( 150), UINT8_C( 231) }, + { INT8_C( 2), -INT8_C( 8), -INT8_C( 8), -INT8_C( 6), + INT8_C( 1), -INT8_C( 5), INT8_C( 1), INT8_C( 2), + -INT8_C( 1), -INT8_C( 2), INT8_C( 5), INT8_C( 5), + -INT8_C( 3), INT8_C( 6), -INT8_C( 1), -INT8_C( 6) }, + { UINT8_C( 255), UINT8_C( 0), UINT8_C( 0), UINT8_C( 2), + UINT8_C( 10), UINT8_C( 4), UINT8_C( 84), UINT8_C( 255), + UINT8_C( 56), UINT8_C( 32), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 19), UINT8_C( 255), UINT8_C( 75), UINT8_C( 4) } }, + { { UINT8_C( 123), UINT8_C( 36), UINT8_C( 41), UINT8_C( 105), + UINT8_C( 46), UINT8_C( 64), UINT8_C( 195), UINT8_C( 242), + UINT8_C( 155), UINT8_C( 176), UINT8_C( 255), UINT8_C( 148), + UINT8_C( 51), UINT8_C( 35), UINT8_C( 168), UINT8_C( 226) }, + { INT8_C( 0), INT8_C( 6), INT8_C( 5), -INT8_C( 6), + INT8_C( 1), -INT8_C( 5), -INT8_C( 4), INT8_C( 2), + INT8_C( 6), INT8_C( 3), -INT8_C( 1), INT8_C( 7), + -INT8_C( 2), -INT8_C( 2), -INT8_C( 1), INT8_C( 4) }, + { UINT8_C( 123), UINT8_C( 255), UINT8_C( 255), UINT8_C( 2), + UINT8_C( 92), UINT8_C( 2), UINT8_C( 12), UINT8_C( 255), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 128), UINT8_C( 255), + UINT8_C( 13), UINT8_C( 9), UINT8_C( 84), UINT8_C( 255) } }, + { { UINT8_C( 209), UINT8_C( 246), UINT8_C( 234), UINT8_C( 223), + UINT8_C( 203), UINT8_C( 186), UINT8_C( 139), UINT8_C( 148), + UINT8_C( 180), UINT8_C( 18), UINT8_C( 7), UINT8_C( 172), + UINT8_C( 225), UINT8_C( 26), UINT8_C( 149), UINT8_C( 28) }, + { INT8_C( 5), -INT8_C( 1), INT8_C( 4), INT8_C( 1), + INT8_C( 1), INT8_C( 1), INT8_C( 7), -INT8_C( 5), + -INT8_C( 2), INT8_C( 4), -INT8_C( 7), INT8_C( 1), + -INT8_C( 3), -INT8_C( 2), INT8_C( 7), INT8_C( 0) }, + { UINT8_C( 255), UINT8_C( 123), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 255), UINT8_C( 5), + UINT8_C( 45), UINT8_C( 255), UINT8_C( 0), UINT8_C( 255), + UINT8_C( 28), UINT8_C( 7), UINT8_C( 255), UINT8_C( 28) } }, + { { UINT8_C( 186), UINT8_C( 147), UINT8_C( 83), UINT8_C( 114), + UINT8_C( 130), UINT8_C( 80), UINT8_C( 252), UINT8_C( 63), + UINT8_C( 137), UINT8_C( 214), UINT8_C( 119), UINT8_C( 227), + UINT8_C( 29), UINT8_C( 181), UINT8_C( 148), UINT8_C( 21) }, + { INT8_C( 2), INT8_C( 0), INT8_C( 4), INT8_C( 1), + INT8_C( 8), -INT8_C( 6), INT8_C( 3), INT8_C( 6), + INT8_C( 5), -INT8_C( 2), -INT8_C( 6), INT8_C( 0), + INT8_C( 6), INT8_C( 5), INT8_C( 0), INT8_C( 7) }, + { UINT8_C( 255), UINT8_C( 147), UINT8_C( 255), UINT8_C( 228), + UINT8_C( 255), UINT8_C( 1), UINT8_C( 255), UINT8_C( 255), + UINT8_C( 255), UINT8_C( 54), UINT8_C( 2), UINT8_C( 227), + UINT8_C( 255), UINT8_C( 255), UINT8_C( 148), UINT8_C( 255) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + simde_uint8x16_t r = simde_vqrshlq_u8(a, b); + + simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_uint8x16_t r = simde_vqrshlq_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint16_t a[8]; + int16_t b[8]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 61506), UINT16_C( 19083), UINT16_C( 60908), UINT16_C( 40546), + UINT16_C( 29927), UINT16_C( 62112), UINT16_C( 60795), UINT16_C( 39381) }, + { INT16_C( 5), -INT16_C( 9), -INT16_C( 2), -INT16_C( 8), + -INT16_C( 7), -INT16_C( 4), INT16_C( 4), -INT16_C( 9) }, + { UINT16_C( 65535), UINT16_C( 37), UINT16_C( 15227), UINT16_C( 158), + UINT16_C( 234), UINT16_C( 3882), UINT16_C( 65535), UINT16_C( 77) } }, + { { UINT16_C( 52703), UINT16_C( 30753), UINT16_C( 21922), UINT16_C( 13244), + UINT16_C( 23660), UINT16_C( 33835), UINT16_C( 6910), UINT16_C( 21793) }, + { -INT16_C( 11), -INT16_C( 12), INT16_C( 15), INT16_C( 11), + INT16_C( 6), -INT16_C( 1), -INT16_C( 16), -INT16_C( 1) }, + { UINT16_C( 26), UINT16_C( 8), UINT16_C( 65535), UINT16_C( 65535), + UINT16_C( 65535), UINT16_C( 16918), UINT16_C( 0), UINT16_C( 10897) } }, + { { UINT16_C( 49424), UINT16_C( 1272), UINT16_C( 11256), UINT16_C( 15451), + UINT16_C( 38439), UINT16_C( 22519), UINT16_C( 53624), UINT16_C( 56406) }, + { -INT16_C( 11), -INT16_C( 3), INT16_C( 10), -INT16_C( 10), + INT16_C( 12), INT16_C( 3), INT16_C( 9), INT16_C( 6) }, + { UINT16_C( 24), UINT16_C( 159), UINT16_C( 65535), UINT16_C( 15), + UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535), UINT16_C( 65535) } }, + { { UINT16_C( 34648), UINT16_C( 38658), UINT16_C( 18124), UINT16_C( 29610), + UINT16_C( 34257), UINT16_C( 23288), UINT16_C( 2479), UINT16_C( 51223) }, + { -INT16_C( 11), -INT16_C( 3), INT16_C( 4), -INT16_C( 10), + INT16_C( 7), -INT16_C( 7), INT16_C( 10), INT16_C( 5) }, + { UINT16_C( 17), UINT16_C( 4832), UINT16_C( 65535), UINT16_C( 29), + UINT16_C( 65535), UINT16_C( 182), UINT16_C( 65535), UINT16_C( 65535) } }, + { { UINT16_C( 34910), UINT16_C( 29561), UINT16_C( 57208), UINT16_C( 321), + UINT16_C( 59682), UINT16_C( 10906), UINT16_C( 56329), UINT16_C( 122) }, + { -INT16_C( 9), INT16_C( 4), -INT16_C( 16), -INT16_C( 1), + INT16_C( 10), -INT16_C( 7), -INT16_C( 1), -INT16_C( 1) }, + { UINT16_C( 68), UINT16_C( 65535), UINT16_C( 1), UINT16_C( 161), + UINT16_C( 65535), UINT16_C( 85), UINT16_C( 28165), UINT16_C( 61) } }, + { { UINT16_C( 17592), UINT16_C( 628), UINT16_C( 30585), UINT16_C( 18746), + UINT16_C( 2909), UINT16_C( 4209), UINT16_C( 6780), UINT16_C( 29795) }, + { -INT16_C( 12), INT16_C( 9), -INT16_C( 11), -INT16_C( 9), + INT16_C( 8), INT16_C( 0), INT16_C( 1), INT16_C( 7) }, + { UINT16_C( 4), UINT16_C( 65535), UINT16_C( 15), UINT16_C( 37), + UINT16_C( 65535), UINT16_C( 4209), UINT16_C( 13560), UINT16_C( 65535) } }, + { { UINT16_C( 27761), UINT16_C( 46906), UINT16_C( 25315), UINT16_C( 54517), + UINT16_C( 3930), UINT16_C( 38934), UINT16_C( 33158), UINT16_C( 15978) }, + { -INT16_C( 5), INT16_C( 3), -INT16_C( 14), -INT16_C( 8), + -INT16_C( 13), INT16_C( 16), -INT16_C( 1), -INT16_C( 15) }, + { UINT16_C( 868), UINT16_C( 65535), UINT16_C( 2), UINT16_C( 213), + UINT16_C( 0), UINT16_C( 65535), UINT16_C( 16579), UINT16_C( 0) } }, + { { UINT16_C( 52730), UINT16_C( 21619), UINT16_C( 31344), UINT16_C( 36500), + UINT16_C( 25393), UINT16_C( 44945), UINT16_C( 18048), UINT16_C( 31672) }, + { -INT16_C( 6), -INT16_C( 9), -INT16_C( 7), INT16_C( 1), + INT16_C( 8), -INT16_C( 5), INT16_C( 0), INT16_C( 7) }, + { UINT16_C( 824), UINT16_C( 42), UINT16_C( 245), UINT16_C( 65535), + UINT16_C( 65535), UINT16_C( 1405), UINT16_C( 18048), UINT16_C( 65535) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_uint16x8_t r = simde_vqrshlq_u16(a, b); + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_uint16x8_t r = simde_vqrshlq_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint32_t a[4]; + int32_t b[4]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 1500975547), UINT32_C( 2108068888), UINT32_C( 2683398431), UINT32_C( 367549241) }, + { INT32_C( 32), INT32_C( 4), INT32_C( 12), INT32_C( 7) }, + { UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 4294967295) } }, + { { UINT32_C( 3541445283), UINT32_C( 2942869736), UINT32_C( 2791986498), UINT32_C( 3847214939) }, + { -INT32_C( 15), -INT32_C( 2), INT32_C( 3), -INT32_C( 5) }, + { UINT32_C( 108076), UINT32_C( 735717434), UINT32_C( 4294967295), UINT32_C( 120225467) } }, + { { UINT32_C( 2758985098), UINT32_C( 1301200120), UINT32_C( 2500937308), UINT32_C( 1482943544) }, + { -INT32_C( 15), INT32_C( 26), INT32_C( 4), -INT32_C( 31) }, + { UINT32_C( 84198), UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 1) } }, + { { UINT32_C( 1695104610), UINT32_C( 2154042730), UINT32_C( 1247813531), UINT32_C( 539880566) }, + { -INT32_C( 5), -INT32_C( 21), INT32_C( 12), INT32_C( 6) }, + { UINT32_C( 52972019), UINT32_C( 1027), UINT32_C( 4294967295), UINT32_C( 4294967295) } }, + { { UINT32_C( 4087492662), UINT32_C( 442729018), UINT32_C( 640768667), UINT32_C( 1635123388) }, + { INT32_C( 7), -INT32_C( 3), INT32_C( 7), INT32_C( 31) }, + { UINT32_C( 4294967295), UINT32_C( 55341127), UINT32_C( 4294967295), UINT32_C( 4294967295) } }, + { { UINT32_C( 4260977946), UINT32_C( 2996827211), UINT32_C( 864415427), UINT32_C( 4108108071) }, + { -INT32_C( 16), INT32_C( 7), INT32_C( 10), -INT32_C( 27) }, + { UINT32_C( 65017), UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 31) } }, + { { UINT32_C( 4249729102), UINT32_C( 3434311192), UINT32_C( 2916969933), UINT32_C( 1847713867) }, + { -INT32_C( 31), -INT32_C( 15), -INT32_C( 21), -INT32_C( 21) }, + { UINT32_C( 2), UINT32_C( 104807), UINT32_C( 1391), UINT32_C( 881) } }, + { { UINT32_C( 2544619547), UINT32_C( 3247194592), UINT32_C( 2391939879), UINT32_C( 2988854179) }, + { INT32_C( 5), INT32_C( 5), INT32_C( 30), -INT32_C( 29) }, + { UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 4294967295), UINT32_C( 6) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_uint32x4_t r = simde_vqrshlq_u32(a, b); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_uint32x4_t r = simde_vqrshlq_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vqrshlq_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + uint64_t a[2]; + int64_t b[2]; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C(12257908650438209556), UINT64_C( 734662789645664798) }, + { -INT64_C( 53), -INT64_C( 24) }, + { UINT64_C( 1361), UINT64_C( 43789314607) } }, + { { UINT64_C(15401328157929644581), UINT64_C(11927978306316211307) }, + { INT64_C( 54), -INT64_C( 26) }, + { UINT64_C(18446744073709551615), UINT64_C( 177740727459) } }, + { { UINT64_C( 9806006901024356755), UINT64_C( 2220988353704575177) }, + { INT64_C( 18), INT64_C( 48) }, + { UINT64_C(18446744073709551615), UINT64_C(18446744073709551615) } }, + { { UINT64_C(17911317278255258189), UINT64_C(16943905161406011814) }, + { INT64_C( 60), INT64_C( 1) }, + { UINT64_C(18446744073709551615), UINT64_C(18446744073709551615) } }, + { { UINT64_C( 2887718461795313073), UINT64_C(17639433086009224754) }, + { INT64_C( 32), INT64_C( 23) }, + { UINT64_C(18446744073709551615), UINT64_C(18446744073709551615) } }, + { { UINT64_C( 5907043747386205374), UINT64_C(11542381187061170644) }, + { INT64_C( 57), -INT64_C( 15) }, + { UINT64_C(18446744073709551615), UINT64_C( 352245519624670) } }, + { { UINT64_C(15325393461537196969), UINT64_C(16407202451519969415) }, + { -INT64_C( 15), -INT64_C( 50) }, + { UINT64_C( 467693892258826), UINT64_C( 14573) } }, + { { UINT64_C(17890801295880430688), UINT64_C(12355916188791687384) }, + { -INT64_C( 38), -INT64_C( 25) }, + { UINT64_C( 65086356), UINT64_C( 368234997654) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_uint64x2_t r = simde_vqrshlq_u64(a, b); + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_uint64x2_t r = simde_vqrshlq_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlb_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlh_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshls_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshld_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlb_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlh_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshls_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshld_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshl_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshlq_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qrshrn_high_n.c b/test/arm/neon/qrshrn_high_n.c new file mode 100644 index 000000000..d41933fc4 --- /dev/null +++ b/test/arm/neon/qrshrn_high_n.c @@ -0,0 +1,550 @@ +#define SIMDE_TEST_ARM_NEON_INSN qrshrn_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qrshrn_high_n.h" + +static int +test_simde_vqrshrn_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int8_t r[8]; + int8_t r1[16]; + int8_t r3[16]; + int8_t r5[16]; + int8_t r6[16]; + int8_t r8[16]; + } test_vec[] = { + { { -INT16_C( 8959), -INT16_C( 20900), INT16_C( 18988), INT16_C( 21099), INT16_C( 29503), INT16_C( 4915), -INT16_C( 29844), -INT16_C( 19802) }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63) }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 77), INT8_MIN, INT8_MIN }, + { INT8_C( 96), INT8_C( 14), -INT8_C(126), -INT8_C( 88), INT8_C( 1), INT8_C( 73), INT8_C( 48), INT8_C( 63), -INT8_C( 35), -INT8_C( 82), INT8_C( 74), INT8_C( 82), INT8_C(115), INT8_C( 19), -INT8_C(117), -INT8_C( 77) } }, + { { INT16_C( 26238), -INT16_C( 985), INT16_C( 29239), -INT16_C( 23094), -INT16_C( 4541), -INT16_C( 17388), INT16_C( 17256), INT16_C( 20751) }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6) }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6), INT8_MAX, -INT8_C(123), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6), INT8_MAX, -INT8_C( 31), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6), INT8_MAX, -INT8_C( 15), INT8_MAX, INT8_MIN, -INT8_C( 71), INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 9), -INT8_C(118), -INT8_C(118), -INT8_C( 12), INT8_C( 56), INT8_C( 23), INT8_C( 83), INT8_C( 6), INT8_C(102), -INT8_C( 4), INT8_C(114), -INT8_C( 90), -INT8_C( 18), -INT8_C( 68), INT8_C( 67), INT8_C( 81) } }, + { { INT16_C( 31174), INT16_C( 29637), INT16_C( 8980), -INT16_C( 10006), INT16_C( 10437), -INT16_C( 1914), -INT16_C( 3412), INT16_C( 17144) }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43) }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, -INT8_C( 60), -INT8_C(107), INT8_MAX }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, -INT8_C( 30), -INT8_C( 53), INT8_MAX }, + { -INT8_C( 94), INT8_C( 93), -INT8_C( 37), -INT8_C( 86), INT8_C( 32), -INT8_C(102), -INT8_C( 70), INT8_C( 43), INT8_C(122), INT8_C(116), INT8_C( 35), -INT8_C( 39), INT8_C( 41), -INT8_C( 7), -INT8_C( 13), INT8_C( 67) } }, + { { INT16_C( 21045), -INT16_C( 23324), INT16_C( 22922), INT16_C( 25211), INT16_C( 8402), INT16_C( 2492), INT16_C( 20769), INT16_C( 8198) }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1) }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 78), INT8_MAX, INT8_MAX }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_C( 39), INT8_MAX, INT8_MAX }, + { INT8_C( 94), -INT8_C( 29), -INT8_C( 54), INT8_C( 6), INT8_C(109), INT8_C( 3), INT8_C( 41), INT8_C( 1), INT8_C( 82), -INT8_C( 91), INT8_C( 90), INT8_C( 98), INT8_C( 33), INT8_C( 10), INT8_C( 81), INT8_C( 32) } }, + { { -INT16_C( 11612), -INT16_C( 1033), -INT16_C( 17168), -INT16_C( 12007), INT16_C( 22319), INT16_C( 32085), -INT16_C( 9898), -INT16_C( 3457) }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4) }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4), INT8_MIN, -INT8_C( 32), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C(108) }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4), INT8_MIN, -INT8_C( 16), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 54) }, + { INT8_C( 66), INT8_C(109), INT8_C(115), -INT8_C(125), INT8_C( 82), -INT8_C( 81), INT8_C( 46), INT8_C( 4), -INT8_C( 45), -INT8_C( 4), -INT8_C( 67), -INT8_C( 47), INT8_C( 87), INT8_C(125), -INT8_C( 39), -INT8_C( 14) } }, + { { INT16_C( 2289), INT16_C( 19821), -INT16_C( 9734), INT16_C( 13238), -INT16_C( 14567), INT16_C( 5610), -INT16_C( 31769), INT16_C( 11544) }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58) }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58), INT8_C( 72), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58), INT8_C( 36), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 88), INT8_MIN, INT8_MAX }, + { -INT8_C( 81), INT8_C( 17), -INT8_C(123), INT8_C( 83), -INT8_C( 4), INT8_C(116), -INT8_C( 22), -INT8_C( 58), INT8_C( 9), INT8_C( 77), -INT8_C( 38), INT8_C( 52), -INT8_C( 57), INT8_C( 22), -INT8_C(124), INT8_C( 45) } }, + { { -INT16_C( 31286), INT16_C( 28817), -INT16_C( 3444), -INT16_C( 20538), -INT16_C( 23929), -INT16_C( 24903), INT16_C( 25277), INT16_C( 25921) }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38) }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38), INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38), INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38), INT8_MIN, INT8_MAX, -INT8_C(108), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38), INT8_MIN, INT8_MAX, -INT8_C( 54), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX }, + { INT8_C( 58), -INT8_C( 55), INT8_C( 59), INT8_C( 21), -INT8_C(101), -INT8_C(119), INT8_C( 12), INT8_C( 38), -INT8_C(122), INT8_C(113), -INT8_C( 13), -INT8_C( 80), -INT8_C( 93), -INT8_C( 97), INT8_C( 99), INT8_C(101) } }, + { { -INT16_C( 27367), INT16_C( 16601), INT16_C( 25930), -INT16_C( 21136), -INT16_C( 25785), INT16_C( 22041), -INT16_C( 21785), INT16_C( 3537) }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48) }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C(111) }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_C( 55) }, + { INT8_C( 12), INT8_C( 59), -INT8_C( 49), INT8_C(103), INT8_C(101), INT8_C( 39), INT8_C( 21), -INT8_C( 48), -INT8_C(107), INT8_C( 65), INT8_C(101), -INT8_C( 83), -INT8_C(101), INT8_C( 86), -INT8_C( 85), INT8_C( 14) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int8x8_t r = simde_vld1_s8(test_vec[i].r); + + simde_int8x16_t r1 = simde_vqrshrn_high_n_s16(r, a, 1); + simde_int8x16_t r3 = simde_vqrshrn_high_n_s16(r, a, 3); + simde_int8x16_t r5 = simde_vqrshrn_high_n_s16(r, a, 5); + simde_int8x16_t r6 = simde_vqrshrn_high_n_s16(r, a, 6); + simde_int8x16_t r8 = simde_vqrshrn_high_n_s16(r, a, 8); + + simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vqrshrn_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t r[4]; + int16_t r3[8]; + int16_t r6[8]; + int16_t r10[8]; + int16_t r13[8]; + int16_t r16[8]; + } test_vec[] = { + { { -INT32_C(838827553), INT32_C(1607126122), -INT32_C(694124064), INT32_C(211448551) }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125) }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125), INT16_MIN, INT16_MAX, INT16_MIN, INT16_C(25812) }, + { INT16_C(11897), INT16_C(12501), INT16_C(11247), INT16_C( 9125), -INT16_C(12799), INT16_C(24523), -INT16_C(10591), INT16_C( 3226) } }, + { { INT32_C(849016275), -INT32_C(875822714), -INT32_C(1739265907), INT32_C(973245645) }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961) }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C(27935), -INT16_C( 4116), INT16_C(25434), INT16_C(22961), INT16_C(12955), -INT16_C(13364), -INT16_C(26539), INT16_C(14851) } }, + { { -INT32_C(1957939153), -INT32_C(1625665171), -INT32_C(1499812866), INT32_C(707976967) }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046) }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(11870), INT16_C(13351), INT16_C(13405), INT16_C(30046), -INT16_C(29876), -INT16_C(24806), -INT16_C(22885), INT16_C(10803) } }, + { { -INT32_C(2082889817), -INT32_C(1620731234), -INT32_C(1429774637), INT32_C(1507838808) }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538) }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538), INT16_MIN, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C(30166), INT16_C( 2317), INT16_C(30082), INT16_C( 6538), -INT16_C(31782), -INT16_C(24730), -INT16_C(21817), INT16_C(23008) } }, + { { -INT32_C(513980245), -INT32_C(1640959032), INT32_C(1061604646), -INT32_C(1446821021) }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706) }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C(24229), INT16_C( 3795), -INT16_C( 2212), -INT16_C(30706), -INT16_C( 7843), -INT16_C(25039), INT16_C(16199), -INT16_C(22077) } }, + { { INT32_C(1589592708), INT32_C(1213245629), INT32_C(1766780598), -INT32_C(429338091) }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784) }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784), INT16_MAX, INT16_MAX, INT16_MAX, INT16_MIN }, + { INT16_C(30697), -INT16_C(32657), INT16_C(31358), -INT16_C(23784), INT16_C(24255), INT16_C(18513), INT16_C(26959), -INT16_C( 6551) } }, + { { -INT32_C(202005891), -INT32_C(1323207242), INT32_C(177620803), INT32_C(2037880450) }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733) }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733), -INT16_C(24659), INT16_MIN, INT16_C(21682), INT16_MAX }, + { -INT16_C(23811), -INT16_C(12176), -INT16_C( 1895), INT16_C(24733), -INT16_C( 3082), -INT16_C(20191), INT16_C( 2710), INT16_C(31096) } }, + { { -INT32_C(2081077396), INT32_C(1188157182), INT32_C(587049621), INT32_C(34538214) }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102) }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102), INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102), INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102), INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102), INT16_MIN, INT16_MAX, INT16_MAX, INT16_C( 4216) }, + { INT16_C( 689), INT16_C(15545), INT16_C(18263), INT16_C( 7102), -INT16_C(31755), INT16_C(18130), INT16_C( 8958), INT16_C( 527) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t r = simde_vld1_s16(test_vec[i].r); + + simde_int16x8_t r3 = simde_vqrshrn_high_n_s32(r, a, 3); + simde_int16x8_t r6 = simde_vqrshrn_high_n_s32(r, a, 6); + simde_int16x8_t r10 = simde_vqrshrn_high_n_s32(r, a, 10); + simde_int16x8_t r13 = simde_vqrshrn_high_n_s32(r, a, 13); + simde_int16x8_t r16 = simde_vqrshrn_high_n_s32(r, a, 16); + + simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vqrshrn_high_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t r[2]; + int32_t r6[4]; + int32_t r13[4]; + int32_t r19[4]; + int32_t r26[4]; + int32_t r32[4]; + } test_vec[] = { + { { -INT64_C(8015303269875855703), -INT64_C(777811017774347363) }, + { INT32_C( 574933518), -INT32_C( 1140181041) }, + { INT32_C( 574933518), -INT32_C( 1140181041), INT32_MIN, INT32_MIN }, + { INT32_C( 574933518), -INT32_C( 1140181041), INT32_MIN, INT32_MIN }, + { INT32_C( 574933518), -INT32_C( 1140181041), INT32_MIN, INT32_MIN }, + { INT32_C( 574933518), -INT32_C( 1140181041), INT32_MIN, INT32_MIN }, + { INT32_C( 574933518), -INT32_C( 1140181041), -INT32_C( 1866208219), -INT32_C( 181098240) } }, + { { -INT64_C(587885589199966149), INT64_C(2787293872381207041) }, + { -INT32_C( 813752761), -INT32_C( 1951079511) }, + { -INT32_C( 813752761), -INT32_C( 1951079511), INT32_MIN, INT32_MAX }, + { -INT32_C( 813752761), -INT32_C( 1951079511), INT32_MIN, INT32_MAX }, + { -INT32_C( 813752761), -INT32_C( 1951079511), INT32_MIN, INT32_MAX }, + { -INT32_C( 813752761), -INT32_C( 1951079511), INT32_MIN, INT32_MAX }, + { -INT32_C( 813752761), -INT32_C( 1951079511), -INT32_C( 136877780), INT32_C( 648967426) } }, + { { INT64_C(1565806582601031981), -INT64_C(3247663395406986982) }, + { -INT32_C( 1379490026), INT32_C( 1383927484) }, + { -INT32_C( 1379490026), INT32_C( 1383927484), INT32_MAX, INT32_MIN }, + { -INT32_C( 1379490026), INT32_C( 1383927484), INT32_MAX, INT32_MIN }, + { -INT32_C( 1379490026), INT32_C( 1383927484), INT32_MAX, INT32_MIN }, + { -INT32_C( 1379490026), INT32_C( 1383927484), INT32_MAX, INT32_MIN }, + { -INT32_C( 1379490026), INT32_C( 1383927484), INT32_C( 364567754), -INT32_C( 756155559) } }, + { { INT64_C(4698504543164676664), INT64_C(5218700252402638633) }, + { INT32_C( 856401848), INT32_C( 10657996) }, + { INT32_C( 856401848), INT32_C( 10657996), INT32_MAX, INT32_MAX }, + { INT32_C( 856401848), INT32_C( 10657996), INT32_MAX, INT32_MAX }, + { INT32_C( 856401848), INT32_C( 10657996), INT32_MAX, INT32_MAX }, + { INT32_C( 856401848), INT32_C( 10657996), INT32_MAX, INT32_MAX }, + { INT32_C( 856401848), INT32_C( 10657996), INT32_C( 1093955837), INT32_C( 1215073339) } }, + { { INT64_C(8695645370954972431), -INT64_C(1836535992261928359) }, + { -INT32_C( 1064871033), -INT32_C( 121309809) }, + { -INT32_C( 1064871033), -INT32_C( 121309809), INT32_MAX, INT32_MIN }, + { -INT32_C( 1064871033), -INT32_C( 121309809), INT32_MAX, INT32_MIN }, + { -INT32_C( 1064871033), -INT32_C( 121309809), INT32_MAX, INT32_MIN }, + { -INT32_C( 1064871033), -INT32_C( 121309809), INT32_MAX, INT32_MIN }, + { -INT32_C( 1064871033), -INT32_C( 121309809), INT32_C( 2024612709), -INT32_C( 427601857) } }, + { { -INT64_C(7805581545156263502), -INT64_C(2190689431020379460) }, + { -INT32_C( 1254046386), -INT32_C( 754305916) }, + { -INT32_C( 1254046386), -INT32_C( 754305916), INT32_MIN, INT32_MIN }, + { -INT32_C( 1254046386), -INT32_C( 754305916), INT32_MIN, INT32_MIN }, + { -INT32_C( 1254046386), -INT32_C( 754305916), INT32_MIN, INT32_MIN }, + { -INT32_C( 1254046386), -INT32_C( 754305916), INT32_MIN, INT32_MIN }, + { -INT32_C( 1254046386), -INT32_C( 754305916), -INT32_C( 1817378575), -INT32_C( 510059630) } }, + { { -INT64_C(7462190876657743812), INT64_C(7255948533612809631) }, + { INT32_C( 1609417307), -INT32_C( 632775540) }, + { INT32_C( 1609417307), -INT32_C( 632775540), INT32_MIN, INT32_MAX }, + { INT32_C( 1609417307), -INT32_C( 632775540), INT32_MIN, INT32_MAX }, + { INT32_C( 1609417307), -INT32_C( 632775540), INT32_MIN, INT32_MAX }, + { INT32_C( 1609417307), -INT32_C( 632775540), INT32_MIN, INT32_MAX }, + { INT32_C( 1609417307), -INT32_C( 632775540), -INT32_C( 1737426705), INT32_C( 1689407167) } }, + { { INT64_C(3129277321313730118), INT64_C(9162266446141080176) }, + { INT32_C( 521065375), INT32_C( 1213388315) }, + { INT32_C( 521065375), INT32_C( 1213388315), INT32_MAX, INT32_MAX }, + { INT32_C( 521065375), INT32_C( 1213388315), INT32_MAX, INT32_MAX }, + { INT32_C( 521065375), INT32_C( 1213388315), INT32_MAX, INT32_MAX }, + { INT32_C( 521065375), INT32_C( 1213388315), INT32_MAX, INT32_MAX }, + { INT32_C( 521065375), INT32_C( 1213388315), INT32_C( 728591653), INT32_C( 2133256394) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t r = simde_vld1_s32(test_vec[i].r); + + simde_int32x4_t r6 = simde_vqrshrn_high_n_s64(r, a, 6); + simde_int32x4_t r13 = simde_vqrshrn_high_n_s64(r, a, 13); + simde_int32x4_t r19 = simde_vqrshrn_high_n_s64(r, a, 19); + simde_int32x4_t r26 = simde_vqrshrn_high_n_s64(r, a, 26); + simde_int32x4_t r32 = simde_vqrshrn_high_n_s64(r, a, 32); + + simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + } + + return 0; +} + +static int +test_simde_vqrshrn_high_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint8_t r[8]; + uint8_t r1[16]; + uint8_t r3[16]; + uint8_t r5[16]; + uint8_t r6[16]; + uint8_t r8[16]; + } test_vec[] = { + { { UINT16_C( 24470), UINT16_C( 63986), UINT16_C( 20177), UINT16_C( 31660), UINT16_C( 62352), UINT16_C( 53911), UINT16_C( 56962), UINT16_C( 44856) }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173) }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 197), UINT8_C( 177), UINT8_C( 80), UINT8_C( 199), UINT8_C( 199), UINT8_C( 75), UINT8_C( 58), UINT8_C( 173), UINT8_C( 96), UINT8_C( 250), UINT8_C( 79), UINT8_C( 124), UINT8_C( 244), UINT8_C( 211), UINT8_C( 223), UINT8_C( 175) } }, + { { UINT16_C( 5860), UINT16_C( 26864), UINT16_C( 16809), UINT16_C( 27423), UINT16_C( 1079), UINT16_C( 39522), UINT16_C( 37626), UINT16_C( 52910) }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177) }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 135), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177), UINT8_C( 183), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 34), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177), UINT8_C( 92), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 17), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 100), UINT8_C( 33), UINT8_C( 61), UINT8_C( 175), UINT8_C( 253), UINT8_C( 92), UINT8_C( 177), UINT8_C( 177), UINT8_C( 23), UINT8_C( 105), UINT8_C( 66), UINT8_C( 107), UINT8_C( 4), UINT8_C( 154), UINT8_C( 147), UINT8_C( 207) } }, + { { UINT16_C( 2748), UINT16_C( 108), UINT16_C( 19947), UINT16_C( 21790), UINT16_C( 38555), UINT16_C( 63630), UINT16_C( 14718), UINT16_C( 60222) }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153) }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153), UINT8_MAX, UINT8_C( 54), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153), UINT8_MAX, UINT8_C( 14), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153), UINT8_C( 86), UINT8_C( 3), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153), UINT8_C( 43), UINT8_C( 2), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 230), UINT8_MAX }, + { UINT8_C( 30), UINT8_C( 226), UINT8_C( 46), UINT8_C( 227), UINT8_C( 201), UINT8_C( 102), UINT8_C( 144), UINT8_C( 153), UINT8_C( 11), UINT8_C( 0), UINT8_C( 78), UINT8_C( 85), UINT8_C( 151), UINT8_C( 249), UINT8_C( 57), UINT8_C( 235) } }, + { { UINT16_C( 52200), UINT16_C( 59734), UINT16_C( 60538), UINT16_C( 17968), UINT16_C( 54622), UINT16_C( 48753), UINT16_C( 51901), UINT16_C( 49581) }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_C( 255), UINT8_C( 198), UINT8_C( 172) }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_MAX, UINT8_C( 198), UINT8_C( 172), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_MAX, UINT8_C( 198), UINT8_C( 172), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_MAX, UINT8_C( 198), UINT8_C( 172), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_MAX, UINT8_C( 198), UINT8_C( 172), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 10), UINT8_C( 178), UINT8_C( 125), UINT8_C( 66), UINT8_MAX, UINT8_C( 198), UINT8_C( 172), UINT8_C( 204), UINT8_C( 233), UINT8_C( 236), UINT8_C( 70), UINT8_C( 213), UINT8_C( 190), UINT8_C( 203), UINT8_C( 194) } }, + { { UINT16_C( 36034), UINT16_C( 14511), UINT16_C( 22328), UINT16_C( 37025), UINT16_C( 29360), UINT16_C( 36089), UINT16_C( 42636), UINT16_C( 34999) }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119) }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119), UINT8_MAX, UINT8_C( 227), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 110), UINT8_C( 85), UINT8_C( 81), UINT8_C( 158), UINT8_C( 156), UINT8_C( 172), UINT8_C( 119), UINT8_C( 141), UINT8_C( 57), UINT8_C( 87), UINT8_C( 145), UINT8_C( 115), UINT8_C( 141), UINT8_C( 167), UINT8_C( 137) } }, + { { UINT16_C( 30613), UINT16_C( 63781), UINT16_C( 7132), UINT16_C( 20485), UINT16_C( 56272), UINT16_C( 22617), UINT16_C( 27265), UINT16_C( 34256) }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193) }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193), UINT8_MAX, UINT8_MAX, UINT8_C( 223), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193), UINT8_MAX, UINT8_MAX, UINT8_C( 111), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 243), UINT8_C( 105), UINT8_C( 61), UINT8_C( 8), UINT8_C( 81), UINT8_C( 50), UINT8_C( 77), UINT8_C( 193), UINT8_C( 120), UINT8_C( 249), UINT8_C( 28), UINT8_C( 80), UINT8_C( 220), UINT8_C( 88), UINT8_C( 107), UINT8_C( 134) } }, + { { UINT16_C( 48120), UINT16_C( 29559), UINT16_C( 33290), UINT16_C( 12707), UINT16_C( 14758), UINT16_C( 64626), UINT16_C( 33733), UINT16_C( 12553) }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111) }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 199), UINT8_C( 231), UINT8_MAX, UINT8_MAX, UINT8_C( 196) }, + { UINT8_C( 1), UINT8_C( 250), UINT8_C( 42), UINT8_C( 195), UINT8_C( 208), UINT8_C( 222), UINT8_C( 120), UINT8_C( 111), UINT8_C( 188), UINT8_C( 115), UINT8_C( 130), UINT8_C( 50), UINT8_C( 58), UINT8_C( 252), UINT8_C( 132), UINT8_C( 49) } }, + { { UINT16_C( 16273), UINT16_C( 63973), UINT16_C( 6852), UINT16_C( 53429), UINT16_C( 49781), UINT16_C( 55429), UINT16_C( 2600), UINT16_C( 52231) }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9) }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9), UINT8_MAX, UINT8_MAX, UINT8_C( 214), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 81), UINT8_MAX }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9), UINT8_C( 254), UINT8_MAX, UINT8_C( 107), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 41), UINT8_MAX }, + { UINT8_C( 99), UINT8_C( 137), UINT8_C( 53), UINT8_C( 146), UINT8_C( 50), UINT8_C( 116), UINT8_C( 190), UINT8_C( 9), UINT8_C( 64), UINT8_C( 250), UINT8_C( 27), UINT8_C( 209), UINT8_C( 194), UINT8_C( 217), UINT8_C( 10), UINT8_C( 204) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); + + simde_uint8x16_t r1 = simde_vqrshrn_high_n_u16(r, a, 1); + simde_uint8x16_t r3 = simde_vqrshrn_high_n_u16(r, a, 3); + simde_uint8x16_t r5 = simde_vqrshrn_high_n_u16(r, a, 5); + simde_uint8x16_t r6 = simde_vqrshrn_high_n_u16(r, a, 6); + simde_uint8x16_t r8 = simde_vqrshrn_high_n_u16(r, a, 8); + + simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vqrshrn_high_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t r[4]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r16[8]; + } test_vec[] = { + { { UINT32_C( 761942977), UINT32_C(3012305286), UINT32_C(2310290612), UINT32_C(2542994018) }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571) }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 30761), UINT16_C( 35410), UINT16_C( 34506), UINT16_C( 38571), UINT16_C( 11626), UINT16_C( 45964), UINT16_C( 35252), UINT16_C( 38803) } }, + { { UINT32_C(2782830608), UINT32_C(2268713308), UINT32_C( 49027278), UINT32_C( 818790821) }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941) }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941), UINT16_MAX, UINT16_MAX, UINT16_C( 47878), UINT16_MAX }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941), UINT16_MAX, UINT16_MAX, UINT16_C( 5985), UINT16_MAX }, + { UINT16_C( 45788), UINT16_C( 58593), UINT16_C( 41499), UINT16_C( 24941), UINT16_C( 42463), UINT16_C( 34618), UINT16_C( 748), UINT16_C( 12494) } }, + { { UINT32_C(3585933417), UINT32_C( 926257399), UINT32_C(3632638559), UINT32_C(2995582533) }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494) }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 38100), UINT16_C( 35761), UINT16_C( 3837), UINT16_C( 5494), UINT16_C( 54717), UINT16_C( 14134), UINT16_C( 55430), UINT16_C( 45709) } }, + { { UINT32_C( 340008151), UINT32_C(1376253756), UINT32_C(3971226977), UINT32_C(1406890267) }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531) }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531), UINT16_C( 41505), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 11711), UINT16_C( 5240), UINT16_C( 24173), UINT16_C( 29531), UINT16_C( 5188), UINT16_C( 21000), UINT16_C( 60596), UINT16_C( 21467) } }, + { { UINT32_C( 421449578), UINT32_C(3134076478), UINT32_C(3574088918), UINT32_C(2882483406) }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065) }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065), UINT16_C( 51446), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 31960), UINT16_C( 39737), UINT16_C( 30571), UINT16_C( 33065), UINT16_C( 6431), UINT16_C( 47822), UINT16_C( 54536), UINT16_C( 43983) } }, + { { UINT32_C( 583537289), UINT32_C( 434301634), UINT32_C(1444419016), UINT32_C(3837079129) }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715) }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715), UINT16_MAX, UINT16_C( 53015), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 35705), UINT16_C( 32452), UINT16_C( 10162), UINT16_C( 9715), UINT16_C( 8904), UINT16_C( 6627), UINT16_C( 22040), UINT16_C( 58549) } }, + { { UINT32_C(1525121211), UINT32_C( 50780994), UINT32_C(3903631585), UINT32_C(2029273804) }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627) }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627), UINT16_MAX, UINT16_C( 49591), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627), UINT16_MAX, UINT16_C( 6199), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 13555), UINT16_C( 7478), UINT16_C( 21263), UINT16_C( 64627), UINT16_C( 23272), UINT16_C( 775), UINT16_C( 59565), UINT16_C( 30964) } }, + { { UINT32_C(3426504747), UINT32_C(2501582580), UINT32_C(3527744463), UINT32_C(3789559335) }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777) }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 21127), UINT16_C( 62248), UINT16_C( 28059), UINT16_C( 2777), UINT16_C( 52284), UINT16_C( 38171), UINT16_C( 53829), UINT16_C( 57824) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); + + simde_uint16x8_t r3 = simde_vqrshrn_high_n_u32(r, a, 3); + simde_uint16x8_t r6 = simde_vqrshrn_high_n_u32(r, a, 6); + simde_uint16x8_t r10 = simde_vqrshrn_high_n_u32(r, a, 10); + simde_uint16x8_t r13 = simde_vqrshrn_high_n_u32(r, a, 13); + simde_uint16x8_t r16 = simde_vqrshrn_high_n_u32(r, a, 16); + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + } + + return 0; +} + + +static int +test_simde_vqrshrn_high_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t r[2]; + uint32_t r6[4]; + uint32_t r13[4]; + uint32_t r19[4]; + uint32_t r26[4]; + uint32_t r32[4]; + } test_vec[] = { + { { UINT64_C(7203098596803566357), UINT64_C(7876275415649177280) }, + { UINT32_C( 3637702956), UINT32_C( 1215937871) }, + { UINT32_C( 3637702956), UINT32_C( 1215937871), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3637702956), UINT32_C( 1215937871), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3637702956), UINT32_C( 1215937871), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3637702956), UINT32_C( 1215937871), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3637702956), UINT32_C( 1215937871), UINT32_C( 1677102083), UINT32_C( 1833838275) } }, + { { UINT64_C(3581476421860816228), UINT64_C(9270783597829440304) }, + { UINT32_C( 1646634326), UINT32_C( 245511145) }, + { UINT32_C( 1646634326), UINT32_C( 245511145), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1646634326), UINT32_C( 245511145), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1646634326), UINT32_C( 245511145), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1646634326), UINT32_C( 245511145), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1646634326), UINT32_C( 245511145), UINT32_C( 833877461), UINT32_C( 2158522512) } }, + { { UINT64_C(2090022547083456224), UINT64_C(11194466363031690243) }, + { UINT32_C( 287061320), UINT32_C( 1554413028) }, + { UINT32_C( 287061320), UINT32_C( 1554413028), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 287061320), UINT32_C( 1554413028), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 287061320), UINT32_C( 1554413028), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 287061320), UINT32_C( 1554413028), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 287061320), UINT32_C( 1554413028), UINT32_C( 486621295), UINT32_C( 2606414809) } }, + { { UINT64_C(13755076121551726691), UINT64_C(12745973165242992493) }, + { UINT32_C( 1937771326), UINT32_C( 1133803333) }, + { UINT32_C( 1937771326), UINT32_C( 1133803333), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1937771326), UINT32_C( 1133803333), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1937771326), UINT32_C( 1133803333), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1937771326), UINT32_C( 1133803333), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 1937771326), UINT32_C( 1133803333), UINT32_C( 3202603227), UINT32_C( 2967653136) } }, + { { UINT64_C(7457958405694239083), UINT64_C(12033233808983669995) }, + { UINT32_C( 2159049551), UINT32_C( 1055810932) }, + { UINT32_C( 2159049551), UINT32_C( 1055810932), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2159049551), UINT32_C( 1055810932), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2159049551), UINT32_C( 1055810932), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2159049551), UINT32_C( 1055810932), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2159049551), UINT32_C( 1055810932), UINT32_C( 1736441256), UINT32_C( 2801705573) } }, + { { UINT64_C(18146717995352602586), UINT64_C(6450929778631403120) }, + { UINT32_C( 3226552088), UINT32_C( 732607272) }, + { UINT32_C( 3226552088), UINT32_C( 732607272), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3226552088), UINT32_C( 732607272), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3226552088), UINT32_C( 732607272), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3226552088), UINT32_C( 732607272), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3226552088), UINT32_C( 732607272), UINT32_C( 4225112031), UINT32_C( 1501974133) } }, + { { UINT64_C(10272196743332054363), UINT64_C(6156165500414793683) }, + { UINT32_C( 535844076), UINT32_C( 3274829162) }, + { UINT32_C( 535844076), UINT32_C( 3274829162), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 535844076), UINT32_C( 3274829162), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 535844076), UINT32_C( 3274829162), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 535844076), UINT32_C( 3274829162), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 535844076), UINT32_C( 3274829162), UINT32_C( 2391682179), UINT32_C( 1433343976) } }, + { { UINT64_C(6350607551009767646), UINT64_C(13198785150916825998) }, + { UINT32_C( 2487826802), UINT32_C( 2586560249) }, + { UINT32_C( 2487826802), UINT32_C( 2586560249), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2487826802), UINT32_C( 2586560249), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2487826802), UINT32_C( 2586560249), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2487826802), UINT32_C( 2586560249), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 2487826802), UINT32_C( 2586560249), UINT32_C( 1478616044), UINT32_C( 3073081642) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); + + simde_uint32x4_t r6 = simde_vqrshrn_high_n_u64(r, a, 6); + simde_uint32x4_t r13 = simde_vqrshrn_high_n_u64(r, a, 13); + simde_uint32x4_t r19 = simde_vqrshrn_high_n_u64(r, a, 19); + simde_uint32x4_t r26 = simde_vqrshrn_high_n_u64(r, a, 26); + simde_uint32x4_t r32 = simde_vqrshrn_high_n_u64(r, a, 32); + + simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrn_high_n_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qrshrun_high_n.c b/test/arm/neon/qrshrun_high_n.c new file mode 100644 index 000000000..574e571bb --- /dev/null +++ b/test/arm/neon/qrshrun_high_n.c @@ -0,0 +1,279 @@ +#define SIMDE_TEST_ARM_NEON_INSN qrshrun_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qrshrun_high_n.h" + +static int +test_simde_vqrshrun_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + uint8_t r[8]; + uint8_t r1[16]; + uint8_t r3[16]; + uint8_t r5[16]; + uint8_t r6[16]; + uint8_t r8[16]; + } test_vec[] = { + { { -INT16_C( 26901), INT16_C( 24912), -INT16_C( 23060), INT16_C( 11637), INT16_C( 29762), -INT16_C( 28735), INT16_C( 31144), -INT16_C( 31799) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 182), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0) }, + { UINT8_C( 154), UINT8_C( 89), UINT8_C( 202), UINT8_C( 149), UINT8_C( 247), UINT8_C( 250), UINT8_C( 171), UINT8_C( 107), UINT8_C( 0), UINT8_C( 97), UINT8_C( 0), UINT8_C( 45), UINT8_C( 116), UINT8_C( 0), UINT8_C( 122), UINT8_C( 0) } }, + { { INT16_C( 24401), -INT16_C( 2085), INT16_C( 30663), INT16_C( 29805), INT16_C( 4918), -INT16_C( 25820), -INT16_C( 9103), INT16_C( 17542) }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18) }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 154), UINT8_C( 0), UINT8_C( 0), UINT8_MAX }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 77), UINT8_C( 0), UINT8_C( 0), UINT8_MAX }, + { UINT8_C( 245), UINT8_C( 174), UINT8_C( 88), UINT8_C( 87), UINT8_C( 50), UINT8_C( 88), UINT8_C( 33), UINT8_C( 18), UINT8_C( 95), UINT8_C( 0), UINT8_C( 120), UINT8_C( 116), UINT8_C( 19), UINT8_C( 0), UINT8_C( 0), UINT8_C( 69) } }, + { { INT16_C( 1440), INT16_C( 15455), INT16_C( 29476), INT16_C( 6151), -INT16_C( 10272), INT16_C( 6729), -INT16_C( 10868), -INT16_C( 27237) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_C( 255), UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_MAX, UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_MAX, UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243), UINT8_C( 180), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_MAX, UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243), UINT8_C( 45), UINT8_MAX, UINT8_MAX, UINT8_C( 192), UINT8_C( 0), UINT8_C( 210), UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_MAX, UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243), UINT8_C( 23), UINT8_C( 241), UINT8_MAX, UINT8_C( 96), UINT8_C( 0), UINT8_C( 105), UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 218), UINT8_C( 81), UINT8_C( 95), UINT8_MAX, UINT8_C( 146), UINT8_C( 139), UINT8_C( 110), UINT8_C( 243), UINT8_C( 6), UINT8_C( 60), UINT8_C( 115), UINT8_C( 24), UINT8_C( 0), UINT8_C( 26), UINT8_C( 0), UINT8_C( 0) } }, + { { -INT16_C( 26011), INT16_C( 29256), -INT16_C( 4522), INT16_C( 13968), INT16_C( 12300), INT16_C( 2638), -INT16_C( 10285), INT16_C( 1479) }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177) }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 185) }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 82), UINT8_C( 0), UINT8_C( 46) }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 218), UINT8_C( 192), UINT8_C( 41), UINT8_C( 0), UINT8_C( 23) }, + { UINT8_C( 46), UINT8_C( 201), UINT8_C( 162), UINT8_C( 0), UINT8_C( 40), UINT8_C( 76), UINT8_C( 118), UINT8_C( 177), UINT8_C( 0), UINT8_C( 114), UINT8_C( 0), UINT8_C( 55), UINT8_C( 48), UINT8_C( 10), UINT8_C( 0), UINT8_C( 6) } }, + { { -INT16_C( 22025), -INT16_C( 8648), INT16_C( 27418), -INT16_C( 6829), INT16_C( 15803), -INT16_C( 8520), INT16_C( 7722), INT16_C( 12519) }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27) }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 241), UINT8_MAX }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_C( 247), UINT8_C( 0), UINT8_C( 121), UINT8_C( 196) }, + { UINT8_C( 62), UINT8_C( 106), UINT8_C( 236), UINT8_C( 116), UINT8_C( 113), UINT8_C( 139), UINT8_C( 132), UINT8_C( 27), UINT8_C( 0), UINT8_C( 0), UINT8_C( 107), UINT8_C( 0), UINT8_C( 62), UINT8_C( 0), UINT8_C( 30), UINT8_C( 49) } }, + { { INT16_C( 2745), INT16_C( 14738), -INT16_C( 5721), -INT16_C( 30689), INT16_C( 3405), INT16_C( 8929), INT16_C( 58), -INT16_C( 3352) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 29), UINT8_C( 0) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 7), UINT8_C( 0) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76), UINT8_C( 86), UINT8_MAX, UINT8_C( 0), UINT8_C( 0), UINT8_C( 106), UINT8_MAX, UINT8_C( 2), UINT8_C( 0) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76), UINT8_C( 43), UINT8_C( 230), UINT8_C( 0), UINT8_C( 0), UINT8_C( 53), UINT8_C( 140), UINT8_C( 1), UINT8_C( 0) }, + { UINT8_C( 107), UINT8_C( 100), UINT8_C( 114), UINT8_C( 2), UINT8_C( 187), UINT8_C( 235), UINT8_C( 199), UINT8_C( 76), UINT8_C( 11), UINT8_C( 58), UINT8_C( 0), UINT8_C( 0), UINT8_C( 13), UINT8_C( 35), UINT8_C( 0), UINT8_C( 0) } }, + { { -INT16_C( 6193), INT16_C( 8857), INT16_C( 26831), -INT16_C( 2865), INT16_C( 7898), INT16_C( 31948), -INT16_C( 25650), -INT16_C( 24171) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234), UINT8_C( 0), UINT8_MAX, UINT8_MAX, UINT8_C( 0), UINT8_C( 247), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234), UINT8_C( 0), UINT8_C( 138), UINT8_MAX, UINT8_C( 0), UINT8_C( 123), UINT8_MAX, UINT8_C( 0), UINT8_C( 0) }, + { UINT8_C( 124), UINT8_C( 47), UINT8_C( 3), UINT8_C( 133), UINT8_C( 190), UINT8_C( 204), UINT8_C( 29), UINT8_C( 234), UINT8_C( 0), UINT8_C( 35), UINT8_C( 105), UINT8_C( 0), UINT8_C( 31), UINT8_C( 125), UINT8_C( 0), UINT8_C( 0) } }, + { { -INT16_C( 1239), -INT16_C( 3301), INT16_C( 3917), -INT16_C( 12085), INT16_C( 14863), -INT16_C( 5705), INT16_C( 9730), INT16_C( 6678) }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129) }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_C( 122), UINT8_C( 0), UINT8_MAX, UINT8_C( 0), UINT8_MAX, UINT8_C( 209) }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_C( 61), UINT8_C( 0), UINT8_C( 232), UINT8_C( 0), UINT8_C( 152), UINT8_C( 104) }, + { UINT8_C( 31), UINT8_C( 43), UINT8_C( 45), UINT8_C( 150), UINT8_C( 141), UINT8_C( 252), UINT8_C( 112), UINT8_C( 129), UINT8_C( 0), UINT8_C( 0), UINT8_C( 15), UINT8_C( 0), UINT8_C( 58), UINT8_C( 0), UINT8_C( 38), UINT8_C( 26) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); + + simde_uint8x16_t r1 = simde_vqrshrun_high_n_s16(r, a, 1); + simde_uint8x16_t r3 = simde_vqrshrun_high_n_s16(r, a, 3); + simde_uint8x16_t r5 = simde_vqrshrun_high_n_s16(r, a, 5); + simde_uint8x16_t r6 = simde_vqrshrun_high_n_s16(r, a, 6); + simde_uint8x16_t r8 = simde_vqrshrun_high_n_s16(r, a, 8); + + simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vqrshrun_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + uint16_t r[4]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r16[8]; + } test_vec[] = { + { { -INT32_C(1363234875), -INT32_C( 417022877), -INT32_C( 258303598), INT32_C(1951319462) }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093) }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_MAX }, + { UINT16_C( 15475), UINT16_C( 43243), UINT16_C( 52233), UINT16_C( 36093), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 29775) } }, + { { -INT32_C(1053233509), -INT32_C(1268107216), -INT32_C(2036943405), -INT32_C( 350219836) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 36487), UINT16_C( 65004), UINT16_C( 33777), UINT16_C( 51946), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { -INT32_C( 786909360), -INT32_C(1579932703), INT32_C(1606062187), INT32_C( 838808178) }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766) }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766), UINT16_C( 0), UINT16_C( 0), UINT16_MAX, UINT16_MAX }, + { UINT16_C( 52851), UINT16_C( 16940), UINT16_C( 39624), UINT16_C( 45766), UINT16_C( 0), UINT16_C( 0), UINT16_C( 24507), UINT16_C( 12799) } }, + { { -INT32_C(2051899715), INT32_C(1170898446), INT32_C(1151727967), INT32_C(2128278906) }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942) }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C( 25343), UINT16_C( 36824), UINT16_C( 54187), UINT16_C( 53942), UINT16_C( 0), UINT16_C( 17866), UINT16_C( 17574), UINT16_C( 32475) } }, + { { INT32_C(2121284719), INT32_C( 132979968), -INT32_C(1351241229), -INT32_C(1056295181) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509), UINT16_MAX, UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509), UINT16_MAX, UINT16_C( 16233), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 48579), UINT16_C( 61937), UINT16_C( 45916), UINT16_C( 45509), UINT16_C( 32368), UINT16_C( 2029), UINT16_C( 0), UINT16_C( 0) } }, + { { -INT32_C(1284406174), -INT32_C(1229648600), -INT32_C(2034326506), -INT32_C(1122523652) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 4801), UINT16_C( 9858), UINT16_C( 54817), UINT16_C( 11429), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0), UINT16_C( 0) } }, + { { -INT32_C(1887739279), INT32_C(2111374391), INT32_C( 588014740), -INT32_C(1460994965) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371), UINT16_C( 0), UINT16_MAX, UINT16_MAX, UINT16_C( 0) }, + { UINT16_C( 1614), UINT16_C( 44820), UINT16_C( 9628), UINT16_C( 56371), UINT16_C( 0), UINT16_C( 32217), UINT16_C( 8972), UINT16_C( 0) } }, + { { -INT32_C(1056644303), INT32_C(1476868805), -INT32_C(2146154485), -INT32_C( 936697718) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147), UINT16_C( 0), UINT16_MAX, UINT16_C( 0), UINT16_C( 0) }, + { UINT16_C( 41652), UINT16_C( 63603), UINT16_C( 12915), UINT16_C( 44147), UINT16_C( 0), UINT16_C( 22535), UINT16_C( 0), UINT16_C( 0) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); + + simde_uint16x8_t r3 = simde_vqrshrun_high_n_s32(r, a, 3); + simde_uint16x8_t r6 = simde_vqrshrun_high_n_s32(r, a, 6); + simde_uint16x8_t r10 = simde_vqrshrun_high_n_s32(r, a, 10); + simde_uint16x8_t r13 = simde_vqrshrun_high_n_s32(r, a, 13); + simde_uint16x8_t r16 = simde_vqrshrun_high_n_s32(r, a, 16); + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vqrshrun_high_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + uint32_t r[2]; + uint32_t r6[4]; + uint32_t r13[4]; + uint32_t r19[4]; + uint32_t r26[4]; + uint32_t r32[4]; + } test_vec[] = { + { { INT64_C(6922806263380053875), -INT64_C(8099866424266803793) }, + { UINT32_C( 760640094), UINT32_C( 594382363) }, + { UINT32_C( 760640094), UINT32_C( 594382363), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 760640094), UINT32_C( 594382363), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 760640094), UINT32_C( 594382363), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 760640094), UINT32_C( 594382363), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 760640094), UINT32_C( 594382363), UINT32_C( 1611841438), UINT32_C( 0) } }, + { { -INT64_C(2271711873451389669), INT64_C(7783093147260015164) }, + { UINT32_C( 2062287409), UINT32_C( 405592575) }, + { UINT32_C( 2062287409), UINT32_C( 405592575), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 2062287409), UINT32_C( 405592575), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 2062287409), UINT32_C( 405592575), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 2062287409), UINT32_C( 405592575), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 2062287409), UINT32_C( 405592575), UINT32_C( 0), UINT32_C( 1812142587) } }, + { { INT64_C(8517667088258568146), INT64_C(270917303239280145) }, + { UINT32_C( 3262654376), UINT32_C( 2356467133) }, + { UINT32_C( 3262654376), UINT32_C( 2356467133), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3262654376), UINT32_C( 2356467133), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3262654376), UINT32_C( 2356467133), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 3262654376), UINT32_C( 2356467133), UINT32_MAX, UINT32_C( 4036982406) }, + { UINT32_C( 3262654376), UINT32_C( 2356467133), UINT32_C( 1983173911), UINT32_C( 63077850) } }, + { { INT64_C(4853728554929975710), -INT64_C(4081064942288582998) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 4281591674), UINT32_C( 1973792079), UINT32_C( 1130096744), UINT32_C( 0) } }, + { { INT64_C(3014540358851394141), INT64_C(5706656414358883996) }, + { UINT32_C( 4260730317), UINT32_C( 4070936592) }, + { UINT32_C( 4260730317), UINT32_C( 4070936592), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 4260730317), UINT32_C( 4070936592), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 4260730317), UINT32_C( 4070936592), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 4260730317), UINT32_C( 4070936592), UINT32_MAX, UINT32_MAX }, + { UINT32_C( 4260730317), UINT32_C( 4070936592), UINT32_C( 701877372), UINT32_C( 1328684486) } }, + { { -INT64_C(7138003406927655947), -INT64_C(3703770648631034745) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415), UINT32_C( 0), UINT32_C( 0) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415), UINT32_C( 0), UINT32_C( 0) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415), UINT32_C( 0), UINT32_C( 0) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415), UINT32_C( 0), UINT32_C( 0) }, + { UINT32_C( 2372538698), UINT32_C( 2756569415), UINT32_C( 0), UINT32_C( 0) } }, + { { INT64_C(6877059253522308642), -INT64_C(4587225836650138282) }, + { UINT32_C( 271301593), UINT32_C( 3580377968) }, + { UINT32_C( 271301593), UINT32_C( 3580377968), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 271301593), UINT32_C( 3580377968), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 271301593), UINT32_C( 3580377968), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 271301593), UINT32_C( 3580377968), UINT32_MAX, UINT32_C( 0) }, + { UINT32_C( 271301593), UINT32_C( 3580377968), UINT32_C( 1601190132), UINT32_C( 0) } }, + { { -INT64_C(4940186427400519059), INT64_C(3197242908979989750) }, + { UINT32_C( 3107228164), UINT32_C( 2643418518) }, + { UINT32_C( 3107228164), UINT32_C( 2643418518), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 3107228164), UINT32_C( 2643418518), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 3107228164), UINT32_C( 2643418518), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 3107228164), UINT32_C( 2643418518), UINT32_C( 0), UINT32_MAX }, + { UINT32_C( 3107228164), UINT32_C( 2643418518), UINT32_C( 0), UINT32_C( 744416124) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); + + simde_uint32x4_t r6 = simde_vqrshrun_high_n_s64(r, a, 6); + simde_uint32x4_t r13 = simde_vqrshrun_high_n_s64(r, a, 13); + simde_uint32x4_t r19 = simde_vqrshrun_high_n_s64(r, a, 19); + simde_uint32x4_t r26 = simde_vqrshrun_high_n_s64(r, a, 26); + simde_uint32x4_t r32 = simde_vqrshrun_high_n_s64(r, a, 32); + + simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrun_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrun_high_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqrshrun_high_n_s64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qshl_n.c b/test/arm/neon/qshl_n.c new file mode 100644 index 000000000..920764613 --- /dev/null +++ b/test/arm/neon/qshl_n.c @@ -0,0 +1,1269 @@ +#define SIMDE_TEST_ARM_NEON_INSN qshl + +#include "test-neon.h" +#include "../../../simde/arm/neon/qshl_n.h" + +/* Until v12, clang used unsigned parameters on the scalar versions, so + * when testing we need to disable the -Wsign-conversion to avoid a + * diagnostic. If this is a problem in your code you might want to + * consider using simde_vqshl* instead of vqshl* so you can avoid an + * ifdef. */ + +#if HEDLEY_HAS_WARNING("-Wsign-conversion") && defined(SIMDE_NATIVE_ALIASES_TESTING) + #pragma clang diagnostic ignored "-Wsign-conversion" +#endif + +static int +test_simde_vqshlb_n_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a; + int8_t n; + int8_t r; + } test_vec[] = { + { INT8_C( 84), + INT8_C( 1), + INT8_MAX }, + { INT8_C( 76), + INT8_C( 3), + INT8_MAX }, + { -INT8_C( 66), + INT8_C( 0), + -INT8_C( 66) }, + { -INT8_C( 71), + INT8_C( 2), + INT8_MIN }, + { -INT8_C( 123), + INT8_C( 6), + INT8_MIN }, + { -INT8_C( 64), + INT8_C( 6), + INT8_MIN }, + { INT8_C( 53), + INT8_C( 4), + INT8_MAX }, + { -INT8_C( 36), + INT8_C( 1), + -INT8_C( 72) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int8_t r; + SIMDE_CONSTIFY_8_(simde_vqshlb_n_s8, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_i8(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshlh_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a; + int16_t n; + int16_t r; + } test_vec[] = { + { -INT16_C( 24158), + INT16_C( 6), + INT16_MIN }, + { -INT16_C( 3172), + INT16_C( 0), + -INT16_C( 3172) }, + { -INT16_C( 1716), + INT16_C( 9), + INT16_MIN }, + { INT16_C( 12698), + INT16_C( 0), + INT16_C( 12698) }, + { -INT16_C( 20989), + INT16_C( 13), + INT16_MIN }, + { INT16_C( 29643), + INT16_C( 2), + INT16_MAX }, + { -INT16_C( 32563), + INT16_C( 15), + INT16_MIN }, + { -INT16_C( 9395), + INT16_C( 9), + INT16_MIN }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int16_t r; + SIMDE_CONSTIFY_16_(simde_vqshlh_n_s16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_i16(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshls_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a; + int32_t n; + int32_t r; + } test_vec[] = { + { INT32_C( 734070998), + INT32_C( 22), + INT32_MAX }, + { -INT32_C( 1381815004), + INT32_C( 9), + INT32_MIN }, + { INT32_C( 56553928), + INT32_C( 0), + INT32_C( 56553928) }, + { -INT32_C( 2086394238), + INT32_C( 23), + INT32_MIN }, + { -INT32_C( 2044455317), + INT32_C( 14), + INT32_MIN }, + { -INT32_C( 1355920605), + INT32_C( 6), + INT32_MIN }, + { -INT32_C( 1446659077), + INT32_C( 17), + INT32_MIN }, + { -INT32_C( 474948635), + INT32_C( 17), + INT32_MIN }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int32_t r; + SIMDE_CONSTIFY_32_(simde_vqshls_n_s32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_i32(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshld_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a; + int64_t n; + int64_t r; + } test_vec[] = { + { -INT64_C( 3292810008776062702), + INT64_C( 25), + INT64_MIN }, + { INT64_C( 2746279207385181801), + INT64_C( 4), + INT64_MAX }, + { INT64_C( 6492565024277658433), + INT64_C( 59), + INT64_MAX }, + { INT64_C( 9109811792200353237), + INT64_C( 17), + INT64_MAX }, + { INT64_C( 8088818188310847726), + INT64_C( 46), + INT64_MAX }, + { -INT64_C( 1088132690814548021), + INT64_C( 58), + INT64_MIN }, + { -INT64_C( 6033727136566393495), + INT64_C( 12), + INT64_MIN }, + { -INT64_C( 7914593034248993296), + INT64_C( 40), + INT64_MIN }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t r; + SIMDE_CONSTIFY_64_(simde_vqshld_n_s64, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_i64(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshlb_n_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a; + int8_t n; + uint8_t r; + } test_vec[] = { + { UINT8_C( 179), + INT8_C( 0), + UINT8_C( 179) }, + { UINT8_C( 125), + INT8_C( 0), + UINT8_C( 125) }, + { UINT8_C( 162), + INT8_C( 4), + UINT8_MAX }, + { UINT8_C( 107), + INT8_C( 7), + UINT8_MAX }, + { UINT8_C( 149), + INT8_C( 0), + UINT8_C( 149) }, + { UINT8_C( 80), + INT8_C( 2), + UINT8_MAX }, + { UINT8_C( 209), + INT8_C( 5), + UINT8_MAX }, + { UINT8_C( 248), + INT8_C( 3), + UINT8_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint8_t r; + SIMDE_CONSTIFY_8_(simde_vqshlb_n_u8, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_u8(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshlh_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a; + int16_t n; + uint16_t r; + } test_vec[] = { + { UINT16_C( 16208), + INT16_C( 2), + UINT16_C( 64832) }, + { UINT16_C( 6064), + INT16_C( 9), + UINT16_MAX }, + { UINT16_C( 24349), + INT16_C( 12), + UINT16_MAX }, + { UINT16_C( 43044), + INT16_C( 15), + UINT16_MAX }, + { UINT16_C( 17835), + INT16_C( 10), + UINT16_MAX }, + { UINT16_C( 35158), + INT16_C( 7), + UINT16_MAX }, + { UINT16_C( 29392), + INT16_C( 4), + UINT16_MAX }, + { UINT16_C( 45176), + INT16_C( 3), + UINT16_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint16_t r; + SIMDE_CONSTIFY_16_(simde_vqshlh_n_u16, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_u16(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshls_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a; + int32_t n; + uint32_t r; + } test_vec[] = { + { UINT32_C(449556519), + INT32_C( 0), + UINT32_C(449556519) }, + { UINT32_C(789920460), + INT32_C( 3), + UINT32_MAX }, + { UINT32_C(34137786), + INT32_C( 21), + UINT32_MAX }, + { UINT32_C(949374081), + INT32_C( 4), + UINT32_MAX }, + { UINT32_C(3915653368), + INT32_C( 2), + UINT32_MAX }, + { UINT32_C(1460287780), + INT32_C( 25), + UINT32_MAX }, + { UINT32_C(4178386259), + INT32_C( 20), + UINT32_MAX }, + { UINT32_C(750328005), + INT32_C( 11), + UINT32_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint32_t r; + SIMDE_CONSTIFY_32_(simde_vqshls_n_u32, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_u32(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshld_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a; + int64_t n; + uint64_t r; + } test_vec[] = { + { UINT64_C( 8344124081252985275), + INT64_C( 55), + UINT64_MAX }, + { UINT64_C( 636454356997588084), + INT64_C( 52), + UINT64_MAX }, + { UINT64_C(13982781360687174434), + INT64_C( 35), + UINT64_MAX }, + { UINT64_C(12813806123922193343), + INT64_C( 10), + UINT64_MAX }, + { UINT64_C( 8314098057120347358), + INT64_C( 23), + UINT64_MAX }, + { UINT64_C( 1432565870918233790), + INT64_C( 31), + UINT64_MAX }, + { UINT64_C(16708691832672280797), + INT64_C( 20), + UINT64_MAX }, + { UINT64_C(10506927218060336816), + INT64_C( 28), + UINT64_MAX }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t r; + SIMDE_CONSTIFY_64_(simde_vqshld_n_u64, r, (HEDLEY_UNREACHABLE(), r), test_vec[i].n, test_vec[i].a); + + simde_assert_equal_u64(r, test_vec[i].r); + } + + return 0; +} + +static int +test_simde_vqshl_n_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[8]; + int8_t n; + int8_t r[8]; + } test_vec[] = { + { { -INT8_C( 80), -INT8_C( 100), -INT8_C( 63), INT8_C( 1), + INT8_C( 115), -INT8_C( 83), INT8_C( 75), -INT8_C( 33) }, + INT8_C( 7), + { INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, + INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN } }, + { { INT8_C( 5), -INT8_C( 106), INT8_C( 59), INT8_C( 120), + -INT8_C( 119), -INT8_C( 120), -INT8_C( 53), INT8_C( 28) }, + INT8_C( 4), + { INT8_C( 80), INT8_MIN, INT8_MAX, INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX } }, + { { INT8_C( 115), INT8_C( 109), -INT8_C( 77), -INT8_C( 38), + -INT8_C( 98), -INT8_C( 64), -INT8_C( 55), INT8_C( 118) }, + INT8_C( 3), + { INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX } }, + { { -INT8_C( 32), INT8_C( 121), -INT8_C( 61), INT8_C( 46), + -INT8_C( 110), INT8_C( 119), INT8_C( 37), INT8_C( 70) }, + INT8_C( 1), + { -INT8_C( 64), INT8_MAX, -INT8_C( 122), INT8_C( 92), + INT8_MIN, INT8_MAX, INT8_C( 74), INT8_MAX } }, + { { -INT8_C( 46), -INT8_C( 52), INT8_C( 66), INT8_C( 116), + INT8_C( 16), INT8_C( 16), -INT8_C( 118), INT8_C( 75) }, + INT8_C( 5), + { INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, + INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX } }, + { { -INT8_C( 114), -INT8_C( 34), INT8_C( 123), INT8_C( 13), + INT8_C( 125), INT8_C( 7), INT8_MAX, INT8_C( 87) }, + INT8_C( 7), + { INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, + INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX } }, + { { -INT8_C( 125), INT8_C( 70), -INT8_C( 83), -INT8_C( 88), + INT8_C( 125), -INT8_C( 81), -INT8_C( 97), INT8_C( 116) }, + INT8_C( 5), + { INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, + INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX } }, + { { -INT8_C( 123), -INT8_C( 110), INT8_C( 87), -INT8_C( 18), + INT8_C( 43), INT8_C( 53), INT8_C( 1), -INT8_C( 74) }, + INT8_C( 2), + { INT8_MIN, INT8_MIN, INT8_MAX, -INT8_C( 72), + INT8_MAX, INT8_MAX, INT8_C( 4), INT8_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); + int8_t n = test_vec[i].n; + simde_int8x8_t r; + SIMDE_CONSTIFY_8_(simde_vqshl_n_s8, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[4]; + int16_t n; + int16_t r[4]; + } test_vec[] = { + { { INT16_C( 9725), INT16_C( 16146), INT16_C( 30907), INT16_C( 10580) }, + INT16_C( 11), + { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } }, + { { -INT16_C( 31565), -INT16_C( 4370), INT16_C( 3592), -INT16_C( 9974) }, + INT16_C( 9), + { INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN } }, + { { -INT16_C( 30344), INT16_C( 13984), INT16_C( 7545), INT16_C( 22501) }, + INT16_C( 7), + { INT16_MIN, INT16_MAX, INT16_MAX, INT16_MAX } }, + { { -INT16_C( 14652), -INT16_C( 20646), -INT16_C( 25937), INT16_C( 31330) }, + INT16_C( 1), + { -INT16_C( 29304), INT16_MIN, INT16_MIN, INT16_MAX } }, + { { INT16_C( 5001), INT16_C( 16082), INT16_C( 243), INT16_C( 17363) }, + INT16_C( 8), + { INT16_MAX, INT16_MAX, INT16_MAX, INT16_MAX } }, + { { INT16_C( 6796), -INT16_C( 10974), -INT16_C( 26277), INT16_C( 20908) }, + INT16_C( 7), + { INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, + { { -INT16_C( 27688), -INT16_C( 2852), INT16_C( 160), -INT16_C( 10339) }, + INT16_C( 12), + { INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN } }, + { { INT16_C( 31921), -INT16_C( 242), INT16_C( 31809), -INT16_C( 5166) }, + INT16_C( 11), + { INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + int16_t n = test_vec[i].n; + simde_int16x4_t r; + SIMDE_CONSTIFY_16_(simde_vqshl_n_s16, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[2]; + int32_t n; + int32_t r[2]; + } test_vec[] = { + { { INT32_C( 1884530711), INT32_C( 365640215) }, + INT32_C( 5), + { INT32_MAX, INT32_MAX } }, + { { INT32_C( 221212881), INT32_C( 337739226) }, + INT32_C( 14), + { INT32_MAX, INT32_MAX } }, + { { -INT32_C( 607613899), INT32_C( 1197518390) }, + INT32_C( 27), + { INT32_MIN, INT32_MAX } }, + { { -INT32_C( 1520164690), INT32_C( 1403295604) }, + INT32_C( 1), + { INT32_MIN, INT32_MAX } }, + { { INT32_C( 334195012), INT32_C( 1395495779) }, + INT32_C( 25), + { INT32_MAX, INT32_MAX } }, + { { -INT32_C( 546807629), -INT32_C( 1654755203) }, + INT32_C( 27), + { INT32_MIN, INT32_MIN } }, + { { -INT32_C( 2111406087), -INT32_C( 945040322) }, + INT32_C( 2), + { INT32_MIN, INT32_MIN } }, + { { INT32_C( 698760047), INT32_C( 1029414862) }, + INT32_C( 4), + { INT32_MAX, INT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + int32_t n = test_vec[i].n; + simde_int32x2_t r; + SIMDE_CONSTIFY_32_(simde_vqshl_n_s32, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[1]; + int64_t n; + int64_t r[1]; + } test_vec[] = { + { { INT64_C( 317419771032513583) }, + INT64_C( 18), + { INT64_MAX } }, + { { -INT64_C( 222543486516657844) }, + INT64_C( 52), + { INT64_MIN } }, + { { INT64_C( 4878483773718223654) }, + INT64_C( 41), + { INT64_MAX } }, + { { -INT64_C( 1366810703334118649) }, + INT64_C( 58), + { INT64_MIN } }, + { { INT64_C( 8647965643940482670) }, + INT64_C( 56), + { INT64_MAX } }, + { { INT64_C( 5183741275295133239) }, + INT64_C( 3), + { INT64_MAX } }, + { { -INT64_C( 4131520758077874604) }, + INT64_C( 53), + { INT64_MIN } }, + { { INT64_C( 1919316409459330283) }, + INT64_C( 1), + { INT64_C( 3838632818918660566) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + int64_t n = test_vec[i].n; + simde_int64x1_t r; + SIMDE_CONSTIFY_64_(simde_vqshl_n_s64, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i64x1(r, simde_vld1_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[8]; + int8_t n; + uint8_t r[8]; + } test_vec[] = { + { { UINT8_C( 146), UINT8_C( 209), UINT8_C( 5), UINT8_C( 55), + UINT8_C( 228), UINT8_C( 113), UINT8_C( 10), UINT8_C( 75) }, + INT8_C( 0), + { UINT8_C( 146), UINT8_C( 209), UINT8_C( 5), UINT8_C( 55), + UINT8_C( 228), UINT8_C( 113), UINT8_C( 10), UINT8_C( 75) } }, + { { UINT8_C( 8), UINT8_C( 125), UINT8_C( 50), UINT8_C( 60), + UINT8_C( 168), UINT8_C( 192), UINT8_C( 133), UINT8_C( 210) }, + INT8_C( 5), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 47), UINT8_C( 234), UINT8_C( 171), UINT8_C( 41), + UINT8_C( 212), UINT8_C( 203), UINT8_C( 184), UINT8_C( 9) }, + INT8_C( 3), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 72) } }, + { { UINT8_C( 123), UINT8_C( 74), UINT8_C( 23), UINT8_C( 204), + UINT8_C( 97), UINT8_C( 84), UINT8_C( 249), UINT8_C( 45) }, + INT8_C( 2), + { UINT8_MAX, UINT8_MAX, UINT8_C( 92), UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 180) } }, + { { UINT8_C( 141), UINT8_C( 216), UINT8_C( 188), UINT8_C( 30), + UINT8_C( 231), UINT8_C( 14), UINT8_C( 129), UINT8_C( 227) }, + INT8_C( 2), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 120), + UINT8_MAX, UINT8_C( 56), UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 221), UINT8_C( 8), UINT8_C( 50), UINT8_C( 126), + UINT8_C( 44), UINT8_C( 181), UINT8_C( 112), UINT8_C( 221) }, + INT8_C( 1), + { UINT8_MAX, UINT8_C( 16), UINT8_C( 100), UINT8_C( 252), + UINT8_C( 88), UINT8_MAX, UINT8_C( 224), UINT8_MAX } }, + { { UINT8_C( 135), UINT8_C( 224), UINT8_C( 43), UINT8_C( 156), + UINT8_C( 223), UINT8_C( 65), UINT8_C( 24), UINT8_C( 124) }, + INT8_C( 3), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_C( 192), UINT8_MAX } }, + { { UINT8_C( 160), UINT8_C( 69), UINT8_C( 70), UINT8_C( 3), + UINT8_C( 34), UINT8_C( 141), UINT8_C( 170), UINT8_C( 146) }, + INT8_C( 2), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 12), + UINT8_C( 136), UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); + int8_t n = test_vec[i].n; + simde_uint8x8_t r; + SIMDE_CONSTIFY_8_(simde_vqshl_n_u8, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[4]; + int16_t n; + uint16_t r[4]; + } test_vec[] = { + { { UINT16_C(23557), UINT16_C(62573), UINT16_C(56816), UINT16_C(33942) }, + INT16_C( 0), + { UINT16_C(23557), UINT16_C(62573), UINT16_C(56816), UINT16_C(33942) } }, + { { UINT16_C(16407), UINT16_C(21374), UINT16_C( 769), UINT16_C(30772) }, + INT16_C( 6), + { UINT16_MAX, UINT16_MAX, UINT16_C(49216), UINT16_MAX } }, + { { UINT16_C(59806), UINT16_C( 6417), UINT16_C(55258), UINT16_C(55600) }, + INT16_C( 11), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(42142), UINT16_C(38610), UINT16_C(40324), UINT16_C(26101) }, + INT16_C( 15), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(11943), UINT16_C(59523), UINT16_C(33503), UINT16_C(21075) }, + INT16_C( 13), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(50967), UINT16_C( 2200), UINT16_C(35906), UINT16_C(34630) }, + INT16_C( 14), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(10546), UINT16_C(48207), UINT16_C(52042), UINT16_C(37855) }, + INT16_C( 12), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(29483), UINT16_C(43989), UINT16_C(56132), UINT16_C(31170) }, + INT16_C( 2), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + int16_t n = test_vec[i].n; + simde_uint16x4_t r; + SIMDE_CONSTIFY_16_(simde_vqshl_n_u16, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[2]; + int32_t n; + uint32_t r[2]; + } test_vec[] = { + { { UINT32_C( 3837263964), UINT32_C( 4037083674) }, + INT32_C( 2), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 827273341), UINT32_C( 1695249226) }, + INT32_C( 11), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 3696103338), UINT32_C( 1664182853) }, + INT32_C( 6), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 2503106558), UINT32_C( 3522453693) }, + INT32_C( 7), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 1922781787), UINT32_C( 1012930042) }, + INT32_C( 24), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 3662817880), UINT32_C( 1393521286) }, + INT32_C( 28), + { UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 406630999), UINT32_C( 2068454351) }, + INT32_C( 3), + { UINT32_C( 3253047992), UINT32_MAX } }, + { { UINT32_C( 2431908342), UINT32_C( 1900918880) }, + INT32_C( 0), + { UINT32_C( 2431908342), UINT32_C( 1900918880) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + int32_t n = test_vec[i].n; + simde_uint32x2_t r; + SIMDE_CONSTIFY_32_(simde_vqshl_n_u32, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshl_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[1]; + int64_t n; + uint64_t r[1]; + } test_vec[] = { + { { UINT64_C(14791235225441363636) }, + INT64_C( 15), + { UINT64_MAX } }, + { { UINT64_C( 6444339481039961058) }, + INT64_C( 33), + { UINT64_MAX } }, + { { UINT64_C( 165573460250930159) }, + INT64_C( 19), + { UINT64_MAX } }, + { { UINT64_C( 1670120376566874410) }, + INT64_C( 13), + { UINT64_MAX } }, + { { UINT64_C( 8676255852403586864) }, + INT64_C( 2), + { UINT64_MAX } }, + { { UINT64_C( 3980418442267725771) }, + INT64_C( 55), + { UINT64_MAX } }, + { { UINT64_C(13746015185500539990) }, + INT64_C( 47), + { UINT64_MAX } }, + { { UINT64_C( 5376833857251923407) }, + INT64_C( 38), + { UINT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + int64_t n = test_vec[i].n; + simde_uint64x1_t r; + SIMDE_CONSTIFY_64_(simde_vqshl_n_u64, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u64x1(r, simde_vld1_u64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int8_t a[16]; + int8_t n; + int8_t r[16]; + } test_vec[] = { + { { INT8_MAX, INT8_C( 68), INT8_C( 19), -INT8_C( 19), + INT8_C( 112), -INT8_C( 21), -INT8_C( 7), -INT8_C( 32), + -INT8_C( 115), -INT8_C( 57), INT8_C( 3), INT8_C( 93), + -INT8_C( 16), -INT8_C( 46), -INT8_C( 12), -INT8_C( 51) }, + INT8_C( 5), + { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, + INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, + INT8_MIN, INT8_MIN, INT8_C( 96), INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN } }, + { { INT8_C( 75), INT8_C( 41), -INT8_C( 49), -INT8_C( 3), + -INT8_C( 26), -INT8_C( 18), -INT8_C( 72), INT8_C( 123), + -INT8_C( 111), INT8_C( 52), -INT8_C( 1), INT8_C( 125), + -INT8_C( 94), -INT8_C( 127), -INT8_C( 67), INT8_C( 70) }, + INT8_C( 4), + { INT8_MAX, INT8_MAX, INT8_MIN, -INT8_C( 48), + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, + INT8_MIN, INT8_MAX, -INT8_C( 16), INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX } }, + { { -INT8_C( 7), INT8_C( 17), INT8_C( 62), -INT8_C( 33), + -INT8_C( 7), INT8_C( 54), -INT8_C( 34), INT8_C( 126), + INT8_C( 44), INT8_C( 36), INT8_C( 112), INT8_C( 21), + -INT8_C( 65), -INT8_C( 103), -INT8_C( 42), -INT8_C( 73) }, + INT8_C( 3), + { -INT8_C( 56), INT8_MAX, INT8_MAX, INT8_MIN, + -INT8_C( 56), INT8_MAX, INT8_MIN, INT8_MAX, + INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN } }, + { { INT8_C( 45), -INT8_C( 127), -INT8_C( 113), INT8_C( 84), + INT8_C( 2), -INT8_C( 111), -INT8_C( 122), INT8_C( 92), + -INT8_C( 123), -INT8_C( 117), -INT8_C( 67), -INT8_C( 30), + -INT8_C( 7), INT8_C( 18), -INT8_C( 74), -INT8_C( 20) }, + INT8_C( 2), + { INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, + INT8_C( 8), INT8_MIN, INT8_MIN, INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MIN, -INT8_C( 120), + -INT8_C( 28), INT8_C( 72), INT8_MIN, -INT8_C( 80) } }, + { { INT8_C( 109), INT8_C( 46), INT8_C( 12), INT8_C( 78), + INT8_C( 18), INT8_C( 3), INT8_C( 80), INT8_C( 7), + -INT8_C( 55), -INT8_C( 56), INT8_C( 4), -INT8_C( 63), + INT8_C( 114), -INT8_C( 82), -INT8_C( 66), INT8_C( 119) }, + INT8_C( 5), + { INT8_MAX, INT8_MAX, INT8_MAX, INT8_MAX, + INT8_MAX, INT8_C( 96), INT8_MAX, INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, + INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX } }, + { { INT8_C( 35), -INT8_C( 115), -INT8_C( 60), -INT8_C( 100), + INT8_C( 55), -INT8_C( 23), INT8_C( 40), INT8_C( 84), + -INT8_C( 104), INT8_C( 3), -INT8_C( 91), INT8_C( 109), + -INT8_C( 120), INT8_C( 98), -INT8_C( 110), INT8_C( 34) }, + INT8_C( 3), + { INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, + INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, + INT8_MIN, INT8_C( 24), INT8_MIN, INT8_MAX, + INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX } }, + { { INT8_C( 103), -INT8_C( 47), INT8_C( 52), -INT8_C( 121), + -INT8_C( 67), -INT8_C( 59), -INT8_C( 28), -INT8_C( 29), + INT8_C( 2), INT8_C( 21), INT8_C( 21), -INT8_C( 43), + -INT8_C( 60), INT8_C( 31), -INT8_C( 34), INT8_C( 113) }, + INT8_C( 1), + { INT8_MAX, -INT8_C( 94), INT8_C( 104), INT8_MIN, + INT8_MIN, -INT8_C( 118), -INT8_C( 56), -INT8_C( 58), + INT8_C( 4), INT8_C( 42), INT8_C( 42), -INT8_C( 86), + -INT8_C( 120), INT8_C( 62), -INT8_C( 68), INT8_MAX } }, + { { -INT8_C( 79), INT8_C( 78), INT8_C( 39), INT8_C( 89), + -INT8_C( 89), -INT8_C( 100), INT8_C( 115), -INT8_C( 48), + -INT8_C( 33), -INT8_C( 101), -INT8_C( 53), INT8_C( 88), + -INT8_C( 81), INT8_C( 27), -INT8_C( 18), INT8_C( 19) }, + INT8_C( 6), + { INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, + INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, + INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, + INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + int8_t n = test_vec[i].n; + simde_int8x16_t r; + SIMDE_CONSTIFY_8_(simde_vqshlq_n_s8, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int16_t n; + int16_t r[8]; + } test_vec[] = { + { { INT16_C( 11402), -INT16_C( 20682), -INT16_C( 13768), INT16_C( 31974), + -INT16_C( 6132), -INT16_C( 21000), INT16_C( 8430), -INT16_C( 10986) }, + INT16_C( 4), + { INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX, + INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN } }, + { { -INT16_C( 24232), INT16_C( 13117), -INT16_C( 17819), -INT16_C( 14174), + INT16_C( 26072), -INT16_C( 32178), -INT16_C( 11710), INT16_C( 21944) }, + INT16_C( 7), + { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MIN, + INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, + { { INT16_C( 7021), -INT16_C( 21234), INT16_C( 5328), -INT16_C( 28727), + INT16_C( 14164), -INT16_C( 27093), -INT16_C( 17534), INT16_C( 2033) }, + INT16_C( 12), + { INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN, + INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, + { { -INT16_C( 30558), -INT16_C( 31585), INT16_C( 22733), -INT16_C( 28634), + INT16_C( 15106), -INT16_C( 14901), -INT16_C( 18342), INT16_C( 10197) }, + INT16_C( 2), + { INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN, + INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, + { { -INT16_C( 18543), INT16_C( 14497), -INT16_C( 1780), INT16_C( 13368), + INT16_C( 32003), INT16_C( 13327), -INT16_C( 21518), -INT16_C( 7936) }, + INT16_C( 13), + { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, + INT16_MAX, INT16_MAX, INT16_MIN, INT16_MIN } }, + { { INT16_C( 155), -INT16_C( 13819), -INT16_C( 26598), -INT16_C( 19722), + INT16_C( 19524), -INT16_C( 18703), -INT16_C( 30213), -INT16_C( 29351) }, + INT16_C( 2), + { INT16_C( 620), INT16_MIN, INT16_MIN, INT16_MIN, + INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN } }, + { { -INT16_C( 20411), INT16_C( 30376), -INT16_C( 17575), INT16_C( 15232), + INT16_C( 16824), -INT16_C( 25087), -INT16_C( 28836), INT16_C( 29871) }, + INT16_C( 14), + { INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX, + INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX } }, + { { INT16_C( 14794), -INT16_C( 27275), -INT16_C( 22632), -INT16_C( 5225), + -INT16_C( 11754), -INT16_C( 17095), INT16_C( 9679), -INT16_C( 29724) }, + INT16_C( 2), + { INT16_MAX, INT16_MIN, INT16_MIN, -INT16_C( 20900), + INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + int16_t n = test_vec[i].n; + simde_int16x8_t r; + SIMDE_CONSTIFY_16_(simde_vqshlq_n_s16, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int32_t n; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 1074729899), -INT32_C( 350055315), -INT32_C( 594235603), -INT32_C( 2026764391) }, + INT32_C( 21), + { INT32_MAX, INT32_MIN, INT32_MIN, INT32_MIN } }, + { { -INT32_C( 1545159008), INT32_C( 1350275068), INT32_C( 1650390489), -INT32_C( 1738000333) }, + INT32_C( 16), + { INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN } }, + { { -INT32_C( 494308548), INT32_C( 694414272), INT32_C( 1701559315), INT32_C( 611372704) }, + INT32_C( 14), + { INT32_MIN, INT32_MAX, INT32_MAX, INT32_MAX } }, + { { -INT32_C( 2137731107), INT32_C( 914115316), -INT32_C( 540329282), INT32_C( 2086573817) }, + INT32_C( 8), + { INT32_MIN, INT32_MAX, INT32_MIN, INT32_MAX } }, + { { -INT32_C( 1951684747), INT32_C( 1204536179), INT32_C( 922714634), -INT32_C( 1800556517) }, + INT32_C( 15), + { INT32_MIN, INT32_MAX, INT32_MAX, INT32_MIN } }, + { { INT32_C( 945136202), -INT32_C( 472613203), INT32_C( 1912776620), -INT32_C( 164578039) }, + INT32_C( 0), + { INT32_C( 945136202), -INT32_C( 472613203), INT32_C( 1912776620), -INT32_C( 164578039) } }, + { { INT32_C( 750743177), -INT32_C( 1595705273), -INT32_C( 774986565), INT32_C( 1814824482) }, + INT32_C( 2), + { INT32_MAX, INT32_MIN, INT32_MIN, INT32_MAX } }, + { { -INT32_C( 484591267), -INT32_C( 1792996225), INT32_C( 1020785704), -INT32_C( 2059266172) }, + INT32_C( 6), + { INT32_MIN, INT32_MIN, INT32_MAX, INT32_MIN } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + int32_t n = test_vec[i].n; + simde_int32x4_t r; + SIMDE_CONSTIFY_32_(simde_vqshlq_n_s32, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int64_t n; + int64_t r[2]; + } test_vec[] = { + { { -INT64_C( 4042258481157755441), -INT64_C( 5433841412613238413) }, + INT64_C( 59), + { INT64_MIN, INT64_MIN } }, + { { -INT64_C( 2646123060523157640), INT64_C( 3641907379906915618) }, + INT64_C( 9), + { INT64_MIN, INT64_MAX } }, + { { INT64_C( 2503202682359869433), INT64_C( 1978542443908903786) }, + INT64_C( 11), + { INT64_MAX, INT64_MAX } }, + { { -INT64_C( 7077121127573266665), -INT64_C( 7559225274706753147) }, + INT64_C( 40), + { INT64_MIN, INT64_MIN } }, + { { INT64_C( 2763049935594417468), INT64_C( 9127028968420169616) }, + INT64_C( 47), + { INT64_MAX, INT64_MAX } }, + { { INT64_C( 5001850119325242351), INT64_C( 3813836005973833327) }, + INT64_C( 25), + { INT64_MAX, INT64_MAX } }, + { { INT64_C( 8591594818391808077), INT64_C( 4108322868374008343) }, + INT64_C( 45), + { INT64_MAX, INT64_MAX } }, + { { -INT64_C( 4504095822229056778), INT64_C( 6849365327622501025) }, + INT64_C( 5), + { INT64_MIN, INT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + int64_t n = test_vec[i].n; + simde_int64x2_t r; + SIMDE_CONSTIFY_64_(simde_vqshlq_n_s64, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_i64x2(r, simde_vld1q_s64(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint8_t a[16]; + int8_t n; + uint8_t r[16]; + } test_vec[] = { + { { UINT8_C( 186), UINT8_C( 109), UINT8_C( 81), UINT8_C( 134), + UINT8_C( 246), UINT8_C( 134), UINT8_C( 185), UINT8_C( 141), + UINT8_C( 53), UINT8_C( 217), UINT8_C( 4), UINT8_C( 118), + UINT8_C( 66), UINT8_C( 45), UINT8_C( 17), UINT8_C( 65) }, + INT8_C( 0), + { UINT8_C( 186), UINT8_C( 109), UINT8_C( 81), UINT8_C( 134), + UINT8_C( 246), UINT8_C( 134), UINT8_C( 185), UINT8_C( 141), + UINT8_C( 53), UINT8_C( 217), UINT8_C( 4), UINT8_C( 118), + UINT8_C( 66), UINT8_C( 45), UINT8_C( 17), UINT8_C( 65) } }, + { { UINT8_C( 132), UINT8_C( 39), UINT8_C( 54), UINT8_C( 85), + UINT8_C( 113), UINT8_C( 202), UINT8_C( 126), UINT8_C( 234), + UINT8_C( 66), UINT8_C( 109), UINT8_C( 75), UINT8_C( 160), + UINT8_C( 23), UINT8_C( 174), UINT8_C( 144), UINT8_C( 169) }, + INT8_C( 4), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 107), UINT8_C( 195), UINT8_C( 7), UINT8_C( 117), + UINT8_C( 8), UINT8_C( 215), UINT8_C( 35), UINT8_C( 11), + UINT8_C( 23), UINT8_C( 194), UINT8_C( 151), UINT8_C( 21), + UINT8_C( 181), UINT8_C( 123), UINT8_C( 222), UINT8_C( 212) }, + INT8_C( 7), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 58), UINT8_C( 168), UINT8_C( 225), UINT8_C( 204), + UINT8_C( 148), UINT8_C( 52), UINT8_C( 237), UINT8_C( 118), + UINT8_C( 62), UINT8_C( 154), UINT8_MAX, UINT8_C( 103), + UINT8_C( 31), UINT8_C( 152), UINT8_C( 61), UINT8_C( 177) }, + INT8_C( 6), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 117), UINT8_C( 3), UINT8_C( 6), UINT8_C( 170), + UINT8_C( 220), UINT8_C( 137), UINT8_C( 137), UINT8_C( 114), + UINT8_C( 139), UINT8_C( 224), UINT8_C( 117), UINT8_C( 34), + UINT8_C( 126), UINT8_C( 143), UINT8_C( 47), UINT8_C( 245) }, + INT8_C( 5), + { UINT8_MAX, UINT8_C( 96), UINT8_C( 192), UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 100), UINT8_C( 121), UINT8_C( 133), UINT8_C( 168), + UINT8_C( 236), UINT8_C( 205), UINT8_C( 116), UINT8_C( 163), + UINT8_C( 147), UINT8_C( 117), UINT8_C( 41), UINT8_C( 166), + UINT8_C( 170), UINT8_C( 76), UINT8_C( 212), UINT8_C( 62) }, + INT8_C( 7), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 193), UINT8_C( 91), UINT8_C( 159), UINT8_C( 251), + UINT8_C( 51), UINT8_C( 137), UINT8_C( 73), UINT8_C( 100), + UINT8_C( 86), UINT8_C( 110), UINT8_C( 7), UINT8_C( 116), + UINT8_C( 62), UINT8_C( 173), UINT8_C( 209), UINT8_C( 170) }, + INT8_C( 6), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + { { UINT8_C( 223), UINT8_C( 187), UINT8_C( 86), UINT8_C( 89), + UINT8_C( 188), UINT8_C( 139), UINT8_C( 84), UINT8_C( 48), + UINT8_C( 199), UINT8_C( 56), UINT8_C( 174), UINT8_C( 180), + UINT8_C( 121), UINT8_C( 197), UINT8_C( 124), UINT8_C( 123) }, + INT8_C( 5), + { UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, + UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + int8_t n = test_vec[i].n; + simde_uint8x16_t r; + SIMDE_CONSTIFY_8_(simde_vqshlq_n_u8, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + int16_t n; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C(20921), UINT16_C(44565), UINT16_C(48346), UINT16_C(18082), + UINT16_C(31795), UINT16_C(38471), UINT16_C( 261), UINT16_C( 9990) }, + INT16_C( 10), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(19165), UINT16_C(14129), UINT16_C(23876), UINT16_C(44179), + UINT16_C(14014), UINT16_C(45088), UINT16_C(35405), UINT16_C(21342) }, + INT16_C( 1), + { UINT16_C(38330), UINT16_C(28258), UINT16_C(47752), UINT16_MAX, + UINT16_C(28028), UINT16_MAX, UINT16_MAX, UINT16_C(42684) } }, + { { UINT16_C( 2818), UINT16_C(48024), UINT16_C(31015), UINT16_C(28717), + UINT16_C(39774), UINT16_C(43608), UINT16_C(31534), UINT16_C(13717) }, + INT16_C( 9), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(19963), UINT16_C(59131), UINT16_C(34949), UINT16_C(19923), + UINT16_C(36646), UINT16_C( 4078), UINT16_C(53516), UINT16_C(24038) }, + INT16_C( 1), + { UINT16_C(39926), UINT16_MAX, UINT16_MAX, UINT16_C(39846), + UINT16_MAX, UINT16_C( 8156), UINT16_MAX, UINT16_C(48076) } }, + { { UINT16_C(11014), UINT16_C( 7857), UINT16_C(61776), UINT16_C(20318), + UINT16_C(15467), UINT16_C(53633), UINT16_C(44067), UINT16_C(42553) }, + INT16_C( 7), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(13019), UINT16_C(19351), UINT16_C(10797), UINT16_C(64624), + UINT16_C(36591), UINT16_C(29446), UINT16_C(42245), UINT16_C(33465) }, + INT16_C( 6), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(15341), UINT16_C(22299), UINT16_C(14571), UINT16_C(29620), + UINT16_C(54193), UINT16_C(26425), UINT16_C(32143), UINT16_C( 6653) }, + INT16_C( 8), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + { { UINT16_C(51956), UINT16_C(21950), UINT16_C(38324), UINT16_C(54094), + UINT16_C(41677), UINT16_C(14867), UINT16_C(39747), UINT16_C(55099) }, + INT16_C( 4), + { UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, + UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + int16_t n = test_vec[i].n; + simde_uint16x8_t r; + SIMDE_CONSTIFY_16_(simde_vqshlq_n_u16, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + int32_t n; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 77096255), UINT32_C( 3070792671), UINT32_C( 2120128025), UINT32_C( 452322529) }, + INT32_C( 30), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 3510062727), UINT32_C( 1881790280), UINT32_C( 1143057217), UINT32_C( 1329755098) }, + INT32_C( 11), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 1998881456), UINT32_C( 2891577961), UINT32_C( 3965455494), UINT32_C( 1907172378) }, + INT32_C( 5), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 3960644261), UINT32_C( 2580070008), UINT32_C( 3811852699), UINT32_C( 3327946214) }, + INT32_C( 5), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 2229128376), UINT32_C( 2833259986), UINT32_C( 489617211), UINT32_C( 2210012836) }, + INT32_C( 30), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 4163257641), UINT32_C( 3821803140), UINT32_C( 1681327744), UINT32_C( 3941018405) }, + INT32_C( 31), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 3661383228), UINT32_C( 325227988), UINT32_C( 1713852693), UINT32_C( 682566008) }, + INT32_C( 30), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + { { UINT32_C( 2377386162), UINT32_C( 3794665476), UINT32_C( 2685443980), UINT32_C( 3553657506) }, + INT32_C( 19), + { UINT32_MAX, UINT32_MAX, UINT32_MAX, UINT32_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + int32_t n = test_vec[i].n; + simde_uint32x4_t r; + SIMDE_CONSTIFY_32_(simde_vqshlq_n_u32, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; +} + +static int +test_simde_vqshlq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + int64_t n; + uint64_t r[2]; + } test_vec[] = { + { { UINT64_C(10322855567311411961), UINT64_C(11440125154735590159) }, + INT64_C( 28), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C( 9512585635638000888), UINT64_C( 4961040813524946467) }, + INT64_C( 13), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C( 2911675089381803092), UINT64_C(17223170097124323585) }, + INT64_C( 35), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C(13241740830667826537), UINT64_C(10243761294436612052) }, + INT64_C( 6), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C(12871405697179562635), UINT64_C( 5888130355122776938) }, + INT64_C( 51), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C( 3754104458717627412), UINT64_C( 1305152970539606068) }, + INT64_C( 35), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C( 9656345281810485236), UINT64_C(12018909814426922402) }, + INT64_C( 37), + { UINT64_MAX, UINT64_MAX } }, + { { UINT64_C(10478671498998178654), UINT64_C( 9663870709090986916) }, + INT64_C( 8), + { UINT64_MAX, UINT64_MAX } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + int64_t n = test_vec[i].n; + simde_uint64x2_t r; + SIMDE_CONSTIFY_64_(simde_vqshlq_n_u64, r, (HEDLEY_UNREACHABLE(), r), n, a); + + simde_test_arm_neon_assert_equal_u64x2(r, simde_vld1q_u64(test_vec[i].r)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlb_n_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlh_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshls_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshld_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlb_n_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlh_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshls_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshld_n_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshl_n_u64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshlq_n_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qshrn_high_n.c b/test/arm/neon/qshrn_high_n.c new file mode 100644 index 000000000..7fe2ebd58 --- /dev/null +++ b/test/arm/neon/qshrn_high_n.c @@ -0,0 +1,549 @@ +#define SIMDE_TEST_ARM_NEON_INSN qshrn_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/qshrn_high_n.h" + +static int +test_simde_vqshrn_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int8_t r[8]; + int8_t r1[16]; + int8_t r3[16]; + int8_t r5[16]; + int8_t r6[16]; + int8_t r8[16]; + } test_vec[] = { + { { INT16_C( 10216), -INT16_C( 24429), -INT16_C( 14067), INT16_C( 9392), INT16_C( 19587), -INT16_C( 32492), INT16_C( 28881), INT16_C( 10954) }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121) }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX }, + { -INT8_C( 121), INT8_C( 23), -INT8_C( 125), -INT8_C( 11), INT8_C( 79), -INT8_C( 3), INT8_C( 106), -INT8_C( 121), INT8_C( 39), -INT8_C( 96), -INT8_C( 55), INT8_C( 36), INT8_C( 76), -INT8_C( 127), INT8_C( 112), INT8_C( 42) } }, + { { -INT16_C( 22100), -INT16_C( 27202), INT16_C( 31275), INT16_C( 29280), -INT16_C( 27394), INT16_C( 8677), -INT16_C( 8078), -INT16_C( 13655) }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30) }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30), INT8_MIN, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, -INT8_C( 127), INT8_MIN }, + { -INT8_C( 22), INT8_C( 103), INT8_C( 106), INT8_C( 121), -INT8_C( 74), INT8_C( 26), -INT8_C( 122), INT8_C( 30), -INT8_C( 87), -INT8_C( 107), INT8_C( 122), INT8_C( 114), -INT8_C( 108), INT8_C( 33), -INT8_C( 32), -INT8_C( 54) } }, + { { -INT16_C( 14844), INT16_C( 23748), INT16_C( 15764), INT16_C( 11881), -INT16_C( 14418), INT16_C( 14298), INT16_C( 22528), INT16_C( 16001) }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83) }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83), INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MAX, INT8_MAX }, + { INT8_C( 5), INT8_C( 52), -INT8_C( 101), -INT8_C( 65), -INT8_C( 50), INT8_C( 52), INT8_C( 73), -INT8_C( 83), -INT8_C( 58), INT8_C( 92), INT8_C( 61), INT8_C( 46), -INT8_C( 57), INT8_C( 55), INT8_C( 88), INT8_C( 62) } }, + { { -INT16_C( 23532), -INT16_C( 12659), -INT16_C( 29861), INT16_C( 16868), -INT16_C( 22458), INT16_C( 28700), -INT16_C( 18092), -INT16_C( 7765) }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68) }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, -INT8_C( 122) }, + { INT8_C( 56), INT8_C( 77), -INT8_C( 23), -INT8_C( 108), -INT8_C( 86), INT8_C( 93), INT8_C( 52), INT8_C( 68), -INT8_C( 92), -INT8_C( 50), -INT8_C( 117), INT8_C( 65), -INT8_C( 88), INT8_C( 112), -INT8_C( 71), -INT8_C( 31) } }, + { { INT16_C( 20461), -INT16_C( 741), -INT16_C( 4533), -INT16_C( 9203), -INT16_C( 31212), -INT16_C( 3743), -INT16_C( 21233), INT16_C( 27370) }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44) }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44), INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44), INT8_MAX, -INT8_C( 93), INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44), INT8_MAX, -INT8_C( 24), INT8_MIN, INT8_MIN, INT8_MIN, -INT8_C( 117), INT8_MIN, INT8_MAX }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44), INT8_MAX, -INT8_C( 12), -INT8_C( 71), INT8_MIN, INT8_MIN, -INT8_C( 59), INT8_MIN, INT8_MAX }, + { -INT8_C( 17), INT8_C( 23), -INT8_C( 109), INT8_C( 27), -INT8_C( 77), INT8_C( 99), INT8_C( 76), -INT8_C( 44), INT8_C( 79), -INT8_C( 3), -INT8_C( 18), -INT8_C( 36), -INT8_C( 122), -INT8_C( 15), -INT8_C( 83), INT8_C( 106) } }, + { { INT16_C( 19560), INT16_C( 13455), -INT16_C( 28972), INT16_C( 26862), -INT16_C( 30243), -INT16_C( 30157), -INT16_C( 12184), INT16_C( 20625) }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109) }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MAX }, + { -INT8_C( 72), -INT8_C( 18), INT8_C( 24), -INT8_C( 24), -INT8_C( 59), -INT8_C( 62), -INT8_C( 102), INT8_C( 109), INT8_C( 76), INT8_C( 52), -INT8_C( 114), INT8_C( 104), -INT8_C( 119), -INT8_C( 118), -INT8_C( 48), INT8_C( 80) } }, + { { INT16_C( 1314), INT16_C( 14771), -INT16_C( 18655), INT16_C( 21202), -INT16_C( 13604), -INT16_C( 4043), -INT16_C( 28213), -INT16_C( 19543) }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70) }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70), INT8_C( 41), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, -INT8_C( 127), INT8_MIN, INT8_MIN }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70), INT8_C( 20), INT8_MAX, INT8_MIN, INT8_MAX, INT8_MIN, -INT8_C( 64), INT8_MIN, INT8_MIN }, + { INT8_C( 104), -INT8_C( 117), -INT8_C( 52), -INT8_C( 101), -INT8_C( 25), -INT8_C( 59), -INT8_C( 62), INT8_C( 70), INT8_C( 5), INT8_C( 57), -INT8_C( 73), INT8_C( 82), -INT8_C( 54), -INT8_C( 16), -INT8_C( 111), -INT8_C( 77) } }, + { { INT16_C( 21984), INT16_C( 7535), -INT16_C( 11054), -INT16_C( 15541), INT16_C( 6702), -INT16_C( 19675), -INT16_C( 10927), -INT16_C( 26787) }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43) }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43), INT8_MAX, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MAX, INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43), INT8_MAX, INT8_C( 117), INT8_MIN, INT8_MIN, INT8_C( 104), INT8_MIN, INT8_MIN, INT8_MIN }, + { INT8_C( 122), -INT8_C( 123), INT8_C( 84), -INT8_C( 127), INT8_C( 82), -INT8_C( 67), -INT8_C( 85), INT8_C( 43), INT8_C( 85), INT8_C( 29), -INT8_C( 44), -INT8_C( 61), INT8_C( 26), -INT8_C( 77), -INT8_C( 43), -INT8_C( 105) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int8x8_t r = simde_vld1_s8(test_vec[i].r); + + simde_int8x16_t r1 = simde_vqshrn_high_n_s16(r, a, 1); + simde_int8x16_t r3 = simde_vqshrn_high_n_s16(r, a, 3); + simde_int8x16_t r5 = simde_vqshrn_high_n_s16(r, a, 5); + simde_int8x16_t r6 = simde_vqshrn_high_n_s16(r, a, 6); + simde_int8x16_t r8 = simde_vqshrn_high_n_s16(r, a, 8); + + simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vqshrn_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t r[4]; + int16_t r3[8]; + int16_t r6[8]; + int16_t r10[8]; + int16_t r13[8]; + int16_t r16[8]; + } test_vec[] = { + { { INT32_C(1129261466), -INT32_C(1639403369), INT32_C( 71313492), -INT32_C(1496180707) }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424) }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MIN }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424), INT16_MAX, INT16_MIN, INT16_C( 8705), INT16_MIN }, + { INT16_C( 1331), -INT16_C( 29205), -INT16_C( 30587), INT16_C( 25424), INT16_C( 17231), -INT16_C( 25016), INT16_C( 1088), -INT16_C( 22830) } }, + { { -INT32_C(1459014421), -INT32_C(1515670200), INT32_C( 82559049), -INT32_C(2077923617) }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378) }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378), INT16_MIN, INT16_MIN, INT16_C( 10078), INT16_MIN }, + { -INT16_C( 28066), -INT16_C( 29382), -INT16_C( 27745), INT16_C( 20378), -INT16_C( 22263), -INT16_C( 23128), INT16_C( 1259), -INT16_C( 31707) } }, + { { -INT32_C(1669005205), -INT32_C( 361496326), INT32_C(1142570396), -INT32_C(1170924398) }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489) }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489), INT16_MIN, INT16_MIN, INT16_MAX, INT16_MIN }, + { -INT16_C( 21246), -INT16_C( 7755), -INT16_C( 21005), -INT16_C( 8489), -INT16_C( 25467), -INT16_C( 5516), INT16_C( 17434), -INT16_C( 17867) } }, + { { INT32_C( 991737227), -INT32_C( 247956976), INT32_C(1830904832), INT32_C( 925103331) }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449) }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449), INT16_MAX, INT16_MIN, INT16_MAX, INT16_MAX }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449), INT16_MAX, -INT16_C( 30269), INT16_MAX, INT16_MAX }, + { -INT16_C( 20572), -INT16_C( 16805), -INT16_C( 11538), INT16_C( 29449), INT16_C( 15132), -INT16_C( 3784), INT16_C( 27937), INT16_C( 14115) } }, + { { INT32_C( 521682632), -INT32_C(1873599869), -INT32_C(1886854197), INT32_C( 834818051) }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910) }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { -INT16_C( 8241), INT16_C( 15746), -INT16_C( 2), INT16_C( 2910), INT16_C( 7960), -INT16_C( 28589), -INT16_C( 28792), INT16_C( 12738) } }, + { { INT32_C( 752569105), -INT32_C(1056108898), -INT32_C( 55610933), -INT32_C(1139322037) }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795) }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MIN }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795), INT16_MAX, INT16_MIN, -INT16_C( 6789), INT16_MIN }, + { -INT16_C( 30067), -INT16_C( 9119), -INT16_C( 6313), -INT16_C( 28795), INT16_C( 11483), -INT16_C( 16115), -INT16_C( 849), -INT16_C( 17385) } }, + { { -INT32_C( 13538846), INT32_C( 294134544), -INT32_C( 36755682), INT32_C( 542562851) }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305) }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305), INT16_MIN, INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305), -INT16_C( 13222), INT16_MAX, INT16_MIN, INT16_MAX }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305), -INT16_C( 1653), INT16_MAX, -INT16_C( 4487), INT16_MAX }, + { INT16_C( 4346), INT16_C( 1839), -INT16_C( 32259), INT16_C( 30305), -INT16_C( 207), INT16_C( 4488), -INT16_C( 561), INT16_C( 8278) } }, + { { INT32_C( 173726430), -INT32_C(1101904268), -INT32_C( 100277379), INT32_C( 952928107) }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019) }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019), INT16_MAX, INT16_MIN, INT16_MIN, INT16_MAX }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019), INT16_C( 21206), INT16_MIN, -INT16_C( 12241), INT16_MAX }, + { INT16_C( 2900), -INT16_C( 20521), INT16_C( 26827), INT16_C( 7019), INT16_C( 2650), -INT16_C( 16814), -INT16_C( 1531), INT16_C( 14540) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t r = simde_vld1_s16(test_vec[i].r); + + simde_int16x8_t r3 = simde_vqshrn_high_n_s32(r, a, 3); + simde_int16x8_t r6 = simde_vqshrn_high_n_s32(r, a, 6); + simde_int16x8_t r10 = simde_vqshrn_high_n_s32(r, a, 10); + simde_int16x8_t r13 = simde_vqshrn_high_n_s32(r, a, 13); + simde_int16x8_t r16 = simde_vqshrn_high_n_s32(r, a, 16); + + simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vqshrn_high_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t r[2]; + int32_t r6[4]; + int32_t r13[4]; + int32_t r19[4]; + int32_t r26[4]; + int32_t r32[4]; + } test_vec[] = { + { { -INT64_C(765171249508011034), -INT64_C(4525319093864694895) }, + { -INT32_C(1566369049), -INT32_C(984125454) }, + { -INT32_C(1566369049), -INT32_C(984125454), INT32_MIN, INT32_MIN }, + { -INT32_C(1566369049), -INT32_C(984125454), INT32_MIN, INT32_MIN }, + { -INT32_C(1566369049), -INT32_C(984125454), INT32_MIN, INT32_MIN }, + { -INT32_C(1566369049), -INT32_C(984125454), INT32_MIN, INT32_MIN }, + { -INT32_C(1566369049), -INT32_C(984125454), -INT32_C(178155315), -INT32_C(1053632958) } }, + { { -INT64_C(3161110402208284557), INT64_C(1691993124931351958) }, + { -INT32_C(360166921), INT32_C(56480042) }, + { -INT32_C(360166921), INT32_C(56480042), INT32_MIN, INT32_MAX }, + { -INT32_C(360166921), INT32_C(56480042), INT32_MIN, INT32_MAX }, + { -INT32_C(360166921), INT32_C(56480042), INT32_MIN, INT32_MAX }, + { -INT32_C(360166921), INT32_C(56480042), INT32_MIN, INT32_MAX }, + { -INT32_C(360166921), INT32_C(56480042), -INT32_C(736003370), INT32_C(393947848) } }, + { { -INT64_C(6810743514283196592), -INT64_C(3960885338091995016) }, + { INT32_C(1536115396), -INT32_C(290743308) }, + { INT32_C(1536115396), -INT32_C(290743308), INT32_MIN, INT32_MIN }, + { INT32_C(1536115396), -INT32_C(290743308), INT32_MIN, INT32_MIN }, + { INT32_C(1536115396), -INT32_C(290743308), INT32_MIN, INT32_MIN }, + { INT32_C(1536115396), -INT32_C(290743308), INT32_MIN, INT32_MIN }, + { INT32_C(1536115396), -INT32_C(290743308), -INT32_C(1585749797), -INT32_C(922215483) } }, + { { INT64_C(1012374666276996287), INT64_C(5445809416200044057) }, + { -INT32_C(1308095952), -INT32_C(784194256) }, + { -INT32_C(1308095952), -INT32_C(784194256), INT32_MAX, INT32_MAX }, + { -INT32_C(1308095952), -INT32_C(784194256), INT32_MAX, INT32_MAX }, + { -INT32_C(1308095952), -INT32_C(784194256), INT32_MAX, INT32_MAX }, + { -INT32_C(1308095952), -INT32_C(784194256), INT32_MAX, INT32_MAX }, + { -INT32_C(1308095952), -INT32_C(784194256), INT32_C(235711845), INT32_C(1267951311) } }, + { { INT64_C(378766495890448787), -INT64_C(7944518708464066992) }, + { -INT32_C(366409358), INT32_C(1396245526) }, + { -INT32_C(366409358), INT32_C(1396245526), INT32_MAX, INT32_MIN }, + { -INT32_C(366409358), INT32_C(1396245526), INT32_MAX, INT32_MIN }, + { -INT32_C(366409358), INT32_C(1396245526), INT32_MAX, INT32_MIN }, + { -INT32_C(366409358), INT32_C(1396245526), INT32_MAX, INT32_MIN }, + { -INT32_C(366409358), INT32_C(1396245526), INT32_C(88188447), -INT32_C(1849727405) } }, + { { -INT64_C(8933309892358046918), -INT64_C(1273532278696929068) }, + { INT32_C(443286716), -INT32_C(1983115849) }, + { INT32_C(443286716), -INT32_C(1983115849), INT32_MIN, INT32_MIN }, + { INT32_C(443286716), -INT32_C(1983115849), INT32_MIN, INT32_MIN }, + { INT32_C(443286716), -INT32_C(1983115849), INT32_MIN, INT32_MIN }, + { INT32_C(443286716), -INT32_C(1983115849), INT32_MIN, INT32_MIN }, + { INT32_C(443286716), -INT32_C(1983115849), -INT32_C(2079948293), -INT32_C(296517341) } }, + { { INT64_C(2163653472670588741), -INT64_C(2442964316617231425) }, + { -INT32_C(1546863875), INT32_C(1785966531) }, + { -INT32_C(1546863875), INT32_C(1785966531), INT32_MAX, INT32_MIN }, + { -INT32_C(1546863875), INT32_C(1785966531), INT32_MAX, INT32_MIN }, + { -INT32_C(1546863875), INT32_C(1785966531), INT32_MAX, INT32_MIN }, + { -INT32_C(1546863875), INT32_C(1785966531), INT32_MAX, INT32_MIN }, + { -INT32_C(1546863875), INT32_C(1785966531), INT32_C(503764830), -INT32_C(568796955) } }, + { { -INT64_C(3574845421579539086), -INT64_C(2305885010680092105) }, + { INT32_C(91844711), -INT32_C(723757095) }, + { INT32_C(91844711), -INT32_C(723757095), INT32_MIN, INT32_MIN }, + { INT32_C(91844711), -INT32_C(723757095), INT32_MIN, INT32_MIN }, + { INT32_C(91844711), -INT32_C(723757095), INT32_MIN, INT32_MIN }, + { INT32_C(91844711), -INT32_C(723757095), INT32_MIN, INT32_MIN }, + { INT32_C(91844711), -INT32_C(723757095), -INT32_C(832333561), -INT32_C(536880692) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t r = simde_vld1_s32(test_vec[i].r); + + simde_int32x4_t r6 = simde_vqshrn_high_n_s64(r, a, 6); + simde_int32x4_t r13 = simde_vqshrn_high_n_s64(r, a, 13); + simde_int32x4_t r19 = simde_vqshrn_high_n_s64(r, a, 19); + simde_int32x4_t r26 = simde_vqshrn_high_n_s64(r, a, 26); + simde_int32x4_t r32 = simde_vqshrn_high_n_s64(r, a, 32); + + simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + } + + return 0; +} + +static int +test_simde_vqshrn_high_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint8_t r[8]; + uint8_t r1[16]; + uint8_t r3[16]; + uint8_t r5[16]; + uint8_t r6[16]; + uint8_t r8[16]; + } test_vec[] = { + { { UINT16_C( 1105), UINT16_C( 6921), UINT16_C( 48895), UINT16_C( 26266), UINT16_C( 24330), UINT16_C( 16692), UINT16_C( 7067), UINT16_C( 37251) }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92) }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92), UINT8_C( 138), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92), UINT8_C( 34), UINT8_C( 216), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 220), UINT8_MAX }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92), UINT8_C( 17), UINT8_C( 108), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 110), UINT8_MAX }, + { UINT8_C( 147), UINT8_C( 72), UINT8_C( 41), UINT8_C( 186), UINT8_C( 224), UINT8_C( 59), UINT8_C( 125), UINT8_C( 92), UINT8_C( 4), UINT8_C( 27), UINT8_C( 190), UINT8_C( 102), UINT8_C( 95), UINT8_C( 65), UINT8_C( 27), UINT8_C( 145) } }, + { { UINT16_C( 54415), UINT16_C( 52396), UINT16_C( 44348), UINT16_C( 19843), UINT16_C( 21304), UINT16_C( 58585), UINT16_C( 20228), UINT16_C( 37437) }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49) }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 83), UINT8_C( 107), UINT8_C( 8), UINT8_C( 150), UINT8_C( 99), UINT8_C( 77), UINT8_C( 232), UINT8_C( 49), UINT8_C( 212), UINT8_C( 204), UINT8_C( 173), UINT8_C( 77), UINT8_C( 83), UINT8_C( 228), UINT8_C( 79), UINT8_C( 146) } }, + { { UINT16_C( 2035), UINT16_C( 27462), UINT16_C( 56467), UINT16_C( 311), UINT16_C( 791), UINT16_C( 31983), UINT16_C( 27022), UINT16_C( 28573) }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117) }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 155), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117), UINT8_C( 254), UINT8_MAX, UINT8_MAX, UINT8_C( 38), UINT8_C( 98), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117), UINT8_C( 63), UINT8_MAX, UINT8_MAX, UINT8_C( 9), UINT8_C( 24), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117), UINT8_C( 31), UINT8_MAX, UINT8_MAX, UINT8_C( 4), UINT8_C( 12), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 68), UINT8_C( 37), UINT8_C( 8), UINT8_C( 62), UINT8_C( 24), UINT8_C( 133), UINT8_C( 210), UINT8_C( 117), UINT8_C( 7), UINT8_C( 107), UINT8_C( 220), UINT8_C( 1), UINT8_C( 3), UINT8_C( 124), UINT8_C( 105), UINT8_C( 111) } }, + { { UINT16_C( 24051), UINT16_C( 261), UINT16_C( 14783), UINT16_C( 63186), UINT16_C( 2150), UINT16_C( 16210), UINT16_C( 15740), UINT16_C( 31367) }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45) }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45), UINT8_MAX, UINT8_C( 130), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45), UINT8_MAX, UINT8_C( 32), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45), UINT8_MAX, UINT8_C( 8), UINT8_MAX, UINT8_MAX, UINT8_C( 67), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45), UINT8_MAX, UINT8_C( 4), UINT8_C( 230), UINT8_MAX, UINT8_C( 33), UINT8_C( 253), UINT8_C( 245), UINT8_MAX }, + { UINT8_C( 26), UINT8_C( 102), UINT8_C( 106), UINT8_C( 141), UINT8_C( 181), UINT8_C( 117), UINT8_C( 54), UINT8_C( 45), UINT8_C( 93), UINT8_C( 1), UINT8_C( 57), UINT8_C( 246), UINT8_C( 8), UINT8_C( 63), UINT8_C( 61), UINT8_C( 122) } }, + { { UINT16_C( 8851), UINT16_C( 30702), UINT16_C( 11103), UINT16_C( 45088), UINT16_C( 21022), UINT16_C( 2460), UINT16_C( 17797), UINT16_C( 18061) }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195) }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 76), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195), UINT8_C( 138), UINT8_MAX, UINT8_C( 173), UINT8_MAX, UINT8_MAX, UINT8_C( 38), UINT8_MAX, UINT8_MAX }, + { UINT8_C( 61), UINT8_C( 231), UINT8_C( 92), UINT8_C( 213), UINT8_C( 101), UINT8_C( 122), UINT8_C( 50), UINT8_C( 195), UINT8_C( 34), UINT8_C( 119), UINT8_C( 43), UINT8_C( 176), UINT8_C( 82), UINT8_C( 9), UINT8_C( 69), UINT8_C( 70) } }, + { { UINT16_C( 52972), UINT16_C( 55056), UINT16_C( 8297), UINT16_C( 49512), UINT16_C( 26081), UINT16_C( 29542), UINT16_C( 35273), UINT16_C( 26373) }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96) }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96), UINT8_MAX, UINT8_MAX, UINT8_C( 129), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 16), UINT8_C( 151), UINT8_C( 73), UINT8_C( 22), UINT8_C( 214), UINT8_C( 25), UINT8_C( 123), UINT8_C( 96), UINT8_C( 206), UINT8_C( 215), UINT8_C( 32), UINT8_C( 193), UINT8_C( 101), UINT8_C( 115), UINT8_C( 137), UINT8_C( 103) } }, + { { UINT16_C( 34631), UINT16_C( 19191), UINT16_C( 10930), UINT16_C( 25238), UINT16_C( 4654), UINT16_C( 17428), UINT16_C( 38959), UINT16_C( 64073) }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231) }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 145), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231), UINT8_MAX, UINT8_MAX, UINT8_C( 170), UINT8_MAX, UINT8_C( 72), UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 78), UINT8_C( 84), UINT8_C( 151), UINT8_C( 79), UINT8_C( 210), UINT8_C( 96), UINT8_C( 183), UINT8_C( 231), UINT8_C( 135), UINT8_C( 74), UINT8_C( 42), UINT8_C( 98), UINT8_C( 18), UINT8_C( 68), UINT8_C( 152), UINT8_C( 250) } }, + { { UINT16_C( 11068), UINT16_C( 33326), UINT16_C( 23470), UINT16_C( 17170), UINT16_C( 12419), UINT16_C( 39286), UINT16_C( 59545), UINT16_C( 8118) }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152) }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 253) }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152), UINT8_C( 172), UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_C( 194), UINT8_MAX, UINT8_MAX, UINT8_C( 126) }, + { UINT8_C( 199), UINT8_C( 1), UINT8_C( 73), UINT8_C( 132), UINT8_C( 228), UINT8_C( 136), UINT8_C( 135), UINT8_C( 152), UINT8_C( 43), UINT8_C( 130), UINT8_C( 91), UINT8_C( 67), UINT8_C( 48), UINT8_C( 153), UINT8_C( 232), UINT8_C( 31) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); + + simde_uint8x16_t r1 = simde_vqshrn_high_n_u16(r, a, 1); + simde_uint8x16_t r3 = simde_vqshrn_high_n_u16(r, a, 3); + simde_uint8x16_t r5 = simde_vqshrn_high_n_u16(r, a, 5); + simde_uint8x16_t r6 = simde_vqshrn_high_n_u16(r, a, 6); + simde_uint8x16_t r8 = simde_vqshrn_high_n_u16(r, a, 8); + + simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vqshrn_high_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t r[4]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r16[8]; + } test_vec[] = { + { { UINT32_C(2361451199), UINT32_C(3928353983), UINT32_C(2203416184), UINT32_C(2489499857) }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088) }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(26690), UINT16_C(31011), UINT16_C( 645), UINT16_C(43088), UINT16_C(36032), UINT16_C(59941), UINT16_C(33621), UINT16_C(37986) } }, + { { UINT32_C(306270981), UINT32_C(4245752686), UINT32_C(1259241961), UINT32_C(1394342818) }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605) }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605), UINT16_C(37386), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53839), UINT16_C(54788), UINT16_C(41601), UINT16_C(52605), UINT16_C(4673), UINT16_C(64785), UINT16_C(19214), UINT16_C(21275) } }, + { { UINT32_C(1329624828), UINT32_C(2258156829), UINT32_C(2018646584), UINT32_C(3202299265) }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206) }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(53341), UINT16_C(23984), UINT16_C(34377), UINT16_C(1206), UINT16_C(20288), UINT16_C(34456), UINT16_C(30802), UINT16_C(48863) } }, + { { UINT32_C(56824135), UINT32_C(2334585118), UINT32_C(2534667093), UINT32_C(1779495218) }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798) }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798), UINT16_C(55492), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798), UINT16_C(6936), UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(48657), UINT16_C(6925), UINT16_C(4288), UINT16_C(28798), UINT16_C( 867), UINT16_C(35622), UINT16_C(38675), UINT16_C(27152) } }, + { { UINT32_C(2307010385), UINT32_C(786094078), UINT32_C(4213990281), UINT32_C(514701508) }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908) }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(62829) }, + { UINT16_C(50041), UINT16_C(42001), UINT16_C(29125), UINT16_C(15908), UINT16_C(35202), UINT16_C(11994), UINT16_C(64300), UINT16_C(7853) } }, + { { UINT32_C(2928194567), UINT32_C(711630444), UINT32_C(1409143013), UINT32_C(91264893) }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136) }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_C(11140) }, + { UINT16_C(46467), UINT16_C(49351), UINT16_C(47593), UINT16_C(14136), UINT16_C(44680), UINT16_C(10858), UINT16_C(21501), UINT16_C(1392) } }, + { { UINT32_C(3282209478), UINT32_C(2025208263), UINT32_C(1022708302), UINT32_C(4060576734) }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549) }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(40872), UINT16_C(52756), UINT16_C(49418), UINT16_C( 549), UINT16_C(50082), UINT16_C(30902), UINT16_C(15605), UINT16_C(61959) } }, + { { UINT32_C(2112066869), UINT32_C(2506561151), UINT32_C(3159742828), UINT32_C(1886997040) }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095) }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095), UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX }, + { UINT16_C(33103), UINT16_C(44697), UINT16_C(3298), UINT16_C(41095), UINT16_C(32227), UINT16_C(38247), UINT16_C(48213), UINT16_C(28793) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); + + simde_uint16x8_t r3 = simde_vqshrn_high_n_u32(r, a, 3); + simde_uint16x8_t r6 = simde_vqshrn_high_n_u32(r, a, 6); + simde_uint16x8_t r10 = simde_vqshrn_high_n_u32(r, a, 10); + simde_uint16x8_t r13 = simde_vqshrn_high_n_u32(r, a, 13); + simde_uint16x8_t r16 = simde_vqshrn_high_n_u32(r, a, 16); + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vqshrn_high_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t r[2]; + uint32_t r6[4]; + uint32_t r13[4]; + uint32_t r19[4]; + uint32_t r26[4]; + uint32_t r32[4]; + } test_vec[] = { + { { UINT64_C(5153608494813095252), UINT64_C(15801484430927645942) }, + { UINT32_C(1149783980), UINT32_C(778382986) }, + { UINT32_C(1149783980), UINT32_C(778382986), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1149783980), UINT32_C(778382986), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1149783980), UINT32_C(778382986), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1149783980), UINT32_C(778382986), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1149783980), UINT32_C(778382986), UINT32_C(1199917982), UINT32_C(3679069790) } }, + { { UINT64_C(13858666046941017502), UINT64_C(17181432487780316057) }, + { UINT32_C(2555790628), UINT32_C(4208570593) }, + { UINT32_C(2555790628), UINT32_C(4208570593), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2555790628), UINT32_C(4208570593), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2555790628), UINT32_C(4208570593), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2555790628), UINT32_C(4208570593), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2555790628), UINT32_C(4208570593), UINT32_C(3226722135), UINT32_C(4000363985) } }, + { { UINT64_C(7611247938741977904), UINT64_C(16019258039644149976) }, + { UINT32_C(813398151), UINT32_C(3091258712) }, + { UINT32_C(813398151), UINT32_C(3091258712), UINT32_MAX, UINT32_MAX }, + { UINT32_C(813398151), UINT32_C(3091258712), UINT32_MAX, UINT32_MAX }, + { UINT32_C(813398151), UINT32_C(3091258712), UINT32_MAX, UINT32_MAX }, + { UINT32_C(813398151), UINT32_C(3091258712), UINT32_MAX, UINT32_MAX }, + { UINT32_C(813398151), UINT32_C(3091258712), UINT32_C(1772131756), UINT32_C(3729774160) } }, + { { UINT64_C(11739101929472879634), UINT64_C(15478438981995682412) }, + { UINT32_C(160744076), UINT32_C(3176789119) }, + { UINT32_C(160744076), UINT32_C(3176789119), UINT32_MAX, UINT32_MAX }, + { UINT32_C(160744076), UINT32_C(3176789119), UINT32_MAX, UINT32_MAX }, + { UINT32_C(160744076), UINT32_C(3176789119), UINT32_MAX, UINT32_MAX }, + { UINT32_C(160744076), UINT32_C(3176789119), UINT32_MAX, UINT32_MAX }, + { UINT32_C(160744076), UINT32_C(3176789119), UINT32_C(2733222658), UINT32_C(3603854910) } }, + { { UINT64_C(13918965383802178048), UINT64_C(988305148310810066) }, + { UINT32_C(1680278688), UINT32_C(551691632) }, + { UINT32_C(1680278688), UINT32_C(551691632), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1680278688), UINT32_C(551691632), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1680278688), UINT32_C(551691632), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1680278688), UINT32_C(551691632), UINT32_MAX, UINT32_MAX }, + { UINT32_C(1680278688), UINT32_C(551691632), UINT32_C(3240761669), UINT32_C(230107723) } }, + { { UINT64_C(12655452248050541842), UINT64_C(9682272096452148561) }, + { UINT32_C(780484345), UINT32_C(2945636890) }, + { UINT32_C(780484345), UINT32_C(2945636890), UINT32_MAX, UINT32_MAX }, + { UINT32_C(780484345), UINT32_C(2945636890), UINT32_MAX, UINT32_MAX }, + { UINT32_C(780484345), UINT32_C(2945636890), UINT32_MAX, UINT32_MAX }, + { UINT32_C(780484345), UINT32_C(2945636890), UINT32_MAX, UINT32_MAX }, + { UINT32_C(780484345), UINT32_C(2945636890), UINT32_C(2946577092), UINT32_C(2254329644) } }, + { { UINT64_C(4711763874410978498), UINT64_C(9840987415755556929) }, + { UINT32_C(275444936), UINT32_C(1358980049) }, + { UINT32_C(275444936), UINT32_C(1358980049), UINT32_MAX, UINT32_MAX }, + { UINT32_C(275444936), UINT32_C(1358980049), UINT32_MAX, UINT32_MAX }, + { UINT32_C(275444936), UINT32_C(1358980049), UINT32_MAX, UINT32_MAX }, + { UINT32_C(275444936), UINT32_C(1358980049), UINT32_MAX, UINT32_MAX }, + { UINT32_C(275444936), UINT32_C(1358980049), UINT32_C(1097043015), UINT32_C(2291283434) } }, + { { UINT64_C(18279397249888037655), UINT64_C(4293313567125042273) }, + { UINT32_C(2733055859), UINT32_C(2742207168) }, + { UINT32_C(2733055859), UINT32_C(2742207168), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2733055859), UINT32_C(2742207168), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2733055859), UINT32_C(2742207168), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2733055859), UINT32_C(2742207168), UINT32_MAX, UINT32_MAX }, + { UINT32_C(2733055859), UINT32_C(2742207168), UINT32_C(4256003827), UINT32_C(999614961) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); + + simde_uint32x4_t r6 = simde_vqshrn_high_n_u64(r, a, 6); + simde_uint32x4_t r13 = simde_vqshrn_high_n_u64(r, a, 13); + simde_uint32x4_t r19 = simde_vqshrn_high_n_u64(r, a, 19); + simde_uint32x4_t r26 = simde_vqshrn_high_n_u64(r, a, 26); + simde_uint32x4_t r32 = simde_vqshrn_high_n_u64(r, a, 32); + + simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_high_n_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/qshrn_n.c b/test/arm/neon/qshrn_n.c index 4115262df..e5727c311 100644 --- a/test/arm/neon/qshrn_n.c +++ b/test/arm/neon/qshrn_n.c @@ -605,6 +605,160 @@ test_simde_vqshrn_n_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vqshrnh_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a; + int8_t r1; + int8_t r3; + int8_t r5; + int8_t r6; + int8_t r8; + } test_vec[] = { + { INT16_C( 11473), + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_C( 44) }, + { INT16_C( 17084), + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_C( 66) }, + { -INT16_C( 29873), + INT8_MIN, + INT8_MIN, + INT8_MIN, + INT8_MIN, + -INT8_C( 117) }, + { INT16_C( 32294), + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_C( 126) }, + { INT16_C( 7196), + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_C( 112), + INT8_C( 28) }, + { -INT16_C( 17570), + INT8_MIN, + INT8_MIN, + INT8_MIN, + INT8_MIN, + -INT8_C( 69) }, + { INT16_C( 7104), + INT8_MAX, + INT8_MAX, + INT8_MAX, + INT8_C( 111), + INT8_C( 27) }, + { -INT16_C( 28856), + INT8_MIN, + INT8_MIN, + INT8_MIN, + INT8_MIN, + -INT8_C( 113) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int8_t r1 = simde_vqshrnh_n_s16(test_vec[i].a, 1); + int8_t r3 = simde_vqshrnh_n_s16(test_vec[i].a, 3); + int8_t r5 = simde_vqshrnh_n_s16(test_vec[i].a, 5); + int8_t r6 = simde_vqshrnh_n_s16(test_vec[i].a, 6); + int8_t r8 = simde_vqshrnh_n_s16(test_vec[i].a, 8); + + simde_assert_equal_i8(r1, test_vec[i].r1); + simde_assert_equal_i8(r3, test_vec[i].r3); + simde_assert_equal_i8(r5, test_vec[i].r5); + simde_assert_equal_i8(r6, test_vec[i].r6); + simde_assert_equal_i8(r8, test_vec[i].r8); + } + + return 0; +} + +static int +test_simde_vqshrnh_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a; + uint8_t r1; + uint8_t r3; + uint8_t r5; + uint8_t r6; + uint8_t r8; + } test_vec[] = { + { UINT16_C( 6711), + UINT8_MAX, + UINT8_MAX, + UINT8_C(209), + UINT8_C(104), + UINT8_C( 26) }, + { UINT16_C(44505), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C(173) }, + { UINT16_C(23584), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C( 92) }, + { UINT16_C(28962), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C(113) }, + { UINT16_C(15333), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C(239), + UINT8_C( 59) }, + { UINT16_C(65383), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX }, + { UINT16_C(28623), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C(111) }, + { UINT16_C(45572), + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_MAX, + UINT8_C(178) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint8_t r1 = simde_vqshrnh_n_u16(test_vec[i].a, 1); + uint8_t r3 = simde_vqshrnh_n_u16(test_vec[i].a, 3); + uint8_t r5 = simde_vqshrnh_n_u16(test_vec[i].a, 5); + uint8_t r6 = simde_vqshrnh_n_u16(test_vec[i].a, 6); + uint8_t r8 = simde_vqshrnh_n_u16(test_vec[i].a, 8); + + simde_assert_equal_u8(r1, test_vec[i].r1); + simde_assert_equal_u8(r3, test_vec[i].r3); + simde_assert_equal_u8(r5, test_vec[i].r5); + simde_assert_equal_u8(r6, test_vec[i].r6); + simde_assert_equal_u8(r8, test_vec[i].r8); + } + + return 0; +} + static int test_simde_vqshrns_n_s32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1004,6 +1158,8 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_n_s64) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrn_n_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrnh_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vqshrnh_n_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrns_n_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrns_n_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vqshrnd_n_s64) diff --git a/test/arm/neon/raddhn.c b/test/arm/neon/raddhn.c new file mode 100644 index 000000000..1f930e6a6 --- /dev/null +++ b/test/arm/neon/raddhn.c @@ -0,0 +1,370 @@ +#define SIMDE_TEST_ARM_NEON_INSN raddhn + +#include "test-neon.h" +#include "../../../simde/arm/neon/raddhn.h" + +static int +test_simde_vraddhn_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a[8]; + int16_t b[8]; + int8_t r[8]; + } test_vec[] = { + { { -INT16_C( 2880), -INT16_C( 31645), -INT16_C( 2485), INT16_C( 9929), INT16_C( 16914), INT16_C( 32272), INT16_C( 6442), -INT16_C( 14878) }, + { -INT16_C( 29699), INT16_C( 1511), -INT16_C( 14208), -INT16_C( 13119), -INT16_C( 26161), -INT16_C( 5298), -INT16_C( 19688), INT16_C( 4298) }, + { -INT8_C( 127), -INT8_C( 118), -INT8_C( 65), -INT8_C( 12), -INT8_C( 36), INT8_C( 105), -INT8_C( 52), -INT8_C( 41) } }, + { { -INT16_C( 5246), -INT16_C( 5174), INT16_C( 30494), -INT16_C( 7937), -INT16_C( 32450), INT16_C( 22308), -INT16_C( 538), -INT16_C( 10035) }, + { INT16_C( 4128), -INT16_C( 6045), INT16_C( 16379), INT16_C( 8081), INT16_C( 22818), -INT16_C( 20555), -INT16_C( 2824), INT16_C( 14798) }, + { -INT8_C( 4), -INT8_C( 44), -INT8_C( 73), INT8_C( 1), -INT8_C( 38), INT8_C( 7), -INT8_C( 13), INT8_C( 19) } }, + { { INT16_C( 2221), INT16_C( 27907), -INT16_C( 27153), -INT16_C( 29480), INT16_C( 8357), INT16_C( 14864), -INT16_C( 24039), INT16_C( 11221) }, + { -INT16_C( 2187), INT16_C( 9428), INT16_C( 311), INT16_C( 10685), -INT16_C( 13024), -INT16_C( 19804), -INT16_C( 18044), INT16_C( 28594) }, + { INT8_C( 0), -INT8_C( 110), -INT8_C( 105), -INT8_C( 73), -INT8_C( 18), -INT8_C( 19), INT8_C( 92), -INT8_C( 100) } }, + { { -INT16_C( 21915), -INT16_C( 30855), INT16_C( 31063), INT16_C( 10494), INT16_C( 28275), -INT16_C( 22384), -INT16_C( 18957), INT16_C( 24066) }, + { INT16_C( 31119), INT16_C( 12866), -INT16_C( 28606), -INT16_C( 720), INT16_C( 30735), -INT16_C( 6009), -INT16_C( 3153), INT16_C( 26950) }, + { INT8_C( 36), -INT8_C( 70), INT8_C( 10), INT8_C( 38), -INT8_C( 25), -INT8_C( 111), -INT8_C( 86), -INT8_C( 57) } }, + { { -INT16_C( 13275), -INT16_C( 26026), INT16_C( 11914), -INT16_C( 15143), -INT16_C( 19155), INT16_C( 26572), INT16_C( 15561), INT16_C( 22917) }, + { -INT16_C( 1816), -INT16_C( 5803), INT16_C( 765), -INT16_C( 23566), INT16_C( 959), -INT16_C( 1974), -INT16_C( 27160), -INT16_C( 32154) }, + { -INT8_C( 59), -INT8_C( 124), INT8_C( 50), INT8_C( 105), -INT8_C( 71), INT8_C( 96), -INT8_C( 45), -INT8_C( 36) } }, + { { INT16_C( 19105), INT16_C( 6883), INT16_C( 21461), -INT16_C( 4395), -INT16_C( 12916), -INT16_C( 25031), -INT16_C( 8163), -INT16_C( 5903) }, + { INT16_C( 29537), -INT16_C( 12101), INT16_C( 3094), -INT16_C( 21595), -INT16_C( 11652), INT16_C( 19683), INT16_C( 15385), -INT16_C( 32317) }, + { -INT8_C( 66), -INT8_C( 20), INT8_C( 96), -INT8_C( 102), -INT8_C( 96), -INT8_C( 21), INT8_C( 28), INT8_C( 107) } }, + { { -INT16_C( 8550), INT16_C( 6411), INT16_C( 32264), -INT16_C( 24002), INT16_C( 18977), INT16_C( 19612), INT16_C( 11311), -INT16_C( 14670) }, + { INT16_C( 4703), -INT16_C( 5021), INT16_C( 25452), INT16_C( 74), INT16_C( 10589), INT16_C( 32145), INT16_C( 5380), -INT16_C( 4592) }, + { -INT8_C( 15), INT8_C( 5), -INT8_C( 31), -INT8_C( 93), INT8_C( 115), -INT8_C( 54), INT8_C( 65), -INT8_C( 75) } }, + { { INT16_C( 11372), -INT16_C( 29543), -INT16_C( 6523), INT16_C( 12759), -INT16_C( 4369), -INT16_C( 9669), -INT16_C( 8170), -INT16_C( 3584) }, + { -INT16_C( 24236), INT16_C( 14447), INT16_C( 11838), INT16_C( 22580), INT16_C( 20919), -INT16_C( 21841), INT16_C( 19904), -INT16_C( 28699) }, + { -INT8_C( 50), -INT8_C( 59), INT8_C( 21), -INT8_C( 118), INT8_C( 65), -INT8_C( 123), INT8_C( 46), -INT8_C( 126) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int8x8_t r = simde_vraddhn_s16(a, b); + + simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int8x8_t r = simde_vraddhn_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a[4]; + int32_t b[4]; + int16_t r[4]; + } test_vec[] = { + { { -INT32_C(1269894241), -INT32_C( 908985094), INT32_C(1593942511), INT32_C(1873000021) }, + { -INT32_C(1333379287), INT32_C( 986415016), INT32_C( 675741472), -INT32_C( 128971027) }, + { INT16_C( 25813), INT16_C( 1181), -INT16_C( 30903), INT16_C( 26612) } }, + { { -INT32_C(1810269671), -INT32_C(1392301115), -INT32_C( 302921755), -INT32_C(1464759437) }, + { INT32_C(1128784732), INT32_C(1723276060), -INT32_C(2025469489), INT32_C(1054571984) }, + { -INT16_C( 10399), INT16_C( 5050), INT16_C( 30008), -INT16_C( 6259) } }, + { { -INT32_C(1761194184), INT32_C( 588647362), -INT32_C(2039025143), INT32_C(1310305073) }, + { -INT32_C(1673982185), -INT32_C( 150884409), -INT32_C(1727266209), INT32_C(1820153119) }, + { INT16_C( 13119), INT16_C( 6680), INT16_C( 8067), -INT16_C( 17769) } }, + { { -INT32_C(2109029467), INT32_C( 646608259), INT32_C( 121275142), -INT32_C( 983498707) }, + { -INT32_C( 764346087), -INT32_C( 869225140), -INT32_C(1137915263), INT32_C(1742440656) }, + { INT16_C( 21692), -INT16_C( 3397), -INT16_C( 15513), INT16_C( 11581) } }, + { { INT32_C(1191080987), -INT32_C(1518951834), INT32_C(1681129750), INT32_C(1556794469) }, + { -INT32_C(1947985800), -INT32_C(1780488219), INT32_C(1270142499), INT32_C(1362476998) }, + { -INT16_C( 11549), INT16_C( 15191), -INT16_C( 20503), -INT16_C( 20991) } }, + { { INT32_C( 450395115), INT32_C(1265186736), -INT32_C(1762907759), INT32_C(1247737336) }, + { INT32_C( 648615904), INT32_C(1603026436), INT32_C(1624246441), -INT32_C( 762443713) }, + { INT16_C( 16770), -INT16_C( 21771), -INT16_C( 2116), INT16_C( 7405) } }, + { { -INT32_C( 774864597), -INT32_C(1855413885), INT32_C(1412322840), -INT32_C( 139448448) }, + { INT32_C(1992971614), -INT32_C( 622334636), INT32_C( 320697101), INT32_C(1784563166) }, + { INT16_C( 18587), INT16_C( 27729), INT16_C( 26444), INT16_C( 25102) } }, + { { -INT32_C(1195535887), -INT32_C(1186085804), INT32_C( 275985911), INT32_C( 29234507) }, + { INT32_C( 55469721), INT32_C( 629254), -INT32_C( 955594344), INT32_C( 58737092) }, + { -INT16_C( 17396), -INT16_C( 18089), -INT16_C( 10370), INT16_C( 1342) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int16x4_t r = simde_vraddhn_s32(a, b); + + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int16x4_t r = simde_vraddhn_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a[2]; + int64_t b[2]; + int32_t r[2]; + } test_vec[] = { + { { -INT64_C(7820212674889991878), -INT64_C(2197004029258549139) }, + { -INT64_C(2938160914834829241), INT64_C(2333620714609775238) }, + { INT32_C( 1790088248), INT32_C( 31808551) } }, + { { INT64_C(6552516639254251817), -INT64_C(5014084713135562430) }, + { INT64_C(5319635273832147844), -INT64_C(1063024156379059705) }, + { -INT32_C( 1530766525), -INT32_C( 1414937170) } }, + { { -INT64_C(635693026954200109), -INT64_C(4329254798521084257) }, + { -INT64_C(7265744133308457668), -INT64_C(3507927426648019494) }, + { -INT32_C( 1839696700), -INT32_C( 1824736182) } }, + { { INT64_C(7332702632804291068), INT64_C(692062730911992544) }, + { -INT64_C(3650629987000525309), INT64_C(3415485980961630011) }, + { INT32_C( 857299344), INT32_C( 956363210) } }, + { { INT64_C(5058425555123709374), INT64_C(4080346964852498566) }, + { -INT64_C(9081086642602264799), -INT64_C(1475383767075495508) }, + { -INT32_C( 936598770), INT32_C( 606515258) } }, + { { -INT64_C(155691658938406433), -INT64_C(3890983314662465847) }, + { -INT64_C(4870030385317886886), -INT64_C(8161888701331778290) }, + { -INT32_C( 1170142098), INT32_C( 1488689347) } }, + { { INT64_C(320476305215413560), -INT64_C(392761775469036542) }, + { -INT64_C(2571124399223038738), -INT64_C(7295895571957081136) }, + { -INT32_C( 524019844), -INT32_C( 1790155039) } }, + { { INT64_C(6934714921123309528), INT64_C(1713895963837179772) }, + { INT64_C(6914907651883119212), INT64_C(390602164641798265) }, + { -INT32_C( 1070350758), INT32_C( 489991654) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int32x2_t r = simde_vraddhn_s64(a, b); + + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int32x2_t r = simde_vraddhn_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint8_t r[8]; + } test_vec[] = { + { { UINT16_C( 37613), UINT16_C( 43116), UINT16_C( 23884), UINT16_C( 20354), UINT16_C( 17594), UINT16_C( 35041), UINT16_C( 41771), UINT16_C( 50717) }, + { UINT16_C( 14183), UINT16_C( 40062), UINT16_C( 58286), UINT16_C( 60308), UINT16_C( 11310), UINT16_C( 30322), UINT16_C( 45046), UINT16_C( 11691) }, + { UINT8_C( 202), UINT8_C( 69), UINT8_C( 65), UINT8_C( 59), UINT8_C( 113), UINT8_MAX, UINT8_C( 83), UINT8_C( 244) } }, + { { UINT16_C( 57703), UINT16_C( 38482), UINT16_C( 36008), UINT16_C( 2614), UINT16_C( 28450), UINT16_C( 8241), UINT16_C( 48645), UINT16_C( 56796) }, + { UINT16_C( 1851), UINT16_C( 6016), UINT16_C( 17360), UINT16_C( 45761), UINT16_C( 13621), UINT16_C( 64150), UINT16_C( 40219), UINT16_C( 11061) }, + { UINT8_C( 233), UINT8_C( 174), UINT8_C( 208), UINT8_C( 189), UINT8_C( 164), UINT8_C( 27), UINT8_C( 91), UINT8_C( 9) } }, + { { UINT16_C( 31428), UINT16_C( 7367), UINT16_C( 45732), UINT16_C( 24472), UINT16_C( 54660), UINT16_C( 7748), UINT16_C( 54606), UINT16_C( 39752) }, + { UINT16_C( 51706), UINT16_C( 15878), UINT16_C( 58199), UINT16_C( 6507), UINT16_C( 19974), UINT16_C( 29962), UINT16_C( 28578), UINT16_C( 4687) }, + { UINT8_C( 69), UINT8_C( 91), UINT8_C( 150), UINT8_C( 121), UINT8_C( 36), UINT8_C( 147), UINT8_C( 69), UINT8_C( 174) } }, + { { UINT16_C( 11063), UINT16_C( 28529), UINT16_C( 26453), UINT16_C( 21236), UINT16_C( 57102), UINT16_C( 38754), UINT16_C( 61126), UINT16_C( 34925) }, + { UINT16_C( 46442), UINT16_C( 33134), UINT16_C( 17636), UINT16_C( 36223), UINT16_C( 21723), UINT16_C( 56442), UINT16_C( 10026), UINT16_C( 61597) }, + { UINT8_C( 225), UINT8_C( 241), UINT8_C( 172), UINT8_C( 224), UINT8_C( 52), UINT8_C( 116), UINT8_C( 22), UINT8_C( 121) } }, + { { UINT16_C( 2731), UINT16_C( 9316), UINT16_C( 49854), UINT16_C( 18896), UINT16_C( 49267), UINT16_C( 45904), UINT16_C( 29001), UINT16_C( 2858) }, + { UINT16_C( 20162), UINT16_C( 1776), UINT16_C( 13357), UINT16_C( 7672), UINT16_C( 16606), UINT16_C( 5538), UINT16_C( 38719), UINT16_C( 12760) }, + { UINT8_C( 89), UINT8_C( 43), UINT8_C( 247), UINT8_C( 104), UINT8_C( 1), UINT8_C( 201), UINT8_C( 9), UINT8_C( 61) } }, + { { UINT16_C( 12795), UINT16_C( 4896), UINT16_C( 23330), UINT16_C( 10108), UINT16_C( 23802), UINT16_C( 27438), UINT16_C( 8867), UINT16_C( 30490) }, + { UINT16_C( 28135), UINT16_C( 46983), UINT16_C( 24268), UINT16_C( 25599), UINT16_C( 20157), UINT16_C( 41969), UINT16_C( 61408), UINT16_C( 41777) }, + { UINT8_C( 160), UINT8_C( 203), UINT8_C( 186), UINT8_C( 139), UINT8_C( 172), UINT8_C( 15), UINT8_C( 19), UINT8_C( 26) } }, + { { UINT16_C( 37491), UINT16_C( 17703), UINT16_C( 9458), UINT16_C( 65074), UINT16_C( 20959), UINT16_C( 32245), UINT16_C( 26919), UINT16_C( 60176) }, + { UINT16_C( 22889), UINT16_C( 32794), UINT16_C( 15720), UINT16_C( 31240), UINT16_C( 32411), UINT16_C( 62788), UINT16_C( 43813), UINT16_C( 63876) }, + { UINT8_C( 236), UINT8_C( 197), UINT8_C( 98), UINT8_C( 120), UINT8_C( 208), UINT8_C( 115), UINT8_C( 20), UINT8_C( 229) } }, + { { UINT16_C( 12277), UINT16_C( 32281), UINT16_C( 37982), UINT16_C( 34110), UINT16_C( 47012), UINT16_C( 58861), UINT16_C( 64783), UINT16_C( 43738) }, + { UINT16_C( 23822), UINT16_C( 44289), UINT16_C( 23707), UINT16_C( 12158), UINT16_C( 15480), UINT16_C( 49961), UINT16_C( 5675), UINT16_C( 59593) }, + { UINT8_C( 141), UINT8_C( 43), UINT8_C( 241), UINT8_C( 181), UINT8_C( 244), UINT8_C( 169), UINT8_C( 19), UINT8_C( 148) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint8x8_t r = simde_vraddhn_u16(a, b); + + simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint8x8_t r = simde_vraddhn_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint16_t r[4]; + } test_vec[] = { + { { UINT32_C(1599077134), UINT32_C(2685955823), UINT32_C(1535880878), UINT32_C(1407804329) }, + { UINT32_C(3668802610), UINT32_C( 533029653), UINT32_C(2777570489), UINT32_C( 85112890) }, + { UINT16_C( 14845), UINT16_C( 49118), UINT16_C( 282), UINT16_C( 22780) } }, + { { UINT32_C( 844280000), UINT32_C(1299592057), UINT32_C(2935929945), UINT32_C(1509650752) }, + { UINT32_C( 586314966), UINT32_C( 647283132), UINT32_C(1980614668), UINT32_C(1085490525) }, + { UINT16_C( 21829), UINT16_C( 29707), UINT16_C( 9485), UINT16_C( 39599) } }, + { { UINT32_C(2656176301), UINT32_C(2150911055), UINT32_C( 952074682), UINT32_C(3083072123) }, + { UINT32_C(3672636508), UINT32_C( 752995622), UINT32_C(2357424942), UINT32_C(2352627710) }, + { UINT16_C( 31034), UINT16_C( 44310), UINT16_C( 50499), UINT16_C( 17406) } }, + { { UINT32_C(1590271315), UINT32_C(1734070346), UINT32_C(4243366597), UINT32_C( 181144014) }, + { UINT32_C( 875291705), UINT32_C(4233293053), UINT32_C(2871217210), UINT32_C(3618956669) }, + { UINT16_C( 37622), UINT16_C( 25519), UINT16_C( 43024), UINT16_C( 57985) } }, + { { UINT32_C( 893970267), UINT32_C( 47668152), UINT32_C( 342513840), UINT32_C(2414300529) }, + { UINT32_C(3185116321), UINT32_C(1517676379), UINT32_C(1426651672), UINT32_C(1979158981) }, + { UINT16_C( 62242), UINT16_C( 23885), UINT16_C( 26995), UINT16_C( 1503) } }, + { { UINT32_C(3923868843), UINT32_C(2329754825), UINT32_C(3687450793), UINT32_C(3801169291) }, + { UINT32_C(1949399041), UINT32_C( 508959889), UINT32_C(1171199549), UINT32_C(2658068365) }, + { UINT16_C( 24083), UINT16_C( 43315), UINT16_C( 8601), UINT16_C( 33024) } }, + { { UINT32_C( 700616631), UINT32_C( 439195780), UINT32_C(3182273828), UINT32_C(1928850695) }, + { UINT32_C(3813632070), UINT32_C(2483472338), UINT32_C(1045877070), UINT32_C(4233527679) }, + { UINT16_C( 3346), UINT16_C( 44596), UINT16_C( 64516), UINT16_C( 28494) } }, + { { UINT32_C(4193873059), UINT32_C(1223881358), UINT32_C( 576680884), UINT32_C(1649826296) }, + { UINT32_C( 657078515), UINT32_C( 653216189), UINT32_C(2522008160), UINT32_C(4176276405) }, + { UINT16_C( 8484), UINT16_C( 28642), UINT16_C( 47282), UINT16_C( 23363) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint16x4_t r = simde_vraddhn_u32(a, b); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint16x4_t r = simde_vraddhn_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint64_t a[2]; + uint64_t b[2]; + uint32_t r[2]; + } test_vec[] = { + { { UINT64_C( 3658221119983586522), UINT64_C(13624450708070574745) }, + { UINT64_C( 7046718151405549136), UINT64_C( 3296843776638760148) }, + { UINT32_C( 2492437901), UINT32_C( 3939795886) } }, + { { UINT64_C(10310172816141081280), UINT64_C( 3376994261991074207) }, + { UINT64_C(11494608161389779618), UINT64_C( 3612232709931170060) }, + { UINT32_C( 781853894), UINT32_C( 1627306215) } }, + { { UINT64_C( 3983515379071047640), UINT64_C( 683812206328697011) }, + { UINT64_C(17437067970154921917), UINT64_C( 3851963522941548347) }, + { UINT32_C( 692400913), UINT32_C( 1056067583) } }, + { { UINT64_C( 1751127879165911272), UINT64_C( 5926739473801593863) }, + { UINT64_C(16950044079879418680), UINT64_C(17928468287003282561) }, + { UINT32_C( 59238608), UINT32_C( 1259256081) } }, + { { UINT64_C(11355337902764347302), UINT64_C( 8883462255544729120) }, + { UINT64_C(12547381047271968305), UINT64_C(10516517387439026716) }, + { UINT32_C( 1270318142), UINT32_C( 221942451) } }, + { { UINT64_C( 8246918623211434206), UINT64_C( 519113934888829026) }, + { UINT64_C(15938487908952821021), UINT64_C( 5324233429962735477) }, + { UINT32_C( 1336136474), UINT32_C( 1360510328) } }, + { { UINT64_C(17751366330943116804), UINT64_C(14347530208924216630) }, + { UINT64_C(11330187298328579592), UINT64_C(13898502737226142347) }, + { UINT32_C( 2476109554), UINT32_C( 2281574736) } }, + { { UINT64_C( 954615016345450251), UINT64_C( 6743074694315865715) }, + { UINT64_C(16957999455362759714), UINT64_C( 28138425858485463) }, + { UINT32_C( 4170605557), UINT32_C( 1576545909) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + simde_uint32x2_t r = simde_vraddhn_u64(a, b); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint32x2_t r = simde_vraddhn_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_s64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/raddhn_high.c b/test/arm/neon/raddhn_high.c new file mode 100644 index 000000000..dd66287eb --- /dev/null +++ b/test/arm/neon/raddhn_high.c @@ -0,0 +1,443 @@ +#define SIMDE_TEST_ARM_NEON_INSN raddhn_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/raddhn_high.h" + +static int +test_simde_vraddhn_high_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t r_[8]; + int16_t a[8]; + int16_t b[8]; + int8_t r[16]; + } test_vec[] = { + { { INT8_C( 15), -INT8_C( 19), -INT8_C( 48), -INT8_C( 13), INT8_C( 45), -INT8_C( 48), -INT8_C( 26), INT8_C( 33) }, + { -INT16_C( 12095), INT16_C( 17211), -INT16_C( 2398), INT16_C( 19853), -INT16_C( 6852), INT16_C( 27120), INT16_C( 31418), INT16_C( 18743) }, + { INT16_C( 5866), INT16_C( 24735), -INT16_C( 32002), INT16_C( 26625), INT16_C( 15989), -INT16_C( 12779), -INT16_C( 11120), -INT16_C( 30611) }, + { INT8_C( 15), -INT8_C( 19), -INT8_C( 48), -INT8_C( 13), INT8_C( 45), -INT8_C( 48), -INT8_C( 26), INT8_C( 33), -INT8_C( 24), -INT8_C( 92), INT8_C( 122), -INT8_C( 74), INT8_C( 36), INT8_C( 56), INT8_C( 79), -INT8_C( 46) } }, + { { -INT8_C( 49), -INT8_C( 118), -INT8_C( 5), -INT8_C( 36), INT8_C( 117), -INT8_C( 43), INT8_C( 69), INT8_C( 124) }, + { -INT16_C( 16110), INT16_C( 1089), -INT16_C( 23667), INT16_C( 18054), -INT16_C( 13983), -INT16_C( 27284), INT16_C( 13014), INT16_C( 15012) }, + { INT16_C( 15410), -INT16_C( 3301), -INT16_C( 20015), INT16_C( 13021), -INT16_C( 23055), INT16_C( 31860), -INT16_C( 16803), -INT16_C( 12129) }, + { -INT8_C( 49), -INT8_C( 118), -INT8_C( 5), -INT8_C( 36), INT8_C( 117), -INT8_C( 43), INT8_C( 69), INT8_C( 124), -INT8_C( 3), -INT8_C( 9), INT8_C( 85), INT8_C( 121), INT8_C( 111), INT8_C( 18), -INT8_C( 15), INT8_C( 11) } }, + { { -INT8_C( 80), INT8_C( 118), -INT8_C( 21), -INT8_C( 40), -INT8_C( 62), -INT8_C( 105), INT8_C( 54), -INT8_C( 23) }, + { INT16_C( 14270), -INT16_C( 13117), -INT16_C( 10907), INT16_C( 7523), -INT16_C( 29065), -INT16_C( 18066), INT16_C( 27774), INT16_C( 27516) }, + { -INT16_C( 8467), INT16_C( 24014), INT16_C( 31552), -INT16_C( 26398), INT16_C( 2507), INT16_C( 19417), INT16_C( 26328), -INT16_C( 29014) }, + { -INT8_C( 80), INT8_C( 118), -INT8_C( 21), -INT8_C( 40), -INT8_C( 62), -INT8_C( 105), INT8_C( 54), -INT8_C( 23), INT8_C( 23), INT8_C( 43), INT8_C( 81), -INT8_C( 74), -INT8_C( 104), INT8_C( 5), -INT8_C( 45), -INT8_C( 6) } }, + { { -INT8_C( 121), -INT8_C( 5), INT8_C( 14), INT8_C( 123), -INT8_C( 28), INT8_C( 83), INT8_C( 46), -INT8_C( 41) }, + { -INT16_C( 23781), INT16_C( 4459), -INT16_C( 7012), -INT16_C( 27719), INT16_C( 4462), INT16_C( 5161), INT16_C( 10500), INT16_C( 15575) }, + { -INT16_C( 21166), INT16_C( 2663), -INT16_C( 30048), -INT16_C( 22763), -INT16_C( 10851), -INT16_C( 2881), INT16_C( 16199), INT16_C( 9949) }, + { -INT8_C( 121), -INT8_C( 5), INT8_C( 14), INT8_C( 123), -INT8_C( 28), INT8_C( 83), INT8_C( 46), -INT8_C( 41), INT8_C( 80), INT8_C( 28), INT8_C( 111), INT8_C( 59), -INT8_C( 25), INT8_C( 9), INT8_C( 104), INT8_C( 100) } }, + { { -INT8_C( 21), INT8_C( 37), -INT8_C( 111), -INT8_C( 34), INT8_C( 53), INT8_C( 35), INT8_C( 125), INT8_C( 1) }, + { -INT16_C( 6601), INT16_C( 30366), INT16_C( 8187), -INT16_C( 17595), INT16_C( 20361), INT16_C( 28645), INT16_C( 22636), INT16_C( 9236) }, + { -INT16_C( 19082), -INT16_C( 23095), INT16_C( 9752), INT16_C( 1312), -INT16_C( 16805), INT16_C( 4906), -INT16_C( 6993), -INT16_C( 11096) }, + { -INT8_C( 21), INT8_C( 37), -INT8_C( 111), -INT8_C( 34), INT8_C( 53), INT8_C( 35), INT8_C( 125), INT8_C( 1), -INT8_C( 100), INT8_C( 28), INT8_C( 70), -INT8_C( 64), INT8_C( 14), -INT8_C( 125), INT8_C( 61), -INT8_C( 7) } }, + { { -INT8_C( 45), INT8_C( 48), INT8_C( 0), -INT8_C( 50), -INT8_C( 71), INT8_C( 39), INT8_C( 35), INT8_C( 0) }, + { -INT16_C( 16149), INT16_C( 24317), -INT16_C( 12198), -INT16_C( 29400), INT16_C( 22461), INT16_C( 28947), -INT16_C( 30952), INT16_C( 6308) }, + { INT16_C( 18236), -INT16_C( 943), INT16_C( 31257), INT16_C( 601), -INT16_C( 30010), -INT16_C( 2903), -INT16_C( 25370), -INT16_C( 2531) }, + { -INT8_C( 45), INT8_C( 48), INT8_C( 0), -INT8_C( 50), -INT8_C( 71), INT8_C( 39), INT8_C( 35), INT8_C( 0), INT8_C( 8), INT8_C( 91), INT8_C( 74), -INT8_C( 112), -INT8_C( 29), INT8_C( 102), INT8_C( 36), INT8_C( 15) } }, + { { INT8_C( 96), -INT8_C( 23), -INT8_C( 112), INT8_C( 70), -INT8_C( 40), INT8_C( 43), INT8_C( 7), -INT8_C( 66) }, + { -INT16_C( 9336), -INT16_C( 24505), INT16_C( 26635), -INT16_C( 6194), INT16_C( 2886), INT16_C( 11325), INT16_C( 24921), INT16_C( 17239) }, + { -INT16_C( 10313), INT16_C( 15520), -INT16_C( 28030), INT16_C( 9813), -INT16_C( 5805), INT16_C( 25109), INT16_C( 2107), INT16_C( 10636) }, + { INT8_C( 96), -INT8_C( 23), -INT8_C( 112), INT8_C( 70), -INT8_C( 40), INT8_C( 43), INT8_C( 7), -INT8_C( 66), -INT8_C( 77), -INT8_C( 35), -INT8_C( 5), INT8_C( 14), -INT8_C( 11), -INT8_C( 114), INT8_C( 106), INT8_C( 109) } }, + { { -INT8_C( 48), INT8_C( 8), INT8_C( 104), -INT8_C( 17), INT8_C( 53), INT8_C( 86), -INT8_C( 63), -INT8_C( 74) }, + { -INT16_C( 18853), INT16_C( 18970), INT16_C( 8488), INT16_C( 27692), -INT16_C( 27894), INT16_C( 685), INT16_C( 28318), -INT16_C( 13629) }, + { INT16_C( 29901), -INT16_C( 3380), -INT16_C( 31158), -INT16_C( 1358), -INT16_C( 32145), -INT16_C( 27772), INT16_C( 22434), -INT16_C( 14729) }, + { -INT8_C( 48), INT8_C( 8), INT8_C( 104), -INT8_C( 17), INT8_C( 53), INT8_C( 86), -INT8_C( 63), -INT8_C( 74), INT8_C( 43), INT8_C( 61), -INT8_C( 89), INT8_C( 103), INT8_C( 21), -INT8_C( 106), -INT8_C( 58), -INT8_C( 111) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t r_ = simde_vld1_s8(test_vec[i].r_); + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int8x16_t r = simde_vraddhn_high_s16(r_, a, b); + + simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t r_ = simde_test_arm_neon_random_i8x8(); + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int8x16_t r = simde_vraddhn_high_s16(r_, a, b); + + simde_test_arm_neon_write_i8x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_high_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t r_[4]; + int32_t a[4]; + int32_t b[4]; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C( 27776), INT16_C( 16425), -INT16_C( 28190), INT16_C( 6709) }, + { -INT32_C(2073946203), -INT32_C(1363725588), -INT32_C(2019342500), -INT32_C( 640904559) }, + { INT32_C( 881477788), -INT32_C( 142697688), -INT32_C(1708525767), INT32_C(1635527412) }, + { -INT16_C( 27776), INT16_C( 16425), -INT16_C( 28190), INT16_C( 6709), -INT16_C( 18196), -INT16_C( 22986), INT16_C( 8653), INT16_C( 15177) } }, + { { -INT16_C( 27280), -INT16_C( 6940), -INT16_C( 2049), -INT16_C( 22912) }, + { INT32_C( 709292876), INT32_C(1720435809), INT32_C(2140505914), -INT32_C( 286823063) }, + { INT32_C( 439340421), INT32_C( 701800638), INT32_C( 103055639), -INT32_C( 469027266) }, + { -INT16_C( 27280), -INT16_C( 6940), -INT16_C( 2049), -INT16_C( 22912), INT16_C( 17527), -INT16_C( 28576), -INT16_C( 31302), -INT16_C( 11533) } }, + { { INT16_C( 25933), -INT16_C( 21517), INT16_C( 243), INT16_C( 15365) }, + { -INT32_C(1427898835), INT32_C( 646153999), INT32_C( 79164467), -INT32_C(1917300873) }, + { INT32_C( 146363064), INT32_C( 209990670), INT32_C( 540618427), -INT32_C( 131110686) }, + { INT16_C( 25933), -INT16_C( 21517), INT16_C( 243), INT16_C( 15365), -INT16_C( 19555), INT16_C( 13064), INT16_C( 9457), -INT16_C( 31256) } }, + { { INT16_C( 9500), INT16_C( 8869), INT16_C( 3952), INT16_C( 28525) }, + { -INT32_C( 309207677), -INT32_C(1888868347), INT32_C(1186727381), INT32_C(1604773216) }, + { INT32_C( 972938125), INT32_C(1919153794), -INT32_C( 117373942), INT32_C(1664140487) }, + { INT16_C( 9500), INT16_C( 8869), INT16_C( 3952), INT16_C( 28525), INT16_C( 10128), INT16_C( 462), INT16_C( 16317), -INT16_C( 15656) } }, + { { INT16_C( 4181), INT16_C( 25047), -INT16_C( 30655), -INT16_C( 17944) }, + { -INT32_C(1606382942), INT32_C( 793175988), INT32_C( 477026851), INT32_C(1629245270) }, + { INT32_C(1708901256), INT32_C(1914777322), -INT32_C( 521976337), INT32_C(1565363748) }, + { INT16_C( 4181), INT16_C( 25047), -INT16_C( 30655), -INT16_C( 17944), INT16_C( 1564), -INT16_C( 24216), -INT16_C( 686), -INT16_C( 16790) } }, + { { -INT16_C( 13165), -INT16_C( 4492), INT16_C( 5206), INT16_C( 12539) }, + { INT32_C( 900494898), INT32_C(1609560050), -INT32_C(1696559543), -INT32_C(1701266285) }, + { INT32_C(1115327487), -INT32_C( 521044433), INT32_C(1199557110), -INT32_C( 199795036) }, + { -INT16_C( 13165), -INT16_C( 4492), INT16_C( 5206), INT16_C( 12539), INT16_C( 30759), INT16_C( 16609), -INT16_C( 7584), -INT16_C( 29008) } }, + { { -INT16_C( 11831), INT16_C( 30921), INT16_C( 20387), -INT16_C( 7959) }, + { INT32_C( 769588418), INT32_C(1958588476), INT32_C(1142744858), -INT32_C( 320146418) }, + { INT32_C( 226742139), -INT32_C(1855718613), INT32_C( 591576080), -INT32_C( 375028857) }, + { -INT16_C( 11831), INT16_C( 30921), INT16_C( 20387), -INT16_C( 7959), INT16_C( 15203), INT16_C( 1570), INT16_C( 26464), -INT16_C( 10608) } }, + { { INT16_C( 27776), INT16_C( 29860), INT16_C( 12208), -INT16_C( 10081) }, + { INT32_C(1533933143), INT32_C(1029248519), INT32_C( 917892597), INT32_C(2126715181) }, + { -INT32_C( 678177319), -INT32_C(1186000480), -INT32_C( 443480976), INT32_C(1722207944) }, + { INT16_C( 27776), INT16_C( 29860), INT16_C( 12208), -INT16_C( 10081), INT16_C( 13058), -INT16_C( 2392), INT16_C( 7239), -INT16_C( 6806) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t r_ = simde_vld1_s16(test_vec[i].r_); + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int16x8_t r = simde_vraddhn_high_s32(r_, a, b); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t r_ = simde_test_arm_neon_random_i16x4(); + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int16x8_t r = simde_vraddhn_high_s32(r_, a, b); + + simde_test_arm_neon_write_i16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_high_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t r_[2]; + int64_t a[2]; + int64_t b[2]; + int32_t r[4]; + } test_vec[] = { + { { INT32_C( 197796418), INT32_C( 601210734) }, + { INT64_C(6827770305986114088), -INT64_C(3424434265917398540) }, + { -INT64_C(4870080611125820418), -INT64_C(1280185501390225677) }, + { INT32_C( 197796418), INT32_C( 601210734), INT32_C( 455810152), -INT32_C(1095379649) } }, + { { -INT32_C(1222698691), -INT32_C( 343308185) }, + { -INT64_C(1578692060818245119), -INT64_C(6924763905932136884) }, + { INT64_C(1395720023092767828), INT64_C(3170804204799828580) }, + { -INT32_C(1222698691), -INT32_C( 343308185), -INT32_C( 42601497), -INT32_C( 874036853) } }, + { { INT32_C(1845387845), INT32_C( 93834540) }, + { INT64_C(3027541176452433987), -INT64_C(1869318515588614046) }, + { -INT64_C(5554700006883697809), -INT64_C(4084970758226857716) }, + { INT32_C(1845387845), INT32_C( 93834540), -INT32_C( 588400017), -INT32_C(1386341004) } }, + { { INT32_C(1156951160), -INT32_C( 995571895) }, + { -INT64_C(5015092532983367815), -INT64_C(3034235131876238016) }, + { INT64_C(7727319190775619735), INT64_C(5183591802858582339) }, + { INT32_C(1156951160), -INT32_C( 995571895), INT32_C( 631489478), INT32_C( 500436097) } }, + { { INT32_C( 565302416), -INT32_C(1351836223) }, + { -INT64_C(2072574158439994684), INT64_C(3072145173928151423) }, + { INT64_C(6430252040398349117), INT64_C(4075474499164742695) }, + { INT32_C( 565302416), -INT32_C(1351836223), INT32_C(1014600946), INT32_C(1664184889) } }, + { { -INT32_C(2122233933), INT32_C(1928251825) }, + { -INT64_C(4381041607828903039), INT64_C(6832580255290064194) }, + { -INT64_C(2447376711268383613), INT64_C(6815464404358872476) }, + { -INT32_C(2122233933), INT32_C(1928251825), -INT32_C(1589865032), -INT32_C(1117284273) } }, + { { -INT32_C(1558854147), -INT32_C( 463274853) }, + { -INT64_C(5259775076737191257), -INT64_C(2017160973710465181) }, + { -INT64_C(2456389913055062980), INT64_C(1004965748914492018) }, + { -INT32_C(1558854147), -INT32_C( 463274853), -INT32_C(1796559661), -INT32_C( 235670066) } }, + { { -INT32_C( 551418001), INT32_C( 490707555) }, + { INT64_C(5975796747031062393), -INT64_C(6489678503113015990) }, + { -INT64_C(2243550412969459196), -INT64_C(852550114939876794) }, + { -INT32_C( 551418001), INT32_C( 490707555), INT32_C( 868981316), -INT32_C(1709495815) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t r_ = simde_vld1_s32(test_vec[i].r_); + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int32x4_t r = simde_vraddhn_high_s64(r_, a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t r_ = simde_test_arm_neon_random_i32x2(); + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int32x4_t r = simde_vraddhn_high_s64(r_, a, b); + + simde_test_arm_neon_write_i32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_high_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t r_[8]; + uint16_t a[8]; + uint16_t b[8]; + uint8_t r[16]; + } test_vec[] = { + { { UINT8_C( 248), UINT8_C( 188), UINT8_C( 97), UINT8_C( 28), UINT8_C( 205), UINT8_C( 80), UINT8_C( 73), UINT8_C( 137) }, + { UINT16_C( 6831), UINT16_C(57295), UINT16_C(13995), UINT16_C(51267), UINT16_C( 2675), UINT16_C(45311), UINT16_C(15899), UINT16_C(40484) }, + { UINT16_C(13621), UINT16_C(62747), UINT16_C(55364), UINT16_C(50561), UINT16_C( 6210), UINT16_C(10039), UINT16_C(54493), UINT16_C( 4603) }, + { UINT8_C( 248), UINT8_C( 188), UINT8_C( 97), UINT8_C( 28), UINT8_C( 205), UINT8_C( 80), UINT8_C( 73), UINT8_C( 137), UINT8_C( 80), UINT8_C( 213), UINT8_C( 15), UINT8_C( 142), UINT8_C( 35), UINT8_C( 216), UINT8_C( 19), UINT8_C( 176) } }, + { { UINT8_C( 179), UINT8_C( 238), UINT8_C( 151), UINT8_C( 173), UINT8_C( 227), UINT8_C( 0), UINT8_C( 171), UINT8_C( 109) }, + { UINT16_C(43179), UINT16_C(29009), UINT16_C(62133), UINT16_C(47539), UINT16_C( 5586), UINT16_C(25158), UINT16_C(18035), UINT16_C(31969) }, + { UINT16_C(27797), UINT16_C(35816), UINT16_C(23267), UINT16_C(29229), UINT16_C(34402), UINT16_C(51753), UINT16_C(53150), UINT16_C(28175) }, + { UINT8_C( 179), UINT8_C( 238), UINT8_C( 151), UINT8_C( 173), UINT8_C( 227), UINT8_C( 0), UINT8_C( 171), UINT8_C( 109), UINT8_C( 21), UINT8_C( 253), UINT8_C( 78), UINT8_C( 44), UINT8_C( 156), UINT8_C( 44), UINT8_C( 22), UINT8_C( 235) } }, + { { UINT8_C( 37), UINT8_C( 127), UINT8_C( 87), UINT8_C( 17), UINT8_C( 135), UINT8_C( 44), UINT8_C( 118), UINT8_C( 68) }, + { UINT16_C(19129), UINT16_C(46467), UINT16_C( 2498), UINT16_C(35093), UINT16_C(43496), UINT16_C(34612), UINT16_C( 934), UINT16_C(34885) }, + { UINT16_C(45931), UINT16_C(13891), UINT16_C(17386), UINT16_C(62037), UINT16_C(50957), UINT16_C(62197), UINT16_C(53339), UINT16_C(61720) }, + { UINT8_C( 37), UINT8_C( 127), UINT8_C( 87), UINT8_C( 17), UINT8_C( 135), UINT8_C( 44), UINT8_C( 118), UINT8_C( 68), UINT8_C( 254), UINT8_C( 236), UINT8_C( 78), UINT8_C( 123), UINT8_C( 113), UINT8_C( 122), UINT8_C( 212), UINT8_C( 121) } }, + { { UINT8_C( 213), UINT8_C( 157), UINT8_C( 114), UINT8_C( 125), UINT8_C( 50), UINT8_C( 125), UINT8_C( 116), UINT8_C( 134) }, + { UINT16_C(34704), UINT16_C(26249), UINT16_C(40822), UINT16_C(21393), UINT16_C(40953), UINT16_C( 3885), UINT16_C(41658), UINT16_C( 3130) }, + { UINT16_C(42703), UINT16_C(33844), UINT16_C(26839), UINT16_C(45040), UINT16_C(55653), UINT16_C(24812), UINT16_C(42037), UINT16_C(11219) }, + { UINT8_C( 213), UINT8_C( 157), UINT8_C( 114), UINT8_C( 125), UINT8_C( 50), UINT8_C( 125), UINT8_C( 116), UINT8_C( 134), UINT8_C( 46), UINT8_C( 235), UINT8_C( 8), UINT8_C( 4), UINT8_C( 121), UINT8_C( 112), UINT8_C( 71), UINT8_C( 56) } }, + { { UINT8_C( 247), UINT8_C( 189), UINT8_C( 115), UINT8_C( 214), UINT8_C( 169), UINT8_C( 148), UINT8_C( 48), UINT8_C( 97) }, + { UINT16_C( 4319), UINT16_C( 5865), UINT16_C(60338), UINT16_C(56217), UINT16_C(36234), UINT16_C(59539), UINT16_C(53208), UINT16_C(51404) }, + { UINT16_C( 4908), UINT16_C( 3243), UINT16_C(37878), UINT16_C(25620), UINT16_C(20768), UINT16_C(44230), UINT16_C(63129), UINT16_C(14658) }, + { UINT8_C( 247), UINT8_C( 189), UINT8_C( 115), UINT8_C( 214), UINT8_C( 169), UINT8_C( 148), UINT8_C( 48), UINT8_C( 97), UINT8_C( 36), UINT8_C( 36), UINT8_C( 128), UINT8_C( 64), UINT8_C( 223), UINT8_C( 149), UINT8_C( 198), UINT8_C( 2) } }, + { { UINT8_C( 123), UINT8_C( 207), UINT8_C( 90), UINT8_C( 66), UINT8_C( 35), UINT8_C( 202), UINT8_C( 11), UINT8_C( 166) }, + { UINT16_C(47921), UINT16_C(52841), UINT16_C(39201), UINT16_C(62255), UINT16_C(26454), UINT16_C(46295), UINT16_C(55742), UINT16_C(65404) }, + { UINT16_C(34926), UINT16_C( 4068), UINT16_C( 9426), UINT16_C(47581), UINT16_C(31106), UINT16_C(61539), UINT16_C(11933), UINT16_C( 7762) }, + { UINT8_C( 123), UINT8_C( 207), UINT8_C( 90), UINT8_C( 66), UINT8_C( 35), UINT8_C( 202), UINT8_C( 11), UINT8_C( 166), UINT8_C( 68), UINT8_C( 222), UINT8_C( 190), UINT8_C( 173), UINT8_C( 225), UINT8_C( 165), UINT8_C( 8), UINT8_C( 30) } }, + { { UINT8_C( 214), UINT8_C( 91), UINT8_C( 217), UINT8_C( 199), UINT8_C( 144), UINT8_C( 76), UINT8_C( 91), UINT8_C( 148) }, + { UINT16_C(14970), UINT16_C(59825), UINT16_C(39864), UINT16_C(30311), UINT16_C(12793), UINT16_C(29806), UINT16_C( 4536), UINT16_C(55184) }, + { UINT16_C(14464), UINT16_C(50342), UINT16_C(22620), UINT16_C(44678), UINT16_C(45214), UINT16_C(50951), UINT16_C(63568), UINT16_C(63058) }, + { UINT8_C( 214), UINT8_C( 91), UINT8_C( 217), UINT8_C( 199), UINT8_C( 144), UINT8_C( 76), UINT8_C( 91), UINT8_C( 148), UINT8_C( 115), UINT8_C( 174), UINT8_C( 244), UINT8_C( 37), UINT8_C( 227), UINT8_C( 59), UINT8_C( 10), UINT8_C( 206) } }, + { { UINT8_C( 200), UINT8_C( 252), UINT8_C( 181), UINT8_C( 219), UINT8_C( 210), UINT8_C( 30), UINT8_C( 224), UINT8_C( 92) }, + { UINT16_C(64475), UINT16_C(41045), UINT16_C(48667), UINT16_C(15480), UINT16_C( 5615), UINT16_C(48980), UINT16_C(40869), UINT16_C( 5807) }, + { UINT16_C( 3562), UINT16_C(52315), UINT16_C(62713), UINT16_C(18955), UINT16_C( 183), UINT16_C(59516), UINT16_C( 382), UINT16_C(15339) }, + { UINT8_C( 200), UINT8_C( 252), UINT8_C( 181), UINT8_C( 219), UINT8_C( 210), UINT8_C( 30), UINT8_C( 224), UINT8_C( 92), UINT8_C( 10), UINT8_C( 109), UINT8_C( 179), UINT8_C( 135), UINT8_C( 23), UINT8_C( 168), UINT8_C( 161), UINT8_C( 83) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t r_ = simde_vld1_u8(test_vec[i].r_); + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint8x16_t r = simde_vraddhn_high_u16(r_, a, b); + + simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t r_ = simde_test_arm_neon_random_u8x8(); + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint8x16_t r = simde_vraddhn_high_u16(r_, a, b); + + simde_test_arm_neon_write_u8x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_high_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t r_[4]; + uint32_t a[4]; + uint32_t b[4]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 23077), UINT16_C( 46664), UINT16_C( 36278), UINT16_C( 22401) }, + { UINT32_C( 212850905), UINT32_C(1467528063), UINT32_C(1489007594), UINT32_C(2026089729) }, + { UINT32_C( 436730000), UINT32_C(1221238491), UINT32_C(2481626902), UINT32_C(2509229408) }, + { UINT16_C( 23077), UINT16_C( 46664), UINT16_C( 36278), UINT16_C( 22401), UINT16_C( 9912), UINT16_C( 41027), UINT16_C( 60587), UINT16_C( 3667) } }, + { { UINT16_C( 26972), UINT16_C( 25134), UINT16_C( 54033), UINT16_C( 46084) }, + { UINT32_C( 194934793), UINT32_C(2438547165), UINT32_C(4072150333), UINT32_C(2075239274) }, + { UINT32_C(1753146815), UINT32_C(3111271966), UINT32_C(2964399735), UINT32_C(2843280803) }, + { UINT16_C( 26972), UINT16_C( 25134), UINT16_C( 54033), UINT16_C( 46084), UINT16_C( 29725), UINT16_C( 19148), UINT16_C( 41833), UINT16_C( 9515) } }, + { { UINT16_C( 993), UINT16_C( 21997), UINT16_C( 9029), UINT16_C( 23301) }, + { UINT32_C(4255305104), UINT32_C(3169505596), UINT32_C(1310219469), UINT32_C( 809894928) }, + { UINT32_C(2689553230), UINT32_C(2556160718), UINT32_C(1341585880), UINT32_C(2876365377) }, + { UINT16_C( 993), UINT16_C( 21997), UINT16_C( 9029), UINT16_C( 23301), UINT16_C( 40434), UINT16_C( 21831), UINT16_C( 40463), UINT16_C( 56248) } }, + { { UINT16_C( 54382), UINT16_C( 485), UINT16_C( 47179), UINT16_C( 37991) }, + { UINT32_C( 912277427), UINT32_C(2598040794), UINT32_C(2358557871), UINT32_C( 494170348) }, + { UINT32_C( 86749149), UINT32_C(4016885293), UINT32_C(1102033753), UINT32_C(3795884245) }, + { UINT16_C( 54382), UINT16_C( 485), UINT16_C( 47179), UINT16_C( 37991), UINT16_C( 15244), UINT16_C( 35400), UINT16_C( 52804), UINT16_C( 65461) } }, + { { UINT16_C( 40375), UINT16_C( 24908), UINT16_C( 46986), UINT16_C( 40104) }, + { UINT32_C(3645706326), UINT32_C(2447247706), UINT32_C( 319248759), UINT32_C( 658432665) }, + { UINT32_C(1671320241), UINT32_C(3353978200), UINT32_C( 428610015), UINT32_C(1633781989) }, + { UINT16_C( 40375), UINT16_C( 24908), UINT16_C( 46986), UINT16_C( 40104), UINT16_C( 15595), UINT16_C( 22984), UINT16_C( 11411), UINT16_C( 34976) } }, + { { UINT16_C( 3244), UINT16_C( 25605), UINT16_C( 17142), UINT16_C( 40589) }, + { UINT32_C(3342312085), UINT32_C( 843603761), UINT32_C(4192013367), UINT32_C(2339055831) }, + { UINT32_C(1884188089), UINT32_C( 86746460), UINT32_C(2553337019), UINT32_C(2959916542) }, + { UINT16_C( 3244), UINT16_C( 25605), UINT16_C( 17142), UINT16_C( 40589), UINT16_C( 14214), UINT16_C( 14196), UINT16_C( 37390), UINT16_C( 15320) } }, + { { UINT16_C( 33641), UINT16_C( 2906), UINT16_C( 19825), UINT16_C( 24144) }, + { UINT32_C(1982882836), UINT32_C(2281243819), UINT32_C( 946004145), UINT32_C( 395418665) }, + { UINT32_C(1663450668), UINT32_C(4273103743), UINT32_C(3004646997), UINT32_C(1238387319) }, + { UINT16_C( 33641), UINT16_C( 2906), UINT16_C( 19825), UINT16_C( 24144), UINT16_C( 55639), UINT16_C( 34475), UINT16_C( 60282), UINT16_C( 24930) } }, + { { UINT16_C( 6456), UINT16_C( 56808), UINT16_C( 26148), UINT16_C( 7183) }, + { UINT32_C(2335372976), UINT32_C(3737714976), UINT32_C(1714085649), UINT32_C(3659772348) }, + { UINT32_C(3806344607), UINT32_C( 127876483), UINT32_C( 823615594), UINT32_C(1708678494) }, + { UINT16_C( 6456), UINT16_C( 56808), UINT16_C( 26148), UINT16_C( 7183), UINT16_C( 28179), UINT16_C( 58984), UINT16_C( 38722), UINT16_C( 16380) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t r_ = simde_vld1_u16(test_vec[i].r_); + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint16x8_t r = simde_vraddhn_high_u32(r_, a, b); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t r_ = simde_test_arm_neon_random_u16x4(); + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint16x8_t r = simde_vraddhn_high_u32(r_, a, b); + + simde_test_arm_neon_write_u16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vraddhn_high_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t r_[2]; + uint64_t a[2]; + uint64_t b[2]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 1833090949), UINT32_C( 159173040) }, + { UINT64_C( 7595685821160238545), UINT64_C( 5460164061812335858) }, + { UINT64_C( 2326480958303370666), UINT64_C( 8560840016854787856) }, + { UINT32_C( 1833090949), UINT32_C( 159173040), UINT32_C( 2310184478), UINT32_C( 3264519404) } }, + { { UINT32_C( 1340950353), UINT32_C( 2345674004) }, + { UINT64_C(14288960134217676844), UINT64_C( 9711330258489778561) }, + { UINT64_C( 5010837924671730567), UINT64_C(12706785817686801043) }, + { UINT32_C( 1340950353), UINT32_C( 2345674004), UINT32_C( 198617108), UINT32_C( 924657100) } }, + { { UINT32_C( 167826599), UINT32_C( 2840686835) }, + { UINT64_C(16502632842102257134), UINT64_C( 9241563249155967922) }, + { UINT64_C(12310152090430206381), UINT64_C( 2353024520452018480) }, + { UINT32_C( 167826599), UINT32_C( 2840686835), UINT32_C( 2413531965), UINT32_C( 2699575333) } }, + { { UINT32_C( 2476932675), UINT32_C( 2198905573) }, + { UINT64_C( 3807331566796253338), UINT64_C(18435570049911670851) }, + { UINT64_C( 3478240210011748394), UINT64_C( 334130327661001499) }, + { UINT32_C( 2476932675), UINT32_C( 2198905573), UINT32_C( 1696304366), UINT32_C( 75194124) } }, + { { UINT32_C( 2749035874), UINT32_C( 1323215606) }, + { UINT64_C(11951820368738700930), UINT64_C( 2454177180675006729) }, + { UINT64_C( 862517325357223436), UINT64_C(12539832836604929412) }, + { UINT32_C( 2749035874), UINT32_C( 1323215606), UINT32_C( 2983570493), UINT32_C( 3491065003) } }, + { { UINT32_C( 2261109996), UINT32_C( 2703799959) }, + { UINT64_C(17439214036731252933), UINT64_C( 4281041593916534883) }, + { UINT64_C( 5627168252961347552), UINT64_C( 4478691679238507457) }, + { UINT32_C( 2261109996), UINT32_C( 2703799959), UINT32_C( 1075593339), UINT32_C( 2039534336) } }, + { { UINT32_C( 4089219629), UINT32_C( 4037508497) }, + { UINT64_C( 6537037128722859108), UINT64_C( 9912793837125092983) }, + { UINT64_C( 192154116194292355), UINT64_C( 3961895855238817793) }, + { UINT32_C( 4089219629), UINT32_C( 4037508497), UINT32_C( 1566761929), UINT32_C( 3230452932) } }, + { { UINT32_C( 2292246865), UINT32_C( 776206411) }, + { UINT64_C(16001022629824472191), UINT64_C(14157820913506211794) }, + { UINT64_C( 2190238845283507523), UINT64_C(17377647137311425031) }, + { UINT32_C( 2292246865), UINT32_C( 776206411), UINT32_C( 4235483118), UINT32_C( 3047456028) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t r_ = simde_vld1_u32(test_vec[i].r_); + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + simde_uint32x4_t r = simde_vraddhn_high_u64(r_, a, b); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t r_ = simde_test_arm_neon_random_u32x2(); + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint32x4_t r = simde_vraddhn_high_u64(r_, a, b); + + simde_test_arm_neon_write_u32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_s64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vraddhn_high_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/reinterpret.c b/test/arm/neon/reinterpret.c index fe0c92ac7..81c74ad15 100644 --- a/test/arm/neon/reinterpret.c +++ b/test/arm/neon/reinterpret.c @@ -37,6 +37,7 @@ test_simde_vreinterpret_f32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -73,6 +74,7 @@ test_simde_vreinterpret_f64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -109,6 +111,7 @@ test_simde_vreinterpret_s16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -145,6 +148,7 @@ test_simde_vreinterpret_s32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -181,6 +185,7 @@ test_simde_vreinterpret_s64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -217,6 +222,7 @@ test_simde_vreinterpret_u8_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -252,6 +258,7 @@ test_simde_vreinterpret_u16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -287,6 +294,7 @@ test_simde_vreinterpret_u32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -322,6 +330,7 @@ test_simde_vreinterpret_u64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -365,6 +374,7 @@ test_simde_vreinterpretq_f32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -409,6 +419,7 @@ test_simde_vreinterpretq_f64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -453,6 +464,7 @@ test_simde_vreinterpretq_s16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -496,6 +508,7 @@ test_simde_vreinterpretq_s32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -539,6 +552,7 @@ test_simde_vreinterpretq_s64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -583,6 +597,7 @@ test_simde_vreinterpretq_u8_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -626,6 +641,7 @@ test_simde_vreinterpretq_u16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -670,6 +686,7 @@ test_simde_vreinterpretq_u32_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -713,6 +730,7 @@ test_simde_vreinterpretq_u64_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -749,6 +767,7 @@ test_simde_vreinterpret_f32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -785,6 +804,7 @@ test_simde_vreinterpret_f64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -821,6 +841,7 @@ test_simde_vreinterpret_s8_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -857,6 +878,7 @@ test_simde_vreinterpret_s32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -893,6 +915,7 @@ test_simde_vreinterpret_s64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -929,6 +952,7 @@ test_simde_vreinterpret_u8_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -964,6 +988,7 @@ test_simde_vreinterpret_u16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -999,6 +1024,7 @@ test_simde_vreinterpret_u32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1034,6 +1060,7 @@ test_simde_vreinterpret_u64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1069,6 +1096,7 @@ test_simde_vreinterpretq_f32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1105,6 +1133,7 @@ test_simde_vreinterpretq_f64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1141,6 +1170,7 @@ test_simde_vreinterpretq_s8_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1176,6 +1206,7 @@ test_simde_vreinterpretq_s32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1211,6 +1242,7 @@ test_simde_vreinterpretq_s64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1247,6 +1279,7 @@ test_simde_vreinterpretq_u8_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1282,6 +1315,7 @@ test_simde_vreinterpretq_u16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1318,6 +1352,7 @@ test_simde_vreinterpretq_u32_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1353,6 +1388,7 @@ test_simde_vreinterpretq_u64_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1389,6 +1425,7 @@ test_simde_vreinterpret_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1425,6 +1462,7 @@ test_simde_vreinterpret_f64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1461,6 +1499,7 @@ test_simde_vreinterpret_s8_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1497,6 +1536,7 @@ test_simde_vreinterpret_s16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1533,6 +1573,7 @@ test_simde_vreinterpret_s64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1569,6 +1610,7 @@ test_simde_vreinterpret_u8_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1604,6 +1646,7 @@ test_simde_vreinterpret_u16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1639,6 +1682,7 @@ test_simde_vreinterpret_u32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1674,6 +1718,7 @@ test_simde_vreinterpret_u64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1709,6 +1754,7 @@ test_simde_vreinterpretq_f32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1745,6 +1791,7 @@ test_simde_vreinterpretq_f64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1781,6 +1828,7 @@ test_simde_vreinterpretq_s8_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1817,6 +1865,7 @@ test_simde_vreinterpretq_s16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1852,6 +1901,7 @@ test_simde_vreinterpretq_s64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1888,6 +1938,7 @@ test_simde_vreinterpretq_u8_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1923,6 +1974,7 @@ test_simde_vreinterpretq_u16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1959,6 +2011,7 @@ test_simde_vreinterpretq_u32_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -1994,6 +2047,7 @@ test_simde_vreinterpretq_u64_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2030,6 +2084,7 @@ test_simde_vreinterpret_f32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2066,6 +2121,7 @@ test_simde_vreinterpret_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2102,6 +2158,7 @@ test_simde_vreinterpret_s8_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2138,6 +2195,7 @@ test_simde_vreinterpret_s16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2174,6 +2232,7 @@ test_simde_vreinterpret_s32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2210,6 +2269,7 @@ test_simde_vreinterpret_u8_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2245,6 +2305,7 @@ test_simde_vreinterpret_u16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2280,6 +2341,7 @@ test_simde_vreinterpret_u32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2315,6 +2377,7 @@ test_simde_vreinterpret_u64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2350,6 +2413,7 @@ test_simde_vreinterpretq_f32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2386,6 +2450,7 @@ test_simde_vreinterpretq_f64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2422,6 +2487,7 @@ test_simde_vreinterpretq_s8_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2458,6 +2524,7 @@ test_simde_vreinterpretq_s16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2493,6 +2560,7 @@ test_simde_vreinterpretq_s32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2529,6 +2597,7 @@ test_simde_vreinterpretq_u8_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2564,6 +2633,7 @@ test_simde_vreinterpretq_u16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2600,6 +2670,7 @@ test_simde_vreinterpretq_u32_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2635,6 +2706,7 @@ test_simde_vreinterpretq_u64_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2671,6 +2743,7 @@ test_simde_vreinterpret_f32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2707,6 +2780,7 @@ test_simde_vreinterpret_f64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2743,6 +2817,7 @@ test_simde_vreinterpret_s8_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2779,6 +2854,7 @@ test_simde_vreinterpret_s16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2815,6 +2891,7 @@ test_simde_vreinterpret_s32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2851,6 +2928,7 @@ test_simde_vreinterpret_s64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2886,6 +2964,7 @@ test_simde_vreinterpret_u16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2921,6 +3000,7 @@ test_simde_vreinterpret_u32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2956,6 +3036,7 @@ test_simde_vreinterpret_u64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -2999,6 +3080,7 @@ test_simde_vreinterpretq_f32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3043,6 +3125,7 @@ test_simde_vreinterpretq_f64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3087,6 +3170,7 @@ test_simde_vreinterpretq_s8_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3131,6 +3215,7 @@ test_simde_vreinterpretq_s16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3174,6 +3259,7 @@ test_simde_vreinterpretq_s32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3217,6 +3303,7 @@ test_simde_vreinterpretq_s64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3260,6 +3347,7 @@ test_simde_vreinterpretq_u16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3304,6 +3392,7 @@ test_simde_vreinterpretq_u32_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3347,6 +3436,7 @@ test_simde_vreinterpretq_u64_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3383,6 +3473,7 @@ test_simde_vreinterpret_f32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3419,6 +3510,7 @@ test_simde_vreinterpret_f64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3455,6 +3547,7 @@ test_simde_vreinterpret_s8_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3491,6 +3584,7 @@ test_simde_vreinterpret_s16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3527,6 +3621,7 @@ test_simde_vreinterpret_s32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3563,6 +3658,7 @@ test_simde_vreinterpret_s64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3599,6 +3695,7 @@ test_simde_vreinterpret_u8_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3634,6 +3731,7 @@ test_simde_vreinterpret_u32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3669,6 +3767,7 @@ test_simde_vreinterpret_u64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3704,6 +3803,7 @@ test_simde_vreinterpretq_f32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3740,6 +3840,7 @@ test_simde_vreinterpretq_f64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3776,6 +3877,7 @@ test_simde_vreinterpretq_s8_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3812,6 +3914,7 @@ test_simde_vreinterpretq_s16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3847,6 +3950,7 @@ test_simde_vreinterpretq_s32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3882,6 +3986,7 @@ test_simde_vreinterpretq_s64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3918,6 +4023,7 @@ test_simde_vreinterpretq_u8_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3954,6 +4060,7 @@ test_simde_vreinterpretq_u32_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -3989,6 +4096,7 @@ test_simde_vreinterpretq_u64_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4016,6 +4124,7 @@ test_simde_vreinterpret_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4052,6 +4161,7 @@ test_simde_vreinterpret_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4088,6 +4198,7 @@ test_simde_vreinterpret_f64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4124,6 +4235,7 @@ test_simde_vreinterpret_s8_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4160,6 +4272,7 @@ test_simde_vreinterpret_s16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4196,6 +4309,7 @@ test_simde_vreinterpret_s32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4232,6 +4346,7 @@ test_simde_vreinterpret_s64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4268,6 +4383,7 @@ test_simde_vreinterpret_u8_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4303,6 +4419,7 @@ test_simde_vreinterpret_u16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4338,6 +4455,7 @@ test_simde_vreinterpret_u64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4365,6 +4483,7 @@ test_simde_vreinterpretq_f16_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4400,6 +4519,7 @@ test_simde_vreinterpretq_f32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4436,6 +4556,7 @@ test_simde_vreinterpretq_f64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4472,6 +4593,7 @@ test_simde_vreinterpretq_s8_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4508,6 +4630,7 @@ test_simde_vreinterpretq_s16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4543,6 +4666,7 @@ test_simde_vreinterpretq_s32_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4578,6 +4702,7 @@ test_simde_vreinterpretq_s64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4614,6 +4739,7 @@ test_simde_vreinterpretq_u8_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4649,6 +4775,7 @@ test_simde_vreinterpretq_u16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4684,6 +4811,7 @@ test_simde_vreinterpretq_u64_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4720,6 +4848,7 @@ test_simde_vreinterpret_f32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4756,6 +4885,7 @@ test_simde_vreinterpret_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4792,6 +4922,7 @@ test_simde_vreinterpret_s8_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4828,6 +4959,7 @@ test_simde_vreinterpret_s16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4864,6 +4996,7 @@ test_simde_vreinterpret_s32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4900,6 +5033,7 @@ test_simde_vreinterpret_s64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4936,6 +5070,7 @@ test_simde_vreinterpret_u8_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -4971,6 +5106,7 @@ test_simde_vreinterpret_u16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5006,6 +5142,7 @@ test_simde_vreinterpret_u32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5041,6 +5178,7 @@ test_simde_vreinterpretq_f32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5077,6 +5215,7 @@ test_simde_vreinterpretq_f64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5113,6 +5252,7 @@ test_simde_vreinterpretq_s8_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5149,6 +5289,7 @@ test_simde_vreinterpretq_s16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5184,6 +5325,7 @@ test_simde_vreinterpretq_s32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5219,6 +5361,7 @@ test_simde_vreinterpretq_s64_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5255,6 +5398,7 @@ test_simde_vreinterpretq_u8_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5290,6 +5434,7 @@ test_simde_vreinterpretq_u16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5326,6 +5471,7 @@ test_simde_vreinterpretq_u32_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5362,6 +5508,7 @@ test_simde_vreinterpret_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5398,6 +5545,7 @@ test_simde_vreinterpret_s8_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5434,6 +5582,7 @@ test_simde_vreinterpret_s16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5470,6 +5619,7 @@ test_simde_vreinterpret_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5506,6 +5656,7 @@ test_simde_vreinterpret_s64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5542,6 +5693,7 @@ test_simde_vreinterpret_u8_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5577,6 +5729,7 @@ test_simde_vreinterpret_u16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5603,6 +5756,7 @@ test_simde_vreinterpret_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5638,33 +5792,7 @@ test_simde_vreinterpret_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; -} - -static int -test_simde_vreinterpret_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { - struct { - simde_float16 a[4]; - } test_vec[] = { - { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, - { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, - { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, - { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, - { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, - { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, - { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, - { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, - { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, - }; - - for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { - simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); - simde_float16x4_private a_ = simde_float16x4_to_private(a); - simde_uint64x1_t r = simde_vreinterpret_u64_f16(a); - simde_uint64x1_private r_ = simde_uint64x1_to_private(r); - simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); - } - return 0; } static int @@ -5700,6 +5828,7 @@ test_simde_vreinterpret_u64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5736,6 +5865,7 @@ test_simde_vreinterpretq_f64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5772,6 +5902,7 @@ test_simde_vreinterpretq_s8_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5808,6 +5939,7 @@ test_simde_vreinterpretq_s16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5843,6 +5975,7 @@ test_simde_vreinterpretq_s32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5878,6 +6011,7 @@ test_simde_vreinterpretq_s64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5914,6 +6048,7 @@ test_simde_vreinterpretq_u8_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5949,6 +6084,7 @@ test_simde_vreinterpretq_u16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -5983,6 +6119,7 @@ test_simde_vreinterpretq_u16_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6019,6 +6156,7 @@ test_simde_vreinterpretq_u32_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6054,6 +6192,7 @@ test_simde_vreinterpretq_u64_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6090,6 +6229,7 @@ test_simde_vreinterpret_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6126,6 +6266,7 @@ test_simde_vreinterpret_s8_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6162,6 +6303,7 @@ test_simde_vreinterpret_s16_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6198,6 +6340,7 @@ test_simde_vreinterpret_s32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6234,6 +6377,7 @@ test_simde_vreinterpret_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6270,6 +6414,7 @@ test_simde_vreinterpret_u8_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6305,6 +6450,7 @@ test_simde_vreinterpret_u16_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6340,6 +6486,7 @@ test_simde_vreinterpret_u32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6375,6 +6522,7 @@ test_simde_vreinterpret_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6410,6 +6558,7 @@ test_simde_vreinterpretq_f32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6446,6 +6595,7 @@ test_simde_vreinterpretq_s8_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6482,6 +6632,7 @@ test_simde_vreinterpretq_s16_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6517,6 +6668,7 @@ test_simde_vreinterpretq_s32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6552,6 +6704,7 @@ test_simde_vreinterpretq_s64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6588,6 +6741,7 @@ test_simde_vreinterpretq_u8_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6623,6 +6777,7 @@ test_simde_vreinterpretq_u16_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6659,6 +6814,7 @@ test_simde_vreinterpretq_u32_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6694,6 +6850,7 @@ test_simde_vreinterpretq_u64_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6730,6 +6887,7 @@ test_simde_vreinterpret_f16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6766,6 +6924,7 @@ test_simde_vreinterpret_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6802,6 +6961,7 @@ test_simde_vreinterpret_f16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6838,6 +6998,7 @@ test_simde_vreinterpret_f16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6874,6 +7035,7 @@ test_simde_vreinterpret_f16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6910,6 +7072,7 @@ test_simde_vreinterpret_f16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6946,6 +7109,7 @@ test_simde_vreinterpret_f16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -6982,6 +7146,7 @@ test_simde_vreinterpret_f16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7018,6 +7183,7 @@ test_simde_vreinterpretq_f16_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7053,6 +7219,7 @@ test_simde_vreinterpretq_f16_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7089,6 +7256,7 @@ test_simde_vreinterpretq_f16_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7124,6 +7292,7 @@ test_simde_vreinterpretq_f16_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7167,6 +7336,7 @@ test_simde_vreinterpretq_f16_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7203,6 +7373,7 @@ test_simde_vreinterpretq_f16_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7238,6 +7409,7 @@ test_simde_vreinterpretq_f16_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } static int @@ -7281,20 +7453,657 @@ test_simde_vreinterpretq_f16_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + } -SIMDE_TEST_FUNC_LIST_BEGIN -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s8) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s8) +static int +test_simde_vreinterpret_f16_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 a[1]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( 28.67) }, + }, + { { SIMDE_FLOAT64_C( 473.75) }, + }, + { { SIMDE_FLOAT64_C( 415.56) }, + }, + { { SIMDE_FLOAT64_C( 753.78) }, + }, + { { SIMDE_FLOAT64_C( 619.03) }, + }, + { { SIMDE_FLOAT64_C( 870.52) }, + }, + { { SIMDE_FLOAT64_C( -321.27) }, + }, + { { SIMDE_FLOAT64_C( -568.79) }, + }, -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s8) + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x1_t a = simde_vld1_f64(test_vec[i].a); + simde_float64x1_private a_ = simde_float64x1_to_private(a); + simde_float16x4_t r = simde_vreinterpret_f16_f64(a); + simde_float16x4_private r_ = simde_float16x4_to_private(r); + + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_f16_f64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float64 a[2]; + } test_vec[] = { + { { SIMDE_FLOAT64_C( -143.25), SIMDE_FLOAT64_C( 834.85) }, + }, + { { SIMDE_FLOAT64_C( -261.67), SIMDE_FLOAT64_C( -140.07) }, + }, + { { SIMDE_FLOAT64_C( 513.01), SIMDE_FLOAT64_C( -658.64) }, + }, + { { SIMDE_FLOAT64_C( -687.83), SIMDE_FLOAT64_C( 764.02) }, + }, + { { SIMDE_FLOAT64_C( -465.12), SIMDE_FLOAT64_C( -765.44) }, + }, + { { SIMDE_FLOAT64_C( 433.84), SIMDE_FLOAT64_C( -259.91) }, + }, + { { SIMDE_FLOAT64_C( -578.38), SIMDE_FLOAT64_C( 507.83) }, + }, + { { SIMDE_FLOAT64_C( 973.67), SIMDE_FLOAT64_C( -82.78) }, + }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float64x2_t a = simde_vld1q_f64(test_vec[i].a); + simde_float64x2_private a_ = simde_float64x2_to_private(a); + simde_float16x8_t r = simde_vreinterpretq_f16_f64(a); + simde_float16x8_private r_ = simde_float16x8_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_f32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_float32x2_t r = simde_vreinterpret_f32_f16(a); + simde_float32x2_private r_ = simde_float32x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_f32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_float32x4_t r = simde_vreinterpretq_f32_f16(a); + simde_float32x4_private r_ = simde_float32x4_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_f64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_float64x1_t r = simde_vreinterpret_f64_f16(a); + simde_float64x1_private r_ = simde_float64x1_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_f64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_float64x2_t r = simde_vreinterpretq_f64_f16(a); + simde_float64x2_private r_ = simde_float64x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_s8_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int8x8_t r = simde_vreinterpret_s8_f16(a); + simde_int8x8_private r_ = simde_int8x8_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_s8_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int8x16_t r = simde_vreinterpretq_s8_f16(a); + simde_int8x16_private r_ = simde_int8x16_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int16x4_t r = simde_vreinterpret_s16_f16(a); + simde_int16x4_private r_ = simde_int16x4_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_s16_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int16x8_t r = simde_vreinterpretq_s16_f16(a); + simde_int16x8_private r_ = simde_int16x8_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int32x2_t r = simde_vreinterpret_s32_f16(a); + simde_int32x2_private r_ = simde_int32x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_s32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int32x4_t r = simde_vreinterpretq_s32_f16(a); + simde_int32x4_private r_ = simde_int32x4_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_int64x1_t r = simde_vreinterpret_s64_f16(a); + simde_int64x1_private r_ = simde_int64x1_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_s64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_int64x2_t r = simde_vreinterpretq_s64_f16(a); + simde_int64x2_private r_ = simde_int64x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpret_u8_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint8x8_t r = simde_vreinterpret_u8_f16(a); + simde_uint8x8_private r_ = simde_uint8x8_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; + +} + +static int +test_simde_vreinterpretq_u8_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint8x16_t r = simde_vreinterpretq_u8_f16(a); + simde_uint8x16_private r_ = simde_uint8x16_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + +static int +test_simde_vreinterpret_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint32x2_t r = simde_vreinterpret_u32_f16(a); + simde_uint32x2_private r_ = simde_uint32x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + +static int +test_simde_vreinterpretq_u32_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint32x4_t r = simde_vreinterpretq_u32_f16(a); + simde_uint32x4_private r_ = simde_uint32x4_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + +static int +test_simde_vreinterpret_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( -49.28), SIMDE_FLOAT16_VALUE( -109.00), SIMDE_FLOAT16_VALUE( -626.50), SIMDE_FLOAT16_VALUE( -567.00) } }, + { { SIMDE_FLOAT16_VALUE( -178.88), SIMDE_FLOAT16_VALUE( 10.22), SIMDE_FLOAT16_VALUE( 976.50), SIMDE_FLOAT16_VALUE( -31.19) } }, + { { SIMDE_FLOAT16_VALUE( -228.12), SIMDE_FLOAT16_VALUE( -98.75), SIMDE_FLOAT16_VALUE( 350.00), SIMDE_FLOAT16_VALUE( -598.00) } }, + { { SIMDE_FLOAT16_VALUE( -226.00), SIMDE_FLOAT16_VALUE( -520.50), SIMDE_FLOAT16_VALUE( -252.38), SIMDE_FLOAT16_VALUE( -407.50) } }, + { { SIMDE_FLOAT16_VALUE( 89.44), SIMDE_FLOAT16_VALUE( -200.50), SIMDE_FLOAT16_VALUE( -439.75), SIMDE_FLOAT16_VALUE( -450.75) } }, + { { SIMDE_FLOAT16_VALUE( -136.50), SIMDE_FLOAT16_VALUE( -721.00), SIMDE_FLOAT16_VALUE( -692.00), SIMDE_FLOAT16_VALUE( -858.00) } }, + { { SIMDE_FLOAT16_VALUE( -833.00), SIMDE_FLOAT16_VALUE( -714.00), SIMDE_FLOAT16_VALUE( 428.50), SIMDE_FLOAT16_VALUE( 871.50) } }, + { { SIMDE_FLOAT16_VALUE( -157.12), SIMDE_FLOAT16_VALUE( 972.50), SIMDE_FLOAT16_VALUE( 298.75), SIMDE_FLOAT16_VALUE( -919.50) } }, + { { SIMDE_FLOAT16_VALUE( -990.00), SIMDE_FLOAT16_VALUE( 258.50), SIMDE_FLOAT16_VALUE( 727.00), SIMDE_FLOAT16_VALUE( -48.00) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_private a_ = simde_float16x4_to_private(a); + simde_uint64x1_t r = simde_vreinterpret_u64_f16(a); + simde_uint64x1_private r_ = simde_uint64x1_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + +static int +test_simde_vreinterpretq_u64_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 615.50), SIMDE_FLOAT16_VALUE( -978.50), SIMDE_FLOAT16_VALUE( -561.50), SIMDE_FLOAT16_VALUE( 508.50), + SIMDE_FLOAT16_VALUE( -968.00), SIMDE_FLOAT16_VALUE( -316.25), SIMDE_FLOAT16_VALUE( -961.50), SIMDE_FLOAT16_VALUE( 786.50) } }, + { { SIMDE_FLOAT16_VALUE( 968.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -0.40), SIMDE_FLOAT16_VALUE( 486.00), + SIMDE_FLOAT16_VALUE( 819.00), SIMDE_FLOAT16_VALUE( 807.00), SIMDE_FLOAT16_VALUE( -690.00), SIMDE_FLOAT16_VALUE( -245.50) } }, + { { SIMDE_FLOAT16_VALUE( -341.50), SIMDE_FLOAT16_VALUE( -184.50), SIMDE_FLOAT16_VALUE( 865.50), SIMDE_FLOAT16_VALUE( 213.38), + SIMDE_FLOAT16_VALUE( 627.50), SIMDE_FLOAT16_VALUE( -115.00), SIMDE_FLOAT16_VALUE( -649.50), SIMDE_FLOAT16_VALUE( 0.66) } }, + { { SIMDE_FLOAT16_VALUE( -466.00), SIMDE_FLOAT16_VALUE( 230.25), SIMDE_FLOAT16_VALUE( 358.50), SIMDE_FLOAT16_VALUE( -979.50), + SIMDE_FLOAT16_VALUE( 474.25), SIMDE_FLOAT16_VALUE( 796.00), SIMDE_FLOAT16_VALUE( 782.00), SIMDE_FLOAT16_VALUE( 89.62) } }, + { { SIMDE_FLOAT16_VALUE( 817.50), SIMDE_FLOAT16_VALUE( -779.50), SIMDE_FLOAT16_VALUE( -402.00), SIMDE_FLOAT16_VALUE( 849.50), + SIMDE_FLOAT16_VALUE( -95.62), SIMDE_FLOAT16_VALUE( -363.50), SIMDE_FLOAT16_VALUE( 636.00), SIMDE_FLOAT16_VALUE( -127.19) } }, + { { SIMDE_FLOAT16_VALUE( -142.88), SIMDE_FLOAT16_VALUE( -364.50), SIMDE_FLOAT16_VALUE( -641.00), SIMDE_FLOAT16_VALUE( -324.00), + SIMDE_FLOAT16_VALUE( -558.00), SIMDE_FLOAT16_VALUE( -331.25), SIMDE_FLOAT16_VALUE( 430.50), SIMDE_FLOAT16_VALUE( 100.62) } }, + { { SIMDE_FLOAT16_VALUE( 484.25), SIMDE_FLOAT16_VALUE( 296.00), SIMDE_FLOAT16_VALUE( -686.00), SIMDE_FLOAT16_VALUE( 111.56), + SIMDE_FLOAT16_VALUE( -819.00), SIMDE_FLOAT16_VALUE( -335.50), SIMDE_FLOAT16_VALUE( -888.00), SIMDE_FLOAT16_VALUE( -284.75) } }, + { { SIMDE_FLOAT16_VALUE( 894.50), SIMDE_FLOAT16_VALUE( 470.75), SIMDE_FLOAT16_VALUE( -264.25), SIMDE_FLOAT16_VALUE( 369.00), + SIMDE_FLOAT16_VALUE( 266.75), SIMDE_FLOAT16_VALUE( -482.00), SIMDE_FLOAT16_VALUE( -541.50), SIMDE_FLOAT16_VALUE( 84.12) } } + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_private a_ = simde_float16x8_to_private(a); + simde_uint64x2_t r = simde_vreinterpretq_u64_f16(a); + simde_uint64x2_private r_ = simde_uint64x2_to_private(r); + simde_assert_equal_i(0, simde_memcmp(&r_, &a_, sizeof(r_))); + } + + return 0; +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u16_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_s8) + +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_s8) @@ -7488,8 +8297,6 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u16_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_f64) -SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_f16) - SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s16) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_s32) @@ -7507,6 +8314,29 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u64) SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f16_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_f64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s8_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u8_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpret_u64_f16) + + +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f16_f64) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_f64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s8_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s16_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_s64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u8_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u32_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vreinterpretq_u64_f16) + SIMDE_TEST_FUNC_LIST_END #include "test-neon-footer.h" diff --git a/test/arm/neon/rev64.c b/test/arm/neon/rev64.c index 4fa95aba6..623ff924c 100644 --- a/test/arm/neon/rev64.c +++ b/test/arm/neon/rev64.c @@ -9,9 +9,6 @@ #include "../../../simde/arm/neon.h" #endif -/* N.B. CM: vrev64_f16 and vrev64q_f16 are omitted as - * SIMDe has no 16-bit floating point support. */ - static int test_simde_vrev64_s8 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -286,6 +283,53 @@ test_simde_vrev64_u32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vrev64_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 73.456), SIMDE_FLOAT16_VALUE( - 25.018), SIMDE_FLOAT16_VALUE( 37.020), SIMDE_FLOAT16_VALUE( 60.928) }, + { SIMDE_FLOAT16_VALUE( 60.928), SIMDE_FLOAT16_VALUE( 37.020), SIMDE_FLOAT16_VALUE( - 25.018), SIMDE_FLOAT16_VALUE( - 73.456) } }, + { { SIMDE_FLOAT16_VALUE( - 53.291), SIMDE_FLOAT16_VALUE( 85.800), SIMDE_FLOAT16_VALUE( - 72.734), SIMDE_FLOAT16_VALUE( 43.151) }, + { SIMDE_FLOAT16_VALUE( 43.151), SIMDE_FLOAT16_VALUE( - 72.734), SIMDE_FLOAT16_VALUE( 85.800), SIMDE_FLOAT16_VALUE( - 53.291) } }, + { { SIMDE_FLOAT16_VALUE( - 40.886), SIMDE_FLOAT16_VALUE( - 28.870), SIMDE_FLOAT16_VALUE( 14.218), SIMDE_FLOAT16_VALUE( 9.978) }, + { SIMDE_FLOAT16_VALUE( 9.978), SIMDE_FLOAT16_VALUE( 14.218), SIMDE_FLOAT16_VALUE( - 28.870), SIMDE_FLOAT16_VALUE( - 40.886) } }, + { { SIMDE_FLOAT16_VALUE( 70.008), SIMDE_FLOAT16_VALUE( - 19.874), SIMDE_FLOAT16_VALUE( - 54.506), SIMDE_FLOAT16_VALUE( 52.049) }, + { SIMDE_FLOAT16_VALUE( 52.049), SIMDE_FLOAT16_VALUE( - 54.506), SIMDE_FLOAT16_VALUE( - 19.874), SIMDE_FLOAT16_VALUE( 70.008) } }, + { { SIMDE_FLOAT16_VALUE( 66.107), SIMDE_FLOAT16_VALUE( - 68.033), SIMDE_FLOAT16_VALUE( 26.334), SIMDE_FLOAT16_VALUE( - 33.368) }, + { SIMDE_FLOAT16_VALUE( - 33.368), SIMDE_FLOAT16_VALUE( 26.334), SIMDE_FLOAT16_VALUE( - 68.033), SIMDE_FLOAT16_VALUE( 66.107) } }, + { { SIMDE_FLOAT16_VALUE( 20.783), SIMDE_FLOAT16_VALUE( 6.506), SIMDE_FLOAT16_VALUE( 94.787), SIMDE_FLOAT16_VALUE( 39.080) }, + { SIMDE_FLOAT16_VALUE( 39.080), SIMDE_FLOAT16_VALUE( 94.787), SIMDE_FLOAT16_VALUE( 6.506), SIMDE_FLOAT16_VALUE( 20.783) } }, + { { SIMDE_FLOAT16_VALUE( - 85.876), SIMDE_FLOAT16_VALUE( - 43.674), SIMDE_FLOAT16_VALUE( 80.017), SIMDE_FLOAT16_VALUE( 92.475) }, + { SIMDE_FLOAT16_VALUE( 92.475), SIMDE_FLOAT16_VALUE( 80.017), SIMDE_FLOAT16_VALUE( - 43.674), SIMDE_FLOAT16_VALUE( - 85.876) } }, + { { SIMDE_FLOAT16_VALUE( - 43.483), SIMDE_FLOAT16_VALUE( 42.449), SIMDE_FLOAT16_VALUE( 78.227), SIMDE_FLOAT16_VALUE( 19.386) }, + { SIMDE_FLOAT16_VALUE( 19.386), SIMDE_FLOAT16_VALUE( 78.227), SIMDE_FLOAT16_VALUE( 42.449), SIMDE_FLOAT16_VALUE( - 43.483) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t r = simde_vrev64_f16(a); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vrev64_f16(a); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vrev64_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -638,6 +682,68 @@ test_simde_vrev64q_u32 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vrev64q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 22.973), SIMDE_FLOAT16_VALUE( 82.785), SIMDE_FLOAT16_VALUE( - 87.788), SIMDE_FLOAT16_VALUE( 54.222), + SIMDE_FLOAT16_VALUE( - 79.878), SIMDE_FLOAT16_VALUE( 61.120), SIMDE_FLOAT16_VALUE( - 1.496), SIMDE_FLOAT16_VALUE( 87.003) }, + { SIMDE_FLOAT16_VALUE( 54.222), SIMDE_FLOAT16_VALUE( - 87.788), SIMDE_FLOAT16_VALUE( 82.785), SIMDE_FLOAT16_VALUE( 22.973), + SIMDE_FLOAT16_VALUE( 87.003), SIMDE_FLOAT16_VALUE( - 1.496), SIMDE_FLOAT16_VALUE( 61.120), SIMDE_FLOAT16_VALUE( - 79.878) } }, + { { SIMDE_FLOAT16_VALUE( 47.119), SIMDE_FLOAT16_VALUE( - 0.580), SIMDE_FLOAT16_VALUE( 95.794), SIMDE_FLOAT16_VALUE( - 97.960), + SIMDE_FLOAT16_VALUE( - 80.926), SIMDE_FLOAT16_VALUE( - 97.842), SIMDE_FLOAT16_VALUE( 16.229), SIMDE_FLOAT16_VALUE( 73.027) }, + { SIMDE_FLOAT16_VALUE( - 97.960), SIMDE_FLOAT16_VALUE( 95.794), SIMDE_FLOAT16_VALUE( - 0.580), SIMDE_FLOAT16_VALUE( 47.119), + SIMDE_FLOAT16_VALUE( 73.027), SIMDE_FLOAT16_VALUE( 16.229), SIMDE_FLOAT16_VALUE( - 97.842), SIMDE_FLOAT16_VALUE( - 80.926) } }, + { { SIMDE_FLOAT16_VALUE( 75.505), SIMDE_FLOAT16_VALUE( - 43.371), SIMDE_FLOAT16_VALUE( 75.338), SIMDE_FLOAT16_VALUE( - 74.826), + SIMDE_FLOAT16_VALUE( 63.128), SIMDE_FLOAT16_VALUE( 68.089), SIMDE_FLOAT16_VALUE( 68.500), SIMDE_FLOAT16_VALUE( - 98.789) }, + { SIMDE_FLOAT16_VALUE( - 74.826), SIMDE_FLOAT16_VALUE( 75.338), SIMDE_FLOAT16_VALUE( - 43.371), SIMDE_FLOAT16_VALUE( 75.505), + SIMDE_FLOAT16_VALUE( - 98.789), SIMDE_FLOAT16_VALUE( 68.500), SIMDE_FLOAT16_VALUE( 68.089), SIMDE_FLOAT16_VALUE( 63.128) } }, + { { SIMDE_FLOAT16_VALUE( 7.539), SIMDE_FLOAT16_VALUE( 5.867), SIMDE_FLOAT16_VALUE( 59.154), SIMDE_FLOAT16_VALUE( - 32.053), + SIMDE_FLOAT16_VALUE( - 73.720), SIMDE_FLOAT16_VALUE( 35.809), SIMDE_FLOAT16_VALUE( - 55.954), SIMDE_FLOAT16_VALUE( 35.526) }, + { SIMDE_FLOAT16_VALUE( - 32.053), SIMDE_FLOAT16_VALUE( 59.154), SIMDE_FLOAT16_VALUE( 5.867), SIMDE_FLOAT16_VALUE( 7.539), + SIMDE_FLOAT16_VALUE( 35.526), SIMDE_FLOAT16_VALUE( - 55.954), SIMDE_FLOAT16_VALUE( 35.809), SIMDE_FLOAT16_VALUE( - 73.720) } }, + { { SIMDE_FLOAT16_VALUE( - 39.271), SIMDE_FLOAT16_VALUE( 97.957), SIMDE_FLOAT16_VALUE( 84.789), SIMDE_FLOAT16_VALUE( - 41.728), + SIMDE_FLOAT16_VALUE( 93.209), SIMDE_FLOAT16_VALUE( - 50.250), SIMDE_FLOAT16_VALUE( 72.873), SIMDE_FLOAT16_VALUE( - 13.690) }, + { SIMDE_FLOAT16_VALUE( - 41.728), SIMDE_FLOAT16_VALUE( 84.789), SIMDE_FLOAT16_VALUE( 97.957), SIMDE_FLOAT16_VALUE( - 39.271), + SIMDE_FLOAT16_VALUE( - 13.690), SIMDE_FLOAT16_VALUE( 72.873), SIMDE_FLOAT16_VALUE( - 50.250), SIMDE_FLOAT16_VALUE( 93.209) } }, + { { SIMDE_FLOAT16_VALUE( - 18.105), SIMDE_FLOAT16_VALUE( - 27.704), SIMDE_FLOAT16_VALUE( 34.737), SIMDE_FLOAT16_VALUE( - 86.956), + SIMDE_FLOAT16_VALUE( 49.374), SIMDE_FLOAT16_VALUE( 11.743), SIMDE_FLOAT16_VALUE( - 98.777), SIMDE_FLOAT16_VALUE( 42.229) }, + { SIMDE_FLOAT16_VALUE( - 86.956), SIMDE_FLOAT16_VALUE( 34.737), SIMDE_FLOAT16_VALUE( - 27.704), SIMDE_FLOAT16_VALUE( - 18.105), + SIMDE_FLOAT16_VALUE( 42.229), SIMDE_FLOAT16_VALUE( - 98.777), SIMDE_FLOAT16_VALUE( 11.743), SIMDE_FLOAT16_VALUE( 49.374) } }, + { { SIMDE_FLOAT16_VALUE( 80.882), SIMDE_FLOAT16_VALUE( 87.435), SIMDE_FLOAT16_VALUE( 55.813), SIMDE_FLOAT16_VALUE( - 68.226), + SIMDE_FLOAT16_VALUE( - 69.733), SIMDE_FLOAT16_VALUE( 94.941), SIMDE_FLOAT16_VALUE( 24.856), SIMDE_FLOAT16_VALUE( 44.375) }, + { SIMDE_FLOAT16_VALUE( - 68.226), SIMDE_FLOAT16_VALUE( 55.813), SIMDE_FLOAT16_VALUE( 87.435), SIMDE_FLOAT16_VALUE( 80.882), + SIMDE_FLOAT16_VALUE( 44.375), SIMDE_FLOAT16_VALUE( 24.856), SIMDE_FLOAT16_VALUE( 94.941), SIMDE_FLOAT16_VALUE( - 69.733) } }, + { { SIMDE_FLOAT16_VALUE( 48.888), SIMDE_FLOAT16_VALUE( - 56.159), SIMDE_FLOAT16_VALUE( 7.183), SIMDE_FLOAT16_VALUE( - 75.051), + SIMDE_FLOAT16_VALUE( 67.973), SIMDE_FLOAT16_VALUE( 13.410), SIMDE_FLOAT16_VALUE( - 45.628), SIMDE_FLOAT16_VALUE( 45.377) }, + { SIMDE_FLOAT16_VALUE( - 75.051), SIMDE_FLOAT16_VALUE( 7.183), SIMDE_FLOAT16_VALUE( - 56.159), SIMDE_FLOAT16_VALUE( 48.888), + SIMDE_FLOAT16_VALUE( 45.377), SIMDE_FLOAT16_VALUE( - 45.628), SIMDE_FLOAT16_VALUE( 13.410), SIMDE_FLOAT16_VALUE( 67.973) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t r = simde_vrev64q_f16(a); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vrev64q_f16(a); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vrev64q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -690,7 +796,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_u32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_s8) @@ -699,7 +805,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_s32) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_u32) -//SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_f16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vrev64q_f32) SIMDE_TEST_FUNC_LIST_END diff --git a/test/arm/neon/rshrn_high_n.c b/test/arm/neon/rshrn_high_n.c new file mode 100644 index 000000000..cc1e510f6 --- /dev/null +++ b/test/arm/neon/rshrn_high_n.c @@ -0,0 +1,645 @@ +#define SIMDE_TEST_ARM_NEON_INSN rshrn_high_n + +#include "test-neon.h" +#include "../../../simde/arm/neon/rshrn_high_n.h" + +static int +test_simde_vrshrn_high_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int16_t a[8]; + int8_t r[8]; + int8_t r1[16]; + int8_t r3[16]; + int8_t r5[16]; + int8_t r6[16]; + int8_t r8[16]; + } test_vec[] = { + { { -INT16_C(22001), -INT16_C(26905), -INT16_C( 9851), INT16_C(17234), + -INT16_C(24080), INT16_C(18965), INT16_C(27794), INT16_C( 7768) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), + INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85), + INT8_C( 8), INT8_C( 116), -INT8_C( 61), -INT8_C( 87), -INT8_C( 8), INT8_C( 11), INT8_C( 73), INT8_C( 44) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85), + INT8_C( 66), -INT8_C( 35), INT8_C( 49), INT8_C( 106), INT8_C( 62), INT8_C( 67), -INT8_C( 110), -INT8_C( 53) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85), + INT8_C( 80), -INT8_C( 73), -INT8_C( 52), INT8_C( 27), INT8_C( 16), INT8_C( 81), INT8_C( 101), -INT8_C( 13) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85), + -INT8_C( 88), INT8_C( 92), INT8_C( 102), INT8_C( 13), -INT8_C( 120), INT8_C( 40), -INT8_C( 78), INT8_C( 121) }, + { -INT8_C( 89), INT8_C( 77), INT8_C( 28), INT8_C( 28), INT8_C( 123), INT8_C( 20), -INT8_C( 111), -INT8_C( 85), + -INT8_C( 86), -INT8_C( 105), -INT8_C( 38), INT8_C( 67), -INT8_C( 94), INT8_C( 74), INT8_C( 109), INT8_C( 30) } }, + { { INT16_C(28383), INT16_C( 4710), INT16_C(12658), INT16_C(17402), + INT16_C(32513), INT16_C( 6690), -INT16_C( 3459), -INT16_C(14763) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), + -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108), + INT8_C( 112), INT8_C( 51), -INT8_C( 71), -INT8_C( 3), -INT8_C( 127), INT8_C( 17), INT8_C( 63), INT8_C( 43) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108), + -INT8_C( 36), INT8_C( 77), INT8_C( 46), INT8_C( 127), -INT8_C( 32), INT8_C( 68), INT8_C( 80), -INT8_C( 53) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108), + INT8_C( 119), -INT8_C( 109), -INT8_C( 116), INT8_C( 32), -INT8_C( 8), -INT8_C( 47), -INT8_C( 108), INT8_C( 51) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108), + -INT8_C( 69), INT8_C( 74), -INT8_C( 58), INT8_C( 16), -INT8_C( 4), INT8_C( 105), -INT8_C( 54), INT8_C( 25) }, + { INT8_C( 11), INT8_C( 29), INT8_C( 25), -INT8_C( 61), -INT8_C( 106), INT8_C( 124), INT8_C( 119), INT8_C( 108), + INT8_C( 111), INT8_C( 18), INT8_C( 49), INT8_C( 68), INT8_C( 127), INT8_C( 26), -INT8_C( 14), -INT8_C( 58) } }, + { { -INT16_C(15297), -INT16_C(14816), -INT16_C(20211), -INT16_C( 4011), + INT16_C( 8230), -INT16_C(24842), INT16_C(24790), -INT16_C(23256) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), + -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103), + INT8_C( 32), INT8_C( 16), -INT8_C( 121), INT8_C( 43), INT8_C( 19), INT8_C( 123), INT8_C( 107), -INT8_C( 108) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103), + -INT8_C( 120), -INT8_C( 60), INT8_C( 34), INT8_C( 11), INT8_C( 5), -INT8_C( 33), INT8_C( 27), -INT8_C( 91) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103), + INT8_C( 34), INT8_C( 49), -INT8_C( 120), -INT8_C( 125), INT8_C( 1), -INT8_C( 8), INT8_C( 7), INT8_C( 41) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103), + INT8_C( 17), INT8_C( 25), -INT8_C( 60), -INT8_C( 63), -INT8_C( 127), INT8_C( 124), -INT8_C( 125), -INT8_C( 107) }, + { -INT8_C( 115), INT8_C( 82), -INT8_C( 121), -INT8_C( 15), -INT8_C( 58), -INT8_C( 37), -INT8_C( 46), -INT8_C( 103), + -INT8_C( 60), -INT8_C( 58), -INT8_C( 79), -INT8_C( 16), INT8_C( 32), -INT8_C( 97), INT8_C( 97), -INT8_C( 91) } }, + { { INT16_C(20309), -INT16_C( 4385), -INT16_C( 4332), -INT16_C(16724), + -INT16_C( 7501), -INT16_C(18956), -INT16_C( 1383), -INT16_C(17787) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), + -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115), + -INT8_C( 85), INT8_C( 112), -INT8_C( 118), INT8_C( 86), INT8_C( 90), -INT8_C( 6), INT8_C( 77), INT8_C( 67) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115), + -INT8_C( 21), -INT8_C( 36), -INT8_C( 29), -INT8_C( 42), INT8_C( 86), -INT8_C( 65), INT8_C( 83), INT8_C( 81) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115), + INT8_C( 123), INT8_C( 119), INT8_C( 121), -INT8_C( 11), INT8_C( 22), -INT8_C( 80), -INT8_C( 43), -INT8_C( 44) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115), + INT8_C( 61), -INT8_C( 69), -INT8_C( 68), -INT8_C( 5), -INT8_C( 117), -INT8_C( 40), -INT8_C( 22), -INT8_C( 22) }, + { -INT8_C( 73), -INT8_C( 40), -INT8_C( 124), -INT8_C( 84), -INT8_C( 105), INT8_C( 63), -INT8_C( 52), INT8_C( 115), + INT8_C( 79), -INT8_C( 17), -INT8_C( 17), -INT8_C( 65), -INT8_C( 29), -INT8_C( 74), -INT8_C( 5), -INT8_C( 69) } }, + { { INT16_C( 2688), INT16_C(27188), INT16_C( 9401), -INT16_C( 4812), + INT16_C( 2024), -INT16_C(29524), INT16_C( 9138), -INT16_C( 1721) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), + -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112), + INT8_C( 64), INT8_C( 26), INT8_C( 93), -INT8_C( 102), -INT8_C( 12), INT8_C( 86), -INT8_C( 39), -INT8_C( 92) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112), + INT8_C( 80), INT8_C( 71), -INT8_C( 105), -INT8_C( 89), -INT8_C( 3), -INT8_C( 106), INT8_C( 118), INT8_C( 41) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112), + INT8_C( 84), INT8_C( 82), INT8_C( 38), INT8_C( 106), INT8_C( 63), INT8_C( 101), INT8_C( 30), -INT8_C( 54) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112), + INT8_C( 42), -INT8_C( 87), -INT8_C( 109), -INT8_C( 75), INT8_C( 32), INT8_C( 51), -INT8_C( 113), -INT8_C( 27) }, + { -INT8_C( 22), INT8_C( 112), INT8_C( 26), INT8_C( 3), -INT8_C( 62), INT8_C( 100), INT8_C( 65), -INT8_C( 112), + INT8_C( 11), INT8_C( 106), INT8_C( 37), -INT8_C( 19), INT8_C( 8), -INT8_C( 115), INT8_C( 36), -INT8_C( 7) } }, + { { INT16_C(13927), -INT16_C(10193), INT16_C(23177), -INT16_C(27680), + -INT16_C(21529), INT16_C(23032), -INT16_C(14334), INT16_C(22212) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), + INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101), + INT8_C( 52), INT8_C( 24), INT8_C( 69), -INT8_C( 16), -INT8_C( 12), -INT8_C( 4), INT8_C( 1), INT8_C( 98) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101), + -INT8_C( 51), INT8_C( 6), INT8_C( 81), INT8_C( 124), INT8_C( 125), INT8_C( 63), INT8_C( 0), -INT8_C( 39) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101), + -INT8_C( 77), -INT8_C( 63), -INT8_C( 44), -INT8_C( 97), INT8_C( 95), -INT8_C( 48), INT8_C( 64), -INT8_C( 74) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101), + -INT8_C( 38), INT8_C( 97), INT8_C( 106), INT8_C( 80), -INT8_C( 80), INT8_C( 104), INT8_C( 32), INT8_C( 91) }, + { INT8_C( 119), -INT8_C( 31), INT8_C( 33), INT8_C( 97), INT8_C( 32), INT8_C( 27), INT8_C( 89), INT8_C( 101), + INT8_C( 54), -INT8_C( 40), INT8_C( 91), -INT8_C( 108), -INT8_C( 84), INT8_C( 90), -INT8_C( 56), INT8_C( 87) } }, + { { INT16_C(19797), INT16_C(24848), -INT16_C(12237), INT16_C( 8350), + -INT16_C(23009), INT16_C(32417), INT16_C(15245), INT16_C(14670) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), + -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8), + -INT8_C( 85), -INT8_C( 120), INT8_C( 26), INT8_C( 79), INT8_C( 16), INT8_C( 81), -INT8_C( 57), -INT8_C( 89) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8), + -INT8_C( 85), INT8_C( 34), INT8_C( 6), INT8_C( 20), -INT8_C( 60), -INT8_C( 44), INT8_C( 114), INT8_C( 42) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8), + INT8_C( 107), INT8_C( 9), -INT8_C( 126), INT8_C( 5), INT8_C( 49), -INT8_C( 11), -INT8_C( 36), -INT8_C( 54) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8), + INT8_C( 53), -INT8_C( 124), INT8_C( 65), -INT8_C( 126), -INT8_C( 104), -INT8_C( 5), -INT8_C( 18), -INT8_C( 27) }, + { INT8_C( 18), -INT8_C( 96), INT8_C( 121), -INT8_C( 105), -INT8_C( 56), INT8_C( 85), -INT8_C( 52), INT8_C( 8), + INT8_C( 77), INT8_C( 97), -INT8_C( 48), INT8_C( 33), -INT8_C( 90), INT8_C( 127), INT8_C( 60), INT8_C( 57) } }, + { { INT16_C(14702), INT16_C(30102), INT16_C(27970), -INT16_C(23274), + INT16_C(21570), -INT16_C(20065), -INT16_C(30909), INT16_C( 8657) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), + INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114), + -INT8_C( 73), -INT8_C( 53), -INT8_C( 95), -INT8_C( 117), INT8_C( 33), -INT8_C( 48), -INT8_C( 94), -INT8_C( 23) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114), + INT8_C( 46), -INT8_C( 77), -INT8_C( 88), -INT8_C( 93), -INT8_C( 120), INT8_C( 52), -INT8_C( 24), INT8_C( 58) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114), + -INT8_C( 53), -INT8_C( 83), INT8_C( 106), INT8_C( 41), -INT8_C( 94), -INT8_C( 115), INT8_C( 58), INT8_C( 15) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114), + -INT8_C( 26), -INT8_C( 42), -INT8_C( 75), -INT8_C( 108), INT8_C( 81), -INT8_C( 58), INT8_C( 29), -INT8_C( 121) }, + { -INT8_C( 31), -INT8_C( 119), -INT8_C( 20), INT8_C( 74), INT8_C( 10), -INT8_C( 56), -INT8_C( 52), -INT8_C( 114), + INT8_C( 57), INT8_C( 118), INT8_C( 109), -INT8_C( 91), INT8_C( 84), -INT8_C( 78), -INT8_C( 121), INT8_C( 34) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int8x8_t r = simde_vld1_s8(test_vec[i].r); + + simde_int8x16_t r1 = simde_vrshrn_high_n_s16(r, a, 1); + simde_int8x16_t r3 = simde_vrshrn_high_n_s16(r, a, 3); + simde_int8x16_t r5 = simde_vrshrn_high_n_s16(r, a, 5); + simde_int8x16_t r6 = simde_vrshrn_high_n_s16(r, a, 6); + simde_int8x16_t r8 = simde_vrshrn_high_n_s16(r, a, 8); + + simde_test_arm_neon_assert_equal_i8x16(r1, simde_vld1q_s8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i8x16(r6, simde_vld1q_s8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i8x16(r8, simde_vld1q_s8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vrshrn_high_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int32_t a[4]; + int16_t r[4]; + int16_t r3[8]; + int16_t r6[8]; + int16_t r10[8]; + int16_t r13[8]; + int16_t r16[8]; + } test_vec[] = { + { { -INT32_C( 116519079), -INT32_C( 951911362), INT32_C( 980070372), -INT32_C( 677594881) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891), + -INT16_C( 15893), INT16_C( 24456), INT16_C( 22013), -INT16_C( 26848) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891), + INT16_C( 14397), INT16_C( 3057), -INT16_C( 21824), INT16_C( 29412) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891), + INT16_C( 17284), -INT16_C( 12097), -INT16_C( 25940), -INT16_C( 6354) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891), + -INT16_C( 14224), INT16_C( 14872), -INT16_C( 11435), -INT16_C( 17178) }, + { INT16_C( 14937), -INT16_C( 18878), -INT16_C( 26791), INT16_C( 3891), + -INT16_C( 1778), -INT16_C( 14525), INT16_C( 14955), -INT16_C( 10339) } }, + { { -INT32_C( 632818833), -INT32_C( 456806704), INT32_C( 1298096978), -INT32_C( 1734838840) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990), + -INT16_C( 402), -INT16_C( 18982), -INT16_C( 5014), INT16_C( 3769) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990), + INT16_C( 8142), INT16_C( 5819), INT16_C( 32141), INT16_C( 25047) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990), + -INT16_C( 28163), INT16_C( 12652), INT16_C( 22489), INT16_C( 9757) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990), + -INT16_C( 11712), INT16_C( 9773), INT16_C( 27387), -INT16_C( 15164) }, + { -INT16_C( 25001), INT16_C( 7672), -INT16_C( 26306), INT16_C( 7990), + -INT16_C( 9656), -INT16_C( 6970), INT16_C( 19807), -INT16_C( 26472) } }, + { { -INT32_C( 417041437), INT32_C( 920932103), -INT32_C( 884177766), INT32_C( 723147296) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090), + -INT16_C( 29060), -INT16_C( 30239), -INT16_C( 28525), INT16_C( 19268) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090), + -INT16_C( 28208), -INT16_C( 28356), INT16_C( 12818), INT16_C( 26985) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090), + -INT16_C( 14051), -INT16_C( 18156), -INT16_C( 11487), -INT16_C( 14697) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090), + INT16_C( 14628), -INT16_C( 18654), INT16_C( 23140), INT16_C( 22739) }, + { -INT16_C( 28020), INT16_C( 5578), INT16_C( 21674), -INT16_C( 17090), + -INT16_C( 6364), INT16_C( 14052), -INT16_C( 13491), INT16_C( 11034) } }, + { { -INT32_C( 1420450770), INT32_C( 924602372), -INT32_C( 1129829990), INT32_C( 1792300063) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509), + -INT16_C( 19322), -INT16_C( 30207), INT16_C( 1331), -INT16_C( 30076) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509), + INT16_C( 22161), INT16_C( 28992), -INT16_C( 24410), INT16_C( 20816) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509), + -INT16_C( 10903), -INT16_C( 14572), INT16_C( 10762), -INT16_C( 19179) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509), + INT16_C( 23213), -INT16_C( 18205), -INT16_C( 6847), INT16_C( 22179) }, + { -INT16_C( 12913), INT16_C( 1534), INT16_C( 21877), INT16_C( 1509), + -INT16_C( 21674), INT16_C( 14108), -INT16_C( 17240), INT16_C( 27348) } }, + { { INT32_C( 2018419504), -INT32_C( 580211751), -INT32_C( 1658752744), INT32_C( 265814774) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669), + -INT16_C( 11162), INT16_C( 21883), INT16_C( 11811), INT16_C( 95) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669), + INT16_C( 14989), -INT16_C( 21841), -INT16_C( 31292), INT16_C( 24588) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669), + INT16_C( 5033), INT16_C( 23211), INT16_C( 18524), -INT16_C( 2559) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669), + -INT16_C( 15755), -INT16_C( 5291), -INT16_C( 5876), INT16_C( 32448) }, + { -INT16_C( 8728), -INT16_C( 18486), INT16_C( 28115), -INT16_C( 19669), + INT16_C( 30799), -INT16_C( 8853), -INT16_C( 25311), INT16_C( 4056) } }, + { { INT32_C( 1943103962), INT32_C( 748733346), -INT32_C( 923904804), -INT32_C( 1357699688) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491), + INT16_C( 11579), INT16_C( 6260), -INT16_C( 13668), INT16_C( 25779) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491), + INT16_C( 17831), -INT16_C( 31985), -INT16_C( 18093), INT16_C( 19606) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491), + -INT16_C( 2982), INT16_C( 10289), INT16_C( 15253), -INT16_C( 15159) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491), + -INT16_C( 24949), INT16_C( 25862), INT16_C( 18291), INT16_C( 30873) }, + { INT16_C( 30943), INT16_C( 2829), -INT16_C( 32063), INT16_C( 15491), + INT16_C( 29649), INT16_C( 11425), -INT16_C( 14098), -INT16_C( 20717) } }, + { { INT32_C( 1223297293), -INT32_C( 1884479338), -INT32_C( 1576465096), -INT32_C( 1524045132) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325), + INT16_C( 16674), -INT16_C( 23533), INT16_C( 8615), INT16_C( 7511) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325), + -INT16_C( 22492), -INT16_C( 19326), INT16_C( 9269), -INT16_C( 23637) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325), + INT16_C( 14978), -INT16_C( 5304), -INT16_C( 32189), INT16_C( 19003) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325), + INT16_C( 18256), INT16_C( 32105), INT16_C( 4168), INT16_C( 10567) }, + { -INT16_C( 15737), -INT16_C( 21460), -INT16_C( 5800), INT16_C( 14325), + INT16_C( 18666), -INT16_C( 28755), -INT16_C( 24055), -INT16_C( 23255) } }, + { { INT32_C( 1605158937), -INT32_C( 1320407505), -INT32_C( 260976609), INT32_C( 1816201139) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763), + -INT16_C( 26365), -INT16_C( 31290), INT16_C( 14852), INT16_C( 8438) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763), + -INT16_C( 19680), INT16_C( 12473), -INT16_C( 14528), INT16_C( 1055) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763), + -INT16_C( 5326), INT16_C( 21260), INT16_C( 7284), INT16_C( 4162) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763), + -INT16_C( 666), -INT16_C( 30111), -INT16_C( 31857), INT16_C( 25096) }, + { INT16_C( 23690), -INT16_C( 31570), INT16_C( 7151), -INT16_C( 3763), + INT16_C( 24493), -INT16_C( 20148), -INT16_C( 3982), INT16_C( 27713) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int16x4_t r = simde_vld1_s16(test_vec[i].r); + + simde_int16x8_t r3 = simde_vrshrn_high_n_s32(r, a, 3); + simde_int16x8_t r6 = simde_vrshrn_high_n_s32(r, a, 6); + simde_int16x8_t r10 = simde_vrshrn_high_n_s32(r, a, 10); + simde_int16x8_t r13 = simde_vrshrn_high_n_s32(r, a, 13); + simde_int16x8_t r16 = simde_vrshrn_high_n_s32(r, a, 16); + + simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r16, simde_vld1q_s16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vrshrn_high_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + int64_t a[2]; + int32_t r[2]; + int32_t r6[4]; + int32_t r13[4]; + int32_t r19[4]; + int32_t r26[4]; + int32_t r32[4]; + } test_vec[] = { + { { INT64_C(522827392458970780), -INT64_C(347963483864753187) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527), -INT32_C( 113675942), INT32_C( 945142895) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527), -INT32_C( 1510837533), INT32_C( 1215343481) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527), INT32_C( 781699532), INT32_C( 2032255662) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527), -INT32_C( 799199340), -INT32_C( 890092667) }, + { -INT32_C( 1547342019), -INT32_C( 1066804527), INT32_C( 121730238), -INT32_C( 81016562) } }, + { { -INT64_C(7511569915281881734), -INT64_C(8609781473616875962) }, + { -INT32_C( 313650613), -INT32_C( 510439462) }, + { -INT32_C( 313650613), -INT32_C( 510439462), -INT32_C( 686263530), INT32_C( 1998409401) }, + { -INT32_C( 313650613), -INT32_C( 510439462), INT32_C( 1471033574), INT32_C( 1257126557) }, + { -INT32_C( 313650613), -INT32_C( 510439462), INT32_C( 828291268), INT32_C( 2100017386) }, + { -INT32_C( 313650613), -INT32_C( 510439462), -INT32_C( 261964430), INT32_C( 553277298) }, + { -INT32_C( 313650613), -INT32_C( 510439462), -INT32_C( 1748923658), -INT32_C( 2004620962) } }, + { { -INT64_C(5217461664724359317), -INT64_C(3833958958480156550) }, + { INT32_C( 529357399), INT32_C( 818474303) }, + { INT32_C( 529357399), INT32_C( 818474303), INT32_C( 158567326), -INT32_C( 1882098590) }, + { INT32_C( 529357399), INT32_C( 818474303), -INT32_C( 770513129), INT32_C( 1428136681) }, + { INT32_C( 529357399), INT32_C( 818474303), -INT32_C( 79148132), INT32_C( 1632927372) }, + { INT32_C( 529357399), INT32_C( 818474303), -INT32_C( 436825961), -INT32_C( 1295865603) }, + { INT32_C( 529357399), INT32_C( 818474303), -INT32_C( 1214784958), -INT32_C( 892663132) } }, + { { INT64_C(8016619778976487131), -INT64_C(5287942160034288787) }, + { -INT32_C( 1170965380), INT32_C( 1968543732) }, + { -INT32_C( 1170965380), INT32_C( 1968543732), -INT32_C( 599454117), INT32_C( 2098815150) }, + { -INT32_C( 1170965380), INT32_C( 1968543732), INT32_C( 163088925), -INT32_C( 1527106879) }, + { -INT32_C( 1170965380), INT32_C( 1968543732), INT32_C( 405201448), -INT32_C( 1366038325) }, + { -INT32_C( 1170965380), INT32_C( 1968543732), -INT32_C( 802140732), -INT32_C( 1487067182) }, + { -INT32_C( 1170965380), INT32_C( 1968543732), INT32_C( 1866514743), -INT32_C( 1231194977) } }, + { { INT64_C(2478068447417743253), -INT64_C(4378779670994155852) }, + { -INT32_C( 675478319), INT32_C( 575064699) }, + { -INT32_C( 675478319), INT32_C( 575064699), INT32_C( 2122694878), INT32_C( 1529005083) }, + { -INT32_C( 675478319), INT32_C( 575064699), -INT32_C( 251851902), -INT32_C( 1732885112) }, + { -INT32_C( 675478319), INT32_C( 575064699), INT32_C( 2076439598), INT32_C( 1851971862) }, + { -INT32_C( 675478319), INT32_C( 575064699), -INT32_C( 1728608280), -INT32_C( 824392270) }, + { -INT32_C( 675478319), INT32_C( 575064699), INT32_C( 576970272), -INT32_C( 1019514089) } }, + { { INT64_C(1489761328527602322), INT64_C(2076240676582230606) }, + { INT32_C( 1924015755), INT32_C( 117050398) }, + { INT32_C( 1924015755), INT32_C( 117050398), INT32_C( 604766666), -INT32_C( 1804625367) }, + { INT32_C( 1924015755), INT32_C( 117050398), -INT32_C( 1874323452), INT32_C( 1328078644) }, + { INT32_C( 1924015755), INT32_C( 117050398), -INT32_C( 1774116768), INT32_C( 154968957) }, + { INT32_C( 1924015755), INT32_C( 117050398), INT32_C( 724337217), INT32_C( 873625927) }, + { INT32_C( 1924015755), INT32_C( 117050398), INT32_C( 346862089), INT32_C( 483412453) } }, + { { INT64_C(1404410458338981322), INT64_C(2196521666701930394) }, + { -INT32_C( 1579182798), INT32_C( 1435747810) }, + { -INT32_C( 1579182798), INT32_C( 1435747810), INT32_C( 2078313943), INT32_C( 1171578558) }, + { -INT32_C( 1579182798), INT32_C( 1435747810), -INT32_C( 1091059428), -INT32_C( 427054659) }, + { -INT32_C( 1579182798), INT32_C( 1435747810), -INT32_C( 1359225084), INT32_C( 1939484327) }, + { -INT32_C( 1579182798), INT32_C( 1435747810), -INT32_C( 547489858), -INT32_C( 1629014947) }, + { -INT32_C( 1579182798), INT32_C( 1435747810), INT32_C( 326989791), INT32_C( 511417553) } }, + { { INT64_C(467500832710821027), INT64_C(1909358569727587315) }, + { INT32_C( 2012855853), -INT32_C( 296579437) }, + { INT32_C( 2012855853), -INT32_C( 296579437), INT32_C( 522696211), INT32_C( 50714576) }, + { INT32_C( 2012855853), -INT32_C( 296579437), INT32_C( 742281068), INT32_C( 1007029168) }, + { INT32_C( 2012855853), -INT32_C( 296579437), -INT32_C( 1666123458), -INT32_C( 319809489) }, + { INT32_C( 2012855853), -INT32_C( 296579437), -INT32_C( 1623629326), -INT32_C( 1613111248) }, + { INT32_C( 2012855853), -INT32_C( 296579437), INT32_C( 108848520), INT32_C( 444557185) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int32x2_t r = simde_vld1_s32(test_vec[i].r); + + simde_int32x4_t r6 = simde_vrshrn_high_n_s64(r, a, 6); + simde_int32x4_t r13 = simde_vrshrn_high_n_s64(r, a, 13); + simde_int32x4_t r19 = simde_vrshrn_high_n_s64(r, a, 19); + simde_int32x4_t r26 = simde_vrshrn_high_n_s64(r, a, 26); + simde_int32x4_t r32 = simde_vrshrn_high_n_s64(r, a, 32); + + simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i32x4(r32, simde_vld1q_s32(test_vec[i].r32)); + } + + return 0; +} + +static int +test_simde_vrshrn_high_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint16_t a[8]; + uint8_t r[8]; + uint8_t r1[16]; + uint8_t r3[16]; + uint8_t r5[16]; + uint8_t r6[16]; + uint8_t r8[16]; + } test_vec[] = { + { { UINT16_C( 4891), UINT16_C( 61518), UINT16_C( 56040), UINT16_C( 56087), UINT16_C( 50073), UINT16_C( 24780), UINT16_C( 61165), UINT16_C( 6842) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43), UINT8_C(142), UINT8_C( 39), UINT8_C(116), UINT8_C(140), UINT8_C(205), UINT8_C(102), UINT8_C(119), UINT8_C( 93) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43), UINT8_C( 99), UINT8_C( 10), UINT8_C( 93), UINT8_C( 99), UINT8_C(115), UINT8_C( 26), UINT8_C(222), UINT8_C( 87) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43), UINT8_C(153), UINT8_C(130), UINT8_C(215), UINT8_C(217), UINT8_C( 29), UINT8_C( 6), UINT8_C(119), UINT8_C(214) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43), UINT8_C( 76), UINT8_C(193), UINT8_C(108), UINT8_C(108), UINT8_C( 14), UINT8_C(131), UINT8_C(188), UINT8_C(107) }, + { UINT8_C(229), UINT8_C(181), UINT8_C(120), UINT8_C(103), UINT8_C(107), UINT8_C( 74), UINT8_C( 98), UINT8_C( 43), UINT8_C( 19), UINT8_C(240), UINT8_C(219), UINT8_C(219), UINT8_C(196), UINT8_C( 97), UINT8_C(239), UINT8_C( 27) } }, + { { UINT16_C( 45300), UINT16_C( 60746), UINT16_C( 20949), UINT16_C( 2061), UINT16_C( 22701), UINT16_C( 38253), UINT16_C( 36627), UINT16_C( 39372) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225), UINT8_C(122), UINT8_C(165), UINT8_C(235), UINT8_C( 7), UINT8_C( 87), UINT8_C(183), UINT8_C(138), UINT8_C(230) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225), UINT8_C( 31), UINT8_C(169), UINT8_C( 59), UINT8_C( 2), UINT8_C( 22), UINT8_C(174), UINT8_C(226), UINT8_C( 58) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225), UINT8_C(136), UINT8_C(106), UINT8_C(143), UINT8_C( 64), UINT8_C(197), UINT8_C(171), UINT8_C(121), UINT8_C(206) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225), UINT8_C(196), UINT8_C(181), UINT8_C( 71), UINT8_C( 32), UINT8_C( 99), UINT8_C( 86), UINT8_C( 60), UINT8_C(103) }, + { UINT8_C(176), UINT8_C(169), UINT8_C(166), UINT8_C(123), UINT8_C(155), UINT8_C(136), UINT8_C(139), UINT8_C(225), UINT8_C(177), UINT8_C(237), UINT8_C( 82), UINT8_C( 8), UINT8_C( 89), UINT8_C(149), UINT8_C(143), UINT8_C(154) } }, + { { UINT16_C( 50411), UINT16_C( 5375), UINT16_C( 13061), UINT16_C( 55963), UINT16_C( 58746), UINT16_C( 42849), UINT16_C( 39459), UINT16_C( 39900) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38), UINT8_C(118), UINT8_C(128), UINT8_C(131), UINT8_C( 78), UINT8_C(189), UINT8_C(177), UINT8_C( 18), UINT8_C(238) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38), UINT8_C(157), UINT8_C(160), UINT8_C( 97), UINT8_C( 83), UINT8_C(175), UINT8_C(236), UINT8_C( 68), UINT8_C(124) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38), UINT8_C( 39), UINT8_C(168), UINT8_C(152), UINT8_C(213), UINT8_C( 44), UINT8_C( 59), UINT8_C(209), UINT8_C(223) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38), UINT8_C( 20), UINT8_C( 84), UINT8_C(204), UINT8_C(106), UINT8_C(150), UINT8_C(158), UINT8_C(105), UINT8_C(111) }, + { UINT8_C( 79), UINT8_C( 19), UINT8_C(207), UINT8_C(136), UINT8_C(181), UINT8_C(169), UINT8_C(171), UINT8_C( 38), UINT8_C(197), UINT8_C( 21), UINT8_C( 51), UINT8_C(219), UINT8_C(229), UINT8_C(167), UINT8_C(154), UINT8_C(156) } }, + { { UINT16_C( 11077), UINT16_C( 7355), UINT16_C( 7596), UINT16_C( 52604), UINT16_C( 46540), UINT16_C( 19909), UINT16_C( 9197), UINT16_C( 7039) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18), UINT8_C(163), UINT8_C( 94), UINT8_C(214), UINT8_C(190), UINT8_C(230), UINT8_C(227), UINT8_C(247), UINT8_C(192) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18), UINT8_C(105), UINT8_C(151), UINT8_C(182), UINT8_C(176), UINT8_C(186), UINT8_C(185), UINT8_C(126), UINT8_C(112) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18), UINT8_C( 90), UINT8_C(230), UINT8_C(237), UINT8_C(108), UINT8_C(174), UINT8_C(110), UINT8_C( 31), UINT8_C(220) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18), UINT8_C(173), UINT8_C(115), UINT8_C(119), UINT8_C( 54), UINT8_C(215), UINT8_C( 55), UINT8_C(144), UINT8_C(110) }, + { UINT8_C(216), UINT8_C(149), UINT8_C(149), UINT8_C(244), UINT8_C(247), UINT8_C(224), UINT8_C( 78), UINT8_C( 18), UINT8_C( 43), UINT8_C( 29), UINT8_C( 30), UINT8_C(205), UINT8_C(182), UINT8_C( 78), UINT8_C( 36), UINT8_C( 27) } }, + { { UINT16_C( 58307), UINT16_C( 29751), UINT16_C( 64220), UINT16_C( 37850), UINT16_C( 11545), UINT16_C( 37711), UINT16_C( 37447), UINT16_C( 29012) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213), UINT8_C(226), UINT8_C( 28), UINT8_C(110), UINT8_C(237), UINT8_C(141), UINT8_C(168), UINT8_C( 36), UINT8_C(170) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213), UINT8_C(120), UINT8_C(135), UINT8_C( 92), UINT8_C(123), UINT8_C(163), UINT8_C(106), UINT8_C( 73), UINT8_C( 43) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213), UINT8_C( 30), UINT8_C(162), UINT8_C(215), UINT8_C(159), UINT8_C(105), UINT8_C(154), UINT8_C(146), UINT8_C(139) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213), UINT8_C(143), UINT8_C(209), UINT8_C(235), UINT8_C( 79), UINT8_C(180), UINT8_C( 77), UINT8_C( 73), UINT8_C(197) }, + { UINT8_C(185), UINT8_C(186), UINT8_C( 7), UINT8_C( 23), UINT8_C(222), UINT8_C(205), UINT8_C( 62), UINT8_C(213), UINT8_C(228), UINT8_C(116), UINT8_C(251), UINT8_C(148), UINT8_C( 45), UINT8_C(147), UINT8_C(146), UINT8_C(113) } }, + { { UINT16_C( 27363), UINT16_C( 2309), UINT16_C( 25695), UINT16_C( 21876), UINT16_C( 35306), UINT16_C( 36813), UINT16_C( 39387), UINT16_C( 17083) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0), UINT8_C(114), UINT8_C(131), UINT8_C( 48), UINT8_C(186), UINT8_C(245), UINT8_C(231), UINT8_C(238), UINT8_C( 94) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0), UINT8_C( 92), UINT8_C( 33), UINT8_C(140), UINT8_C(175), UINT8_C( 61), UINT8_C(250), UINT8_C( 59), UINT8_C( 87) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0), UINT8_C( 87), UINT8_C( 72), UINT8_C( 35), UINT8_C(172), UINT8_C( 79), UINT8_C(126), UINT8_C(207), UINT8_C( 22) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0), UINT8_C(172), UINT8_C( 36), UINT8_C(145), UINT8_C( 86), UINT8_C( 40), UINT8_C( 63), UINT8_C(103), UINT8_C( 11) }, + { UINT8_C( 57), UINT8_C(244), UINT8_C( 78), UINT8_C( 15), UINT8_C(175), UINT8_C(168), UINT8_C(108), UINT8_C( 0), UINT8_C(107), UINT8_C( 9), UINT8_C(100), UINT8_C( 85), UINT8_C(138), UINT8_C(144), UINT8_C(154), UINT8_C( 67) } }, + { { UINT16_C( 25471), UINT16_C( 24819), UINT16_C( 29221), UINT16_C( 12141), UINT16_C( 38560), UINT16_C( 48255), UINT16_C( 11379), UINT16_C( 48434) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86), UINT8_C(192), UINT8_C(122), UINT8_C( 19), UINT8_C(183), UINT8_C( 80), UINT8_C( 64), UINT8_C( 58), UINT8_C(153) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86), UINT8_C(112), UINT8_C( 30), UINT8_C( 69), UINT8_C(238), UINT8_C(212), UINT8_C(144), UINT8_C(142), UINT8_C(166) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86), UINT8_C( 28), UINT8_C( 8), UINT8_C(145), UINT8_C(123), UINT8_C(181), UINT8_C(228), UINT8_C(100), UINT8_C(234) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86), UINT8_C(142), UINT8_C(132), UINT8_C(201), UINT8_C(190), UINT8_C( 91), UINT8_C(242), UINT8_C(178), UINT8_C(245) }, + { UINT8_C( 89), UINT8_C( 46), UINT8_C( 14), UINT8_C(127), UINT8_C( 99), UINT8_C( 59), UINT8_C(123), UINT8_C( 86), UINT8_C( 99), UINT8_C( 97), UINT8_C(114), UINT8_C( 47), UINT8_C(151), UINT8_C(188), UINT8_C( 44), UINT8_C(189) } }, + { { UINT16_C( 36224), UINT16_C( 8314), UINT16_C( 32940), UINT16_C( 63808), UINT16_C( 56263), UINT16_C( 52379), UINT16_C( 17081), UINT16_C( 58652) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117), UINT8_C(192), UINT8_C( 61), UINT8_C( 86), UINT8_C(160), UINT8_C(228), UINT8_C( 78), UINT8_C( 93), UINT8_C(142) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117), UINT8_C(176), UINT8_C( 15), UINT8_C( 22), UINT8_C( 40), UINT8_C(121), UINT8_C(147), UINT8_C( 87), UINT8_C(164) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117), UINT8_C(108), UINT8_C( 4), UINT8_C( 5), UINT8_C(202), UINT8_C(222), UINT8_C(101), UINT8_C( 22), UINT8_C( 41) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117), UINT8_C( 54), UINT8_C(130), UINT8_C( 3), UINT8_C(229), UINT8_C(111), UINT8_C( 50), UINT8_C( 11), UINT8_C(148) }, + { UINT8_C(222), UINT8_C(131), UINT8_C(103), UINT8_C(113), UINT8_C( 90), UINT8_C( 24), UINT8_C(198), UINT8_C(117), UINT8_C(142), UINT8_C( 32), UINT8_C(129), UINT8_C(249), UINT8_C(220), UINT8_C(205), UINT8_C( 67), UINT8_C(229) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint8x8_t r = simde_vld1_u8(test_vec[i].r); + + simde_uint8x16_t r1 = simde_vrshrn_high_n_u16(r, a, 1); + simde_uint8x16_t r3 = simde_vrshrn_high_n_u16(r, a, 3); + simde_uint8x16_t r5 = simde_vrshrn_high_n_u16(r, a, 5); + simde_uint8x16_t r6 = simde_vrshrn_high_n_u16(r, a, 6); + simde_uint8x16_t r8 = simde_vrshrn_high_n_u16(r, a, 8); + + simde_test_arm_neon_assert_equal_u8x16(r1, simde_vld1q_u8(test_vec[i].r1)); + simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x16(r6, simde_vld1q_u8(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u8x16(r8, simde_vld1q_u8(test_vec[i].r8)); + } + + return 0; +} + +static int +test_simde_vrshrn_high_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint32_t a[4]; + uint16_t r[4]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r16[8]; + } test_vec[] = { + { { UINT32_C( 87549657), UINT32_C( 3197875600), UINT32_C( 3423107810), UINT32_C( 1562909874) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301), UINT16_C( 64731), UINT16_C( 30386), UINT16_C( 3932), UINT16_C( 918) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301), UINT16_C( 57243), UINT16_C( 28374), UINT16_C( 8684), UINT16_C( 41075) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301), UINT16_C( 19962), UINT16_C( 42733), UINT16_C( 543), UINT16_C( 18951) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301), UINT16_C( 10687), UINT16_C( 62686), UINT16_C( 24644), UINT16_C( 59713) }, + { UINT16_C( 17400), UINT16_C( 19521), UINT16_C( 57287), UINT16_C( 12301), UINT16_C( 1336), UINT16_C( 48796), UINT16_C( 52232), UINT16_C( 23848) } }, + { { UINT32_C( 1133139453), UINT32_C( 368185956), UINT32_C( 458320166), UINT32_C( 3496695836) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906), UINT16_C( 19136), UINT16_C( 16973), UINT16_C( 11557), UINT16_C( 27396) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906), UINT16_C( 10584), UINT16_C( 51274), UINT16_C( 17829), UINT16_C( 44384) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906), UINT16_C( 58005), UINT16_C( 31877), UINT16_C( 54362), UINT16_C( 6870) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906), UINT16_C( 7251), UINT16_C( 44945), UINT16_C( 55947), UINT16_C( 33627) }, + { UINT16_C( 44607), UINT16_C( 33562), UINT16_C( 36745), UINT16_C( 21906), UINT16_C( 17290), UINT16_C( 5618), UINT16_C( 6993), UINT16_C( 53355) } }, + { { UINT32_C( 980851135), UINT32_C( 793656310), UINT32_C( 2985559125), UINT32_C( 560062105) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173), UINT16_C( 54072), UINT16_C( 51071), UINT16_C( 32907), UINT16_C( 15315) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173), UINT16_C( 55911), UINT16_C( 14576), UINT16_C( 53265), UINT16_C( 34682) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173), UINT16_C( 40358), UINT16_C( 54159), UINT16_C( 32001), UINT16_C( 22648) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173), UINT16_C( 54197), UINT16_C( 31346), UINT16_C( 36768), UINT16_C( 2831) }, + { UINT16_C( 38879), UINT16_C( 213), UINT16_C( 15569), UINT16_C( 36173), UINT16_C( 14967), UINT16_C( 12110), UINT16_C( 45556), UINT16_C( 8546) } }, + { { UINT32_C( 1422970508), UINT32_C( 343243953), UINT32_C( 404026163), UINT32_C( 4166559419) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503), UINT16_C( 6610), UINT16_C( 44950), UINT16_C( 40550), UINT16_C( 5335) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503), UINT16_C( 17210), UINT16_C( 54771), UINT16_C( 21453), UINT16_C( 25243) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503), UINT16_C( 13364), UINT16_C( 7519), UINT16_C( 1341), UINT16_C( 5674) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503), UINT16_C( 42630), UINT16_C( 41900), UINT16_C( 49320), UINT16_C( 49861) }, + { UINT16_C( 14167), UINT16_C( 4288), UINT16_C( 34735), UINT16_C( 50503), UINT16_C( 21713), UINT16_C( 5237), UINT16_C( 6165), UINT16_C( 63577) } }, + { { UINT32_C( 1025494157), UINT32_C( 1044321423), UINT32_C( 3497459030), UINT32_C( 4040193428) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606), UINT16_C( 63890), UINT16_C( 58002), UINT16_C( 57259), UINT16_C( 3763) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606), UINT16_C( 32562), UINT16_C( 64594), UINT16_C( 56309), UINT16_C( 16854) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606), UINT16_C( 18419), UINT16_C( 36805), UINT16_C( 7615), UINT16_C( 13341) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606), UINT16_C( 59646), UINT16_C( 61945), UINT16_C( 33720), UINT16_C( 34436) }, + { UINT16_C( 33360), UINT16_C( 17888), UINT16_C( 49892), UINT16_C( 38606), UINT16_C( 15648), UINT16_C( 15935), UINT16_C( 53367), UINT16_C( 61648) } }, + { { UINT32_C( 3455883293), UINT32_C( 3831475108), UINT32_C( 1766784541), UINT32_C( 3258319112) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497), UINT16_C( 37636), UINT16_C( 62837), UINT16_C( 57284), UINT16_C( 49185) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497), UINT16_C( 62048), UINT16_C( 32431), UINT16_C( 15352), UINT16_C( 55300) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497), UINT16_C( 32550), UINT16_C( 6123), UINT16_C( 21440), UINT16_C( 36224) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497), UINT16_C( 28645), UINT16_C( 8957), UINT16_C( 19064), UINT16_C( 4528) }, + { UINT16_C( 59822), UINT16_C( 57292), UINT16_C( 5363), UINT16_C( 23497), UINT16_C( 52733), UINT16_C( 58464), UINT16_C( 26959), UINT16_C( 49718) } }, + { { UINT32_C( 2005456876), UINT32_C( 2730066849), UINT32_C( 749824633), UINT32_C( 1930375946) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117), UINT16_C( 6910), UINT16_C( 12404), UINT16_C( 11599), UINT16_C( 58977) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117), UINT16_C( 9056), UINT16_C( 58895), UINT16_C( 50602), UINT16_C( 15564) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117), UINT16_C( 57910), UINT16_C( 44641), UINT16_C( 11355), UINT16_C( 50125) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117), UINT16_C( 48199), UINT16_C( 5580), UINT16_C( 25995), UINT16_C( 39034) }, + { UINT16_C( 42802), UINT16_C( 65518), UINT16_C( 23836), UINT16_C( 42117), UINT16_C( 30601), UINT16_C( 41658), UINT16_C( 11441), UINT16_C( 29455) } }, + { { UINT32_C( 148880009), UINT32_C( 3807681043), UINT32_C( 3754381792), UINT32_C( 3404112735) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767), UINT16_C( 63313), UINT16_C( 37698), UINT16_C( 59964), UINT16_C( 54380) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767), UINT16_C( 32490), UINT16_C( 53864), UINT16_C( 7496), UINT16_C( 39565) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767), UINT16_C( 14319), UINT16_C( 48423), UINT16_C( 61908), UINT16_C( 47529) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767), UINT16_C( 18174), UINT16_C( 6053), UINT16_C( 65083), UINT16_C( 22325) }, + { UINT16_C( 2886), UINT16_C( 62845), UINT16_C( 16231), UINT16_C( 11767), UINT16_C( 2272), UINT16_C( 58101), UINT16_C( 57287), UINT16_C( 51943) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint16x4_t r = simde_vld1_u16(test_vec[i].r); + + simde_uint16x8_t r3 = simde_vrshrn_high_n_u32(r, a, 3); + simde_uint16x8_t r6 = simde_vrshrn_high_n_u32(r, a, 6); + simde_uint16x8_t r10 = simde_vrshrn_high_n_u32(r, a, 10); + simde_uint16x8_t r13 = simde_vrshrn_high_n_u32(r, a, 13); + simde_uint16x8_t r16 = simde_vrshrn_high_n_u32(r, a, 16); + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r16, simde_vld1q_u16(test_vec[i].r16)); + } + + return 0; +} + +static int +test_simde_vrshrn_high_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + static const struct { + uint64_t a[2]; + uint32_t r[2]; + uint32_t r6[4]; + uint32_t r13[4]; + uint32_t r19[4]; + uint32_t r26[4]; + uint32_t r32[4]; + } test_vec[] = { + { { UINT64_C(12634522949072977421), UINT64_C(15389710757421385289) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475), UINT32_C( 3120041976), UINT32_C( 2502979609) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475), UINT32_C( 3178491936), UINT32_C( 3811205344) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475), UINT32_C( 3673542592), UINT32_C( 1737271684) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475), UINT32_C( 3585469344), UINT32_C( 1691294035) }, + { UINT32_C( 2551825828), UINT32_C( 3348771475), UINT32_C( 2941704110), UINT32_C( 3583196261) } }, + { { UINT64_C(17407805632220268990), UINT64_C(13386670519694084117) }, + { UINT32_C( 793383030), UINT32_C( 2436206287) }, + { UINT32_C( 793383030), UINT32_C( 2436206287), UINT32_C( 4157481511), UINT32_C( 2783853856) }, + { UINT32_C( 793383030), UINT32_C( 2436206287), UINT32_C( 2549062724), UINT32_C( 256629882) }, + { UINT32_C( 793383030), UINT32_C( 2436206287), UINT32_C( 2657074801), UINT32_C( 3762106226) }, + { UINT32_C( 793383030), UINT32_C( 2436206287), UINT32_C( 1698479997), UINT32_C( 1908439647) }, + { UINT32_C( 793383030), UINT32_C( 2436206287), UINT32_C( 4053070590), UINT32_C( 3116827113) } }, + { { UINT64_C(16343870392989174902), UINT64_C(11733149172732130587) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258), UINT32_C( 496276274), UINT32_C( 132478932) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258), UINT32_C( 3157993766), UINT32_C( 672123632) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258), UINT32_C( 586214565), UINT32_C( 2426421036) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258), UINT32_C( 3024478681), UINT32_C( 3038855294) }, + { UINT32_C( 3472215691), UINT32_C( 1649027258), UINT32_C( 3805353863), UINT32_C( 2731836674) } }, + { { UINT64_C(15512433260555789026), UINT64_C(3577193402352101463) }, + { UINT32_C( 713202166), UINT32_C( 277356412) }, + { UINT32_C( 713202166), UINT32_C( 277356412), UINT32_C( 1925547916), UINT32_C( 3378529697) }, + { UINT32_C( 713202166), UINT32_C( 277356412), UINT32_C( 3739585295), UINT32_C( 4086481035) }, + { UINT32_C( 713202166), UINT32_C( 277356412), UINT32_C( 3883636268), UINT32_C( 2546879234) }, + { UINT32_C( 713202166), UINT32_C( 277356412), UINT32_C( 3520001836), UINT32_C( 1764727958) }, + { UINT32_C( 713202166), UINT32_C( 277356412), UINT32_C( 3611769821), UINT32_C( 832880242) } }, + { { UINT64_C(15735521981218693814), UINT64_C(1229963549765769063) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170), UINT32_C( 2088210571), UINT32_C( 1406787166) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170), UINT32_C( 1861807905), UINT32_C( 2863117245) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170), UINT32_C( 4189840317), UINT32_C( 917151439) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170), UINT32_C( 2549315527), UINT32_C( 1148015934) }, + { UINT32_C( 3643688801), UINT32_C( 1421601170), UINT32_C( 3663711711), UINT32_C( 286373205) } }, + { { UINT64_C(1235418179107275598), UINT64_C(288733723891023001) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541), UINT32_C( 659226381), UINT32_C( 723307762) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541), UINT32_C( 2991494654), UINT32_C( 1314273690) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541), UINT32_C( 2731096664), UINT32_C( 960059622) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541), UINT32_C( 1229296245), UINT32_C( 7500466) }, + { UINT32_C( 3555891537), UINT32_C( 2563891541), UINT32_C( 287643210), UINT32_C( 67226059) } }, + { { UINT64_C(11340215631633226781), UINT64_C(16474273799472454905) }, + { UINT32_C( 4112139742), UINT32_C( 917585359) }, + { UINT32_C( 4112139742), UINT32_C( 917585359), UINT32_C( 582963824), UINT32_C( 4057485940) }, + { UINT32_C( 4112139742), UINT32_C( 917585359), UINT32_C( 1346731685), UINT32_C( 98807973) }, + { UINT32_C( 4112139742), UINT32_C( 917585359), UINT32_C( 289478139), UINT32_C( 202870467) }, + { UINT32_C( 4112139742), UINT32_C( 917585359), UINT32_C( 1478656556), UINT32_C( 672673566) }, + { UINT32_C( 4112139742), UINT32_C( 917585359), UINT32_C( 2640349705), UINT32_C( 3835715772) } }, + { { UINT64_C(11225533717002403351), UINT64_C(16173979578999132471) }, + { UINT32_C( 470831506), UINT32_C( 534338189) }, + { UINT32_C( 470831506), UINT32_C( 534338189), UINT32_C( 3267388664), UINT32_C( 4014647621) }, + { UINT32_C( 470831506), UINT32_C( 534338189), UINT32_C( 3682959562), UINT32_C( 635344211) }, + { UINT32_C( 470831506), UINT32_C( 534338189), UINT32_C( 594417155), UINT32_C( 2962717269) }, + { UINT32_C( 470831506), UINT32_C( 534338189), UINT32_C( 4064730156), UINT32_C( 492908277) }, + { UINT32_C( 470831506), UINT32_C( 534338189), UINT32_C( 2613648241), UINT32_C( 3765798076) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint32x2_t r = simde_vld1_u32(test_vec[i].r); + + simde_uint32x4_t r6 = simde_vrshrn_high_n_u64(r, a, 6); + simde_uint32x4_t r13 = simde_vrshrn_high_n_u64(r, a, 13); + simde_uint32x4_t r19 = simde_vrshrn_high_n_u64(r, a, 19); + simde_uint32x4_t r26 = simde_vrshrn_high_n_u64(r, a, 26); + simde_uint32x4_t r32 = simde_vrshrn_high_n_u64(r, a, 32); + + simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x4(r32, simde_vld1q_u32(test_vec[i].r32)); + } + + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrshrn_high_n_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/rshrn_n.c b/test/arm/neon/rshrn_n.c index d3f517c04..3a700bb7c 100644 --- a/test/arm/neon/rshrn_n.c +++ b/test/arm/neon/rshrn_n.c @@ -1,4 +1,3 @@ - #define SIMDE_TEST_ARM_NEON_INSN rshrn_n #include "test-neon.h" diff --git a/test/arm/neon/rsubhn.c b/test/arm/neon/rsubhn.c new file mode 100644 index 000000000..0b80209ca --- /dev/null +++ b/test/arm/neon/rsubhn.c @@ -0,0 +1,370 @@ +#define SIMDE_TEST_ARM_NEON_INSN rsubhn + +#include "test-neon.h" +#include "../../../simde/arm/neon/rsubhn.h" + +static int +test_simde_vrsubhn_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t a[8]; + int16_t b[8]; + int8_t r[8]; + } test_vec[] = { + { { INT16_C( 1165), -INT16_C( 25577), -INT16_C( 19629), INT16_C( 16709), -INT16_C( 10414), INT16_C( 11317), -INT16_C( 25725), -INT16_C( 8539) }, + { INT16_C( 30857), INT16_C( 31003), -INT16_C( 19117), INT16_C( 17860), INT16_C( 26198), INT16_C( 3923), INT16_C( 1999), INT16_C( 6514) }, + { -INT8_C( 116), INT8_C( 35), -INT8_C( 2), -INT8_C( 4), INT8_C( 113), INT8_C( 29), -INT8_C( 108), -INT8_C( 59) } }, + { { -INT16_C( 25274), INT16_C( 25728), -INT16_C( 2273), INT16_C( 28917), -INT16_C( 20745), -INT16_C( 17220), INT16_C( 17034), INT16_C( 10018) }, + { INT16_C( 8191), -INT16_C( 17753), INT16_C( 9836), INT16_C( 10532), -INT16_C( 17563), -INT16_C( 19211), -INT16_C( 27082), -INT16_C( 27159) }, + { INT8_C( 125), -INT8_C( 86), -INT8_C( 47), INT8_C( 72), -INT8_C( 12), INT8_C( 8), -INT8_C( 84), -INT8_C( 111) } }, + { { INT16_C( 3119), INT16_C( 22524), INT16_C( 31293), -INT16_C( 17154), -INT16_C( 555), -INT16_C( 4005), INT16_C( 3508), -INT16_C( 18031) }, + { INT16_C( 13560), -INT16_C( 15028), INT16_C( 24566), -INT16_C( 22220), INT16_C( 11155), INT16_C( 3919), -INT16_C( 6841), -INT16_C( 24138) }, + { -INT8_C( 41), -INT8_C( 109), INT8_C( 26), INT8_C( 20), -INT8_C( 46), -INT8_C( 31), INT8_C( 40), INT8_C( 24) } }, + { { INT16_C( 18265), INT16_C( 11526), -INT16_C( 4342), INT16_C( 6872), -INT16_C( 2835), -INT16_C( 19093), -INT16_C( 8816), -INT16_C( 5583) }, + { INT16_C( 7850), -INT16_C( 18627), -INT16_C( 12018), -INT16_C( 7828), -INT16_C( 20995), INT16_C( 25319), -INT16_C( 23122), -INT16_C( 30516) }, + { INT8_C( 41), INT8_C( 118), INT8_C( 30), INT8_C( 57), INT8_C( 71), INT8_C( 83), INT8_C( 56), INT8_C( 97) } }, + { { -INT16_C( 3909), -INT16_C( 7574), -INT16_C( 19525), INT16_C( 25595), -INT16_C( 16839), -INT16_C( 248), -INT16_C( 8970), -INT16_C( 9537) }, + { -INT16_C( 29539), -INT16_C( 29907), INT16_C( 26573), INT16_C( 4862), -INT16_C( 28200), -INT16_C( 16755), -INT16_C( 2122), -INT16_C( 11964) }, + { INT8_C( 100), INT8_C( 87), INT8_C( 76), INT8_C( 81), INT8_C( 44), INT8_C( 64), -INT8_C( 27), INT8_C( 9) } }, + { { -INT16_C( 1140), -INT16_C( 9092), -INT16_C( 26626), INT16_C( 21004), INT16_C( 12235), -INT16_C( 3154), -INT16_C( 13931), INT16_C( 31373) }, + { INT16_C( 2858), -INT16_C( 940), INT16_C( 20724), INT16_C( 16815), -INT16_C( 20556), INT16_C( 26572), INT16_C( 14270), INT16_C( 17145) }, + { -INT8_C( 16), -INT8_C( 32), INT8_C( 71), INT8_C( 16), INT8_MIN, -INT8_C( 116), -INT8_C( 110), INT8_C( 56) } }, + { { -INT16_C( 23074), -INT16_C( 26996), INT16_C( 4526), INT16_C( 1349), -INT16_C( 8754), INT16_C( 10657), INT16_C( 19463), INT16_C( 30408) }, + { -INT16_C( 19757), -INT16_C( 28653), -INT16_C( 21984), INT16_C( 15377), -INT16_C( 24759), INT16_C( 25557), -INT16_C( 10359), -INT16_C( 25279) }, + { -INT8_C( 13), INT8_C( 6), INT8_C( 104), -INT8_C( 55), INT8_C( 63), -INT8_C( 58), INT8_C( 116), -INT8_C( 38) } }, + { { INT16_C( 32372), INT16_C( 32113), INT16_C( 23030), -INT16_C( 26077), -INT16_C( 7480), -INT16_C( 15356), INT16_C( 20553), INT16_C( 19760) }, + { INT16_C( 25860), INT16_C( 30792), -INT16_C( 29580), -INT16_C( 5399), INT16_C( 26520), -INT16_C( 9373), -INT16_C( 29052), INT16_C( 26833) }, + { INT8_C( 25), INT8_C( 5), -INT8_C( 50), -INT8_C( 81), INT8_C( 123), -INT8_C( 23), -INT8_C( 62), -INT8_C( 28) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int8x8_t r = simde_vrsubhn_s16(a, b); + + simde_test_arm_neon_assert_equal_i8x8(r, simde_vld1_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int8x8_t r = simde_vrsubhn_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t a[4]; + int32_t b[4]; + int16_t r[4]; + } test_vec[] = { + { { INT32_C(1812595993), -INT32_C( 667570689), INT32_C( 929976915), -INT32_C(1112037646) }, + { -INT32_C( 420586090), INT32_C( 955493168), -INT32_C(1240693863), INT32_C( 630754689) }, + { -INT16_C( 31460), -INT16_C( 24766), -INT16_C( 32414), -INT16_C( 26593) } }, + { { INT32_C(1653728956), -INT32_C(1166867047), -INT32_C( 410466735), INT32_C(1990199230) }, + { INT32_C(1117439081), -INT32_C( 433903381), INT32_C(1330312943), INT32_C( 502661614) }, + { INT16_C( 8183), -INT16_C( 11184), -INT16_C( 26562), INT16_C( 22698) } }, + { { INT32_C(1227370350), INT32_C(1202094101), INT32_C(1855902672), -INT32_C(1132663354) }, + { -INT32_C(1821465079), -INT32_C( 646635233), INT32_C( 164397446), -INT32_C(1082154980) }, + { -INT16_C( 19014), INT16_C( 28209), INT16_C( 25810), -INT16_C( 771) } }, + { { -INT32_C(2038980475), INT32_C(1726066676), INT32_C(1672820009), INT32_C(1817557340) }, + { INT32_C( 663258981), -INT32_C(1055985839), -INT32_C( 57347798), INT32_C( 547465596) }, + { INT16_C( 24303), -INT16_C( 23085), INT16_C( 26400), INT16_C( 19380) } }, + { { -INT32_C(1997625374), -INT32_C(2093685407), -INT32_C(1254548881), -INT32_C(2146851755) }, + { -INT32_C(1874675543), INT32_C(1522988084), -INT32_C(1415862084), INT32_C( 42689605) }, + { -INT16_C( 1876), INT16_C( 10350), INT16_C( 2461), INT16_C( 32126) } }, + { { -INT32_C(1987048678), -INT32_C( 616391684), INT32_C( 981121408), -INT32_C(1218383321) }, + { -INT32_C( 546058595), -INT32_C(2076611794), -INT32_C( 884668964), INT32_C( 291582647) }, + { -INT16_C( 21988), INT16_C( 22281), INT16_C( 28470), -INT16_C( 23040) } }, + { { -INT32_C(2043385215), INT32_C(1957693319), INT32_C( 927930365), -INT32_C(1269369000) }, + { INT32_C( 195513466), INT32_C(2020898934), -INT32_C(1610338607), INT32_C( 111181957) }, + { INT16_C( 31373), -INT16_C( 964), -INT16_C( 26805), -INT16_C( 21066) } }, + { { -INT32_C( 438254734), INT32_C( 48171334), -INT32_C( 393918305), INT32_C(1540695454) }, + { -INT32_C(1959842633), -INT32_C(1766366061), INT32_C( 903709532), -INT32_C(1070121217) }, + { INT16_C( 23218), INT16_C( 27688), -INT16_C( 19800), -INT16_C( 25698) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int16x4_t r = simde_vrsubhn_s32(a, b); + + simde_test_arm_neon_assert_equal_i16x4(r, simde_vld1_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int16x4_t r = simde_vrsubhn_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int64_t a[2]; + int64_t b[2]; + int32_t r[2]; + } test_vec[] = { + { { -INT64_C( 7131656646972370489), -INT64_C( 9191719478619199873) }, + { INT64_C( 6956781457684521314), -INT64_C( 8857608276428189746) }, + { INT32_C( 1014747184), -INT32_C( 77791326) } }, + { { INT64_C( 5467516521056656280), -INT64_C( 5805162430751730775) }, + { INT64_C( 7753254591744264402), INT64_C( 2694334893554550518) }, + { -INT32_C( 532189866), -INT32_C( 1978943433) } }, + { { INT64_C( 2528882371772547758), INT64_C( 2442999325206047751) }, + { -INT64_C( 2912812053305232400), -INT64_C( 4780790296547683821) }, + { INT32_C( 1266993216), INT32_C( 1681919587) } }, + { { INT64_C( 5312513145232874807), INT64_C( 5345053565558734413) }, + { INT64_C( 2551668690573666092), INT64_C( 1866764780563808235) }, + { INT32_C( 642809191), INT32_C( 809852217) } }, + { { -INT64_C( 6982640720335884570), -INT64_C( 553324913872001245) }, + { -INT64_C( 4454923573749713898), INT64_C( 8917356586784945114) }, + { -INT32_C( 588530010), INT32_C( 2089902426) } }, + { { INT64_C( 5067003250906428870), -INT64_C( 866627165876107086) }, + { -INT64_C( 7807865972529963603), -INT64_C( 4443756904747231981) }, + { -INT32_C( 1297303208), INT32_C( 832865420) } }, + { { INT64_C( 8070297654783732279), INT64_C( 4172239274647255845) }, + { -INT64_C( 5873247660730130865), INT64_C( 1169429670015049654) }, + { -INT32_C( 1048482665), INT32_C( 699146093) } }, + { { INT64_C( 8748447386439770518), -INT64_C( 5024017480446161880) }, + { -INT64_C( 1171128945334562910), INT64_C( 2824347558001420556) }, + { -INT32_C( 1985385954), -INT32_C( 1827339884) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int32x2_t r = simde_vrsubhn_s64(a, b); + + simde_test_arm_neon_assert_equal_i32x2(r, simde_vld1_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int32x2_t r = simde_vrsubhn_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t a[8]; + uint16_t b[8]; + uint8_t r[8]; + } test_vec[] = { + { { UINT16_C( 12161), UINT16_C( 18617), UINT16_C( 52789), UINT16_C( 32554), UINT16_C( 56627), UINT16_C( 51906), UINT16_C( 34962), UINT16_C( 25867) }, + { UINT16_C( 49337), UINT16_C( 32743), UINT16_C( 49165), UINT16_C( 22352), UINT16_C( 18327), UINT16_C( 48111), UINT16_C( 8359), UINT16_C( 29201) }, + { UINT8_C( 111), UINT8_C( 201), UINT8_C( 14), UINT8_C( 40), UINT8_C( 150), UINT8_C( 15), UINT8_C( 104), UINT8_C( 243) } }, + { { UINT16_C( 933), UINT16_C( 64397), UINT16_C( 24717), UINT16_C( 49563), UINT16_C( 1496), UINT16_C( 39516), UINT16_C( 40868), UINT16_C( 40604) }, + { UINT16_C( 53778), UINT16_C( 12903), UINT16_C( 9411), UINT16_C( 46927), UINT16_C( 6143), UINT16_C( 21411), UINT16_C( 52782), UINT16_C( 41568) }, + { UINT8_C( 50), UINT8_C( 201), UINT8_C( 60), UINT8_C( 10), UINT8_C( 238), UINT8_C( 71), UINT8_C( 209), UINT8_C( 252) } }, + { { UINT16_C( 53842), UINT16_C( 11057), UINT16_C( 8569), UINT16_C( 32369), UINT16_C( 11083), UINT16_C( 45511), UINT16_C( 41076), UINT16_C( 36202) }, + { UINT16_C( 4513), UINT16_C( 33804), UINT16_C( 57068), UINT16_C( 8207), UINT16_C( 33162), UINT16_C( 35568), UINT16_C( 20399), UINT16_C( 28487) }, + { UINT8_C( 193), UINT8_C( 167), UINT8_C( 67), UINT8_C( 94), UINT8_C( 170), UINT8_C( 39), UINT8_C( 81), UINT8_C( 30) } }, + { { UINT16_C( 56256), UINT16_C( 57420), UINT16_C( 48695), UINT16_C( 54799), UINT16_C( 19578), UINT16_C( 52367), UINT16_C( 53650), UINT16_C( 42276) }, + { UINT16_C( 54033), UINT16_C( 28158), UINT16_C( 54584), UINT16_C( 64405), UINT16_C( 41000), UINT16_C( 15510), UINT16_C( 61147), UINT16_C( 15301) }, + { UINT8_C( 9), UINT8_C( 114), UINT8_C( 233), UINT8_C( 218), UINT8_C( 172), UINT8_C( 144), UINT8_C( 227), UINT8_C( 105) } }, + { { UINT16_C( 61976), UINT16_C( 423), UINT16_C( 25133), UINT16_C( 49612), UINT16_C( 53580), UINT16_C( 29501), UINT16_C( 116), UINT16_C( 24297) }, + { UINT16_C( 47876), UINT16_C( 38860), UINT16_C( 9421), UINT16_C( 35565), UINT16_C( 21120), UINT16_C( 45197), UINT16_C( 26024), UINT16_C( 31506) }, + { UINT8_C( 55), UINT8_C( 106), UINT8_C( 61), UINT8_C( 55), UINT8_C( 127), UINT8_C( 195), UINT8_C( 155), UINT8_C( 228) } }, + { { UINT16_C( 23831), UINT16_C( 13364), UINT16_C( 50466), UINT16_C( 47525), UINT16_C( 46256), UINT16_C( 59041), UINT16_C( 48757), UINT16_C( 33825) }, + { UINT16_C( 27017), UINT16_C( 56722), UINT16_C( 18848), UINT16_C( 31122), UINT16_C( 15506), UINT16_C( 10463), UINT16_C( 64239), UINT16_C( 24932) }, + { UINT8_C( 244), UINT8_C( 87), UINT8_C( 124), UINT8_C( 64), UINT8_C( 120), UINT8_C( 190), UINT8_C( 196), UINT8_C( 35) } }, + { { UINT16_C( 4668), UINT16_C( 47405), UINT16_C( 1280), UINT16_C( 37355), UINT16_C( 44987), UINT16_C( 61159), UINT16_C( 58037), UINT16_C( 9495) }, + { UINT16_C( 30152), UINT16_C( 23980), UINT16_C( 29476), UINT16_C( 55866), UINT16_C( 6527), UINT16_C( 16904), UINT16_C( 53829), UINT16_C( 44599) }, + { UINT8_C( 156), UINT8_C( 92), UINT8_C( 146), UINT8_C( 184), UINT8_C( 150), UINT8_C( 173), UINT8_C( 16), UINT8_C( 119) } }, + { { UINT16_C( 2917), UINT16_C( 65077), UINT16_C( 49136), UINT16_C( 55605), UINT16_C( 20567), UINT16_C( 28302), UINT16_C( 2185), UINT16_C( 41419) }, + { UINT16_C( 51639), UINT16_C( 17009), UINT16_C( 41304), UINT16_C( 39702), UINT16_C( 58523), UINT16_C( 13517), UINT16_C( 31125), UINT16_C( 63522) }, + { UINT8_C( 66), UINT8_C( 188), UINT8_C( 31), UINT8_C( 62), UINT8_C( 108), UINT8_C( 58), UINT8_C( 143), UINT8_C( 170) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint8x8_t r = simde_vrsubhn_u16(a, b); + + simde_test_arm_neon_assert_equal_u8x8(r, simde_vld1_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint8x8_t r = simde_vrsubhn_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t a[4]; + uint32_t b[4]; + uint16_t r[4]; + } test_vec[] = { + { { UINT32_C( 584512549), UINT32_C( 242317329), UINT32_C(4038352609), UINT32_C(1922938507) }, + { UINT32_C( 86125608), UINT32_C(1276565899), UINT32_C(1508346116), UINT32_C(1339170290) }, + { UINT16_C( 7605), UINT16_C( 49755), UINT16_C( 38605), UINT16_C( 8908) } }, + { { UINT32_C( 890467764), UINT32_C( 183007246), UINT32_C(4011717609), UINT32_C( 913780066) }, + { UINT32_C(2320321187), UINT32_C(2559194385), UINT32_C(2289723378), UINT32_C(2752114807) }, + { UINT16_C( 43718), UINT16_C( 29278), UINT16_C( 26276), UINT16_C( 37485) } }, + { { UINT32_C(1047477475), UINT32_C(3144558603), UINT32_C(1468018416), UINT32_C(1111064548) }, + { UINT32_C(2437846973), UINT32_C(1713756673), UINT32_C(1570329625), UINT32_C( 889747248) }, + { UINT16_C( 44321), UINT16_C( 21832), UINT16_C( 63975), UINT16_C( 3377) } }, + { { UINT32_C(3841402048), UINT32_C(1239918712), UINT32_C(3441073611), UINT32_C(3082239364) }, + { UINT32_C(3647154593), UINT32_C(3728341984), UINT32_C(1420184183), UINT32_C( 354953863) }, + { UINT16_C( 2964), UINT16_C( 27566), UINT16_C( 30836), UINT16_C( 41615) } }, + { { UINT32_C(1660863637), UINT32_C(3738580126), UINT32_C(1177643969), UINT32_C(3262696681) }, + { UINT32_C(1896420512), UINT32_C(1257348162), UINT32_C(2435569234), UINT32_C(3525887081) }, + { UINT16_C( 61942), UINT16_C( 37861), UINT16_C( 46342), UINT16_C( 61520) } }, + { { UINT32_C(3096824198), UINT32_C(3621319602), UINT32_C(3142426518), UINT32_C(3645582469) }, + { UINT32_C( 796952733), UINT32_C(2091671964), UINT32_C(1100615639), UINT32_C(2144998799) }, + { UINT16_C( 35093), UINT16_C( 23341), UINT16_C( 31156), UINT16_C( 22897) } }, + { { UINT32_C(1396282785), UINT32_C(3730226499), UINT32_C(3654754584), UINT32_C( 959659968) }, + { UINT32_C(1651744654), UINT32_C(4017901029), UINT32_C(1629456713), UINT32_C(4160987821) }, + { UINT16_C( 61638), UINT16_C( 61146), UINT16_C( 30904), UINT16_C( 16688) } }, + { { UINT32_C(1715960303), UINT32_C(4039562533), UINT32_C(3828483314), UINT32_C(2407488645) }, + { UINT32_C(3912234216), UINT32_C(2924013893), UINT32_C( 423861645), UINT32_C(3779652142) }, + { UINT16_C( 32024), UINT16_C( 17022), UINT16_C( 51950), UINT16_C( 44598) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint16x4_t r = simde_vrsubhn_u32(a, b); + + simde_test_arm_neon_assert_equal_u16x4(r, simde_vld1_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint16x4_t r = simde_vrsubhn_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint64_t a[2]; + uint64_t b[2]; + uint32_t r[2]; + } test_vec[] = { + { { UINT64_C( 5720579107695434868), UINT64_C(10158864097640915505) }, + { UINT64_C( 2944425187806881986), UINT64_C( 6881001908194755993) }, + { UINT32_C( 646373704), UINT32_C( 763186763) } }, + { { UINT64_C(14151069090009110415), UINT64_C( 3427730040787600922) }, + { UINT64_C( 1162921553288783224), UINT64_C( 6896780722481148156) }, + { UINT32_C( 3024038751), UINT32_C( 3487265993) } }, + { { UINT64_C( 5957476926263911949), UINT64_C( 7246487387568598101) }, + { UINT64_C( 7480154478098634778), UINT64_C( 7158156165420539990) }, + { UINT32_C( 3940441302), UINT32_C( 20566215) } }, + { { UINT64_C( 7814423941832542988), UINT64_C(14658162422605217111) }, + { UINT64_C(14460842918693877557), UINT64_C(16532515342099580551) }, + { UINT32_C( 2747477288), UINT32_C( 3858560499) } }, + { { UINT64_C(15985923743401242236), UINT64_C(14565191298281482105) }, + { UINT64_C( 3644320369892282462), UINT64_C(10380643387119289025) }, + { UINT32_C( 2873503457), UINT32_C( 974290984) } }, + { { UINT64_C(15842686916909780856), UINT64_C( 6007490528874810545) }, + { UINT64_C( 5998344717132929260), UINT64_C(17066458207901803814) }, + { UINT32_C( 2292064531), UINT32_C( 1720100733) } }, + { { UINT64_C( 8906256487824497186), UINT64_C(16273672314943311094) }, + { UINT64_C(14134855410931785930), UINT64_C( 2718685512824199260) }, + { UINT32_C( 3077589243), UINT32_C( 3156016302) } }, + { { UINT64_C(11260902580370141307), UINT64_C( 6856970678142061529) }, + { UINT64_C( 4916313494196341874), UINT64_C(15477497448153835858) }, + { UINT32_C( 1477214761), UINT32_C( 2287844500) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + simde_uint32x2_t r = simde_vrsubhn_u64(a, b); + + simde_test_arm_neon_assert_equal_u32x2(r, simde_vld1_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint32x2_t r = simde_vrsubhn_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_s64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/rsubhn_high.c b/test/arm/neon/rsubhn_high.c new file mode 100644 index 000000000..fb0a3e0a5 --- /dev/null +++ b/test/arm/neon/rsubhn_high.c @@ -0,0 +1,443 @@ +#define SIMDE_TEST_ARM_NEON_INSN rsubhn_high + +#include "test-neon.h" +#include "../../../simde/arm/neon/rsubhn_high.h" + +static int +test_simde_vrsubhn_high_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int8_t r_[8]; + int16_t a[8]; + int16_t b[8]; + int8_t r[16]; + } test_vec[] = { + { { -INT8_C( 57), INT8_C( 26), INT8_C( 118), -INT8_C( 106), INT8_C( 22), INT8_C( 120), INT8_C( 9), INT8_C( 108) }, + { INT16_C( 13311), -INT16_C( 31040), -INT16_C( 24932), -INT16_C( 4106), INT16_C( 2488), INT16_C( 18411), -INT16_C( 9260), INT16_C( 15452) }, + { -INT16_C( 30683), -INT16_C( 28493), INT16_C( 4281), -INT16_C( 14327), -INT16_C( 13218), -INT16_C( 6794), -INT16_C( 3631), -INT16_C( 15368) }, + { -INT8_C( 57), INT8_C( 26), INT8_C( 118), -INT8_C( 106), INT8_C( 22), INT8_C( 120), INT8_C( 9), INT8_C( 108), -INT8_C( 84), -INT8_C( 10), -INT8_C( 114), INT8_C( 40), INT8_C( 61), INT8_C( 98), -INT8_C( 22), INT8_C( 120) } }, + { { INT8_C( 124), INT8_C( 84), -INT8_C( 124), -INT8_C( 66), INT8_C( 49), INT8_C( 64), -INT8_C( 55), INT8_C( 120) }, + { -INT16_C( 28562), -INT16_C( 19703), -INT16_C( 2719), INT16_C( 16071), -INT16_C( 24879), INT16_C( 26327), INT16_C( 26938), -INT16_C( 13405) }, + { -INT16_C( 19547), INT16_C( 30137), INT16_C( 27207), INT16_C( 21507), -INT16_C( 31778), -INT16_C( 13807), INT16_C( 4738), -INT16_C( 24177) }, + { INT8_C( 124), INT8_C( 84), -INT8_C( 124), -INT8_C( 66), INT8_C( 49), INT8_C( 64), -INT8_C( 55), INT8_C( 120), -INT8_C( 35), INT8_C( 61), -INT8_C( 117), -INT8_C( 21), INT8_C( 27), -INT8_C( 99), INT8_C( 87), INT8_C( 42) } }, + { { -INT8_C( 109), -INT8_C( 29), -INT8_C( 17), -INT8_C( 36), -INT8_C( 56), INT8_C( 90), -INT8_C( 19), -INT8_C( 55) }, + { INT16_C( 741), INT16_C( 711), -INT16_C( 1724), INT16_C( 7729), INT16_C( 6281), -INT16_C( 13445), INT16_C( 22116), INT16_C( 26179) }, + { INT16_C( 6527), INT16_C( 29841), -INT16_C( 5864), INT16_C( 18441), -INT16_C( 24112), -INT16_C( 30497), -INT16_C( 6294), INT16_C( 7786) }, + { -INT8_C( 109), -INT8_C( 29), -INT8_C( 17), -INT8_C( 36), -INT8_C( 56), INT8_C( 90), -INT8_C( 19), -INT8_C( 55), -INT8_C( 23), -INT8_C( 114), INT8_C( 16), -INT8_C( 42), INT8_C( 119), INT8_C( 67), INT8_C( 111), INT8_C( 72) } }, + { { -INT8_C( 16), -INT8_C( 42), -INT8_C( 32), -INT8_C( 55), -INT8_C( 66), -INT8_C( 104), -INT8_C( 62), INT8_C( 124) }, + { INT16_C( 23253), INT16_C( 16023), -INT16_C( 2725), -INT16_C( 2569), -INT16_C( 30790), -INT16_C( 17177), -INT16_C( 30309), -INT16_C( 17320) }, + { INT16_C( 7831), INT16_C( 18555), -INT16_C( 29330), INT16_C( 29911), INT16_C( 13510), INT16_C( 1110), INT16_C( 2441), -INT16_C( 5602) }, + { -INT8_C( 16), -INT8_C( 42), -INT8_C( 32), -INT8_C( 55), -INT8_C( 66), -INT8_C( 104), -INT8_C( 62), INT8_C( 124), INT8_C( 60), -INT8_C( 10), INT8_C( 104), -INT8_C( 127), INT8_C( 83), -INT8_C( 71), INT8_MIN, -INT8_C( 46) } }, + { { -INT8_C( 55), -INT8_C( 23), INT8_C( 91), INT8_C( 57), INT8_C( 44), INT8_C( 3), INT8_C( 51), INT8_C( 118) }, + { INT16_C( 17257), INT16_C( 32178), -INT16_C( 24596), INT16_C( 10324), INT16_C( 87), INT16_C( 27239), -INT16_C( 13293), INT16_C( 15079) }, + { -INT16_C( 25504), -INT16_C( 20474), -INT16_C( 30939), INT16_C( 10568), INT16_C( 5089), -INT16_C( 12537), INT16_C( 7486), -INT16_C( 28349) }, + { -INT8_C( 55), -INT8_C( 23), INT8_C( 91), INT8_C( 57), INT8_C( 44), INT8_C( 3), INT8_C( 51), INT8_C( 118), -INT8_C( 89), -INT8_C( 50), INT8_C( 25), -INT8_C( 1), -INT8_C( 20), -INT8_C( 101), -INT8_C( 81), -INT8_C( 86) } }, + { { -INT8_C( 5), -INT8_C( 77), INT8_C( 112), -INT8_C( 74), -INT8_C( 14), -INT8_C( 82), -INT8_C( 2), -INT8_C( 87) }, + { -INT16_C( 19745), -INT16_C( 16782), -INT16_C( 587), -INT16_C( 16737), INT16_C( 30127), INT16_C( 11759), INT16_C( 19794), INT16_C( 24265) }, + { INT16_C( 24307), -INT16_C( 30221), -INT16_C( 25200), INT16_C( 17173), -INT16_C( 31429), -INT16_C( 11979), INT16_C( 24480), INT16_C( 22617) }, + { -INT8_C( 5), -INT8_C( 77), INT8_C( 112), -INT8_C( 74), -INT8_C( 14), -INT8_C( 82), -INT8_C( 2), -INT8_C( 87), INT8_C( 84), INT8_C( 52), INT8_C( 96), INT8_C( 124), -INT8_C( 16), INT8_C( 93), -INT8_C( 18), INT8_C( 6) } }, + { { -INT8_C( 124), -INT8_C( 54), INT8_C( 16), -INT8_C( 78), -INT8_C( 127), -INT8_C( 7), INT8_C( 33), -INT8_C( 61) }, + { -INT16_C( 30078), INT16_C( 8645), -INT16_C( 573), INT16_C( 13811), -INT16_C( 7119), INT16_C( 14999), INT16_C( 20134), -INT16_C( 20511) }, + { INT16_C( 32370), -INT16_C( 26330), -INT16_C( 16041), INT16_C( 7697), INT16_C( 28606), INT16_C( 30738), INT16_C( 30774), INT16_C( 3914) }, + { -INT8_C( 124), -INT8_C( 54), INT8_C( 16), -INT8_C( 78), -INT8_C( 127), -INT8_C( 7), INT8_C( 33), -INT8_C( 61), INT8_C( 12), -INT8_C( 119), INT8_C( 60), INT8_C( 24), INT8_C( 116), -INT8_C( 61), -INT8_C( 42), -INT8_C( 95) } }, + { { INT8_C( 80), INT8_C( 56), INT8_C( 100), -INT8_C( 66), -INT8_C( 31), INT8_C( 37), -INT8_C( 33), INT8_C( 74) }, + { -INT16_C( 14553), INT16_C( 5150), INT16_C( 28351), -INT16_C( 684), INT16_C( 26568), -INT16_C( 31096), INT16_C( 4472), INT16_C( 13012) }, + { INT16_C( 32286), -INT16_C( 10540), -INT16_C( 5164), INT16_C( 6273), INT16_C( 22632), INT16_C( 17748), INT16_C( 18038), -INT16_C( 21733) }, + { INT8_C( 80), INT8_C( 56), INT8_C( 100), -INT8_C( 66), -INT8_C( 31), INT8_C( 37), -INT8_C( 33), INT8_C( 74), INT8_C( 73), INT8_C( 61), -INT8_C( 125), -INT8_C( 27), INT8_C( 15), INT8_C( 65), -INT8_C( 53), -INT8_C( 120) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t r_ = simde_vld1_s8(test_vec[i].r_); + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + simde_int8x16_t r = simde_vrsubhn_high_s16(r_, a, b); + + simde_test_arm_neon_assert_equal_i8x16(r, simde_vld1q_s8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t r_ = simde_test_arm_neon_random_i8x8(); + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int8x16_t r = simde_vrsubhn_high_s16(r_, a, b); + + simde_test_arm_neon_write_i8x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_high_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int16_t r_[4]; + int32_t a[4]; + int32_t b[4]; + int16_t r[8]; + } test_vec[] = { + { { -INT16_C( 12977), -INT16_C( 2227), -INT16_C( 27768), INT16_C( 17924) }, + { -INT32_C(1395181711), -INT32_C(1240879307), -INT32_C( 637043430), -INT32_C( 687732233) }, + { -INT32_C( 514460743), -INT32_C( 645485430), -INT32_C( 941798700), INT32_C(1426294115) }, + { -INT16_C( 12977), -INT16_C( 2227), -INT16_C( 27768), INT16_C( 17924), -INT16_C( 13439), -INT16_C( 9085), INT16_C( 4650), -INT16_C( 32257) } }, + { { -INT16_C( 30965), -INT16_C( 14140), -INT16_C( 1197), INT16_C( 25568) }, + { -INT32_C( 475098007), INT32_C(1756822291), -INT32_C(1668735740), INT32_C(1597884770) }, + { -INT32_C(1092539856), -INT32_C(1644754626), -INT32_C( 452915460), -INT32_C( 267100233) }, + { -INT16_C( 30965), -INT16_C( 14140), -INT16_C( 1197), INT16_C( 25568), INT16_C( 9421), -INT16_C( 13632), -INT16_C( 18552), INT16_C( 28457) } }, + { { -INT16_C( 3586), -INT16_C( 25062), INT16_C( 2878), -INT16_C( 22179) }, + { -INT32_C(1144008554), -INT32_C( 383576581), INT32_C(2119783921), INT32_C(1440612031) }, + { INT32_C(1818261921), INT32_C( 254868399), INT32_C(2027737229), INT32_C( 557924416) }, + { -INT16_C( 3586), -INT16_C( 25062), INT16_C( 2878), -INT16_C( 22179), INT16_C( 20335), -INT16_C( 9742), INT16_C( 1405), INT16_C( 13469) } }, + { { INT16_C( 2568), INT16_C( 19580), INT16_C( 7584), INT16_C( 15845) }, + { -INT32_C(2058569671), -INT32_C(1945659170), -INT32_C(1893664429), -INT32_C( 686544847) }, + { INT32_C(1348487948), -INT32_C(1225141222), INT32_C( 722877109), INT32_C( 940184532) }, + { INT16_C( 2568), INT16_C( 19580), INT16_C( 7584), INT16_C( 15845), INT16_C( 13548), -INT16_C( 10994), INT16_C( 25611), -INT16_C( 24822) } }, + { { INT16_C( 30154), -INT16_C( 31961), -INT16_C( 2317), -INT16_C( 15814) }, + { INT32_C( 661319774), -INT32_C(1184345804), INT32_C( 907491779), -INT32_C(1143902803) }, + { INT32_C( 546102851), INT32_C(1867663720), INT32_C(1122486484), INT32_C( 279341413) }, + { INT16_C( 30154), -INT16_C( 31961), -INT16_C( 2317), -INT16_C( 15814), INT16_C( 1758), INT16_C( 18966), -INT16_C( 3281), -INT16_C( 21717) } }, + { { INT16_C( 13545), INT16_C( 25683), -INT16_C( 22237), -INT16_C( 1651) }, + { INT32_C(1179105308), INT32_C(1257466709), INT32_C(1971469630), INT32_C( 965297803) }, + { -INT32_C( 922124931), -INT32_C(1349774106), -INT32_C(1713687644), INT32_C(1629767843) }, + { INT16_C( 13545), INT16_C( 25683), -INT16_C( 22237), -INT16_C( 1651), INT16_C( 32062), -INT16_C( 25753), -INT16_C( 9305), -INT16_C( 10139) } }, + { { INT16_C( 16119), -INT16_C( 7381), INT16_C( 32043), -INT16_C( 21273) }, + { INT32_C(1112362391), -INT32_C( 222232724), INT32_C(1047777903), -INT32_C( 913499513) }, + { INT32_C(1767981731), -INT32_C(1115257806), INT32_C( 915094923), -INT32_C(2015328243) }, + { INT16_C( 16119), -INT16_C( 7381), INT16_C( 32043), -INT16_C( 21273), -INT16_C( 10004), INT16_C( 13626), INT16_C( 2025), INT16_C( 16813) } }, + { { -INT16_C( 32566), INT16_C( 5160), INT16_C( 17899), -INT16_C( 15831) }, + { -INT32_C(1954975313), -INT32_C(1920435371), INT32_C(1003706891), INT32_C( 541402975) }, + { INT32_C(1674438238), -INT32_C(1098473480), -INT32_C( 634716656), -INT32_C( 851524412) }, + { -INT16_C( 32566), INT16_C( 5160), INT16_C( 17899), -INT16_C( 15831), INT16_C( 10156), -INT16_C( 12542), INT16_C( 25000), INT16_C( 21254) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t r_ = simde_vld1_s16(test_vec[i].r_); + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + simde_int16x8_t r = simde_vrsubhn_high_s32(r_, a, b); + + simde_test_arm_neon_assert_equal_i16x8(r, simde_vld1q_s16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t r_ = simde_test_arm_neon_random_i16x4(); + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int16x8_t r = simde_vrsubhn_high_s32(r_, a, b); + + simde_test_arm_neon_write_i16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_high_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + int32_t r_[2]; + int64_t a[2]; + int64_t b[2]; + int32_t r[4]; + } test_vec[] = { + { { -INT32_C( 634794411), INT32_C( 1900921601) }, + { -INT64_C( 4981114222538011063), INT64_C( 6718889980534685553) }, + { -INT64_C( 9134035082168866882), -INT64_C( 8643474357928897387) }, + { -INT32_C( 634794411), INT32_C( 1900921601), INT32_C( 966927237), -INT32_C( 718138119) } }, + { { -INT32_C( 1252566501), INT32_C( 408004355) }, + { INT64_C( 1961598702296295413), INT64_C( 1789265077301493858) }, + { -INT64_C( 1992937627710137491), -INT64_C( 6606323933163750076) }, + { -INT32_C( 1252566501), INT32_C( 408004355), INT32_C( 920737239), INT32_C( 1954750393) } }, + { { -INT32_C( 1461648077), -INT32_C( 456589213) }, + { -INT64_C( 3805107682072796996), -INT64_C( 3551263502935852693) }, + { INT64_C( 2017632390811172449), -INT64_C( 811730804735164581) }, + { -INT32_C( 1461648077), -INT32_C( 456589213), -INT32_C( 1355712319), -INT32_C( 637847161) } }, + { { INT32_C( 1747874793), INT32_C( 2140696208) }, + { INT64_C( 937152159973332462), -INT64_C( 7415891501218868528) }, + { -INT64_C( 6013839813684367117), -INT64_C( 5801899480449452360) }, + { INT32_C( 1747874793), INT32_C( 2140696208), INT32_C( 1618403935), -INT32_C( 375786801) } }, + { { -INT32_C( 1743771151), INT32_C( 2117269274) }, + { INT64_C( 565523750854915567), INT64_C( 6277698001984965823) }, + { -INT64_C( 7182780127885909828), INT64_C( 1628218994207521522) }, + { -INT32_C( 1743771151), INT32_C( 2117269274), INT32_C( 1804042579), INT32_C( 1082541190) } }, + { { INT32_C( 769461152), -INT32_C( 1065585759) }, + { -INT64_C( 8263077630275804176), INT64_C( 8146965252929535167) }, + { INT64_C( 6693203158242440733), -INT64_C( 5722247609551143928) }, + { INT32_C( 769461152), -INT32_C( 1065585759), INT32_C( 812686813), -INT32_C( 1065789538) } }, + { { -INT32_C( 779973276), -INT32_C( 89996234) }, + { INT64_C( 4328069067460272340), INT64_C( 8960946558766265296) }, + { -INT64_C( 5126182062046763683), -INT64_C( 6499324058686156033) }, + { -INT32_C( 779973276), -INT32_C( 89996234), -INT32_C( 2093727920), -INT32_C( 695342537) } }, + { { INT32_C( 1374451854), INT32_C( 1634665168) }, + { INT64_C( 933833112004457496), INT64_C( 3781945808152128338) }, + { INT64_C( 3556891504846685220), INT64_C( 7834410733044719669) }, + { INT32_C( 1374451854), INT32_C( 1634665168), -INT32_C( 610728374), -INT32_C( 943538017) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t r_ = simde_vld1_s32(test_vec[i].r_); + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + simde_int32x4_t r = simde_vrsubhn_high_s64(r_, a, b); + + simde_test_arm_neon_assert_equal_i32x4(r, simde_vld1q_s32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t r_ = simde_test_arm_neon_random_i32x2(); + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int32x4_t r = simde_vrsubhn_high_s64(r_, a, b); + + simde_test_arm_neon_write_i32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_high_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint8_t r_[8]; + uint16_t a[8]; + uint16_t b[8]; + uint8_t r[16]; + } test_vec[] = { + { { UINT8_C( 199), UINT8_C( 208), UINT8_C( 108), UINT8_C( 25), UINT8_C( 23), UINT8_C( 206), UINT8_C( 220), UINT8_C( 146) }, + { UINT16_C(20331), UINT16_C(63359), UINT16_C(61138), UINT16_C(22627), UINT16_C(27341), UINT16_C(46940), UINT16_C(59425), UINT16_C(62036) }, + { UINT16_C(23567), UINT16_C(23177), UINT16_C(44534), UINT16_C(26849), UINT16_C(19009), UINT16_C(31005), UINT16_C(31592), UINT16_C(62268) }, + { UINT8_C( 199), UINT8_C( 208), UINT8_C( 108), UINT8_C( 25), UINT8_C( 23), UINT8_C( 206), UINT8_C( 220), UINT8_C( 146), UINT8_C( 243), UINT8_C( 157), UINT8_C( 65), UINT8_C( 240), UINT8_C( 33), UINT8_C( 62), UINT8_C( 109), UINT8_MAX } }, + { { UINT8_C( 124), UINT8_C( 248), UINT8_C( 130), UINT8_C( 219), UINT8_C( 227), UINT8_C( 29), UINT8_C( 126), UINT8_C( 216) }, + { UINT16_C( 3413), UINT16_C(58604), UINT16_C(30450), UINT16_C(19146), UINT16_C(17020), UINT16_C(33098), UINT16_C(35964), UINT16_C(43515) }, + { UINT16_C(60215), UINT16_C(10159), UINT16_C(57097), UINT16_C( 3673), UINT16_C(11038), UINT16_C(27128), UINT16_C(41205), UINT16_C( 1394) }, + { UINT8_C( 124), UINT8_C( 248), UINT8_C( 130), UINT8_C( 219), UINT8_C( 227), UINT8_C( 29), UINT8_C( 126), UINT8_C( 216), UINT8_C( 34), UINT8_C( 189), UINT8_C( 152), UINT8_C( 60), UINT8_C( 23), UINT8_C( 23), UINT8_C( 236), UINT8_C( 165) } }, + { { UINT8_C( 203), UINT8_C( 165), UINT8_C( 122), UINT8_C( 152), UINT8_C( 11), UINT8_C( 126), UINT8_C( 17), UINT8_C( 127) }, + { UINT16_C(25287), UINT16_C(41941), UINT16_C(65006), UINT16_C(31570), UINT16_C(13092), UINT16_C(56629), UINT16_C(38097), UINT16_C(32785) }, + { UINT16_C(64937), UINT16_C(60072), UINT16_C(19706), UINT16_C(51741), UINT16_C(48358), UINT16_C(32600), UINT16_C(27603), UINT16_C(11365) }, + { UINT8_C( 203), UINT8_C( 165), UINT8_C( 122), UINT8_C( 152), UINT8_C( 11), UINT8_C( 126), UINT8_C( 17), UINT8_C( 127), UINT8_C( 101), UINT8_C( 185), UINT8_C( 177), UINT8_C( 177), UINT8_C( 118), UINT8_C( 94), UINT8_C( 41), UINT8_C( 84) } }, + { { UINT8_C( 18), UINT8_C( 254), UINT8_C( 215), UINT8_C( 80), UINT8_C( 143), UINT8_C( 172), UINT8_C( 69), UINT8_C( 82) }, + { UINT16_C(10014), UINT16_C(42189), UINT16_C(29852), UINT16_C( 9439), UINT16_C(39633), UINT16_C(55336), UINT16_C(44596), UINT16_C(37844) }, + { UINT16_C(61820), UINT16_C(52531), UINT16_C(15789), UINT16_C(65214), UINT16_C(47820), UINT16_C(60501), UINT16_C(63187), UINT16_C(26072) }, + { UINT8_C( 18), UINT8_C( 254), UINT8_C( 215), UINT8_C( 80), UINT8_C( 143), UINT8_C( 172), UINT8_C( 69), UINT8_C( 82), UINT8_C( 54), UINT8_C( 216), UINT8_C( 55), UINT8_C( 38), UINT8_C( 224), UINT8_C( 236), UINT8_C( 183), UINT8_C( 46) } }, + { { UINT8_C( 68), UINT8_C( 57), UINT8_C( 239), UINT8_C( 197), UINT8_C( 110), UINT8_C( 115), UINT8_C( 180), UINT8_C( 123) }, + { UINT16_C(46221), UINT16_C( 6188), UINT16_C(62049), UINT16_C(56738), UINT16_C(42307), UINT16_C(60741), UINT16_C(14810), UINT16_C( 2302) }, + { UINT16_C(24062), UINT16_C(63486), UINT16_C(19483), UINT16_C(35985), UINT16_C(61603), UINT16_C(28995), UINT16_C(38454), UINT16_C(46727) }, + { UINT8_C( 68), UINT8_C( 57), UINT8_C( 239), UINT8_C( 197), UINT8_C( 110), UINT8_C( 115), UINT8_C( 180), UINT8_C( 123), UINT8_C( 87), UINT8_C( 32), UINT8_C( 166), UINT8_C( 81), UINT8_C( 181), UINT8_C( 124), UINT8_C( 164), UINT8_C( 82) } }, + { { UINT8_C( 106), UINT8_C( 175), UINT8_C( 92), UINT8_C( 116), UINT8_C( 88), UINT8_C( 72), UINT8_C( 185), UINT8_C( 179) }, + { UINT16_C(12059), UINT16_C(64175), UINT16_C(62948), UINT16_C(45417), UINT16_C(10359), UINT16_C(22978), UINT16_C(40265), UINT16_C(15140) }, + { UINT16_C( 5905), UINT16_C(21144), UINT16_C(12969), UINT16_C(37068), UINT16_C(46022), UINT16_C(33245), UINT16_C(46817), UINT16_C(41344) }, + { UINT8_C( 106), UINT8_C( 175), UINT8_C( 92), UINT8_C( 116), UINT8_C( 88), UINT8_C( 72), UINT8_C( 185), UINT8_C( 179), UINT8_C( 24), UINT8_C( 168), UINT8_C( 195), UINT8_C( 33), UINT8_C( 117), UINT8_C( 216), UINT8_C( 230), UINT8_C( 154) } }, + { { UINT8_C( 95), UINT8_C( 48), UINT8_C( 246), UINT8_C( 49), UINT8_C( 184), UINT8_C( 192), UINT8_C( 82), UINT8_C( 247) }, + { UINT16_C( 9636), UINT16_C(60462), UINT16_C(55485), UINT16_C(38214), UINT16_C(40221), UINT16_C(42243), UINT16_C(63986), UINT16_C(29866) }, + { UINT16_C(20853), UINT16_C(39462), UINT16_C(47200), UINT16_C(55659), UINT16_C( 3178), UINT16_C(11400), UINT16_C(14681), UINT16_C(21092) }, + { UINT8_C( 95), UINT8_C( 48), UINT8_C( 246), UINT8_C( 49), UINT8_C( 184), UINT8_C( 192), UINT8_C( 82), UINT8_C( 247), UINT8_C( 212), UINT8_C( 82), UINT8_C( 32), UINT8_C( 188), UINT8_C( 145), UINT8_C( 120), UINT8_C( 193), UINT8_C( 34) } }, + { { UINT8_C( 20), UINT8_C( 220), UINT8_C( 10), UINT8_C( 32), UINT8_C( 231), UINT8_C( 213), UINT8_C( 17), UINT8_C( 207) }, + { UINT16_C(21531), UINT16_C(19493), UINT16_C( 5660), UINT16_C(23767), UINT16_C(40695), UINT16_C(19505), UINT16_C(47924), UINT16_C(53622) }, + { UINT16_C(60692), UINT16_C(20628), UINT16_C( 1724), UINT16_C(24276), UINT16_C(44233), UINT16_C(14776), UINT16_C(51369), UINT16_C(30125) }, + { UINT8_C( 20), UINT8_C( 220), UINT8_C( 10), UINT8_C( 32), UINT8_C( 231), UINT8_C( 213), UINT8_C( 17), UINT8_C( 207), UINT8_C( 103), UINT8_C( 252), UINT8_C( 15), UINT8_C( 254), UINT8_C( 242), UINT8_C( 18), UINT8_C( 243), UINT8_C( 92) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t r_ = simde_vld1_u8(test_vec[i].r_); + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + simde_uint8x16_t r = simde_vrsubhn_high_u16(r_, a, b); + + simde_test_arm_neon_assert_equal_u8x16(r, simde_vld1q_u8(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t r_ = simde_test_arm_neon_random_u8x8(); + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint8x16_t r = simde_vrsubhn_high_u16(r_, a, b); + + simde_test_arm_neon_write_u8x8(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_high_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint16_t r_[4]; + uint32_t a[4]; + uint32_t b[4]; + uint16_t r[8]; + } test_vec[] = { + { { UINT16_C( 41269), UINT16_C( 50638), UINT16_C( 61698), UINT16_C( 7807) }, + { UINT32_C(1741988329), UINT32_C(1606087118), UINT32_C( 700475097), UINT32_C(3688868886) }, + { UINT32_C(2806349967), UINT32_C(1854800487), UINT32_C( 21704828), UINT32_C(3997988195) }, + { UINT16_C( 41269), UINT16_C( 50638), UINT16_C( 61698), UINT16_C( 7807), UINT16_C( 49295), UINT16_C( 61741), UINT16_C( 10357), UINT16_C( 60819) } }, + { { UINT16_C( 34253), UINT16_C( 44792), UINT16_C( 7455), UINT16_C( 26942) }, + { UINT32_C(4283611204), UINT32_C(4280106541), UINT32_C(2458782660), UINT32_C(1830661178) }, + { UINT32_C(3706562521), UINT32_C(2205941448), UINT32_C(2302945884), UINT32_C(4150772654) }, + { UINT16_C( 34253), UINT16_C( 44792), UINT16_C( 7455), UINT16_C( 26942), UINT16_C( 8805), UINT16_C( 31649), UINT16_C( 2378), UINT16_C( 30134) } }, + { { UINT16_C( 11864), UINT16_C( 1970), UINT16_C( 6478), UINT16_C( 21710) }, + { UINT32_C(2673192528), UINT32_C(1952095768), UINT32_C(3214859988), UINT32_C( 343408403) }, + { UINT32_C(2243488234), UINT32_C( 883441011), UINT32_C(2409138063), UINT32_C(1276108507) }, + { UINT16_C( 11864), UINT16_C( 1970), UINT16_C( 6478), UINT16_C( 21710), UINT16_C( 6557), UINT16_C( 16306), UINT16_C( 12294), UINT16_C( 51304) } }, + { { UINT16_C( 50706), UINT16_C( 37166), UINT16_C( 14410), UINT16_C( 64394) }, + { UINT32_C(1360680546), UINT32_C(2100403524), UINT32_C(1682447994), UINT32_C(1336618539) }, + { UINT32_C( 973421663), UINT32_C(3133116068), UINT32_C(1237858822), UINT32_C( 382114510) }, + { UINT16_C( 50706), UINT16_C( 37166), UINT16_C( 14410), UINT16_C( 64394), UINT16_C( 5909), UINT16_C( 49778), UINT16_C( 6784), UINT16_C( 14565) } }, + { { UINT16_C( 28785), UINT16_C( 52939), UINT16_C( 4039), UINT16_C( 58633) }, + { UINT32_C(3940626557), UINT32_C( 638609814), UINT32_C(1551602896), UINT32_C(3076801985) }, + { UINT32_C(3455587449), UINT32_C(1587525949), UINT32_C(1767754029), UINT32_C( 941250066) }, + { UINT16_C( 28785), UINT16_C( 52939), UINT16_C( 4039), UINT16_C( 58633), UINT16_C( 7401), UINT16_C( 51057), UINT16_C( 62238), UINT16_C( 32586) } }, + { { UINT16_C( 19790), UINT16_C( 48601), UINT16_C( 34521), UINT16_C( 53777) }, + { UINT32_C(3399780476), UINT32_C(3128390483), UINT32_C(2661794473), UINT32_C(2711758066) }, + { UINT32_C(1783301814), UINT32_C(2700865479), UINT32_C(1792320211), UINT32_C(1346550133) }, + { UINT16_C( 19790), UINT16_C( 48601), UINT16_C( 34521), UINT16_C( 53777), UINT16_C( 24666), UINT16_C( 6524), UINT16_C( 13267), UINT16_C( 20831) } }, + { { UINT16_C( 30913), UINT16_C( 40483), UINT16_C( 13733), UINT16_C( 11348) }, + { UINT32_C(3982828149), UINT32_C(2078910112), UINT32_C( 826117185), UINT32_C(1511393051) }, + { UINT32_C(3523586226), UINT32_C(2698734480), UINT32_C(2154583641), UINT32_C( 757340238) }, + { UINT16_C( 30913), UINT16_C( 40483), UINT16_C( 13733), UINT16_C( 11348), UINT16_C( 7007), UINT16_C( 56078), UINT16_C( 45265), UINT16_C( 11506) } }, + { { UINT16_C( 24786), UINT16_C( 6944), UINT16_C( 14496), UINT16_C( 43106) }, + { UINT32_C(2903731845), UINT32_C( 303656794), UINT32_C(3826239729), UINT32_C( 525402713) }, + { UINT32_C(2544300481), UINT32_C( 361837913), UINT32_C( 649429955), UINT32_C(3783680799) }, + { UINT16_C( 24786), UINT16_C( 6944), UINT16_C( 14496), UINT16_C( 43106), UINT16_C( 5484), UINT16_C( 64648), UINT16_C( 48474), UINT16_C( 15819) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t r_ = simde_vld1_u16(test_vec[i].r_); + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + simde_uint16x8_t r = simde_vrsubhn_high_u32(r_, a, b); + + simde_test_arm_neon_assert_equal_u16x8(r, simde_vld1q_u16(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t r_ = simde_test_arm_neon_random_u16x4(); + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint16x8_t r = simde_vrsubhn_high_u32(r_, a, b); + + simde_test_arm_neon_write_u16x4(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vrsubhn_high_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + static const struct { + uint32_t r_[2]; + uint64_t a[2]; + uint64_t b[2]; + uint32_t r[4]; + } test_vec[] = { + { { UINT32_C( 1871412901), UINT32_C( 3393036411) }, + { UINT64_C(17731709460066361324), UINT64_C(11218132822946835667) }, + { UINT64_C( 3438642190895232954), UINT64_C(16745057506486448806) }, + { UINT32_C( 1871412901), UINT32_C( 3393036411), UINT32_C( 3327864052), UINT32_C( 3008129865) } }, + { { UINT32_C( 3345833697), UINT32_C( 2892487010) }, + { UINT64_C(15016907265298523829), UINT64_C( 3408669964119177856) }, + { UINT64_C( 5786146879377488931), UINT64_C(14643760497357938360) }, + { UINT32_C( 3345833697), UINT32_C( 2892487010), UINT32_C( 2149203882), UINT32_C( 1679093936) } }, + { { UINT32_C( 3628723145), UINT32_C( 897897657) }, + { UINT64_C(11792583140865019933), UINT64_C(16941015192646112791) }, + { UINT64_C(17692816430312995448), UINT64_C(16603442786833374610) }, + { UINT32_C( 3628723145), UINT32_C( 897897657), UINT32_C( 2921212182), UINT32_C( 78597201) } }, + { { UINT32_C( 1542968199), UINT32_C( 2506479180) }, + { UINT64_C( 9048986537022657802), UINT64_C( 9932019547326549171) }, + { UINT64_C(10268029266140439713), UINT64_C( 1637355560458647743) }, + { UINT32_C( 1542968199), UINT32_C( 2506479180), UINT32_C( 4011136793), UINT32_C( 1931251955) } }, + { { UINT32_C( 513868505), UINT32_C( 585247018) }, + { UINT64_C(11618880053111078913), UINT64_C(14007978403554808354) }, + { UINT64_C( 2066260072343106377), UINT64_C(17486312152245205426) }, + { UINT32_C( 513868505), UINT32_C( 585247018), UINT32_C( 2224142659), UINT32_C( 3485104610) } }, + { { UINT32_C( 105946740), UINT32_C( 3581109170) }, + { UINT64_C( 8517730612630495290), UINT64_C(15091637072261973633) }, + { UINT64_C( 8273527098162170189), UINT64_C( 6864551697442672809) }, + { UINT32_C( 105946740), UINT32_C( 3581109170), UINT32_C( 56858061), UINT32_C( 1915517583) } }, + { { UINT32_C( 2072729636), UINT32_C( 1142462021) }, + { UINT64_C( 6174206732082916876), UINT64_C( 6872713127752786924) }, + { UINT64_C( 4337958157275197741), UINT64_C(13402179920841493563) }, + { UINT32_C( 2072729636), UINT32_C( 1142462021), UINT32_C( 427534938), UINT32_C( 2774707340) } }, + { { UINT32_C( 3616339027), UINT32_C( 3569404306) }, + { UINT64_C( 1432544248088139574), UINT64_C( 3578587937909353029) }, + { UINT64_C( 8328472411672646838), UINT64_C( 5318579539293040351) }, + { UINT32_C( 3616339027), UINT32_C( 3569404306), UINT32_C( 2689383903), UINT32_C( 3889843931) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t r_ = simde_vld1_u32(test_vec[i].r_); + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + simde_uint32x4_t r = simde_vrsubhn_high_u64(r_, a, b); + + simde_test_arm_neon_assert_equal_u32x4(r, simde_vld1q_u32(test_vec[i].r)); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t r_ = simde_test_arm_neon_random_u32x2(); + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint32x4_t r = simde_vrsubhn_high_u64(r_, a, b); + + simde_test_arm_neon_write_u32x2(2, r_, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_s64) + +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vrsubhn_high_u64) +SIMDE_TEST_FUNC_LIST_END + +#include "test-neon-footer.h" diff --git a/test/arm/neon/sli_n.c b/test/arm/neon/sli_n.c new file mode 100644 index 000000000..887e19adb --- /dev/null +++ b/test/arm/neon/sli_n.c @@ -0,0 +1,1561 @@ +#define SIMDE_TEST_ARM_NEON_INSN sli_n + +#include +#include + +static int +test_simde_vsli_n_s8 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int8_t a[8]; + int8_t b[8]; + int8_t r0[8]; + int8_t r3[8]; + int8_t r5[8]; + int8_t r7[8]; + } test_vec[] = { + { { -INT8_C( 101), INT8_C( 26), INT8_C( 9), INT8_C( 26), INT8_C( 49), -INT8_C( 61), INT8_C( 126), INT8_C( 21) }, + { -INT8_C( 44), -INT8_C( 122), INT8_C( 93), INT8_C( 94), INT8_C( 54), -INT8_C( 71), -INT8_C( 21), INT8_C( 56) }, + { -INT8_C( 44), -INT8_C( 122), INT8_C( 93), INT8_C( 94), INT8_C( 54), -INT8_C( 71), -INT8_C( 21), INT8_C( 56) }, + { -INT8_C( 93), INT8_C( 50), -INT8_C( 23), -INT8_C( 14), -INT8_C( 79), -INT8_C( 53), INT8_C( 94), -INT8_C( 59) }, + { -INT8_C( 101), -INT8_C( 38), -INT8_C( 87), -INT8_C( 38), -INT8_C( 47), INT8_C( 35), INT8_C( 126), INT8_C( 21) }, + { INT8_C( 27), INT8_C( 26), -INT8_C( 119), INT8_C( 26), INT8_C( 49), -INT8_C( 61), -INT8_C( 2), INT8_C( 21) } }, + { { -INT8_C( 52), -INT8_C( 65), INT8_C( 2), INT8_C( 9), INT8_C( 115), -INT8_C( 114), INT8_C( 25), INT8_C( 51) }, + { INT8_C( 36), -INT8_C( 93), INT8_C( 125), -INT8_C( 57), -INT8_C( 97), INT8_C( 56), INT8_C( 7), -INT8_C( 86) }, + { INT8_C( 36), -INT8_C( 93), INT8_C( 125), -INT8_C( 57), -INT8_C( 97), INT8_C( 56), INT8_C( 7), -INT8_C( 86) }, + { INT8_C( 36), INT8_C( 31), -INT8_C( 22), INT8_C( 57), -INT8_C( 5), -INT8_C( 58), INT8_C( 57), INT8_C( 83) }, + { -INT8_C( 116), INT8_MAX, -INT8_C( 94), -INT8_C( 23), -INT8_C( 13), INT8_C( 14), -INT8_C( 7), INT8_C( 83) }, + { INT8_C( 76), -INT8_C( 65), -INT8_C( 126), -INT8_C( 119), -INT8_C( 13), INT8_C( 14), -INT8_C( 103), INT8_C( 51) } }, + { { INT8_C( 6), -INT8_C( 75), -INT8_C( 113), -INT8_C( 102), INT8_C( 91), INT8_C( 107), -INT8_C( 19), -INT8_C( 49) }, + { INT8_C( 125), INT8_C( 24), -INT8_C( 117), -INT8_C( 52), INT8_C( 56), -INT8_C( 96), -INT8_C( 78), -INT8_C( 125) }, + { INT8_C( 125), INT8_C( 24), -INT8_C( 117), -INT8_C( 52), INT8_C( 56), -INT8_C( 96), -INT8_C( 78), -INT8_C( 125) }, + { -INT8_C( 18), -INT8_C( 59), INT8_C( 95), INT8_C( 98), -INT8_C( 61), INT8_C( 3), -INT8_C( 107), INT8_C( 31) }, + { -INT8_C( 90), INT8_C( 21), INT8_C( 111), -INT8_C( 102), INT8_C( 27), INT8_C( 11), INT8_C( 77), INT8_C( 111) }, + { -INT8_C( 122), INT8_C( 53), -INT8_C( 113), INT8_C( 26), INT8_C( 91), INT8_C( 107), INT8_C( 109), -INT8_C( 49) } }, + { { INT8_C( 75), -INT8_C( 25), INT8_C( 53), INT8_C( 41), -INT8_C( 91), -INT8_C( 47), INT8_C( 11), INT8_C( 120) }, + { INT8_C( 26), -INT8_C( 111), -INT8_C( 51), -INT8_C( 50), -INT8_C( 100), INT8_C( 21), -INT8_C( 106), -INT8_C( 93) }, + { INT8_C( 26), -INT8_C( 111), -INT8_C( 51), -INT8_C( 50), -INT8_C( 100), INT8_C( 21), -INT8_C( 106), -INT8_C( 93) }, + { -INT8_C( 45), -INT8_C( 113), INT8_C( 109), INT8_C( 113), -INT8_C( 27), -INT8_C( 87), -INT8_C( 77), INT8_C( 24) }, + { INT8_C( 75), INT8_C( 39), -INT8_C( 75), -INT8_C( 55), -INT8_C( 123), -INT8_C( 79), -INT8_C( 53), INT8_C( 120) }, + { INT8_C( 75), -INT8_C( 25), -INT8_C( 75), INT8_C( 41), INT8_C( 37), -INT8_C( 47), INT8_C( 11), -INT8_C( 8) } }, + { { -INT8_C( 87), -INT8_C( 95), -INT8_C( 8), -INT8_C( 55), -INT8_C( 109), -INT8_C( 83), -INT8_C( 16), -INT8_C( 60) }, + { INT8_C( 67), -INT8_C( 113), INT8_C( 90), -INT8_C( 126), -INT8_C( 75), -INT8_C( 92), INT8_C( 62), INT8_C( 117) }, + { INT8_C( 67), -INT8_C( 113), INT8_C( 90), -INT8_C( 126), -INT8_C( 75), -INT8_C( 92), INT8_C( 62), INT8_C( 117) }, + { INT8_C( 25), INT8_C( 121), -INT8_C( 48), INT8_C( 17), -INT8_C( 85), INT8_C( 37), -INT8_C( 16), -INT8_C( 84) }, + { INT8_C( 105), -INT8_C( 31), INT8_C( 88), INT8_C( 73), -INT8_C( 77), -INT8_C( 115), -INT8_C( 48), -INT8_C( 92) }, + { -INT8_C( 87), -INT8_C( 95), INT8_C( 120), INT8_C( 73), -INT8_C( 109), INT8_C( 45), INT8_C( 112), -INT8_C( 60) } }, + { { INT8_C( 123), INT8_C( 75), -INT8_C( 20), -INT8_C( 51), INT8_C( 120), INT8_C( 69), INT8_C( 48), INT8_C( 75) }, + { -INT8_C( 63), -INT8_C( 101), -INT8_C( 127), INT8_C( 26), -INT8_C( 50), -INT8_C( 85), INT8_C( 50), -INT8_C( 88) }, + { -INT8_C( 63), -INT8_C( 101), -INT8_C( 127), INT8_C( 26), -INT8_C( 50), -INT8_C( 85), INT8_C( 50), -INT8_C( 88) }, + { INT8_C( 11), -INT8_C( 37), INT8_C( 12), -INT8_C( 43), INT8_C( 112), INT8_C( 93), -INT8_C( 112), INT8_C( 67) }, + { INT8_C( 59), INT8_C( 107), INT8_C( 44), INT8_C( 77), -INT8_C( 40), INT8_C( 101), INT8_C( 80), INT8_C( 11) }, + { -INT8_C( 5), -INT8_C( 53), -INT8_C( 20), INT8_C( 77), INT8_C( 120), -INT8_C( 59), INT8_C( 48), INT8_C( 75) } }, + { { -INT8_C( 23), -INT8_C( 99), -INT8_C( 123), -INT8_C( 103), INT8_C( 63), -INT8_C( 97), -INT8_C( 22), INT8_C( 117) }, + { -INT8_C( 75), -INT8_C( 111), INT8_C( 98), -INT8_C( 17), -INT8_C( 5), INT8_C( 109), INT8_C( 78), INT8_C( 52) }, + { -INT8_C( 75), -INT8_C( 111), INT8_C( 98), -INT8_C( 17), -INT8_C( 5), INT8_C( 109), INT8_C( 78), INT8_C( 52) }, + { -INT8_C( 87), -INT8_C( 115), INT8_C( 21), INT8_C( 121), -INT8_C( 33), INT8_C( 111), INT8_C( 114), -INT8_C( 91) }, + { -INT8_C( 87), INT8_C( 61), INT8_C( 69), -INT8_C( 7), INT8_MAX, -INT8_C( 65), -INT8_C( 54), -INT8_C( 107) }, + { -INT8_C( 23), -INT8_C( 99), INT8_C( 5), -INT8_C( 103), -INT8_C( 65), -INT8_C( 97), INT8_C( 106), INT8_C( 117) } }, + { { INT8_C( 98), INT8_C( 62), INT8_C( 126), -INT8_C( 74), -INT8_C( 10), INT8_C( 124), INT8_C( 96), -INT8_C( 61) }, + { -INT8_C( 110), INT8_C( 111), INT8_C( 80), INT8_C( 107), INT8_C( 102), INT8_C( 8), INT8_C( 75), -INT8_C( 51) }, + { -INT8_C( 110), INT8_C( 111), INT8_C( 80), INT8_C( 107), INT8_C( 102), INT8_C( 8), INT8_C( 75), -INT8_C( 51) }, + { -INT8_C( 110), INT8_C( 126), -INT8_C( 122), INT8_C( 94), INT8_C( 54), INT8_C( 68), INT8_C( 88), INT8_C( 107) }, + { INT8_C( 66), -INT8_C( 2), INT8_C( 30), INT8_C( 118), -INT8_C( 42), INT8_C( 28), INT8_C( 96), -INT8_C( 93) }, + { INT8_C( 98), -INT8_C( 66), INT8_C( 126), -INT8_C( 74), INT8_C( 118), INT8_C( 124), -INT8_C( 32), -INT8_C( 61) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x8_t a = simde_vld1_s8(test_vec[i].a); + simde_int8x8_t b = simde_vld1_s8(test_vec[i].b); + + simde_int8x8_t r0 = simde_vsli_n_s8(a, b, 0); + simde_int8x8_t r3 = simde_vsli_n_s8(a, b, 3); + simde_int8x8_t r5 = simde_vsli_n_s8(a, b, 5); + simde_int8x8_t r7 = simde_vsli_n_s8(a, b, 7); + + simde_test_arm_neon_assert_equal_i8x8(r0, simde_vld1_s8(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i8x8(r3, simde_vld1_s8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i8x8(r5, simde_vld1_s8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i8x8(r7, simde_vld1_s8(test_vec[i].r7)); + } + return 0; +} + +static int +test_simde_vsli_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int16_t a[4]; + int16_t b[4]; + int16_t r3[4]; + int16_t r6[4]; + int16_t r10[4]; + int16_t r13[4]; + int16_t r15[4]; + } test_vec[] = { + { { INT16_C( 23715), -INT16_C( 5119), -INT16_C( 948), -INT16_C( 10596) }, + { -INT16_C( 10350), -INT16_C( 1795), -INT16_C( 14589), INT16_C( 23500) }, + { -INT16_C( 17261), -INT16_C( 14359), INT16_C( 14364), -INT16_C( 8604) }, + { -INT16_C( 7005), INT16_C( 16193), -INT16_C( 16180), -INT16_C( 3300) }, + { INT16_C( 18595), -INT16_C( 3071), INT16_C( 3148), INT16_C( 12956) }, + { INT16_C( 23715), -INT16_C( 21503), INT16_C( 31820), -INT16_C( 26980) }, + { INT16_C( 23715), -INT16_C( 5119), -INT16_C( 948), INT16_C( 22172) } }, + { { -INT16_C( 7934), INT16_C( 21731), -INT16_C( 14529), INT16_C( 7523) }, + { INT16_C( 19582), INT16_C( 30872), -INT16_C( 13482), INT16_C( 26450) }, + { INT16_C( 25586), -INT16_C( 15165), INT16_C( 23223), INT16_C( 14995) }, + { INT16_C( 8066), INT16_C( 9763), -INT16_C( 10817), -INT16_C( 11101) }, + { -INT16_C( 1790), INT16_C( 24803), INT16_C( 23359), INT16_C( 18787) }, + { -INT16_C( 16126), INT16_C( 5347), -INT16_C( 14529), INT16_C( 23907) }, + { INT16_C( 24834), INT16_C( 21731), INT16_C( 18239), INT16_C( 7523) } }, + { { -INT16_C( 24714), -INT16_C( 31480), -INT16_C( 32651), INT16_C( 11630) }, + { -INT16_C( 15138), INT16_C( 24046), INT16_C( 29437), INT16_C( 12656) }, + { INT16_C( 9974), -INT16_C( 4240), -INT16_C( 26643), -INT16_C( 29818) }, + { INT16_C( 14262), INT16_C( 31624), -INT16_C( 16523), INT16_C( 23598) }, + { INT16_C( 31606), -INT16_C( 18168), -INT16_C( 2955), -INT16_C( 16018) }, + { -INT16_C( 8330), -INT16_C( 15096), -INT16_C( 24459), INT16_C( 3438) }, + { INT16_C( 8054), INT16_C( 1288), -INT16_C( 32651), INT16_C( 11630) } }, + { { INT16_C( 16403), INT16_C( 27409), -INT16_C( 10162), INT16_C( 4136) }, + { -INT16_C( 19183), INT16_C( 17481), INT16_C( 8984), -INT16_C( 8279) }, + { -INT16_C( 22389), INT16_C( 8777), INT16_C( 6342), -INT16_C( 696) }, + { INT16_C( 17491), INT16_C( 4689), -INT16_C( 14834), -INT16_C( 5528) }, + { INT16_C( 17427), INT16_C( 10001), INT16_C( 24654), -INT16_C( 23512) }, + { INT16_C( 8211), INT16_C( 11025), INT16_C( 6222), INT16_C( 12328) }, + { -INT16_C( 16365), -INT16_C( 5359), INT16_C( 22606), -INT16_C( 28632) } }, + { { -INT16_C( 13838), -INT16_C( 14285), -INT16_C( 31333), INT16_C( 13782) }, + { -INT16_C( 27229), -INT16_C( 5507), INT16_C( 956), INT16_C( 28420) }, + { -INT16_C( 21222), INT16_C( 21483), INT16_C( 7651), INT16_C( 30758) }, + { INT16_C( 26866), -INT16_C( 24717), -INT16_C( 4325), -INT16_C( 16106) }, + { -INT16_C( 29198), -INT16_C( 3021), -INT16_C( 3685), INT16_C( 4566) }, + { INT16_C( 27122), -INT16_C( 22477), -INT16_C( 31333), -INT16_C( 27178) }, + { -INT16_C( 13838), -INT16_C( 14285), INT16_C( 1435), INT16_C( 13782) } }, + { { -INT16_C( 18860), INT16_C( 10979), INT16_C( 2287), INT16_C( 27177) }, + { -INT16_C( 19616), -INT16_C( 31797), -INT16_C( 22340), -INT16_C( 25390) }, + { -INT16_C( 25852), INT16_C( 7771), INT16_C( 17895), -INT16_C( 6511) }, + { -INT16_C( 10220), -INT16_C( 3357), INT16_C( 12079), INT16_C( 13481) }, + { -INT16_C( 32172), INT16_C( 12003), -INT16_C( 3857), INT16_C( 18985) }, + { INT16_C( 5716), INT16_C( 27363), -INT16_C( 30481), INT16_C( 18985) }, + { INT16_C( 13908), -INT16_C( 21789), INT16_C( 2287), INT16_C( 27177) } }, + { { -INT16_C( 2519), -INT16_C( 16767), INT16_C( 1614), -INT16_C( 7018) }, + { -INT16_C( 20639), -INT16_C( 13532), INT16_C( 11903), INT16_C( 7823) }, + { INT16_C( 31497), INT16_C( 22817), INT16_C( 29694), -INT16_C( 2946) }, + { -INT16_C( 10135), -INT16_C( 14079), -INT16_C( 24626), -INT16_C( 23594) }, + { -INT16_C( 31191), -INT16_C( 28031), -INT16_C( 434), INT16_C( 15510) }, + { INT16_C( 13865), -INT16_C( 24959), -INT16_C( 6578), -INT16_C( 7018) }, + { -INT16_C( 2519), INT16_C( 16001), -INT16_C( 31154), -INT16_C( 7018) } }, + { { INT16_C( 23386), -INT16_C( 5076), INT16_C( 2178), -INT16_C( 19292) }, + { -INT16_C( 15822), INT16_C( 11790), -INT16_C( 26587), INT16_C( 12479) }, + { INT16_C( 4498), INT16_C( 28788), -INT16_C( 16086), -INT16_C( 31236) }, + { -INT16_C( 29542), -INT16_C( 31828), INT16_C( 2370), INT16_C( 12260) }, + { -INT16_C( 13478), INT16_C( 14380), -INT16_C( 27518), -INT16_C( 860) }, + { INT16_C( 23386), -INT16_C( 13268), -INT16_C( 22398), -INT16_C( 2908) }, + { INT16_C( 23386), INT16_C( 27692), -INT16_C( 30590), -INT16_C( 19292) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x4_t a = simde_vld1_s16(test_vec[i].a); + simde_int16x4_t b = simde_vld1_s16(test_vec[i].b); + + simde_int16x4_t r3 = simde_vsli_n_s16(a, b, 3); + simde_int16x4_t r6 = simde_vsli_n_s16(a, b, 6); + simde_int16x4_t r10 = simde_vsli_n_s16(a, b, 10); + simde_int16x4_t r13 = simde_vsli_n_s16(a, b, 13); + simde_int16x4_t r15 = simde_vsli_n_s16(a, b, 15); + + simde_test_arm_neon_assert_equal_i16x4(r3, simde_vld1_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x4(r6, simde_vld1_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x4(r10, simde_vld1_s16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i16x4(r13, simde_vld1_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x4(r15, simde_vld1_s16(test_vec[i].r15)); + } + return 0; +} + +static int +test_simde_vsli_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int32_t a[2]; + int32_t b[2]; + int32_t r6[2]; + int32_t r13[2]; + int32_t r19[2]; + int32_t r26[2]; + int32_t r31[2]; + } test_vec[] = { + { { -INT32_C(1940115604), -INT32_C( 493831926) }, + { INT32_C(1869505583), -INT32_C( 899781217) }, + { -INT32_C( 610726932), -INT32_C(1751423030) }, + { -INT32_C( 863637652), -INT32_C( 843843318) }, + { INT32_C( 561786732), INT32_C(1828239626) }, + { -INT32_C(1134809236), INT32_C(2123413770) }, + { -INT32_C(1940115604), -INT32_C( 493831926) } }, + { { -INT32_C(1955947586), INT32_C(1978912402) }, + { -INT32_C(1236739145), INT32_C(1680908470) }, + { -INT32_C(1841893890), INT32_C( 203959698) }, + { INT32_C( 460782526), INT32_C( 337038994) }, + { -INT32_C( 574972994), INT32_C( 95669906) }, + { -INT32_C( 546661442), -INT32_C( 638333294) }, + { -INT32_C(1955947586), INT32_C(1978912402) } }, + { { INT32_C( 280147815), -INT32_C( 796117516) }, + { INT32_C( 183449113), -INT32_C(1555283915) }, + { -INT32_C(1144158617), -INT32_C( 753922700) }, + { -INT32_C( 423413913), -INT32_C(2012826124) }, + { -INT32_C(1328892057), INT32_C( 28063220) }, + { INT32_C(1689433959), -INT32_C( 729008652) }, + { -INT32_C(1867335833), -INT32_C( 796117516) } }, + { { -INT32_C( 481956659), INT32_C(1163086735) }, + { -INT32_C( 907228244), -INT32_C( 499683881) }, + { INT32_C(2066934541), -INT32_C(1914997297) }, + { -INT32_C(1720349491), -INT32_C( 306517105) }, + { INT32_C(1566960845), INT32_C(1857768335) }, + { -INT32_C(1287263027), INT32_C(1565739919) }, + { INT32_C(1665526989), -INT32_C( 984396913) } }, + { { INT32_C( 866589805), -INT32_C(1170103474) }, + { -INT32_C( 619576881), -INT32_C(1677689277) }, + { -INT32_C( 998214675), INT32_C( 2068686) }, + { INT32_C(1077541997), INT32_C( 264790862) }, + { INT32_C( 243211373), -INT32_C( 233200818) }, + { INT32_C(1067916397), INT32_C( 239182670) }, + { -INT32_C(1280893843), -INT32_C(1170103474) } }, + { { -INT32_C(1782422731), INT32_C(1801404031) }, + { -INT32_C( 75297761), -INT32_C( 202030994) }, + { -INT32_C( 524089355), -INT32_C( 45081665) }, + { INT32_C(1636033333), -INT32_C(1475486081) }, + { INT32_C(1627022133), INT32_C( 58146431) }, + { INT32_C(2109891381), -INT32_C(1151385985) }, + { -INT32_C(1782422731), INT32_C(1801404031) } }, + { { -INT32_C(1145515122), -INT32_C(1205350463) }, + { -INT32_C(2014419578), -INT32_C( 605233209) }, + { -INT32_C( 73834098), -INT32_C( 80219711) }, + { -INT32_C( 860826738), -INT32_C(1678184511) }, + { INT32_C( 741397390), -INT32_C( 29372479) }, + { INT32_C( 465097614), INT32_C( 472371137) }, + { INT32_C(1001968526), -INT32_C(1205350463) } }, + { { INT32_C(1262095535), -INT32_C(1722331384) }, + { INT32_C(1969407398), -INT32_C(1230587853) }, + { INT32_C(1488021935), -INT32_C(1448211256) }, + { INT32_C(1488243887), -INT32_C( 687444216) }, + { INT32_C( 758254767), -INT32_C(1046524152) }, + { -INT32_C(1690694481), -INT32_C( 849916152) }, + { INT32_C(1262095535), -INT32_C(1722331384) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x2_t a = simde_vld1_s32(test_vec[i].a); + simde_int32x2_t b = simde_vld1_s32(test_vec[i].b); + + simde_int32x2_t r6 = simde_vsli_n_s32(a, b, 6); + simde_int32x2_t r13 = simde_vsli_n_s32(a, b, 13); + simde_int32x2_t r19 = simde_vsli_n_s32(a, b, 19); + simde_int32x2_t r26 = simde_vsli_n_s32(a, b, 26); + simde_int32x2_t r31 = simde_vsli_n_s32(a, b, 31); + + simde_test_arm_neon_assert_equal_i32x2(r6, simde_vld1_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x2(r13, simde_vld1_s32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i32x2(r19, simde_vld1_s32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_i32x2(r26, simde_vld1_s32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i32x2(r31, simde_vld1_s32(test_vec[i].r31)); + } + return 0; +} + +static int +test_simde_vsli_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int64_t a[1]; + int64_t b[1]; + int64_t r13[1]; + int64_t r26[1]; + int64_t r39[1]; + int64_t r52[1]; + int64_t r63[1]; + } test_vec[] = { + { { INT64_C( 6231160190656283089) }, + { -INT64_C( 460160416367637488) }, + { -INT64_C( 6498339846937771567) }, + { INT64_C( 2903370611581485521) }, + { INT64_C( 6558938900547469777) }, + { -INT64_C( 4536946518386572847) }, + { INT64_C( 6231160190656283089) } }, + { { -INT64_C( 1382763905312428596) }, + { -INT64_C( 4253508470463334304) }, + { INT64_C( 1158165201708387788) }, + { INT64_C( 6062878508435041740) }, + { INT64_C( 8465694235828350412) }, + { -INT64_C( 8786681692709524020) }, + { INT64_C( 7840608131542347212) } }, + { { -INT64_C( 188675660009576721) }, + { INT64_C( 8862026640770314742) }, + { -INT64_C( 8662432930376792337) }, + { INT64_C( 1973885914004186863) }, + { -INT64_C( 7721145048391358737) }, + { INT64_C( 2261282537279973103) }, + { INT64_C( 9034696376845199087) } }, + { { INT64_C( 8280825101553980221) }, + { -INT64_C( 5326939997036914826) }, + { INT64_C( 6704022670392873789) }, + { INT64_C( 3396608425067762493) }, + { INT64_C( 7326154889283982141) }, + { INT64_C( 8605084274724655933) }, + { INT64_C( 8280825101553980221) } }, + { { -INT64_C( 4186443145725907458) }, + { INT64_C( 8264653391654679543) }, + { INT64_C( 4489833921080389118) }, + { -INT64_C( 2088201486283346434) }, + { -INT64_C( 6414819513448829442) }, + { INT64_C( 4573058129509707262) }, + { -INT64_C( 4186443145725907458) } }, + { { INT64_C( 668356405945180429) }, + { -INT64_C( 5702155833444664254) }, + { -INT64_C( 4904592946104876787) }, + { -INT64_C( 1416821951687024371) }, + { -INT64_C( 3603406246906420979) }, + { -INT64_C( 4312624781926588147) }, + { INT64_C( 668356405945180429) } }, + { { -INT64_C( 7452140987469877822) }, + { -INT64_C( 6307661369476126878) }, + { -INT64_C( 3031788287977304638) }, + { -INT64_C( 7092131897022960190) }, + { INT64_C( 8499331815746219458) }, + { INT64_C( 8513119691558530498) }, + { INT64_C( 1771231049384897986) } }, + { { INT64_C( 8764810789801773550) }, + { -INT64_C( 8804733489023547712) }, + { -INT64_C( 1607413876556035602) }, + { INT64_C( 3040791881582987758) }, + { INT64_C( 7062594716434778606) }, + { INT64_C( 7783026071035005422) }, + { INT64_C( 8764810789801773550) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x1_t a = simde_vld1_s64(test_vec[i].a); + simde_int64x1_t b = simde_vld1_s64(test_vec[i].b); + + simde_int64x1_t r13 = simde_vsli_n_s64(a, b, 13); + simde_int64x1_t r26 = simde_vsli_n_s64(a, b, 26); + simde_int64x1_t r39 = simde_vsli_n_s64(a, b, 39); + simde_int64x1_t r52 = simde_vsli_n_s64(a, b, 52); + simde_int64x1_t r63 = simde_vsli_n_s64(a, b, 63); + + simde_test_arm_neon_assert_equal_i64x1(r13, simde_vld1_s64(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i64x1(r26, simde_vld1_s64(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i64x1(r39, simde_vld1_s64(test_vec[i].r39)); + simde_test_arm_neon_assert_equal_i64x1(r52, simde_vld1_s64(test_vec[i].r52)); + simde_test_arm_neon_assert_equal_i64x1(r63, simde_vld1_s64(test_vec[i].r63)); + } + return 0; +} + +static int +test_simde_vsli_n_u8 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint8_t a[8]; + uint8_t b[8]; + uint8_t r0[8]; + uint8_t r3[8]; + uint8_t r5[8]; + uint8_t r7[8]; + } test_vec[] = { + { { UINT8_C( 194), UINT8_C( 153), UINT8_C( 182), UINT8_C( 53), UINT8_C( 45), UINT8_C( 214), UINT8_C( 136), UINT8_C( 217) }, + { UINT8_C( 51), UINT8_C( 2), UINT8_C( 97), UINT8_C( 189), UINT8_C( 250), UINT8_C( 77), UINT8_C( 65), UINT8_C( 28) }, + { UINT8_C( 51), UINT8_C( 2), UINT8_C( 97), UINT8_C( 189), UINT8_C( 250), UINT8_C( 77), UINT8_C( 65), UINT8_C( 28) }, + { UINT8_C( 154), UINT8_C( 17), UINT8_C( 14), UINT8_C( 237), UINT8_C( 213), UINT8_C( 110), UINT8_C( 8), UINT8_C( 225) }, + { UINT8_C( 98), UINT8_C( 89), UINT8_C( 54), UINT8_C( 181), UINT8_C( 77), UINT8_C( 182), UINT8_C( 40), UINT8_C( 153) }, + { UINT8_C( 194), UINT8_C( 25), UINT8_C( 182), UINT8_C( 181), UINT8_C( 45), UINT8_C( 214), UINT8_C( 136), UINT8_C( 89) } }, + { { UINT8_C( 149), UINT8_C( 148), UINT8_C( 192), UINT8_C( 18), UINT8_C( 191), UINT8_C( 120), UINT8_C( 160), UINT8_C( 251) }, + { UINT8_C( 243), UINT8_C( 246), UINT8_C( 159), UINT8_C( 234), UINT8_C( 49), UINT8_C( 191), UINT8_C( 110), UINT8_C( 189) }, + { UINT8_C( 243), UINT8_C( 246), UINT8_C( 159), UINT8_C( 234), UINT8_C( 49), UINT8_C( 191), UINT8_C( 110), UINT8_C( 189) }, + { UINT8_C( 157), UINT8_C( 180), UINT8_C( 248), UINT8_C( 82), UINT8_C( 143), UINT8_C( 248), UINT8_C( 112), UINT8_C( 235) }, + { UINT8_C( 117), UINT8_C( 212), UINT8_C( 224), UINT8_C( 82), UINT8_C( 63), UINT8_C( 248), UINT8_C( 192), UINT8_C( 187) }, + { UINT8_C( 149), UINT8_C( 20), UINT8_C( 192), UINT8_C( 18), UINT8_C( 191), UINT8_C( 248), UINT8_C( 32), UINT8_C( 251) } }, + { { UINT8_C( 16), UINT8_C( 99), UINT8_C( 9), UINT8_C( 145), UINT8_C( 219), UINT8_C( 91), UINT8_C( 218), UINT8_C( 77) }, + { UINT8_C( 145), UINT8_C( 92), UINT8_C( 7), UINT8_C( 26), UINT8_C( 192), UINT8_C( 34), UINT8_C( 2), UINT8_C( 133) }, + { UINT8_C( 145), UINT8_C( 92), UINT8_C( 7), UINT8_C( 26), UINT8_C( 192), UINT8_C( 34), UINT8_C( 2), UINT8_C( 133) }, + { UINT8_C( 136), UINT8_C( 227), UINT8_C( 57), UINT8_C( 209), UINT8_C( 3), UINT8_C( 19), UINT8_C( 18), UINT8_C( 45) }, + { UINT8_C( 48), UINT8_C( 131), UINT8_C( 233), UINT8_C( 81), UINT8_C( 27), UINT8_C( 91), UINT8_C( 90), UINT8_C( 173) }, + { UINT8_C( 144), UINT8_C( 99), UINT8_C( 137), UINT8_C( 17), UINT8_C( 91), UINT8_C( 91), UINT8_C( 90), UINT8_C( 205) } }, + { { UINT8_C( 102), UINT8_C( 103), UINT8_C( 7), UINT8_C( 20), UINT8_C( 115), UINT8_C( 205), UINT8_C( 70), UINT8_C( 133) }, + { UINT8_C( 41), UINT8_C( 89), UINT8_C( 182), UINT8_C( 231), UINT8_C( 117), UINT8_C( 92), UINT8_C( 234), UINT8_C( 177) }, + { UINT8_C( 41), UINT8_C( 89), UINT8_C( 182), UINT8_C( 231), UINT8_C( 117), UINT8_C( 92), UINT8_C( 234), UINT8_C( 177) }, + { UINT8_C( 78), UINT8_C( 207), UINT8_C( 183), UINT8_C( 60), UINT8_C( 171), UINT8_C( 229), UINT8_C( 86), UINT8_C( 141) }, + { UINT8_C( 38), UINT8_C( 39), UINT8_C( 199), UINT8_C( 244), UINT8_C( 179), UINT8_C( 141), UINT8_C( 70), UINT8_C( 37) }, + { UINT8_C( 230), UINT8_C( 231), UINT8_C( 7), UINT8_C( 148), UINT8_C( 243), UINT8_C( 77), UINT8_C( 70), UINT8_C( 133) } }, + { { UINT8_C( 51), UINT8_C( 28), UINT8_C( 95), UINT8_C( 70), UINT8_C( 30), UINT8_C( 241), UINT8_C( 5), UINT8_C( 134) }, + { UINT8_C( 85), UINT8_C( 174), UINT8_C( 194), UINT8_C( 192), UINT8_C( 222), UINT8_C( 190), UINT8_C( 203), UINT8_C( 97) }, + { UINT8_C( 85), UINT8_C( 174), UINT8_C( 194), UINT8_C( 192), UINT8_C( 222), UINT8_C( 190), UINT8_C( 203), UINT8_C( 97) }, + { UINT8_C( 171), UINT8_C( 116), UINT8_C( 23), UINT8_C( 6), UINT8_C( 246), UINT8_C( 241), UINT8_C( 93), UINT8_C( 14) }, + { UINT8_C( 179), UINT8_C( 220), UINT8_C( 95), UINT8_C( 6), UINT8_C( 222), UINT8_C( 209), UINT8_C( 101), UINT8_C( 38) }, + { UINT8_C( 179), UINT8_C( 28), UINT8_C( 95), UINT8_C( 70), UINT8_C( 30), UINT8_C( 113), UINT8_C( 133), UINT8_C( 134) } }, + { { UINT8_C( 167), UINT8_C( 158), UINT8_C( 137), UINT8_C( 37), UINT8_C( 155), UINT8_C( 179), UINT8_C( 172), UINT8_C( 255) }, + { UINT8_C( 33), UINT8_C( 236), UINT8_C( 241), UINT8_C( 226), UINT8_C( 155), UINT8_C( 240), UINT8_C( 33), UINT8_C( 64) }, + { UINT8_C( 33), UINT8_C( 236), UINT8_C( 241), UINT8_C( 226), UINT8_C( 155), UINT8_C( 240), UINT8_C( 33), UINT8_C( 64) }, + { UINT8_C( 15), UINT8_C( 102), UINT8_C( 137), UINT8_C( 21), UINT8_C( 219), UINT8_C( 131), UINT8_C( 12), UINT8_C( 7) }, + { UINT8_C( 39), UINT8_C( 158), UINT8_C( 41), UINT8_C( 69), UINT8_C( 123), UINT8_C( 19), UINT8_C( 44), UINT8_C( 31) }, + { UINT8_C( 167), UINT8_C( 30), UINT8_C( 137), UINT8_C( 37), UINT8_C( 155), UINT8_C( 51), UINT8_C( 172), UINT8_C( 127) } }, + { { UINT8_C( 5), UINT8_C( 77), UINT8_C( 86), UINT8_C( 188), UINT8_C( 22), UINT8_C( 139), UINT8_C( 165), UINT8_C( 74) }, + { UINT8_C( 27), UINT8_C( 100), UINT8_C( 63), UINT8_C( 133), UINT8_C( 173), UINT8_C( 182), UINT8_C( 69), UINT8_C( 152) }, + { UINT8_C( 27), UINT8_C( 100), UINT8_C( 63), UINT8_C( 133), UINT8_C( 173), UINT8_C( 182), UINT8_C( 69), UINT8_C( 152) }, + { UINT8_C( 221), UINT8_C( 37), UINT8_C( 254), UINT8_C( 44), UINT8_C( 110), UINT8_C( 179), UINT8_C( 45), UINT8_C( 194) }, + { UINT8_C( 101), UINT8_C( 141), UINT8_C( 246), UINT8_C( 188), UINT8_C( 182), UINT8_C( 203), UINT8_C( 165), UINT8_C( 10) }, + { UINT8_C( 133), UINT8_C( 77), UINT8_C( 214), UINT8_C( 188), UINT8_C( 150), UINT8_C( 11), UINT8_C( 165), UINT8_C( 74) } }, + { { UINT8_C( 250), UINT8_C( 205), UINT8_C( 140), UINT8_C( 192), UINT8_C( 13), UINT8_C( 95), UINT8_C( 19), UINT8_C( 34) }, + { UINT8_C( 170), UINT8_C( 18), UINT8_C( 194), UINT8_C( 134), UINT8_C( 116), UINT8_C( 119), UINT8_C( 217), UINT8_C( 7) }, + { UINT8_C( 170), UINT8_C( 18), UINT8_C( 194), UINT8_C( 134), UINT8_C( 116), UINT8_C( 119), UINT8_C( 217), UINT8_C( 7) }, + { UINT8_C( 82), UINT8_C( 149), UINT8_C( 20), UINT8_C( 48), UINT8_C( 165), UINT8_C( 191), UINT8_C( 203), UINT8_C( 58) }, + { UINT8_C( 90), UINT8_C( 77), UINT8_C( 76), UINT8_C( 192), UINT8_C( 141), UINT8_MAX, UINT8_C( 51), UINT8_C( 226) }, + { UINT8_C( 122), UINT8_C( 77), UINT8_C( 12), UINT8_C( 64), UINT8_C( 13), UINT8_C( 223), UINT8_C( 147), UINT8_C( 162) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x8_t a = simde_vld1_u8(test_vec[i].a); + simde_uint8x8_t b = simde_vld1_u8(test_vec[i].b); + + simde_uint8x8_t r0 = simde_vsli_n_u8(a, b, 0); + simde_uint8x8_t r3 = simde_vsli_n_u8(a, b, 3); + simde_uint8x8_t r5 = simde_vsli_n_u8(a, b, 5); + simde_uint8x8_t r7 = simde_vsli_n_u8(a, b, 7); + + simde_test_arm_neon_assert_equal_u8x8(r0, simde_vld1_u8(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_u8x8(r3, simde_vld1_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x8(r5, simde_vld1_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x8(r7, simde_vld1_u8(test_vec[i].r7)); + } + return 0; +} + +static int +test_simde_vsli_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint16_t a[4]; + uint16_t b[4]; + uint16_t r3[4]; + uint16_t r6[4]; + uint16_t r10[4]; + uint16_t r13[4]; + uint16_t r15[4]; + } test_vec[] = { + { { UINT16_C( 15391), UINT16_C( 8013), UINT16_C( 1254), UINT16_C( 45654) }, + { UINT16_C( 35066), UINT16_C( 48548), UINT16_C( 14346), UINT16_C( 1393) }, + { UINT16_C( 18391), UINT16_C( 60709), UINT16_C( 49238), UINT16_C( 11150) }, + { UINT16_C( 16031), UINT16_C( 26893), UINT16_C( 678), UINT16_C( 23638) }, + { UINT16_C( 59423), UINT16_C( 37709), UINT16_C( 10470), UINT16_C( 50774) }, + { UINT16_C( 23583), UINT16_C( 40781), UINT16_C( 17638), UINT16_C( 12886) }, + { UINT16_C( 15391), UINT16_C( 8013), UINT16_C( 1254), UINT16_C( 45654) } }, + { { UINT16_C( 23736), UINT16_C( 7291), UINT16_C( 61578), UINT16_C( 8940) }, + { UINT16_C( 15242), UINT16_C( 20311), UINT16_C( 19232), UINT16_C( 4411) }, + { UINT16_C( 56400), UINT16_C( 31419), UINT16_C( 22786), UINT16_C( 35292) }, + { UINT16_C( 58040), UINT16_C( 54779), UINT16_C( 51210), UINT16_C( 20204) }, + { UINT16_C( 10424), UINT16_C( 23675), UINT16_C( 32906), UINT16_C( 61164) }, + { UINT16_C( 23736), UINT16_C( 64635), UINT16_C( 4234), UINT16_C( 25324) }, + { UINT16_C( 23736), UINT16_C( 40059), UINT16_C( 28810), UINT16_C( 41708) } }, + { { UINT16_C( 47025), UINT16_C( 52193), UINT16_C( 31710), UINT16_C( 14843) }, + { UINT16_C( 54022), UINT16_C( 30364), UINT16_C( 65371), UINT16_C( 28567) }, + { UINT16_C( 38961), UINT16_C( 46305), UINT16_C( 64222), UINT16_C( 31931) }, + { UINT16_C( 49585), UINT16_C( 42785), UINT16_C( 55006), UINT16_C( 58875) }, + { UINT16_C( 7089), UINT16_C( 29665), UINT16_C( 28638), UINT16_C( 24059) }, + { UINT16_C( 55217), UINT16_C( 35809), UINT16_C( 31710), UINT16_C( 63995) }, + { UINT16_C( 14257), UINT16_C( 19425), UINT16_C( 64478), UINT16_C( 47611) } }, + { { UINT16_C( 54568), UINT16_C( 15862), UINT16_C( 52509), UINT16_C( 47425) }, + { UINT16_C( 1779), UINT16_C( 31523), UINT16_C( 16426), UINT16_C( 60116) }, + { UINT16_C( 14232), UINT16_C( 55582), UINT16_C( 341), UINT16_C( 22177) }, + { UINT16_C( 48360), UINT16_C( 51446), UINT16_C( 2717), UINT16_C( 46337) }, + { UINT16_C( 52520), UINT16_C( 36342), UINT16_C( 43293), UINT16_C( 20801) }, + { UINT16_C( 29992), UINT16_C( 32246), UINT16_C( 19741), UINT16_C( 39233) }, + { UINT16_C( 54568), UINT16_C( 48630), UINT16_C( 19741), UINT16_C( 14657) } }, + { { UINT16_C( 19021), UINT16_C( 2955), UINT16_C( 23434), UINT16_C( 24652) }, + { UINT16_C( 64304), UINT16_C( 33123), UINT16_C( 28656), UINT16_C( 7738) }, + { UINT16_C( 55685), UINT16_C( 2843), UINT16_C( 32642), UINT16_C( 61908) }, + { UINT16_C( 52237), UINT16_C( 22731), UINT16_C( 64522), UINT16_C( 36492) }, + { UINT16_C( 49741), UINT16_C( 36747), UINT16_C( 50058), UINT16_C( 59468) }, + { UINT16_C( 2637), UINT16_C( 27531), UINT16_C( 7050), UINT16_C( 16460) }, + { UINT16_C( 19021), UINT16_C( 35723), UINT16_C( 23434), UINT16_C( 24652) } }, + { { UINT16_C( 35638), UINT16_C( 64110), UINT16_C( 55906), UINT16_C( 20848) }, + { UINT16_C( 27670), UINT16_C( 29508), UINT16_C( 17746), UINT16_C( 25076) }, + { UINT16_C( 24758), UINT16_C( 39462), UINT16_C( 10898), UINT16_C( 4000) }, + { UINT16_C( 1462), UINT16_C( 53550), UINT16_C( 21666), UINT16_C( 32048) }, + { UINT16_C( 23350), UINT16_C( 4718), UINT16_C( 19042), UINT16_C( 53616) }, + { UINT16_C( 52022), UINT16_C( 39534), UINT16_C( 23138), UINT16_C( 37232) }, + { UINT16_C( 2870), UINT16_C( 31342), UINT16_C( 23138), UINT16_C( 20848) } }, + { { UINT16_C( 23800), UINT16_C( 37832), UINT16_C( 34512), UINT16_C( 33446) }, + { UINT16_C( 19904), UINT16_C( 51885), UINT16_C( 10602), UINT16_C( 42147) }, + { UINT16_C( 28160), UINT16_C( 21864), UINT16_C( 19280), UINT16_C( 9502) }, + { UINT16_C( 28728), UINT16_C( 43848), UINT16_C( 23184), UINT16_C( 10470) }, + { UINT16_C( 248), UINT16_C( 47048), UINT16_C( 43728), UINT16_C( 36518) }, + { UINT16_C( 7416), UINT16_C( 46024), UINT16_C( 18128), UINT16_C( 25254) }, + { UINT16_C( 23800), UINT16_C( 37832), UINT16_C( 1744), UINT16_C( 33446) } }, + { { UINT16_C( 32311), UINT16_C( 55845), UINT16_C( 55299), UINT16_C( 15525) }, + { UINT16_C( 16152), UINT16_C( 36439), UINT16_C( 64155), UINT16_C( 41506) }, + { UINT16_C( 63687), UINT16_C( 29373), UINT16_C( 54491), UINT16_C( 4373) }, + { UINT16_C( 50743), UINT16_C( 38373), UINT16_C( 42691), UINT16_C( 34981) }, + { UINT16_C( 25143), UINT16_C( 24101), UINT16_C( 27651), UINT16_C( 34981) }, + { UINT16_C( 7735), UINT16_C( 64037), UINT16_C( 30723), UINT16_C( 23717) }, + { UINT16_C( 32311), UINT16_C( 55845), UINT16_C( 55299), UINT16_C( 15525) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x4_t a = simde_vld1_u16(test_vec[i].a); + simde_uint16x4_t b = simde_vld1_u16(test_vec[i].b); + + simde_uint16x4_t r3 = simde_vsli_n_u16(a, b, 3); + simde_uint16x4_t r6 = simde_vsli_n_u16(a, b, 6); + simde_uint16x4_t r10 = simde_vsli_n_u16(a, b, 10); + simde_uint16x4_t r13 = simde_vsli_n_u16(a, b, 13); + simde_uint16x4_t r15 = simde_vsli_n_u16(a, b, 15); + + simde_test_arm_neon_assert_equal_u16x4(r3, simde_vld1_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x4(r6, simde_vld1_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x4(r10, simde_vld1_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x4(r13, simde_vld1_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x4(r15, simde_vld1_u16(test_vec[i].r15)); + } + return 0; +} + +static int +test_simde_vsli_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint32_t a[2]; + uint32_t b[2]; + uint32_t r6[2]; + uint32_t r13[2]; + uint32_t r19[2]; + uint32_t r26[2]; + uint32_t r31[2]; + } test_vec[] = { + { { UINT32_C(3081230558), UINT32_C(1323792986) }, + { UINT32_C(1071965024), UINT32_C(2237917539) }, + { UINT32_C(4181252126), UINT32_C(1492801754) }, + { UINT32_C(2624329950), UINT32_C(2100066906) }, + { UINT32_C( 453499102), UINT32_C(1260354138) }, + { UINT32_C(2208815326), UINT32_C(2397534810) }, + { UINT32_C( 933746910), UINT32_C(3471276634) } }, + { { UINT32_C(1801753472), UINT32_C(3276321139) }, + { UINT32_C(3767665232), UINT32_C(1682889056) }, + { UINT32_C( 612406272), UINT32_C( 330717235) }, + { UINT32_C(1078596480), UINT32_C(3677098355) }, + { UINT32_C( 310678400), UINT32_C(3405820275) }, + { UINT32_C(1130664832), UINT32_C(2202579315) }, + { UINT32_C(1801753472), UINT32_C(1128837491) } }, + { { UINT32_C(1903242450), UINT32_C( 544175578) }, + { UINT32_C(1301025297), UINT32_C(2094177212) }, + { UINT32_C(1661240402), UINT32_C( 883355418) }, + { UINT32_C(2185374930), UINT32_C(1400346074) }, + { UINT32_C(2424909010), UINT32_C(3722933722) }, + { UINT32_C(1165044946), UINT32_C(4033836506) }, + { UINT32_C(4050726098), UINT32_C( 544175578) } }, + { { UINT32_C(2126941907), UINT32_C( 977695758) }, + { UINT32_C(1486134999), UINT32_C(1742360137) }, + { UINT32_C( 623359443), UINT32_C(4136866382) }, + { UINT32_C(2480598739), UINT32_C(1237922830) }, + { UINT32_C(4139683539), UINT32_C(1917744142) }, + { UINT32_C(1590070995), UINT32_C( 642151438) }, + { UINT32_C(4274425555), UINT32_C(3125179406) } }, + { { UINT32_C(2706916007), UINT32_C(1068692422) }, + { UINT32_C( 545324817), UINT32_C(1867710312) }, + { UINT32_C( 541049959), UINT32_C(3569342982) }, + { UINT32_C( 534913703), UINT32_C(1609372614) }, + { UINT32_C(4169679527), UINT32_C(4215468998) }, + { UINT32_C(1163412135), UINT32_C(2746414022) }, + { UINT32_C(2706916007), UINT32_C(1068692422) } }, + { { UINT32_C(4153598072), UINT32_C(1583301308) }, + { UINT32_C(1843033608), UINT32_C(3432411003) }, + { UINT32_C(1990033976), UINT32_C( 630972156) }, + { UINT32_C(1321271416), UINT32_C(3455017660) }, + { UINT32_C(2957172856), UINT32_C(2078229180) }, + { UINT32_C( 596828280), UINT32_C(3999220412) }, + { UINT32_C(2006114424), UINT32_C(3730784956) } }, + { { UINT32_C( 503922803), UINT32_C( 102391840) }, + { UINT32_C(1659267842), UINT32_C(3517337088) }, + { UINT32_C(3113926835), UINT32_C(1771274272) }, + { UINT32_C(3445637235), UINT32_C(3384803360) }, + { UINT32_C(1477525619), UINT32_C(1879203872) }, + { UINT32_C( 168378483), UINT32_C( 35282976) }, + { UINT32_C( 503922803), UINT32_C( 102391840) } }, + { { UINT32_C( 135317347), UINT32_C(3314568864) }, + { UINT32_C( 923437489), UINT32_C( 213085622) }, + { UINT32_C(3265424483), UINT32_C( 752577952) }, + { UINT32_C(1362503523), UINT32_C(1840696992) }, + { UINT32_C(1300809571), UINT32_C(1840271008) }, + { UINT32_C(3289433955), UINT32_C(3650113184) }, + { UINT32_C(2282800995), UINT32_C(1167085216) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x2_t a = simde_vld1_u32(test_vec[i].a); + simde_uint32x2_t b = simde_vld1_u32(test_vec[i].b); + + simde_uint32x2_t r6 = simde_vsli_n_u32(a, b, 6); + simde_uint32x2_t r13 = simde_vsli_n_u32(a, b, 13); + simde_uint32x2_t r19 = simde_vsli_n_u32(a, b, 19); + simde_uint32x2_t r26 = simde_vsli_n_u32(a, b, 26); + simde_uint32x2_t r31 = simde_vsli_n_u32(a, b, 31); + + simde_test_arm_neon_assert_equal_u32x2(r6, simde_vld1_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x2(r13, simde_vld1_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x2(r19, simde_vld1_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x2(r26, simde_vld1_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x2(r31, simde_vld1_u32(test_vec[i].r31)); + } + return 0; +} + +static int +test_simde_vsli_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint64_t a[1]; + uint64_t b[1]; + uint64_t r13[1]; + uint64_t r26[1]; + uint64_t r39[1]; + uint64_t r52[1]; + uint64_t r63[1]; + } test_vec[] = { + { { UINT64_C( 2367438358383478378) }, + { UINT64_C(10151069329549398822) }, + { UINT64_C(18084407459726018154) }, + { UINT64_C( 1664254114169931370) }, + { UINT64_C( 1425832543274714730) }, + { UINT64_C( 3632949853674587754) }, + { UINT64_C( 2367438358383478378) } }, + { { UINT64_C( 4289360236958653475) }, + { UINT64_C( 9003445777938185491) }, + { UINT64_C( 6145006178828188707) }, + { UINT64_C(17172783880833014819) }, + { UINT64_C( 4575245904564132899) }, + { UINT64_C(15075481344510991395) }, + { UINT64_C(13512732273813429283) } }, + { { UINT64_C( 4661840731346591672) }, + { UINT64_C(16008367045675390461) }, + { UINT64_C( 2639218171596226488) }, + { UINT64_C( 891207328682762168) }, + { UINT64_C(14306527482023272376) }, + { UINT64_C( 6904633345777098680) }, + { UINT64_C(13885212768201367480) } }, + { { UINT64_C( 560855986095228110) }, + { UINT64_C(12519105941898633195) }, + { UINT64_C(11065570282205701326) }, + { UINT64_C( 1851373620381320398) }, + { UINT64_C( 3229069440521569486) }, + { UINT64_C(18354578113836057806) }, + { UINT64_C( 9784228022950003918) } }, + { { UINT64_C(15210706800944352878) }, + { UINT64_C( 2343987333079752503) }, + { UINT64_C(17330395931398827630) }, + { UINT64_C( 4461078750471655022) }, + { UINT64_C( 2157113990720261742) }, + { UINT64_C(17543571407922269806) }, + { UINT64_C(15210706800944352878) } }, + { { UINT64_C( 3102245097986106578) }, + { UINT64_C(11620068368410445742) }, + { UINT64_C( 6400653677085181138) }, + { UINT64_C( 8508265199309464786) }, + { UINT64_C( 7909402293599511762) }, + { UINT64_C( 8857845421765600466) }, + { UINT64_C( 3102245097986106578) } }, + { { UINT64_C(17038098246975051983) }, + { UINT64_C(18292780545233057134) }, + { UINT64_C(11556115806516271311) }, + { UINT64_C(17456844777586120911) }, + { UINT64_C( 7312358247877793999) }, + { UINT64_C( 6260984338677455055) }, + { UINT64_C( 7814726210120276175) } }, + { { UINT64_C( 1351949120307680797) }, + { UINT64_C(15139404553755458091) }, + { UINT64_C( 4541696815397172765) }, + { UINT64_C(16944259135176136221) }, + { UINT64_C(14068424518630069789) }, + { UINT64_C(11723739062141933085) }, + { UINT64_C(10575321157162456605) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x1_t a = simde_vld1_u64(test_vec[i].a); + simde_uint64x1_t b = simde_vld1_u64(test_vec[i].b); + + simde_uint64x1_t r13 = simde_vsli_n_u64(a, b, 13); + simde_uint64x1_t r26 = simde_vsli_n_u64(a, b, 26); + simde_uint64x1_t r39 = simde_vsli_n_u64(a, b, 39); + simde_uint64x1_t r52 = simde_vsli_n_u64(a, b, 52); + simde_uint64x1_t r63 = simde_vsli_n_u64(a, b, 63); + + simde_test_arm_neon_assert_equal_u64x1(r13, simde_vld1_u64(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u64x1(r26, simde_vld1_u64(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u64x1(r39, simde_vld1_u64(test_vec[i].r39)); + simde_test_arm_neon_assert_equal_u64x1(r52, simde_vld1_u64(test_vec[i].r52)); + simde_test_arm_neon_assert_equal_u64x1(r63, simde_vld1_u64(test_vec[i].r63)); + } + return 0; +} + +static int +test_simde_vsliq_n_s8 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int8_t a[16]; + int8_t b[16]; + int8_t r0[16]; + int8_t r3[16]; + int8_t r5[16]; + int8_t r7[16]; + } test_vec[] = { + { { INT8_C( 107), INT8_C( 125), -INT8_C( 74), INT8_C( 13), -INT8_C( 11), INT8_C( 64), INT8_C( 106), -INT8_C( 95), -INT8_C( 9), -INT8_C( 95), INT8_C( 80), INT8_C( 86), INT8_C( 70), INT8_C( 72), INT8_C( 12), INT8_C( 60) }, + { INT8_C( 59), -INT8_C( 7), -INT8_C( 115), -INT8_C( 18), -INT8_C( 104), INT8_C( 105), INT8_C( 23), -INT8_C( 72), INT8_C( 48), INT8_C( 16), -INT8_C( 56), INT8_C( 12), INT8_C( 111), INT8_C( 13), -INT8_C( 111), INT8_C( 21) }, + { INT8_C( 59), -INT8_C( 7), -INT8_C( 115), -INT8_C( 18), -INT8_C( 104), INT8_C( 105), INT8_C( 23), -INT8_C( 72), INT8_C( 48), INT8_C( 16), -INT8_C( 56), INT8_C( 12), INT8_C( 111), INT8_C( 13), -INT8_C( 111), INT8_C( 21) }, + { -INT8_C( 37), -INT8_C( 51), INT8_C( 110), INT8_C( 117), -INT8_C( 59), INT8_C( 72), -INT8_C( 70), -INT8_C( 63), -INT8_C( 121), -INT8_C( 127), INT8_C( 64), INT8_C( 102), INT8_C( 126), INT8_C( 104), -INT8_C( 116), -INT8_C( 84) }, + { INT8_C( 107), INT8_C( 61), -INT8_C( 74), -INT8_C( 51), INT8_C( 21), INT8_C( 32), -INT8_C( 22), INT8_C( 1), INT8_C( 23), INT8_C( 1), INT8_C( 16), -INT8_C( 106), -INT8_C( 26), -INT8_C( 88), INT8_C( 44), -INT8_C( 68) }, + { -INT8_C( 21), -INT8_C( 3), -INT8_C( 74), INT8_C( 13), INT8_C( 117), -INT8_C( 64), -INT8_C( 22), INT8_C( 33), INT8_C( 119), INT8_C( 33), INT8_C( 80), INT8_C( 86), -INT8_C( 58), -INT8_C( 56), -INT8_C( 116), -INT8_C( 68) } }, + { { -INT8_C( 83), INT8_C( 48), -INT8_C( 92), INT8_C( 37), -INT8_C( 62), -INT8_C( 48), INT8_C( 120), -INT8_C( 43), -INT8_C( 96), INT8_C( 105), INT8_C( 127), INT8_C( 63), INT8_C( 110), INT8_C( 120), INT8_C( 10), -INT8_C( 86) }, + { INT8_C( 27), -INT8_C( 20), -INT8_C( 50), -INT8_C( 62), INT8_C( 1), INT8_C( 119), INT8_C( 75), -INT8_C( 28), INT8_C( 42), INT8_C( 27), INT8_C( 107), INT8_C( 0), -INT8_C( 101), INT8_C( 64), INT8_C( 104), -INT8_C( 83) }, + { INT8_C( 27), -INT8_C( 20), -INT8_C( 50), -INT8_C( 62), INT8_C( 1), INT8_C( 119), INT8_C( 75), -INT8_C( 28), INT8_C( 42), INT8_C( 27), INT8_C( 107), INT8_C( 0), -INT8_C( 101), INT8_C( 64), INT8_C( 104), -INT8_C( 83) }, + { -INT8_C( 35), INT8_C( 96), INT8_C( 116), INT8_C( 21), INT8_C( 10), -INT8_C( 72), INT8_C( 88), INT8_C( 37), INT8_C( 80), -INT8_C( 39), INT8_C( 95), INT8_C( 7), -INT8_C( 34), INT8_C( 0), INT8_C( 66), INT8_C( 106) }, + { INT8_C( 109), -INT8_C( 112), -INT8_C( 60), INT8_C( 69), INT8_C( 34), -INT8_C( 16), INT8_C( 120), -INT8_C( 107), INT8_C( 64), INT8_C( 105), INT8_MAX, INT8_C( 31), INT8_C( 110), INT8_C( 24), INT8_C( 10), -INT8_C( 86) }, + { -INT8_C( 83), INT8_C( 48), INT8_C( 36), INT8_C( 37), -INT8_C( 62), -INT8_C( 48), -INT8_C( 8), INT8_C( 85), INT8_C( 32), -INT8_C( 23), -INT8_C( 1), INT8_C( 63), -INT8_C( 18), INT8_C( 120), INT8_C( 10), -INT8_C( 86) } }, + { { -INT8_C( 19), -INT8_C( 90), INT8_C( 10), -INT8_C( 113), -INT8_C( 120), -INT8_C( 94), INT8_C( 87), INT8_C( 114), -INT8_C( 45), -INT8_C( 75), INT8_C( 120), -INT8_C( 16), INT8_C( 46), INT8_C( 102), -INT8_C( 116), -INT8_C( 68) }, + { INT8_C( 92), -INT8_C( 76), INT8_C( 70), -INT8_C( 37), INT8_C( 123), INT8_C( 52), -INT8_C( 87), INT8_C( 95), -INT8_C( 94), INT8_C( 15), INT8_C( 0), INT8_C( 4), INT8_C( 101), INT8_C( 63), -INT8_C( 71), -INT8_C( 97) }, + { INT8_C( 92), -INT8_C( 76), INT8_C( 70), -INT8_C( 37), INT8_C( 123), INT8_C( 52), -INT8_C( 87), INT8_C( 95), -INT8_C( 94), INT8_C( 15), INT8_C( 0), INT8_C( 4), INT8_C( 101), INT8_C( 63), -INT8_C( 71), -INT8_C( 97) }, + { -INT8_C( 27), -INT8_C( 90), INT8_C( 50), -INT8_C( 33), -INT8_C( 40), -INT8_C( 94), INT8_C( 79), -INT8_C( 6), INT8_C( 19), INT8_C( 125), INT8_C( 0), INT8_C( 32), INT8_C( 46), -INT8_C( 2), -INT8_C( 52), -INT8_C( 4) }, + { -INT8_C( 115), -INT8_C( 122), -INT8_C( 54), INT8_C( 111), INT8_C( 104), -INT8_C( 126), INT8_C( 55), -INT8_C( 14), INT8_C( 83), -INT8_C( 11), INT8_C( 24), -INT8_C( 112), -INT8_C( 82), -INT8_C( 26), INT8_C( 44), -INT8_C( 4) }, + { INT8_C( 109), INT8_C( 38), INT8_C( 10), -INT8_C( 113), -INT8_C( 120), INT8_C( 34), -INT8_C( 41), -INT8_C( 14), INT8_C( 83), -INT8_C( 75), INT8_C( 120), INT8_C( 112), -INT8_C( 82), -INT8_C( 26), -INT8_C( 116), -INT8_C( 68) } }, + { { -INT8_C( 15), INT8_C( 19), INT8_C( 64), -INT8_C( 114), INT8_C( 42), -INT8_C( 125), INT8_C( 89), -INT8_C( 12), -INT8_C( 7), -INT8_C( 83), INT8_C( 70), -INT8_C( 105), INT8_C( 44), -INT8_C( 31), INT8_C( 103), INT8_C( 51) }, + { INT8_C( 67), INT8_C( 4), -INT8_C( 27), -INT8_C( 70), INT8_C( 87), INT8_C( 38), -INT8_C( 25), -INT8_C( 12), INT8_C( 35), -INT8_C( 20), INT8_C( 122), INT8_C( 53), INT8_C( 17), INT8_C( 11), -INT8_C( 80), -INT8_C( 18) }, + { INT8_C( 67), INT8_C( 4), -INT8_C( 27), -INT8_C( 70), INT8_C( 87), INT8_C( 38), -INT8_C( 25), -INT8_C( 12), INT8_C( 35), -INT8_C( 20), INT8_C( 122), INT8_C( 53), INT8_C( 17), INT8_C( 11), -INT8_C( 80), -INT8_C( 18) }, + { INT8_C( 25), INT8_C( 35), INT8_C( 40), -INT8_C( 42), -INT8_C( 70), INT8_C( 51), INT8_C( 57), -INT8_C( 92), INT8_C( 25), INT8_C( 101), -INT8_C( 42), -INT8_C( 81), -INT8_C( 116), INT8_C( 89), -INT8_C( 121), INT8_C( 115) }, + { INT8_C( 113), -INT8_C( 109), -INT8_C( 96), INT8_C( 78), -INT8_C( 22), -INT8_C( 61), -INT8_C( 7), -INT8_C( 108), INT8_C( 121), -INT8_C( 115), INT8_C( 70), -INT8_C( 73), INT8_C( 44), INT8_C( 97), INT8_C( 7), -INT8_C( 45) }, + { -INT8_C( 15), INT8_C( 19), -INT8_C( 64), INT8_C( 14), -INT8_C( 86), INT8_C( 3), -INT8_C( 39), INT8_C( 116), -INT8_C( 7), INT8_C( 45), INT8_C( 70), -INT8_C( 105), -INT8_C( 84), -INT8_C( 31), INT8_C( 103), INT8_C( 51) } }, + { { -INT8_C( 117), INT8_C( 28), INT8_C( 31), INT8_C( 18), INT8_C( 49), INT8_C( 30), INT8_C( 101), -INT8_C( 25), -INT8_C( 21), INT8_C( 14), INT8_C( 3), -INT8_C( 42), -INT8_C( 6), INT8_C( 126), -INT8_C( 36), -INT8_C( 27) }, + { INT8_C( 105), INT8_C( 0), -INT8_C( 61), -INT8_C( 49), INT8_C( 81), INT8_C( 27), -INT8_C( 36), INT8_C( 79), INT8_C( 94), INT8_C( 43), -INT8_C( 22), INT8_C( 34), INT8_C( 55), INT8_C( 20), -INT8_C( 6), INT8_C( 19) }, + { INT8_C( 105), INT8_C( 0), -INT8_C( 61), -INT8_C( 49), INT8_C( 81), INT8_C( 27), -INT8_C( 36), INT8_C( 79), INT8_C( 94), INT8_C( 43), -INT8_C( 22), INT8_C( 34), INT8_C( 55), INT8_C( 20), -INT8_C( 6), INT8_C( 19) }, + { INT8_C( 75), INT8_C( 4), INT8_C( 31), INT8_C( 122), -INT8_C( 119), -INT8_C( 34), -INT8_C( 27), INT8_MAX, -INT8_C( 13), INT8_C( 94), INT8_C( 83), INT8_C( 22), -INT8_C( 70), -INT8_C( 90), -INT8_C( 44), -INT8_C( 99) }, + { INT8_C( 43), INT8_C( 28), INT8_MAX, -INT8_C( 14), INT8_C( 49), INT8_C( 126), -INT8_C( 123), -INT8_C( 25), -INT8_C( 53), INT8_C( 110), INT8_C( 67), INT8_C( 86), -INT8_C( 6), -INT8_C( 98), INT8_C( 92), INT8_C( 101) }, + { -INT8_C( 117), INT8_C( 28), -INT8_C( 97), -INT8_C( 110), -INT8_C( 79), -INT8_C( 98), INT8_C( 101), -INT8_C( 25), INT8_C( 107), -INT8_C( 114), INT8_C( 3), INT8_C( 86), -INT8_C( 6), INT8_C( 126), INT8_C( 92), -INT8_C( 27) } }, + { { INT8_C( 32), INT8_C( 1), -INT8_C( 82), INT8_C( 9), -INT8_C( 16), INT8_C( 104), INT8_C( 15), -INT8_C( 123), -INT8_C( 124), -INT8_C( 120), INT8_C( 24), INT8_C( 2), INT8_C( 70), INT8_C( 107), -INT8_C( 20), -INT8_C( 87) }, + { -INT8_C( 91), -INT8_C( 3), INT8_C( 66), -INT8_C( 82), -INT8_C( 60), -INT8_C( 88), INT8_C( 118), INT8_C( 92), INT8_C( 44), -INT8_C( 110), -INT8_C( 62), INT8_C( 8), INT8_C( 15), INT8_C( 24), INT8_C( 15), INT8_C( 22) }, + { -INT8_C( 91), -INT8_C( 3), INT8_C( 66), -INT8_C( 82), -INT8_C( 60), -INT8_C( 88), INT8_C( 118), INT8_C( 92), INT8_C( 44), -INT8_C( 110), -INT8_C( 62), INT8_C( 8), INT8_C( 15), INT8_C( 24), INT8_C( 15), INT8_C( 22) }, + { INT8_C( 40), -INT8_C( 23), INT8_C( 22), INT8_C( 113), INT8_C( 32), INT8_C( 64), -INT8_C( 73), -INT8_C( 27), INT8_C( 100), -INT8_C( 112), INT8_C( 16), INT8_C( 66), INT8_C( 126), -INT8_C( 61), INT8_C( 124), -INT8_C( 79) }, + { -INT8_C( 96), -INT8_C( 95), INT8_C( 78), -INT8_C( 55), -INT8_C( 112), INT8_C( 8), -INT8_C( 49), -INT8_C( 123), -INT8_C( 124), INT8_C( 72), INT8_C( 88), INT8_C( 2), -INT8_C( 26), INT8_C( 11), -INT8_C( 20), -INT8_C( 55) }, + { -INT8_C( 96), -INT8_C( 127), INT8_C( 46), INT8_C( 9), INT8_C( 112), INT8_C( 104), INT8_C( 15), INT8_C( 5), INT8_C( 4), INT8_C( 8), INT8_C( 24), INT8_C( 2), -INT8_C( 58), INT8_C( 107), -INT8_C( 20), INT8_C( 41) } }, + { { INT8_C( 116), INT8_C( 19), INT8_C( 48), INT8_C( 76), -INT8_C( 11), -INT8_C( 16), INT8_C( 66), INT8_C( 14), -INT8_C( 56), -INT8_C( 32), INT8_C( 85), -INT8_C( 52), INT8_C( 90), INT8_C( 115), -INT8_C( 81), -INT8_C( 128) }, + { INT8_C( 38), -INT8_C( 126), -INT8_C( 53), INT8_C( 117), INT8_C( 74), -INT8_C( 43), INT8_C( 93), INT8_C( 63), INT8_C( 26), INT8_C( 0), INT8_C( 22), -INT8_C( 5), -INT8_C( 88), -INT8_C( 126), -INT8_C( 54), -INT8_C( 25) }, + { INT8_C( 38), -INT8_C( 126), -INT8_C( 53), INT8_C( 117), INT8_C( 74), -INT8_C( 43), INT8_C( 93), INT8_C( 63), INT8_C( 26), INT8_C( 0), INT8_C( 22), -INT8_C( 5), -INT8_C( 88), -INT8_C( 126), -INT8_C( 54), -INT8_C( 25) }, + { INT8_C( 52), INT8_C( 19), INT8_C( 88), -INT8_C( 84), INT8_C( 85), -INT8_C( 88), -INT8_C( 22), -INT8_C( 2), -INT8_C( 48), INT8_C( 0), -INT8_C( 75), -INT8_C( 36), INT8_C( 66), INT8_C( 19), INT8_C( 87), INT8_C( 56) }, + { -INT8_C( 44), INT8_C( 83), INT8_C( 112), -INT8_C( 84), INT8_C( 85), -INT8_C( 80), -INT8_C( 94), -INT8_C( 18), INT8_C( 72), INT8_C( 0), -INT8_C( 43), INT8_C( 108), INT8_C( 26), INT8_C( 83), INT8_C( 79), -INT8_C( 32) }, + { INT8_C( 116), INT8_C( 19), -INT8_C( 80), -INT8_C( 52), INT8_C( 117), -INT8_C( 16), -INT8_C( 62), -INT8_C( 114), INT8_C( 72), INT8_C( 96), INT8_C( 85), -INT8_C( 52), INT8_C( 90), INT8_C( 115), INT8_C( 47), INT8_MIN } }, + { { -INT8_C( 22), -INT8_C( 17), INT8_C( 28), INT8_C( 50), -INT8_C( 115), -INT8_C( 92), INT8_C( 125), -INT8_C( 70), -INT8_C( 83), -INT8_C( 95), INT8_C( 82), INT8_C( 105), -INT8_C( 105), INT8_C( 108), INT8_C( 59), -INT8_C( 115) }, + { -INT8_C( 23), -INT8_C( 45), -INT8_C( 89), -INT8_C( 112), -INT8_C( 31), -INT8_C( 92), INT8_C( 60), INT8_C( 70), -INT8_C( 8), INT8_C( 108), INT8_C( 110), INT8_C( 82), INT8_C( 62), -INT8_C( 116), INT8_C( 65), -INT8_C( 36) }, + { -INT8_C( 23), -INT8_C( 45), -INT8_C( 89), -INT8_C( 112), -INT8_C( 31), -INT8_C( 92), INT8_C( 60), INT8_C( 70), -INT8_C( 8), INT8_C( 108), INT8_C( 110), INT8_C( 82), INT8_C( 62), -INT8_C( 116), INT8_C( 65), -INT8_C( 36) }, + { INT8_C( 74), -INT8_C( 97), INT8_C( 60), -INT8_C( 126), INT8_C( 13), INT8_C( 36), -INT8_C( 27), INT8_C( 50), -INT8_C( 59), INT8_C( 97), INT8_C( 114), -INT8_C( 111), -INT8_C( 9), INT8_C( 100), INT8_C( 11), -INT8_C( 27) }, + { INT8_C( 42), INT8_C( 111), -INT8_C( 4), INT8_C( 18), INT8_C( 45), -INT8_C( 124), -INT8_C( 99), -INT8_C( 38), INT8_C( 13), -INT8_C( 127), -INT8_C( 46), INT8_C( 73), -INT8_C( 41), -INT8_C( 116), INT8_C( 59), -INT8_C( 115) }, + { -INT8_C( 22), -INT8_C( 17), -INT8_C( 100), INT8_C( 50), -INT8_C( 115), INT8_C( 36), INT8_C( 125), INT8_C( 58), INT8_C( 45), INT8_C( 33), INT8_C( 82), INT8_C( 105), INT8_C( 23), INT8_C( 108), -INT8_C( 69), INT8_C( 13) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int8x16_t a = simde_vld1q_s8(test_vec[i].a); + simde_int8x16_t b = simde_vld1q_s8(test_vec[i].b); + + simde_int8x16_t r0 = simde_vsliq_n_s8(a, b, 0); + simde_int8x16_t r3 = simde_vsliq_n_s8(a, b, 3); + simde_int8x16_t r5 = simde_vsliq_n_s8(a, b, 5); + simde_int8x16_t r7 = simde_vsliq_n_s8(a, b, 7); + + simde_test_arm_neon_assert_equal_i8x16(r0, simde_vld1q_s8(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_i8x16(r3, simde_vld1q_s8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i8x16(r5, simde_vld1q_s8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_i8x16(r7, simde_vld1q_s8(test_vec[i].r7)); + } + return 0; +} + +static int +test_simde_vsliq_n_s16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int16_t a[8]; + int16_t b[8]; + int16_t r3[8]; + int16_t r6[8]; + int16_t r10[8]; + int16_t r13[8]; + int16_t r15[8]; + } test_vec[] = { + { { -INT16_C( 29857), INT16_C( 2406), -INT16_C( 11751), INT16_C( 26063), -INT16_C( 23473), -INT16_C( 31823), INT16_C( 16138), -INT16_C( 20066) }, + { -INT16_C( 4911), INT16_C( 31239), -INT16_C( 17957), -INT16_C( 32668), -INT16_C( 25285), -INT16_C( 17755), INT16_C( 16852), -INT16_C( 29183) }, + { INT16_C( 26255), -INT16_C( 12226), -INT16_C( 12583), INT16_C( 807), -INT16_C( 5665), -INT16_C( 10967), INT16_C( 3746), INT16_C( 28686) }, + { INT16_C( 13407), -INT16_C( 32282), INT16_C( 30425), INT16_C( 6415), INT16_C( 20175), -INT16_C( 22159), INT16_C( 29962), -INT16_C( 32674) }, + { INT16_C( 18271), INT16_C( 7526), INT16_C( 28185), -INT16_C( 28209), -INT16_C( 5041), -INT16_C( 26703), INT16_C( 21258), INT16_C( 1438) }, + { INT16_C( 11103), -INT16_C( 5786), INT16_C( 29209), -INT16_C( 31281), INT16_C( 25679), -INT16_C( 23631), -INT16_C( 24822), INT16_C( 12702) }, + { -INT16_C( 29857), -INT16_C( 30362), -INT16_C( 11751), INT16_C( 26063), -INT16_C( 23473), -INT16_C( 31823), INT16_C( 16138), -INT16_C( 20066) } }, + { { INT16_C( 22305), INT16_C( 2996), -INT16_C( 8798), INT16_C( 21771), INT16_C( 20048), -INT16_C( 17424), -INT16_C( 7311), -INT16_C( 13384) }, + { INT16_C( 3899), -INT16_C( 13236), INT16_C( 23899), INT16_C( 10311), INT16_C( 358), -INT16_C( 22886), -INT16_C( 4505), -INT16_C( 13942) }, + { INT16_C( 31193), INT16_C( 25188), -INT16_C( 5414), INT16_C( 16955), INT16_C( 2864), INT16_C( 13520), INT16_C( 29497), INT16_C( 19536) }, + { -INT16_C( 12575), INT16_C( 4916), INT16_C( 22242), INT16_C( 4555), INT16_C( 22928), -INT16_C( 22864), -INT16_C( 26127), INT16_C( 25272) }, + { -INT16_C( 4319), INT16_C( 13236), INT16_C( 28066), INT16_C( 7435), -INT16_C( 26032), INT16_C( 27632), -INT16_C( 24719), INT16_C( 11192) }, + { INT16_C( 30497), -INT16_C( 29772), INT16_C( 32162), -INT16_C( 2805), -INT16_C( 12720), INT16_C( 23536), -INT16_C( 7311), INT16_C( 19384) }, + { -INT16_C( 10463), INT16_C( 2996), -INT16_C( 8798), -INT16_C( 10997), INT16_C( 20048), INT16_C( 15344), -INT16_C( 7311), INT16_C( 19384) } }, + { { -INT16_C( 1299), -INT16_C( 23418), -INT16_C( 31396), INT16_C( 14574), INT16_C( 20335), INT16_C( 24078), -INT16_C( 16568), -INT16_C( 30262) }, + { -INT16_C( 9023), -INT16_C( 23614), -INT16_C( 28517), -INT16_C( 32122), INT16_C( 6327), INT16_C( 32095), -INT16_C( 8933), INT16_C( 26767) }, + { -INT16_C( 6643), INT16_C( 7702), -INT16_C( 31524), INT16_C( 5174), -INT16_C( 14913), -INT16_C( 5378), -INT16_C( 5928), INT16_C( 17530) }, + { INT16_C( 12397), -INT16_C( 3962), INT16_C( 9948), -INT16_C( 24146), INT16_C( 11759), INT16_C( 22478), INT16_C( 18120), INT16_C( 9162) }, + { INT16_C( 1773), INT16_C( 2182), INT16_C( 27996), INT16_C( 6382), -INT16_C( 8337), INT16_C( 32270), INT16_C( 28488), INT16_C( 15818) }, + { INT16_C( 15085), INT16_C( 17542), INT16_C( 25948), -INT16_C( 10002), -INT16_C( 4241), -INT16_C( 498), INT16_C( 32584), -INT16_C( 5686) }, + { -INT16_C( 1299), INT16_C( 9350), -INT16_C( 31396), INT16_C( 14574), -INT16_C( 12433), -INT16_C( 8690), -INT16_C( 16568), -INT16_C( 30262) } }, + { { INT16_C( 7035), -INT16_C( 28128), -INT16_C( 27633), -INT16_C( 6964), INT16_C( 3245), INT16_C( 16127), -INT16_C( 23649), INT16_C( 10659) }, + { -INT16_C( 6328), INT16_C( 13357), -INT16_C( 9860), INT16_C( 5121), INT16_C( 2322), -INT16_C( 9202), -INT16_C( 5031), INT16_C( 2542) }, + { INT16_C( 14915), -INT16_C( 24216), -INT16_C( 13337), -INT16_C( 24564), INT16_C( 18581), -INT16_C( 8073), INT16_C( 25295), INT16_C( 20339) }, + { -INT16_C( 11717), INT16_C( 2912), INT16_C( 24335), INT16_C( 76), INT16_C( 17581), INT16_C( 959), INT16_C( 5727), INT16_C( 31651) }, + { INT16_C( 9083), -INT16_C( 18912), -INT16_C( 4081), INT16_C( 1228), INT16_C( 18605), INT16_C( 15103), INT16_C( 26527), -INT16_C( 18013) }, + { INT16_C( 7035), -INT16_C( 19936), -INT16_C( 27633), INT16_C( 9420), INT16_C( 19629), -INT16_C( 8449), INT16_C( 9119), -INT16_C( 13917) }, + { INT16_C( 7035), -INT16_C( 28128), INT16_C( 5135), -INT16_C( 6964), INT16_C( 3245), INT16_C( 16127), -INT16_C( 23649), INT16_C( 10659) } }, + { { -INT16_C( 14045), INT16_C( 6950), INT16_C( 25104), -INT16_C( 22985), -INT16_C( 559), -INT16_C( 28091), INT16_C( 4901), INT16_C( 28381) }, + { -INT16_C( 25976), -INT16_C( 15056), INT16_C( 10123), INT16_C( 5179), INT16_C( 9318), -INT16_C( 26683), -INT16_C( 29282), INT16_C( 29906) }, + { -INT16_C( 11197), INT16_C( 10630), INT16_C( 15448), -INT16_C( 24097), INT16_C( 9009), -INT16_C( 16851), INT16_C( 27893), -INT16_C( 22891) }, + { -INT16_C( 24029), INT16_C( 19494), -INT16_C( 7472), INT16_C( 3831), INT16_C( 6545), -INT16_C( 3771), INT16_C( 26533), INT16_C( 13469) }, + { INT16_C( 8483), -INT16_C( 15578), INT16_C( 11792), -INT16_C( 4553), -INT16_C( 26159), INT16_C( 5701), INT16_C( 31525), INT16_C( 19165) }, + { INT16_C( 2339), INT16_C( 6950), INT16_C( 25104), INT16_C( 26167), -INT16_C( 8751), -INT16_C( 19899), -INT16_C( 11483), INT16_C( 20189) }, + { INT16_C( 18723), INT16_C( 6950), -INT16_C( 7664), -INT16_C( 22985), INT16_C( 32209), -INT16_C( 28091), INT16_C( 4901), INT16_C( 28381) } }, + { { -INT16_C( 7004), -INT16_C( 1843), -INT16_C( 19583), INT16_C( 21568), INT16_C( 21139), INT16_C( 8147), INT16_C( 22889), -INT16_C( 8842) }, + { -INT16_C( 14802), -INT16_C( 15206), INT16_C( 30112), INT16_C( 8024), INT16_C( 23550), INT16_C( 30909), -INT16_C( 13817), INT16_C( 27018) }, + { INT16_C( 12660), INT16_C( 9429), -INT16_C( 21247), -INT16_C( 1344), -INT16_C( 8205), -INT16_C( 14869), INT16_C( 20537), INT16_C( 19542) }, + { -INT16_C( 29788), INT16_C( 9869), INT16_C( 26625), -INT16_C( 10752), -INT16_C( 109), INT16_C( 12115), -INT16_C( 32279), INT16_C( 25270) }, + { -INT16_C( 18268), INT16_C( 26829), -INT16_C( 31871), INT16_C( 24640), -INT16_C( 1389), -INT16_C( 2093), INT16_C( 7529), INT16_C( 10614) }, + { -INT16_C( 15196), INT16_C( 22733), INT16_C( 4993), INT16_C( 5184), -INT16_C( 11629), -INT16_C( 16429), -INT16_C( 1687), INT16_C( 23926) }, + { INT16_C( 25764), INT16_C( 30925), INT16_C( 13185), INT16_C( 21568), INT16_C( 21139), -INT16_C( 24621), -INT16_C( 9879), INT16_C( 23926) } }, + { { INT16_C( 17169), INT16_C( 24072), -INT16_C( 12994), -INT16_C( 8073), -INT16_C( 12777), INT16_C( 1825), -INT16_C( 5150), -INT16_C( 31600) }, + { -INT16_C( 7078), INT16_C( 21725), INT16_C( 27331), -INT16_C( 7935), -INT16_C( 9434), INT16_C( 29126), INT16_C( 21858), -INT16_C( 2211) }, + { INT16_C( 8913), -INT16_C( 22808), INT16_C( 22046), INT16_C( 2063), -INT16_C( 9929), -INT16_C( 29135), -INT16_C( 21742), -INT16_C( 17688) }, + { INT16_C( 5777), INT16_C( 14152), -INT16_C( 20226), INT16_C( 16503), -INT16_C( 13929), INT16_C( 29089), INT16_C( 22690), -INT16_C( 10416) }, + { INT16_C( 27409), INT16_C( 30216), INT16_C( 3390), INT16_C( 1143), -INT16_C( 26089), INT16_C( 6945), -INT16_C( 29726), INT16_C( 29840) }, + { INT16_C( 17169), -INT16_C( 16888), INT16_C( 27966), INT16_C( 8311), -INT16_C( 12777), -INT16_C( 14559), INT16_C( 19426), -INT16_C( 23408) }, + { INT16_C( 17169), -INT16_C( 8696), -INT16_C( 12994), -INT16_C( 8073), INT16_C( 19991), INT16_C( 1825), INT16_C( 27618), -INT16_C( 31600) } }, + { { -INT16_C( 23136), -INT16_C( 21049), INT16_C( 4129), -INT16_C( 8019), INT16_C( 25623), INT16_C( 9637), INT16_C( 8632), INT16_C( 32502) }, + { -INT16_C( 1800), INT16_C( 21742), INT16_C( 13572), -INT16_C( 19366), -INT16_C( 6353), INT16_C( 26854), INT16_C( 902), INT16_C( 24083) }, + { -INT16_C( 14400), -INT16_C( 22665), -INT16_C( 22495), -INT16_C( 23851), INT16_C( 14719), INT16_C( 18229), INT16_C( 7216), -INT16_C( 3938) }, + { INT16_C( 15904), INT16_C( 15239), INT16_C( 16673), INT16_C( 5805), -INT16_C( 13353), INT16_C( 14757), -INT16_C( 7752), -INT16_C( 31498) }, + { -INT16_C( 7776), -INT16_C( 17977), INT16_C( 4129), INT16_C( 26797), -INT16_C( 17385), -INT16_C( 26203), INT16_C( 6584), INT16_C( 20214) }, + { INT16_C( 1440), -INT16_C( 12857), -INT16_C( 28639), INT16_C( 16557), -INT16_C( 7145), -INT16_C( 14939), -INT16_C( 15944), INT16_C( 32502) }, + { INT16_C( 9632), INT16_C( 11719), INT16_C( 4129), INT16_C( 24749), -INT16_C( 7145), INT16_C( 9637), INT16_C( 8632), -INT16_C( 266) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int16x8_t a = simde_vld1q_s16(test_vec[i].a); + simde_int16x8_t b = simde_vld1q_s16(test_vec[i].b); + + simde_int16x8_t r3 = simde_vsliq_n_s16(a, b, 3); + simde_int16x8_t r6 = simde_vsliq_n_s16(a, b, 6); + simde_int16x8_t r10 = simde_vsliq_n_s16(a, b, 10); + simde_int16x8_t r13 = simde_vsliq_n_s16(a, b, 13); + simde_int16x8_t r15 = simde_vsliq_n_s16(a, b, 15); + + simde_test_arm_neon_assert_equal_i16x8(r3, simde_vld1q_s16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_i16x8(r6, simde_vld1q_s16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i16x8(r10, simde_vld1q_s16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_i16x8(r13, simde_vld1q_s16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i16x8(r15, simde_vld1q_s16(test_vec[i].r15)); + } + return 0; +} + +static int +test_simde_vsliq_n_s32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int32_t a[4]; + int32_t b[4]; + int32_t r6[4]; + int32_t r13[4]; + int32_t r19[4]; + int32_t r26[4]; + int32_t r31[4]; + } test_vec[] = { + { { -INT32_C( 2048672908), INT32_C( 482466515), -INT32_C( 375121982), INT32_C( 1919378555) }, + { -INT32_C( 92957730), INT32_C( 1419974623), INT32_C( 1820210245), -INT32_C( 127918576) }, + { -INT32_C( 1654327372), INT32_C( 684062675), INT32_C( 529338690), INT32_C( 403145787) }, + { -INT32_C( 1300505740), INT32_C( 1660680915), -INT32_C( 964118590), INT32_C( 63046779) }, + { -INT32_C( 1628193932), -INT32_C( 1090921773), -INT32_C( 1574168638), -INT32_C( 259562373) }, + { INT32_C( 2044967796), INT32_C( 2093079251), INT32_C( 363075522), INT32_C( 1114072187) }, + { INT32_C( 98810740), -INT32_C( 1665017133), -INT32_C( 375121982), INT32_C( 1919378555) } }, + { { INT32_C( 429638935), INT32_C( 807280638), INT32_C( 482365039), INT32_C( 1712721656) }, + { INT32_C( 396810675), INT32_C( 2144555430), -INT32_C( 705681075), INT32_C( 287557555) }, + { -INT32_C( 373920553), -INT32_C( 187405890), INT32_C( 2081051503), INT32_C( 1223814392) }, + { -INT32_C( 617192169), INT32_C( 1781850110), INT32_C( 86617711), INT32_C( 2029416184) }, + { -INT32_C( 845429481), -INT32_C( 1925832706), INT32_C( 1248349807), INT32_C( 1033768696) }, + { -INT32_C( 845429481), -INT32_C( 1742856194), INT32_C( 885018223), -INT32_C( 837415176) }, + { -INT32_C( 1717844713), INT32_C( 807280638), -INT32_C( 1665118609), -INT32_C( 434761992) } }, + { { -INT32_C( 517994438), INT32_C( 1285438865), -INT32_C( 338029974), INT32_C( 40999571) }, + { INT32_C( 291353625), -INT32_C( 1038552292), -INT32_C( 2119572785), -INT32_C( 1620388047) }, + { INT32_C( 1466762874), -INT32_C( 2042837231), INT32_C( 1786295274), -INT32_C( 625619885) }, + { -INT32_C( 1232918470), INT32_C( 509844881), INT32_C( 1012527722), INT32_C( 1525037715) }, + { -INT32_C( 1597503430), -INT32_C( 1729741423), INT32_C( 377098858), -INT32_C( 1182164333) }, + { INT32_C( 1696598074), INT32_C( 1889418641), INT32_C( 1071256170), -INT32_C( 965633389) }, + { -INT32_C( 517994438), INT32_C( 1285438865), -INT32_C( 338029974), -INT32_C( 2106484077) } }, + { { -INT32_C( 61481464), INT32_C( 164801062), -INT32_C( 1442367081), INT32_C( 600232314) }, + { INT32_C( 268218714), -INT32_C( 64451073), -INT32_C( 735524508), -INT32_C( 1224314185) }, + { -INT32_C( 13871480), INT32_C( 170098662), INT32_C( 171071767), -INT32_C( 1046696454) }, + { -INT32_C( 1775542776), INT32_C( 297789990), INT32_C( 422353303), -INT32_C( 833162886) }, + { -INT32_C( 1965695480), INT32_C( 1878698534), INT32_C( 1260861847), -INT32_C( 1782656646) }, + { INT32_C( 1750457864), -INT32_C( 36525530), -INT32_C( 1845020265), -INT32_C( 540618374) }, + { INT32_C( 2086002184), -INT32_C( 1982682586), INT32_C( 705116567), -INT32_C( 1547251334) } }, + { { -INT32_C( 126851217), -INT32_C( 1366643457), -INT32_C( 1960710568), INT32_C( 1373905720) }, + { INT32_C( 491803898), INT32_C( 803598513), -INT32_C( 1851430137), -INT32_C( 750917567) }, + { INT32_C( 1410678447), -INT32_C( 109302657), INT32_C( 1767555544), -INT32_C( 814083976) }, + { INT32_C( 178210671), -INT32_C( 1105842945), -INT32_C( 1386156456), -INT32_C( 1123540168) }, + { -INT32_C( 1479514257), -INT32_C( 2054509313), INT32_C( 1480191576), INT32_C( 1108091704) }, + { -INT32_C( 395286673), -INT32_C( 963990273), INT32_C( 522317400), INT32_C( 98837304) }, + { INT32_C( 2020632431), -INT32_C( 1366643457), -INT32_C( 1960710568), -INT32_C( 773577928) } }, + { { -INT32_C( 2137359464), -INT32_C( 615731445), INT32_C( 708318726), INT32_C( 944041089) }, + { INT32_C( 938801535), -INT32_C( 84228041), INT32_C( 353656812), INT32_C( 158358579) }, + { -INT32_C( 46243880), -INT32_C( 1095627317), INT32_C( 1159199494), INT32_C( 1545014465) }, + { -INT32_C( 1624245352), INT32_C( 1493626635), -INT32_C( 1946315258), INT32_C( 193358977) }, + { -INT32_C( 872776808), INT32_C( 1102884619), -INT32_C( 10480122), -INT32_C( 509809535) }, + { -INT32_C( 56984680), -INT32_C( 548622581), -INT32_C( 1304947194), -INT32_C( 867898239) }, + { -INT32_C( 2137359464), -INT32_C( 615731445), INT32_C( 708318726), -INT32_C( 1203442559) } }, + { { INT32_C( 1569927325), INT32_C( 1349793024), INT32_C( 1347867252), INT32_C( 583373983) }, + { INT32_C( 198226018), INT32_C( 1371438700), INT32_C( 322817713), INT32_C( 1605845413) }, + { -INT32_C( 198436707), INT32_C( 1872730880), -INT32_C( 814502796), -INT32_C( 305108641) }, + { INT32_C( 369905821), -INT32_C( 808610560), -INT32_C( 1177144716), -INT32_C( 399199073) }, + { -INT32_C( 2095894371), -INT32_C( 211536640), INT32_C( 1972294260), INT32_C( 221090975) }, + { -INT32_C( 1986842467), -INT32_C( 1334561536), -INT32_C( 1000942988), -INT32_C( 1765436257) }, + { INT32_C( 1569927325), INT32_C( 1349793024), -INT32_C( 799616396), -INT32_C( 1564109665) } }, + { { INT32_C( 2115085206), INT32_C( 1904667615), -INT32_C( 1478857215), -INT32_C( 1378658545) }, + { INT32_C( 738556760), INT32_C( 523005912), -INT32_C( 406383756), INT32_C( 1614439741) }, + { INT32_C( 22992406), -INT32_C( 887359969), -INT32_C( 238756607), INT32_C( 244928335) }, + { -INT32_C( 1351941226), -INT32_C( 1912927265), -INT32_C( 496071167), INT32_C( 1286059791) }, + { -INT32_C( 624843882), INT32_C( 2126965727), -INT32_C( 1683853823), INT32_C( 703289103) }, + { INT32_C( 1645323158), INT32_C( 1636232159), -INT32_C( 740659711), -INT32_C( 170698993) }, + { INT32_C( 2115085206), INT32_C( 1904667615), INT32_C( 668626433), -INT32_C( 1378658545) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int32x4_t a = simde_vld1q_s32(test_vec[i].a); + simde_int32x4_t b = simde_vld1q_s32(test_vec[i].b); + + simde_int32x4_t r6 = simde_vsliq_n_s32(a, b, 6); + simde_int32x4_t r13 = simde_vsliq_n_s32(a, b, 13); + simde_int32x4_t r19 = simde_vsliq_n_s32(a, b, 19); + simde_int32x4_t r26 = simde_vsliq_n_s32(a, b, 26); + simde_int32x4_t r31 = simde_vsliq_n_s32(a, b, 31); + + simde_test_arm_neon_assert_equal_i32x4(r6, simde_vld1q_s32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_i32x4(r13, simde_vld1q_s32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i32x4(r19, simde_vld1q_s32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_i32x4(r26, simde_vld1q_s32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i32x4(r31, simde_vld1q_s32(test_vec[i].r31)); + } + return 0; +} + +static int +test_simde_vsliq_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int64_t a[2]; + int64_t b[2]; + int64_t r13[2]; + int64_t r26[2]; + int64_t r39[2]; + int64_t r52[2]; + int64_t r63[2]; + } test_vec[] = { + { { -INT64_C( 5263643971819047857), INT64_C( 541502720996737267) }, + { INT64_C( 3946797069985802130), -INT64_C( 42445362033043898) }, + { -INT64_C( 4980763889152928689), INT64_C( 2775731625785868531) }, + { INT64_C( 1780111104754152527), -INT64_C( 6041964446011948813) }, + { -INT64_C( 8704392437862665137), -INT64_C( 3158391839620661005) }, + { INT64_C( 8729040070421083215), INT64_C( 7233851767269294323) }, + { INT64_C( 3959728065035727951), INT64_C( 541502720996737267) } }, + { { -INT64_C( 7912615021952819050), -INT64_C( 5293486681595043945) }, + { INT64_C( 6940172625691428387), INT64_C( 9115758130503661579) }, + { INT64_C( 1028914491343265942), INT64_C( 3870594709730721687) }, + { -INT64_C( 1294528601230335850), -INT64_C( 2041200592708952169) }, + { INT64_C( 2099541159110320278), -INT64_C( 8765124277553586281) }, + { INT64_C( 7075364537936191638), -INT64_C( 9171085960761041001) }, + { -INT64_C( 7912615021952819050), -INT64_C( 5293486681595043945) } }, + { { INT64_C( 2180577019911891298), -INT64_C( 1541747926973591080) }, + { -INT64_C( 8065666064468080886), -INT64_C( 6826784933820909701) }, + { INT64_C( 2300871905095272802), INT64_C( 5505853626468233688) }, + { -INT64_C( 3829796790686029470), INT64_C( 1663647807938712024) }, + { INT64_C( 4216360182134499682), -INT64_C( 3541027852909850152) }, + { INT64_C( 8116321328786205026), INT64_C( 8627380031628988888) }, + { INT64_C( 2180577019911891298), -INT64_C( 1541747926973591080) } }, + { { -INT64_C( 1415110380634122788), INT64_C( 3816863978450722004) }, + { INT64_C( 6818280905982752686), INT64_C( 9098726142237371117) }, + { -INT64_C( 1383873381812283940), -INT64_C( 6528244651753882412) }, + { INT64_C( 8056861525103990236), -INT64_C( 2269117483815658284) }, + { -INT64_C( 640682009993816612), INT64_C( 5707598467225492692) }, + { INT64_C( 8857600369397978588), -INT64_C( 5847860821886362412) }, + { INT64_C( 7808261656220653020), -INT64_C( 5406508058404053804) } }, + { { -INT64_C( 1783781172000361495), INT64_C( 7705716969977077247) }, + { -INT64_C( 8358257093625578635), -INT64_C( 7084006825256911651) }, + { INT64_C( 3471890629115425769), INT64_C( 1272943385629146623) }, + { -INT64_C( 3151327946570763287), INT64_C( 5541813428062969343) }, + { -INT64_C( 8683578925376113687), INT64_C( 1098437138345549311) }, + { -INT64_C( 5233538486566161431), -INT64_C( 3616332493232349697) }, + { -INT64_C( 1783781172000361495), -INT64_C( 1517655066877698561) } }, + { { -INT64_C( 4154394067891526097), -INT64_C( 2189220270624961150) }, + { -INT64_C( 6795139893074132725), -INT64_C( 309058967878318537) }, + { INT64_C( 6487610392131495471), -INT64_C( 4607126760976880254) }, + { INT64_C( 1434655984018908719), INT64_C( 455948887150071170) }, + { INT64_C( 2125845925662680623), INT64_C( 8890980614938856834) }, + { INT64_C( 1204889488679364143), INT64_C( 7169259755050929538) }, + { -INT64_C( 4154394067891526097), -INT64_C( 2189220270624961150) } }, + { { -INT64_C( 2333047277017444018), INT64_C( 7658204717489207402) }, + { -INT64_C( 6281729226854084346), INT64_C( 6339895830207820242) }, + { INT64_C( 6490139260990050638), INT64_C( 8842073570075629674) }, + { INT64_C( 3704405599550624078), -INT64_C( 6097291397849008022) }, + { INT64_C( 1596670502572577102), INT64_C( 4771820118565599338) }, + { INT64_C( 1184264031958913358), INT64_C( 2100762777314015338) }, + { INT64_C( 6890324759837331790), INT64_C( 7658204717489207402) } }, + { { INT64_C( 8524677170650393138), INT64_C( 3185460102790185058) }, + { -INT64_C( 4517026812874662391), INT64_C( 3366551090117233348) }, + { INT64_C( 684960792126237234), INT64_C( 904140044595920994) }, + { INT64_C( 3388610690435492402), -INT64_C( 8875872301439188894) }, + { -INT64_C( 2851054848273569230), INT64_C( 5919245132162203746) }, + { -INT64_C( 2261443936901944782), -INT64_C( 6033408334437220254) }, + { -INT64_C( 698694866204382670), INT64_C( 3185460102790185058) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_int64x2_t a = simde_vld1q_s64(test_vec[i].a); + simde_int64x2_t b = simde_vld1q_s64(test_vec[i].b); + + simde_int64x2_t r13 = simde_vsliq_n_s64(a, b, 13); + simde_int64x2_t r26 = simde_vsliq_n_s64(a, b, 26); + simde_int64x2_t r39 = simde_vsliq_n_s64(a, b, 39); + simde_int64x2_t r52 = simde_vsliq_n_s64(a, b, 52); + simde_int64x2_t r63 = simde_vsliq_n_s64(a, b, 63); + + simde_test_arm_neon_assert_equal_i64x2(r13, simde_vld1q_s64(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_i64x2(r26, simde_vld1q_s64(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_i64x2(r39, simde_vld1q_s64(test_vec[i].r39)); + simde_test_arm_neon_assert_equal_i64x2(r52, simde_vld1q_s64(test_vec[i].r52)); + simde_test_arm_neon_assert_equal_i64x2(r63, simde_vld1q_s64(test_vec[i].r63)); + } + return 0; +} + +static int +test_simde_vsliq_n_u8 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint8_t a[16]; + uint8_t b[16]; + uint8_t r0[16]; + uint8_t r3[16]; + uint8_t r5[16]; + uint8_t r7[16]; + } test_vec[] = { + { { UINT8_C( 244), UINT8_C( 66), UINT8_C( 116), UINT8_C( 69), UINT8_C( 184), UINT8_C( 184), UINT8_C( 36), UINT8_C( 119), UINT8_C( 145), UINT8_C( 73), UINT8_C( 68), UINT8_C( 242), UINT8_C( 252), UINT8_C( 205), UINT8_C( 128), UINT8_C( 96) }, + { UINT8_C( 152), UINT8_C( 85), UINT8_C( 203), UINT8_C( 155), UINT8_C( 94), UINT8_C( 94), UINT8_C( 115), UINT8_C( 52), UINT8_C( 255), UINT8_C( 54), UINT8_C( 182), UINT8_C( 141), UINT8_C( 92), UINT8_C( 173), UINT8_C( 209), UINT8_C( 117) }, + { UINT8_C( 152), UINT8_C( 85), UINT8_C( 203), UINT8_C( 155), UINT8_C( 94), UINT8_C( 94), UINT8_C( 115), UINT8_C( 52), UINT8_MAX, UINT8_C( 54), UINT8_C( 182), UINT8_C( 141), UINT8_C( 92), UINT8_C( 173), UINT8_C( 209), UINT8_C( 117) }, + { UINT8_C( 196), UINT8_C( 170), UINT8_C( 92), UINT8_C( 221), UINT8_C( 240), UINT8_C( 240), UINT8_C( 156), UINT8_C( 167), UINT8_C( 249), UINT8_C( 177), UINT8_C( 180), UINT8_C( 106), UINT8_C( 228), UINT8_C( 109), UINT8_C( 136), UINT8_C( 168) }, + { UINT8_C( 20), UINT8_C( 162), UINT8_C( 116), UINT8_C( 101), UINT8_C( 216), UINT8_C( 216), UINT8_C( 100), UINT8_C( 151), UINT8_C( 241), UINT8_C( 201), UINT8_C( 196), UINT8_C( 178), UINT8_C( 156), UINT8_C( 173), UINT8_C( 32), UINT8_C( 160) }, + { UINT8_C( 116), UINT8_C( 194), UINT8_C( 244), UINT8_C( 197), UINT8_C( 56), UINT8_C( 56), UINT8_C( 164), UINT8_C( 119), UINT8_C( 145), UINT8_C( 73), UINT8_C( 68), UINT8_C( 242), UINT8_C( 124), UINT8_C( 205), UINT8_C( 128), UINT8_C( 224) } }, + { { UINT8_C( 196), UINT8_C( 145), UINT8_C( 235), UINT8_C( 187), UINT8_C( 234), UINT8_C( 71), UINT8_C( 208), UINT8_C( 39), UINT8_C( 217), UINT8_C( 216), UINT8_C( 187), UINT8_C( 169), UINT8_C( 88), UINT8_C( 246), UINT8_C( 218), UINT8_C( 150) }, + { UINT8_C( 139), UINT8_C( 183), UINT8_C( 107), UINT8_C( 236), UINT8_C( 111), UINT8_C( 223), UINT8_C( 146), UINT8_C( 125), UINT8_C( 1), UINT8_C( 153), UINT8_C( 203), UINT8_C( 5), UINT8_C( 159), UINT8_C( 47), UINT8_C( 104), UINT8_C( 122) }, + { UINT8_C( 139), UINT8_C( 183), UINT8_C( 107), UINT8_C( 236), UINT8_C( 111), UINT8_C( 223), UINT8_C( 146), UINT8_C( 125), UINT8_C( 1), UINT8_C( 153), UINT8_C( 203), UINT8_C( 5), UINT8_C( 159), UINT8_C( 47), UINT8_C( 104), UINT8_C( 122) }, + { UINT8_C( 92), UINT8_C( 185), UINT8_C( 91), UINT8_C( 99), UINT8_C( 122), UINT8_MAX, UINT8_C( 144), UINT8_C( 239), UINT8_C( 9), UINT8_C( 200), UINT8_C( 91), UINT8_C( 41), UINT8_C( 248), UINT8_C( 126), UINT8_C( 66), UINT8_C( 214) }, + { UINT8_C( 100), UINT8_C( 241), UINT8_C( 107), UINT8_C( 155), UINT8_C( 234), UINT8_C( 231), UINT8_C( 80), UINT8_C( 167), UINT8_C( 57), UINT8_C( 56), UINT8_C( 123), UINT8_C( 169), UINT8_C( 248), UINT8_C( 246), UINT8_C( 26), UINT8_C( 86) }, + { UINT8_C( 196), UINT8_C( 145), UINT8_C( 235), UINT8_C( 59), UINT8_C( 234), UINT8_C( 199), UINT8_C( 80), UINT8_C( 167), UINT8_C( 217), UINT8_C( 216), UINT8_C( 187), UINT8_C( 169), UINT8_C( 216), UINT8_C( 246), UINT8_C( 90), UINT8_C( 22) } }, + { { UINT8_C( 99), UINT8_C( 213), UINT8_C( 98), UINT8_C( 126), UINT8_C( 100), UINT8_C( 135), UINT8_C( 177), UINT8_C( 214), UINT8_C( 144), UINT8_C( 59), UINT8_C( 153), UINT8_C( 4), UINT8_C( 22), UINT8_C( 60), UINT8_C( 89), UINT8_C( 55) }, + { UINT8_C( 110), UINT8_C( 86), UINT8_C( 122), UINT8_C( 11), UINT8_C( 21), UINT8_C( 130), UINT8_C( 209), UINT8_C( 56), UINT8_C( 211), UINT8_C( 181), UINT8_C( 9), UINT8_C( 238), UINT8_C( 204), UINT8_C( 88), UINT8_C( 131), UINT8_C( 141) }, + { UINT8_C( 110), UINT8_C( 86), UINT8_C( 122), UINT8_C( 11), UINT8_C( 21), UINT8_C( 130), UINT8_C( 209), UINT8_C( 56), UINT8_C( 211), UINT8_C( 181), UINT8_C( 9), UINT8_C( 238), UINT8_C( 204), UINT8_C( 88), UINT8_C( 131), UINT8_C( 141) }, + { UINT8_C( 115), UINT8_C( 181), UINT8_C( 210), UINT8_C( 94), UINT8_C( 172), UINT8_C( 23), UINT8_C( 137), UINT8_C( 198), UINT8_C( 152), UINT8_C( 171), UINT8_C( 73), UINT8_C( 116), UINT8_C( 102), UINT8_C( 196), UINT8_C( 25), UINT8_C( 111) }, + { UINT8_C( 195), UINT8_C( 213), UINT8_C( 66), UINT8_C( 126), UINT8_C( 164), UINT8_C( 71), UINT8_C( 49), UINT8_C( 22), UINT8_C( 112), UINT8_C( 187), UINT8_C( 57), UINT8_C( 196), UINT8_C( 150), UINT8_C( 28), UINT8_C( 121), UINT8_C( 183) }, + { UINT8_C( 99), UINT8_C( 85), UINT8_C( 98), UINT8_C( 254), UINT8_C( 228), UINT8_C( 7), UINT8_C( 177), UINT8_C( 86), UINT8_C( 144), UINT8_C( 187), UINT8_C( 153), UINT8_C( 4), UINT8_C( 22), UINT8_C( 60), UINT8_C( 217), UINT8_C( 183) } }, + { { UINT8_C( 180), UINT8_C( 141), UINT8_C( 233), UINT8_C( 117), UINT8_C( 209), UINT8_C( 84), UINT8_C( 61), UINT8_C( 212), UINT8_C( 173), UINT8_C( 50), UINT8_C( 112), UINT8_C( 18), UINT8_C( 188), UINT8_C( 32), UINT8_C( 189), UINT8_C( 127) }, + { UINT8_C( 190), UINT8_C( 224), UINT8_C( 111), UINT8_C( 107), UINT8_C( 92), UINT8_C( 61), UINT8_C( 107), UINT8_C( 194), UINT8_C( 193), UINT8_C( 92), UINT8_C( 169), UINT8_C( 27), UINT8_C( 120), UINT8_C( 102), UINT8_C( 237), UINT8_C( 120) }, + { UINT8_C( 190), UINT8_C( 224), UINT8_C( 111), UINT8_C( 107), UINT8_C( 92), UINT8_C( 61), UINT8_C( 107), UINT8_C( 194), UINT8_C( 193), UINT8_C( 92), UINT8_C( 169), UINT8_C( 27), UINT8_C( 120), UINT8_C( 102), UINT8_C( 237), UINT8_C( 120) }, + { UINT8_C( 244), UINT8_C( 5), UINT8_C( 121), UINT8_C( 93), UINT8_C( 225), UINT8_C( 236), UINT8_C( 93), UINT8_C( 20), UINT8_C( 13), UINT8_C( 226), UINT8_C( 72), UINT8_C( 218), UINT8_C( 196), UINT8_C( 48), UINT8_C( 109), UINT8_C( 199) }, + { UINT8_C( 212), UINT8_C( 13), UINT8_C( 233), UINT8_C( 117), UINT8_C( 145), UINT8_C( 180), UINT8_C( 125), UINT8_C( 84), UINT8_C( 45), UINT8_C( 146), UINT8_C( 48), UINT8_C( 114), UINT8_C( 28), UINT8_C( 192), UINT8_C( 189), UINT8_C( 31) }, + { UINT8_C( 52), UINT8_C( 13), UINT8_C( 233), UINT8_C( 245), UINT8_C( 81), UINT8_C( 212), UINT8_C( 189), UINT8_C( 84), UINT8_C( 173), UINT8_C( 50), UINT8_C( 240), UINT8_C( 146), UINT8_C( 60), UINT8_C( 32), UINT8_C( 189), UINT8_C( 127) } }, + { { UINT8_C( 183), UINT8_C( 36), UINT8_C( 213), UINT8_C( 190), UINT8_C( 11), UINT8_C( 205), UINT8_C( 129), UINT8_C( 21), UINT8_C( 4), UINT8_C( 108), UINT8_C( 248), UINT8_C( 225), UINT8_C( 169), UINT8_C( 75), UINT8_C( 175), UINT8_C( 43) }, + { UINT8_C( 141), UINT8_C( 83), UINT8_C( 71), UINT8_C( 14), UINT8_C( 37), UINT8_C( 24), UINT8_C( 233), UINT8_C( 220), UINT8_C( 16), UINT8_C( 80), UINT8_C( 162), UINT8_C( 85), UINT8_C( 75), UINT8_C( 254), UINT8_C( 153), UINT8_C( 29) }, + { UINT8_C( 141), UINT8_C( 83), UINT8_C( 71), UINT8_C( 14), UINT8_C( 37), UINT8_C( 24), UINT8_C( 233), UINT8_C( 220), UINT8_C( 16), UINT8_C( 80), UINT8_C( 162), UINT8_C( 85), UINT8_C( 75), UINT8_C( 254), UINT8_C( 153), UINT8_C( 29) }, + { UINT8_C( 111), UINT8_C( 156), UINT8_C( 61), UINT8_C( 118), UINT8_C( 43), UINT8_C( 197), UINT8_C( 73), UINT8_C( 229), UINT8_C( 132), UINT8_C( 132), UINT8_C( 16), UINT8_C( 169), UINT8_C( 89), UINT8_C( 243), UINT8_C( 207), UINT8_C( 235) }, + { UINT8_C( 183), UINT8_C( 100), UINT8_C( 245), UINT8_C( 222), UINT8_C( 171), UINT8_C( 13), UINT8_C( 33), UINT8_C( 149), UINT8_C( 4), UINT8_C( 12), UINT8_C( 88), UINT8_C( 161), UINT8_C( 105), UINT8_C( 203), UINT8_C( 47), UINT8_C( 171) }, + { UINT8_C( 183), UINT8_C( 164), UINT8_C( 213), UINT8_C( 62), UINT8_C( 139), UINT8_C( 77), UINT8_C( 129), UINT8_C( 21), UINT8_C( 4), UINT8_C( 108), UINT8_C( 120), UINT8_C( 225), UINT8_C( 169), UINT8_C( 75), UINT8_C( 175), UINT8_C( 171) } }, + { { UINT8_C( 234), UINT8_C( 182), UINT8_C( 103), UINT8_C( 241), UINT8_C( 11), UINT8_C( 127), UINT8_C( 118), UINT8_C( 55), UINT8_C( 177), UINT8_C( 196), UINT8_C( 189), UINT8_C( 131), UINT8_C( 254), UINT8_C( 151), UINT8_C( 156), UINT8_C( 20) }, + { UINT8_C( 38), UINT8_C( 43), UINT8_C( 107), UINT8_C( 172), UINT8_C( 184), UINT8_C( 43), UINT8_C( 121), UINT8_C( 218), UINT8_C( 74), UINT8_C( 188), UINT8_C( 8), UINT8_C( 190), UINT8_C( 231), UINT8_C( 164), UINT8_C( 142), UINT8_C( 233) }, + { UINT8_C( 38), UINT8_C( 43), UINT8_C( 107), UINT8_C( 172), UINT8_C( 184), UINT8_C( 43), UINT8_C( 121), UINT8_C( 218), UINT8_C( 74), UINT8_C( 188), UINT8_C( 8), UINT8_C( 190), UINT8_C( 231), UINT8_C( 164), UINT8_C( 142), UINT8_C( 233) }, + { UINT8_C( 50), UINT8_C( 94), UINT8_C( 95), UINT8_C( 97), UINT8_C( 195), UINT8_C( 95), UINT8_C( 206), UINT8_C( 215), UINT8_C( 81), UINT8_C( 228), UINT8_C( 69), UINT8_C( 243), UINT8_C( 62), UINT8_C( 39), UINT8_C( 116), UINT8_C( 76) }, + { UINT8_C( 202), UINT8_C( 118), UINT8_C( 103), UINT8_C( 145), UINT8_C( 11), UINT8_C( 127), UINT8_C( 54), UINT8_C( 87), UINT8_C( 81), UINT8_C( 132), UINT8_C( 29), UINT8_C( 195), UINT8_C( 254), UINT8_C( 151), UINT8_C( 220), UINT8_C( 52) }, + { UINT8_C( 106), UINT8_C( 182), UINT8_C( 231), UINT8_C( 113), UINT8_C( 11), UINT8_MAX, UINT8_C( 246), UINT8_C( 55), UINT8_C( 49), UINT8_C( 68), UINT8_C( 61), UINT8_C( 3), UINT8_C( 254), UINT8_C( 23), UINT8_C( 28), UINT8_C( 148) } }, + { { UINT8_C( 46), UINT8_C( 219), UINT8_C( 53), UINT8_C( 4), UINT8_C( 130), UINT8_C( 212), UINT8_C( 30), UINT8_C( 85), UINT8_C( 206), UINT8_C( 67), UINT8_C( 71), UINT8_C( 68), UINT8_C( 104), UINT8_C( 195), UINT8_C( 15), UINT8_C( 107) }, + { UINT8_C( 125), UINT8_C( 115), UINT8_C( 253), UINT8_C( 164), UINT8_C( 131), UINT8_C( 60), UINT8_C( 54), UINT8_C( 18), UINT8_C( 160), UINT8_C( 87), UINT8_C( 144), UINT8_C( 99), UINT8_C( 146), UINT8_C( 139), UINT8_C( 180), UINT8_C( 90) }, + { UINT8_C( 125), UINT8_C( 115), UINT8_C( 253), UINT8_C( 164), UINT8_C( 131), UINT8_C( 60), UINT8_C( 54), UINT8_C( 18), UINT8_C( 160), UINT8_C( 87), UINT8_C( 144), UINT8_C( 99), UINT8_C( 146), UINT8_C( 139), UINT8_C( 180), UINT8_C( 90) }, + { UINT8_C( 238), UINT8_C( 155), UINT8_C( 237), UINT8_C( 36), UINT8_C( 26), UINT8_C( 228), UINT8_C( 182), UINT8_C( 149), UINT8_C( 6), UINT8_C( 187), UINT8_C( 135), UINT8_C( 28), UINT8_C( 144), UINT8_C( 91), UINT8_C( 167), UINT8_C( 211) }, + { UINT8_C( 174), UINT8_C( 123), UINT8_C( 181), UINT8_C( 132), UINT8_C( 98), UINT8_C( 148), UINT8_C( 222), UINT8_C( 85), UINT8_C( 14), UINT8_C( 227), UINT8_C( 7), UINT8_C( 100), UINT8_C( 72), UINT8_C( 99), UINT8_C( 143), UINT8_C( 75) }, + { UINT8_C( 174), UINT8_C( 219), UINT8_C( 181), UINT8_C( 4), UINT8_C( 130), UINT8_C( 84), UINT8_C( 30), UINT8_C( 85), UINT8_C( 78), UINT8_C( 195), UINT8_C( 71), UINT8_C( 196), UINT8_C( 104), UINT8_C( 195), UINT8_C( 15), UINT8_C( 107) } }, + { { UINT8_C( 234), UINT8_C( 41), UINT8_C( 8), UINT8_C( 239), UINT8_C( 215), UINT8_C( 216), UINT8_C( 100), UINT8_C( 174), UINT8_C( 83), UINT8_C( 100), UINT8_C( 240), UINT8_C( 31), UINT8_C( 72), UINT8_C( 42), UINT8_C( 180), UINT8_C( 12) }, + { UINT8_C( 87), UINT8_C( 103), UINT8_C( 201), UINT8_C( 233), UINT8_C( 29), UINT8_C( 172), UINT8_C( 138), UINT8_C( 78), UINT8_C( 179), UINT8_C( 66), UINT8_C( 163), UINT8_C( 203), UINT8_C( 132), UINT8_C( 33), UINT8_C( 173), UINT8_C( 89) }, + { UINT8_C( 87), UINT8_C( 103), UINT8_C( 201), UINT8_C( 233), UINT8_C( 29), UINT8_C( 172), UINT8_C( 138), UINT8_C( 78), UINT8_C( 179), UINT8_C( 66), UINT8_C( 163), UINT8_C( 203), UINT8_C( 132), UINT8_C( 33), UINT8_C( 173), UINT8_C( 89) }, + { UINT8_C( 186), UINT8_C( 57), UINT8_C( 72), UINT8_C( 79), UINT8_C( 239), UINT8_C( 96), UINT8_C( 84), UINT8_C( 118), UINT8_C( 155), UINT8_C( 20), UINT8_C( 24), UINT8_C( 95), UINT8_C( 32), UINT8_C( 10), UINT8_C( 108), UINT8_C( 204) }, + { UINT8_C( 234), UINT8_C( 233), UINT8_C( 40), UINT8_C( 47), UINT8_C( 183), UINT8_C( 152), UINT8_C( 68), UINT8_C( 206), UINT8_C( 115), UINT8_C( 68), UINT8_C( 112), UINT8_C( 127), UINT8_C( 136), UINT8_C( 42), UINT8_C( 180), UINT8_C( 44) }, + { UINT8_C( 234), UINT8_C( 169), UINT8_C( 136), UINT8_C( 239), UINT8_C( 215), UINT8_C( 88), UINT8_C( 100), UINT8_C( 46), UINT8_C( 211), UINT8_C( 100), UINT8_C( 240), UINT8_C( 159), UINT8_C( 72), UINT8_C( 170), UINT8_C( 180), UINT8_C( 140) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint8x16_t a = simde_vld1q_u8(test_vec[i].a); + simde_uint8x16_t b = simde_vld1q_u8(test_vec[i].b); + + simde_uint8x16_t r0 = simde_vsliq_n_u8(a, b, 0); + simde_uint8x16_t r3 = simde_vsliq_n_u8(a, b, 3); + simde_uint8x16_t r5 = simde_vsliq_n_u8(a, b, 5); + simde_uint8x16_t r7 = simde_vsliq_n_u8(a, b, 7); + + simde_test_arm_neon_assert_equal_u8x16(r0, simde_vld1q_u8(test_vec[i].r0)); + simde_test_arm_neon_assert_equal_u8x16(r3, simde_vld1q_u8(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u8x16(r5, simde_vld1q_u8(test_vec[i].r5)); + simde_test_arm_neon_assert_equal_u8x16(r7, simde_vld1q_u8(test_vec[i].r7)); + } + return 0; +} + +static int +test_simde_vsliq_n_u16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint16_t a[8]; + uint16_t b[8]; + uint16_t r3[8]; + uint16_t r6[8]; + uint16_t r10[8]; + uint16_t r13[8]; + uint16_t r15[8]; + } test_vec[] = { + { { UINT16_C( 54932), UINT16_C( 2052), UINT16_C( 30900), UINT16_C( 7957), UINT16_C( 53988), UINT16_C( 54164), UINT16_C( 41244), UINT16_C( 57663) }, + { UINT16_C( 22926), UINT16_C( 51202), UINT16_C( 3783), UINT16_C( 62722), UINT16_C( 47770), UINT16_C( 44849), UINT16_C( 38117), UINT16_C( 19359) }, + { UINT16_C( 52340), UINT16_C( 16404), UINT16_C( 30268), UINT16_C( 43029), UINT16_C( 54484), UINT16_C( 31116), UINT16_C( 42796), UINT16_C( 23807) }, + { UINT16_C( 25492), UINT16_C( 132), UINT16_C( 45556), UINT16_C( 16533), UINT16_C( 42660), UINT16_C( 52308), UINT16_C( 14684), UINT16_C( 59391) }, + { UINT16_C( 14996), UINT16_C( 2052), UINT16_C( 7348), UINT16_C( 2837), UINT16_C( 27364), UINT16_C( 51092), UINT16_C( 38172), UINT16_C( 32063) }, + { UINT16_C( 54932), UINT16_C( 18436), UINT16_C( 63668), UINT16_C( 24341), UINT16_C( 21220), UINT16_C( 13204), UINT16_C( 41244), UINT16_C( 57663) }, + { UINT16_C( 22164), UINT16_C( 2052), UINT16_C( 63668), UINT16_C( 7957), UINT16_C( 21220), UINT16_C( 54164), UINT16_C( 41244), UINT16_C( 57663) } }, + { { UINT16_C( 55983), UINT16_C( 3761), UINT16_C( 17325), UINT16_C( 1501), UINT16_C( 20550), UINT16_C( 44898), UINT16_C( 2169), UINT16_C( 48672) }, + { UINT16_C( 59886), UINT16_C( 11886), UINT16_C( 50059), UINT16_C( 39013), UINT16_C( 4828), UINT16_C( 28010), UINT16_C( 2847), UINT16_C( 15306) }, + { UINT16_C( 20343), UINT16_C( 29553), UINT16_C( 7261), UINT16_C( 49965), UINT16_C( 38630), UINT16_C( 27474), UINT16_C( 22777), UINT16_C( 56912) }, + { UINT16_C( 31663), UINT16_C( 39857), UINT16_C( 58093), UINT16_C( 6493), UINT16_C( 46854), UINT16_C( 23202), UINT16_C( 51193), UINT16_C( 62112) }, + { UINT16_C( 47791), UINT16_C( 47793), UINT16_C( 12205), UINT16_C( 38365), UINT16_C( 28742), UINT16_C( 43874), UINT16_C( 31865), UINT16_C( 10784) }, + { UINT16_C( 55983), UINT16_C( 52913), UINT16_C( 25517), UINT16_C( 42461), UINT16_C( 36934), UINT16_C( 20322), UINT16_C( 59513), UINT16_C( 24096) }, + { UINT16_C( 23215), UINT16_C( 3761), UINT16_C( 50093), UINT16_C( 34269), UINT16_C( 20550), UINT16_C( 12130), UINT16_C( 34937), UINT16_C( 15904) } }, + { { UINT16_C( 17536), UINT16_C( 59891), UINT16_C( 42463), UINT16_C( 3164), UINT16_C( 10034), UINT16_C( 15752), UINT16_C( 13926), UINT16_C( 51179) }, + { UINT16_C( 16067), UINT16_C( 15431), UINT16_C( 15067), UINT16_C( 39231), UINT16_C( 52443), UINT16_C( 53059), UINT16_C( 35282), UINT16_C( 4686) }, + { UINT16_C( 63000), UINT16_C( 57915), UINT16_C( 55007), UINT16_C( 51708), UINT16_C( 26330), UINT16_C( 31256), UINT16_C( 20118), UINT16_C( 37491) }, + { UINT16_C( 45248), UINT16_C( 4595), UINT16_C( 46815), UINT16_C( 20444), UINT16_C( 14066), UINT16_C( 53448), UINT16_C( 29862), UINT16_C( 37803) }, + { UINT16_C( 3200), UINT16_C( 7667), UINT16_C( 28127), UINT16_C( 64604), UINT16_C( 28466), UINT16_C( 3464), UINT16_C( 19046), UINT16_C( 15339) }, + { UINT16_C( 25728), UINT16_C( 59891), UINT16_C( 26079), UINT16_C( 60508), UINT16_C( 26418), UINT16_C( 32136), UINT16_C( 22118), UINT16_C( 51179) }, + { UINT16_C( 50304), UINT16_C( 59891), UINT16_C( 42463), UINT16_C( 35932), UINT16_C( 42802), UINT16_C( 48520), UINT16_C( 13926), UINT16_C( 18411) } }, + { { UINT16_C( 55903), UINT16_C( 22857), UINT16_C( 39133), UINT16_C( 33408), UINT16_C( 16011), UINT16_C( 990), UINT16_C( 21485), UINT16_C( 53727) }, + { UINT16_C( 10331), UINT16_C( 5549), UINT16_C( 17252), UINT16_C( 48442), UINT16_C( 3492), UINT16_C( 61577), UINT16_C( 18795), UINT16_C( 38317) }, + { UINT16_C( 17119), UINT16_C( 44393), UINT16_C( 6949), UINT16_C( 59856), UINT16_C( 27939), UINT16_C( 33870), UINT16_C( 19293), UINT16_C( 44399) }, + { UINT16_C( 5855), UINT16_C( 27465), UINT16_C( 55581), UINT16_C( 20096), UINT16_C( 26891), UINT16_C( 8798), UINT16_C( 23277), UINT16_C( 27487) }, + { UINT16_C( 28255), UINT16_C( 46409), UINT16_C( 37085), UINT16_C( 60032), UINT16_C( 37515), UINT16_C( 10206), UINT16_C( 45037), UINT16_C( 46559) }, + { UINT16_C( 31327), UINT16_C( 47433), UINT16_C( 39133), UINT16_C( 17024), UINT16_C( 40587), UINT16_C( 9182), UINT16_C( 29677), UINT16_C( 45535) }, + { UINT16_C( 55903), UINT16_C( 55625), UINT16_C( 6365), UINT16_C( 640), UINT16_C( 16011), UINT16_C( 33758), UINT16_C( 54253), UINT16_C( 53727) } }, + { { UINT16_C( 30910), UINT16_C( 36057), UINT16_C( 28028), UINT16_C( 65285), UINT16_C( 30944), UINT16_C( 29925), UINT16_C( 64087), UINT16_C( 51797) }, + { UINT16_C( 21033), UINT16_C( 43881), UINT16_C( 63534), UINT16_C( 35658), UINT16_C( 41153), UINT16_C( 27208), UINT16_C( 27863), UINT16_C( 40336) }, + { UINT16_C( 37198), UINT16_C( 23369), UINT16_C( 49524), UINT16_C( 23125), UINT16_C( 1544), UINT16_C( 21061), UINT16_C( 26303), UINT16_C( 60549) }, + { UINT16_C( 35454), UINT16_C( 55897), UINT16_C( 3004), UINT16_C( 53893), UINT16_C( 12384), UINT16_C( 37413), UINT16_C( 13783), UINT16_C( 25621) }, + { UINT16_C( 42174), UINT16_C( 42201), UINT16_C( 47484), UINT16_C( 11013), UINT16_C( 1248), UINT16_C( 8421), UINT16_C( 24151), UINT16_C( 16981) }, + { UINT16_C( 14526), UINT16_C( 11481), UINT16_C( 52604), UINT16_C( 24325), UINT16_C( 14560), UINT16_C( 5349), UINT16_C( 64087), UINT16_C( 2645) }, + { UINT16_C( 63678), UINT16_C( 36057), UINT16_C( 28028), UINT16_C( 32517), UINT16_C( 63712), UINT16_C( 29925), UINT16_C( 64087), UINT16_C( 19029) } }, + { { UINT16_C( 2698), UINT16_C( 52795), UINT16_C( 47428), UINT16_C( 57027), UINT16_C( 39964), UINT16_C( 36120), UINT16_C( 60475), UINT16_C( 45338) }, + { UINT16_C( 21185), UINT16_C( 2661), UINT16_C( 53801), UINT16_C( 24352), UINT16_C( 21787), UINT16_C( 30325), UINT16_C( 60200), UINT16_C( 19373) }, + { UINT16_C( 38410), UINT16_C( 21291), UINT16_C( 37196), UINT16_C( 63747), UINT16_C( 43228), UINT16_C( 45992), UINT16_C( 22851), UINT16_C( 23914) }, + { UINT16_C( 45130), UINT16_C( 39291), UINT16_C( 35396), UINT16_C( 51203), UINT16_C( 18140), UINT16_C( 40280), UINT16_C( 51771), UINT16_C( 60250) }, + { UINT16_C( 1674), UINT16_C( 38459), UINT16_C( 42308), UINT16_C( 33475), UINT16_C( 27676), UINT16_C( 54552), UINT16_C( 41019), UINT16_C( 46362) }, + { UINT16_C( 10890), UINT16_C( 44603), UINT16_C( 14660), UINT16_C( 7875), UINT16_C( 31772), UINT16_C( 44312), UINT16_C( 3131), UINT16_C( 45338) }, + { UINT16_C( 35466), UINT16_C( 52795), UINT16_C( 47428), UINT16_C( 24259), UINT16_C( 39964), UINT16_C( 36120), UINT16_C( 27707), UINT16_C( 45338) } }, + { { UINT16_C( 20622), UINT16_C( 35405), UINT16_C( 42684), UINT16_C( 64877), UINT16_C( 29368), UINT16_C( 10266), UINT16_C( 24647), UINT16_C( 64280) }, + { UINT16_C( 31576), UINT16_C( 19446), UINT16_C( 44288), UINT16_C( 56822), UINT16_C( 32174), UINT16_C( 17103), UINT16_C( 18667), UINT16_C( 20678) }, + { UINT16_C( 56006), UINT16_C( 24501), UINT16_C( 26628), UINT16_C( 61365), UINT16_C( 60784), UINT16_C( 5754), UINT16_C( 18271), UINT16_C( 34352) }, + { UINT16_C( 54798), UINT16_C( 64909), UINT16_C( 16444), UINT16_C( 32173), UINT16_C( 27576), UINT16_C( 46042), UINT16_C( 15047), UINT16_C( 12696) }, + { UINT16_C( 24718), UINT16_C( 55885), UINT16_C( 700), UINT16_C( 55661), UINT16_C( 47800), UINT16_C( 15386), UINT16_C( 44103), UINT16_C( 6936) }, + { UINT16_C( 4238), UINT16_C( 51789), UINT16_C( 1724), UINT16_C( 56685), UINT16_C( 53944), UINT16_C( 59418), UINT16_C( 24647), UINT16_C( 56088) }, + { UINT16_C( 20622), UINT16_C( 2637), UINT16_C( 9916), UINT16_C( 32109), UINT16_C( 29368), UINT16_C( 43034), UINT16_C( 57415), UINT16_C( 31512) } }, + { { UINT16_C( 47232), UINT16_C( 355), UINT16_C( 64050), UINT16_C( 59843), UINT16_C( 9532), UINT16_C( 42900), UINT16_C( 36704), UINT16_C( 46935) }, + { UINT16_C( 18854), UINT16_C( 61891), UINT16_C( 57306), UINT16_C( 15820), UINT16_C( 62408), UINT16_C( 25140), UINT16_C( 4068), UINT16_C( 24825) }, + { UINT16_C( 19760), UINT16_C( 36379), UINT16_C( 65234), UINT16_C( 61027), UINT16_C( 40516), UINT16_C( 4516), UINT16_C( 32544), UINT16_C( 1999) }, + { UINT16_C( 27008), UINT16_C( 28899), UINT16_C( 63154), UINT16_C( 29443), UINT16_C( 62012), UINT16_C( 36116), UINT16_C( 63776), UINT16_C( 15959) }, + { UINT16_C( 39040), UINT16_C( 3427), UINT16_C( 27186), UINT16_C( 12739), UINT16_C( 8508), UINT16_C( 54164), UINT16_C( 37728), UINT16_C( 59223) }, + { UINT16_C( 55424), UINT16_C( 24931), UINT16_C( 23090), UINT16_C( 35267), UINT16_C( 1340), UINT16_C( 34708), UINT16_C( 36704), UINT16_C( 14167) }, + { UINT16_C( 14464), UINT16_C( 33123), UINT16_C( 31282), UINT16_C( 27075), UINT16_C( 9532), UINT16_C( 10132), UINT16_C( 3936), UINT16_C( 46935) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint16x8_t a = simde_vld1q_u16(test_vec[i].a); + simde_uint16x8_t b = simde_vld1q_u16(test_vec[i].b); + + simde_uint16x8_t r3 = simde_vsliq_n_u16(a, b, 3); + simde_uint16x8_t r6 = simde_vsliq_n_u16(a, b, 6); + simde_uint16x8_t r10 = simde_vsliq_n_u16(a, b, 10); + simde_uint16x8_t r13 = simde_vsliq_n_u16(a, b, 13); + simde_uint16x8_t r15 = simde_vsliq_n_u16(a, b, 15); + + simde_test_arm_neon_assert_equal_u16x8(r3, simde_vld1q_u16(test_vec[i].r3)); + simde_test_arm_neon_assert_equal_u16x8(r6, simde_vld1q_u16(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u16x8(r10, simde_vld1q_u16(test_vec[i].r10)); + simde_test_arm_neon_assert_equal_u16x8(r13, simde_vld1q_u16(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u16x8(r15, simde_vld1q_u16(test_vec[i].r15)); + } + return 0; +} + +static int +test_simde_vsliq_n_u32 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint32_t a[4]; + uint32_t b[4]; + uint32_t r6[4]; + uint32_t r13[4]; + uint32_t r19[4]; + uint32_t r26[4]; + uint32_t r31[4]; + } test_vec[] = { + { { UINT32_C(3314104844), UINT32_C( 896066203), UINT32_C(1274783480), UINT32_C(1259865211) }, + { UINT32_C(2546938802), UINT32_C(2309454007), UINT32_C(3677554301), UINT32_C(1529066540) }, + { UINT32_C(4090293388), UINT32_C(1776168411), UINT32_C(3435241336), UINT32_C(3370978107) }, + { UINT32_C(3866515980), UINT32_C(4011254427), UINT32_C(1624221432), UINT32_C(1988461691) }, + { UINT32_C(2643540492), UINT32_C(3317228187), UINT32_C( 871081720), UINT32_C(2707424379) }, + { UINT32_C(3381213708), UINT32_C(3714638491), UINT32_C(4160464632), UINT32_C(3004695675) }, + { UINT32_C(1166621196), UINT32_C(3043549851), UINT32_C(3422267128), UINT32_C(1259865211) } }, + { { UINT32_C(2519560084), UINT32_C(4091206232), UINT32_C(1216423209), UINT32_C(2044064088) }, + { UINT32_C(3403746096), UINT32_C( 188361982), UINT32_C(1027264752), UINT32_C( 342173962) }, + { UINT32_C(3091385364), UINT32_C(3465232280), UINT32_C(1320434729), UINT32_C( 424297112) }, + { UINT32_C( 560336788), UINT32_C(1168104024), UINT32_C(1511916841), UINT32_C(2770424152) }, + { UINT32_C(1501917076), UINT32_C(1743968856), UINT32_C(2273387817), UINT32_C(1213591896) }, + { UINT32_C(3257757588), UINT32_C(4225423960), UINT32_C(3229689129), UINT32_C( 701886808) }, + { UINT32_C( 372076436), UINT32_C(1943722584), UINT32_C(1216423209), UINT32_C(2044064088) } }, + { { UINT32_C( 442898144), UINT32_C(4213751943), UINT32_C(3937589646), UINT32_C(1647003042) }, + { UINT32_C(1900144466), UINT32_C(2941186414), UINT32_C( 465710967), UINT32_C(3138196476) }, + { UINT32_C(1350161568), UINT32_C(3552336775), UINT32_C(4035698126), UINT32_C(3276078882) }, + { UINT32_C(1021990624), UINT32_C(3727540359), UINT32_C(1173289358), UINT32_C(2726266274) }, + { UINT32_C( 982914784), UINT32_C(2339422343), UINT32_C(2075842958), UINT32_C(2682471842) }, + { UINT32_C(1248204512), UINT32_C(3140010119), UINT32_C(3736263054), UINT32_C(4062922146) }, + { UINT32_C( 442898144), UINT32_C(2066268295), UINT32_C(3937589646), UINT32_C(1647003042) } }, + { { UINT32_C(2089720617), UINT32_C(1970686998), UINT32_C( 778315975), UINT32_C(2964826108) }, + { UINT32_C( 394207440), UINT32_C(4261643434), UINT32_C(2206406389), UINT32_C(3380683517) }, + { UINT32_C(3754439721), UINT32_C(2162240150), UINT32_C(3771055431), UINT32_C(1615380348) }, + { UINT32_C(3826916137), UINT32_C(1888832534), UINT32_C(1658759367), UINT32_C( 610248700) }, + { UINT32_C( 109484841), UINT32_C( 626412566), UINT32_C(3081513159), UINT32_C( 401582076) }, + { UINT32_C(1083087657), UINT32_C(2843102230), UINT32_C(3596888263), UINT32_C(4105676796) }, + { UINT32_C(2089720617), UINT32_C(1970686998), UINT32_C(2925799623), UINT32_C(2964826108) } }, + { { UINT32_C(4032337033), UINT32_C(2722005257), UINT32_C(3284061714), UINT32_C(3943119414) }, + { UINT32_C(3606855119), UINT32_C(4011542875), UINT32_C(3628735594), UINT32_C(1545447947) }, + { UINT32_C(3205460937), UINT32_C(3335673545), UINT32_C( 310844050), UINT32_C( 124420854) }, + { UINT32_C(2277110921), UINT32_C(1764450569), UINT32_C(1133334034), UINT32_C(3040967222) }, + { UINT32_C(4000879753), UINT32_C(1256096009), UINT32_C(3814116882), UINT32_C(1348418102) }, + { UINT32_C(1012438153), UINT32_C(1849590025), UINT32_C(2881408530), UINT32_C( 789002806) }, + { UINT32_C(4032337033), UINT32_C(2722005257), UINT32_C(1136578066), UINT32_C(3943119414) } }, + { { UINT32_C(1857063657), UINT32_C( 398218107), UINT32_C(3397194281), UINT32_C(2589095246) }, + { UINT32_C( 57471507), UINT32_C(2562833446), UINT32_C(3648520257), UINT32_C(3804890585) }, + { UINT32_C(3678176489), UINT32_C( 812583355), UINT32_C(1577062505), UINT32_C(2994828878) }, + { UINT32_C(2655152873), UINT32_C( 931451771), UINT32_C( 537129), UINT32_C(1086010702) }, + { UINT32_C(2425916137), UINT32_C(3778302843), UINT32_C( 34411049), UINT32_C( 785020238) }, + { UINT32_C(1320192745), UINT32_C(2612810619), UINT32_C( 108859945), UINT32_C(1716680014) }, + { UINT32_C(4004547305), UINT32_C( 398218107), UINT32_C(3397194281), UINT32_C(2589095246) } }, + { { UINT32_C(3089759757), UINT32_C( 293828492), UINT32_C(3325553807), UINT32_C( 877656139) }, + { UINT32_C(1892709601), UINT32_C(3829548036), UINT32_C( 730099765), UINT32_C(2664244870) }, + { UINT32_C( 874330189), UINT32_C( 277938444), UINT32_C(3776712015), UINT32_C(3007947147) }, + { UINT32_C( 245120525), UINT32_C(1216386956), UINT32_C(2382802063), UINT32_C(2765150283) }, + { UINT32_C(2802449933), UINT32_C( 539195276), UINT32_C(2175790223), UINT32_C( 876083275) }, + { UINT32_C(2217344525), UINT32_C( 293828492), UINT32_C(3593989263), UINT32_C( 407894091) }, + { UINT32_C(3089759757), UINT32_C( 293828492), UINT32_C(3325553807), UINT32_C( 877656139) } }, + { { UINT32_C(3617356505), UINT32_C(3366892416), UINT32_C(2443525970), UINT32_C(3678188135) }, + { UINT32_C(2074072819), UINT32_C(1284524143), UINT32_C(2431646596), UINT32_C(4048530367) }, + { UINT32_C(3891641561), UINT32_C( 605166528), UINT32_C(1006559506), UINT32_C(1407905767) }, + { UINT32_C(4208884441), UINT32_C( 151909248), UINT32_C(4285570898), UINT32_C(4118278759) }, + { UINT32_C(3080485593), UINT32_C(1132376960), UINT32_C(3693428562), UINT32_C(1576841831) }, + { UINT32_C(3483138777), UINT32_C(3165565824), UINT32_C( 296042322), UINT32_C(4282167911) }, + { UINT32_C(3617356505), UINT32_C(3366892416), UINT32_C( 296042322), UINT32_C(3678188135) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint32x4_t a = simde_vld1q_u32(test_vec[i].a); + simde_uint32x4_t b = simde_vld1q_u32(test_vec[i].b); + + simde_uint32x4_t r6 = simde_vsliq_n_u32(a, b, 6); + simde_uint32x4_t r13 = simde_vsliq_n_u32(a, b, 13); + simde_uint32x4_t r19 = simde_vsliq_n_u32(a, b, 19); + simde_uint32x4_t r26 = simde_vsliq_n_u32(a, b, 26); + simde_uint32x4_t r31 = simde_vsliq_n_u32(a, b, 31); + + simde_test_arm_neon_assert_equal_u32x4(r6, simde_vld1q_u32(test_vec[i].r6)); + simde_test_arm_neon_assert_equal_u32x4(r13, simde_vld1q_u32(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u32x4(r19, simde_vld1q_u32(test_vec[i].r19)); + simde_test_arm_neon_assert_equal_u32x4(r26, simde_vld1q_u32(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u32x4(r31, simde_vld1q_u32(test_vec[i].r31)); + } + return 0; +} + +static int +test_simde_vsliq_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint64_t a[2]; + uint64_t b[2]; + uint64_t r13[2]; + uint64_t r26[2]; + uint64_t r39[2]; + uint64_t r52[2]; + uint64_t r63[2]; + } test_vec[] = { + { { UINT64_C( 3326354101924670590), UINT64_C(15812941646549698216) }, + { UINT64_C(18151090002998169439), UINT64_C( 1380667824062628123) }, + { UINT64_C(12972070462018022526), UINT64_C( 2576697537094444712) }, + { UINT64_C(13955360284637015166), UINT64_C( 5231003553959724712) }, + { UINT64_C( 7838427115654622334), UINT64_C( 594630893316032168) }, + { UINT64_C(17724362110628146302), UINT64_C( 1275322049397737128) }, + { UINT64_C(12549726138779446398), UINT64_C(15812941646549698216) } }, + { { UINT64_C( 5869328896938516263), UINT64_C( 2222189733952536719) }, + { UINT64_C(12411628851339405478), UINT64_C( 942836641027284248) }, + { UINT64_C(16056959959070727975), UINT64_C(12978740484919984271) }, + { UINT64_C(13330739158301581095), UINT64_C(13255955676427108495) }, + { UINT64_C( 690267940543831847), UINT64_C(15253283237737447567) }, + { UINT64_C( 9972108157473038119), UINT64_C(15097981068604784783) }, + { UINT64_C( 5869328896938516263), UINT64_C( 2222189733952536719) } }, + { { UINT64_C(15272376107936314541), UINT64_C( 7392787741455680135) }, + { UINT64_C( 9420840874243141019), UINT64_C( 6038106548576691917) }, + { UINT64_C(12797981472756823213), UINT64_C( 8447984324952302215) }, + { UINT64_C( 8217653932499203245), UINT64_C(12150569524748616327) }, + { UINT64_C( 6851890001671943341), UINT64_C(17281269205359133319) }, + { UINT64_C(15686707273654400173), UINT64_C( 7843147704192729735) }, + { UINT64_C(15272376107936314541), UINT64_C(16616159778310455943) } }, + { { UINT64_C(17788547161097733072), UINT64_C(10303315733532817032) }, + { UINT64_C( 9530739437947880088), UINT64_C( 9539080515559423601) }, + { UINT64_C( 9196555730211242960), UINT64_C( 3739687229137499784) }, + { UINT64_C( 1681744860744483792), UINT64_C(13922618736527455880) }, + { UINT64_C(15582820183151182800), UINT64_C(16320825774362506888) }, + { UINT64_C( 2994222385185653712), UINT64_C(16653391208125216392) }, + { UINT64_C( 8565175124242957264), UINT64_C(10303315733532817032) } }, + { { UINT64_C(11269734078394947120), UINT64_C(11079511464515323691) }, + { UINT64_C( 4631258017608531081), UINT64_C( 2950484057785924872) }, + { UINT64_C(12759864702248498736), UINT64_C( 5130664822783936299) }, + { UINT64_C( 9559719181400167984), UINT64_C( 8723228335687976747) }, + { UINT64_C( 6790941082614519344), UINT64_C(16446728547236431659) }, + { UINT64_C(14453779014945887792), UINT64_C(15024664738091878187) }, + { UINT64_C(11269734078394947120), UINT64_C( 1856139427660547883) } }, + { { UINT64_C( 7998503011676121860), UINT64_C(12436442786690204518) }, + { UINT64_C(10507257933463575756), UINT64_C( 890983024616133747) }, + { UINT64_C( 2949143004844755716), UINT64_C(12469028540094767974) }, + { UINT64_C(12591503202443651844), UINT64_C( 6659864326580700006) }, + { UINT64_C(13848118176700230404), UINT64_C(10586337451585012582) }, + { UINT64_C(14753902452731865860), UINT64_C( 5131604191095260006) }, + { UINT64_C( 7998503011676121860), UINT64_C(12436442786690204518) } }, + { { UINT64_C(10515578012670966780), UINT64_C(17680525830101447828) }, + { UINT64_C(11288336922296194998), UINT64_C(16689682725274606802) }, + { UINT64_C( 528025944447179772), UINT64_C(13060555188091901076) }, + { UINT64_C( 9050423663222406140), UINT64_C( 952473333433152660) }, + { UINT64_C( 3606217256912632828), UINT64_C(18135548271896081556) }, + { UINT64_C( 8894282146817588220), UINT64_C(14784711269702218900) }, + { UINT64_C( 1292205975816190972), UINT64_C( 8457153793246672020) } }, + { { UINT64_C( 2303077286822012679), UINT64_C( 138581218363046567) }, + { UINT64_C(18411670028825252116), UINT64_C( 8225054175440624625) }, + { UINT64_C( 7821329487171323655), UINT64_C(12134448022314433191) }, + { UINT64_C( 6788990914251187975), UINT64_C(14341129652734655143) }, + { UINT64_C(16926931031184631559), UINT64_C(13667854094844426919) }, + { UINT64_C( 1244731374389946119), UINT64_C(13770977290413537959) }, + { UINT64_C( 2303077286822012679), UINT64_C( 9361953255217822375) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_uint64x2_t a = simde_vld1q_u64(test_vec[i].a); + simde_uint64x2_t b = simde_vld1q_u64(test_vec[i].b); + + simde_uint64x2_t r13 = simde_vsliq_n_u64(a, b, 13); + simde_uint64x2_t r26 = simde_vsliq_n_u64(a, b, 26); + simde_uint64x2_t r39 = simde_vsliq_n_u64(a, b, 39); + simde_uint64x2_t r52 = simde_vsliq_n_u64(a, b, 52); + simde_uint64x2_t r63 = simde_vsliq_n_u64(a, b, 63); + + simde_test_arm_neon_assert_equal_u64x2(r13, simde_vld1q_u64(test_vec[i].r13)); + simde_test_arm_neon_assert_equal_u64x2(r26, simde_vld1q_u64(test_vec[i].r26)); + simde_test_arm_neon_assert_equal_u64x2(r39, simde_vld1q_u64(test_vec[i].r39)); + simde_test_arm_neon_assert_equal_u64x2(r52, simde_vld1q_u64(test_vec[i].r52)); + simde_test_arm_neon_assert_equal_u64x2(r63, simde_vld1q_u64(test_vec[i].r63)); + } + return 0; +} + +static int +test_simde_vslid_n_s64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + int64_t a; + int64_t b; + int64_t r13; + int64_t r26; + int64_t r39; + int64_t r52; + int64_t r63; + } test_vec[] = { + { -INT64_C( 4403672992202505812), + -INT64_C( 1230496034501682882), + -INT64_C( 8301250392370986580), + -INT64_C( 9144558609705090644), + INT64_C( 3552791725982124), + -INT64_C( 7790379911985118804), + INT64_C( 4819699044652269996) }, + { -INT64_C( 3907786404754124187), + INT64_C( 7915678331130018259), + INT64_C( 4931469528035652197), + INT64_C( 228852244148621925), + -INT64_C( 6810311284617670043), + -INT64_C( 7118852939069287835), + -INT64_C( 3907786404754124187) }, + { -INT64_C( 7546561803657959031), + -INT64_C( 5215796123490493360), + -INT64_C( 5142568922800060023), + INT64_C( 4438848774528280969), + INT64_C( 4516591482021325193), + -INT64_C( 4249926876422755959), + INT64_C( 1676810233196816777) }, + { INT64_C( 3478031672116439332), + -INT64_C( 8079927811897769318), + -INT64_C( 3850898596655052508), + -INT64_C( 2628937754841519836), + -INT64_C( 8907753509524412124), + INT64_C( 3000650111615166756), + INT64_C( 3478031672116439332) }, + { INT64_C( 7801976851260586890), + INT64_C( 4148771054844773256), + INT64_C( 7829897515388443530), + INT64_C( 3191301773980941194), + INT64_C( 4107779959898323850), + INT64_C( 4072996359797816202), + INT64_C( 7801976851260586890) }, + { INT64_C( 5698065473092539665), + -INT64_C( 6783390212694855361), + -INT64_C( 7939472383085644527), + INT64_C( 3061841662278456593), + -INT64_C( 4965042823300033263), + INT64_C( 1437660225600050449), + -INT64_C( 3525306563762236143) }, + { INT64_C( 4286310312536331710), + INT64_C( 2689930727598008905), + -INT64_C( 7946647600025225794), + -INT64_C( 377303285623532098), + INT64_C( 8184488051327784382), + -INT64_C( 6585379187936045634), + -INT64_C( 4937061724318444098) }, + { INT64_C( 7214605044141401542), + INT64_C( 5142768677345030141), + -INT64_C( 2802459542128974394), + INT64_C( 8447802647868220870), + -INT64_C( 7784473535874193978), + -INT64_C( 9168758160874042), + -INT64_C( 2008766992713374266) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + int64_t r13 = simde_vslid_n_s64(test_vec[i].a, test_vec[i].b, 13); + int64_t r26 = simde_vslid_n_s64(test_vec[i].a, test_vec[i].b, 26); + int64_t r39 = simde_vslid_n_s64(test_vec[i].a, test_vec[i].b, 39); + int64_t r52 = simde_vslid_n_s64(test_vec[i].a, test_vec[i].b, 52); + int64_t r63 = simde_vslid_n_s64(test_vec[i].a, test_vec[i].b, 63); + + simde_assert_equal_i64(r13, test_vec[i].r13); + simde_assert_equal_i64(r26, test_vec[i].r26); + simde_assert_equal_i64(r39, test_vec[i].r39); + simde_assert_equal_i64(r52, test_vec[i].r52); + simde_assert_equal_i64(r63, test_vec[i].r63); + } + return 0; +} + +static int +test_simde_vslid_n_u64 (SIMDE_MUNIT_TEST_ARGS) { + struct { + uint64_t a; + uint64_t b; + uint64_t r13; + uint64_t r26; + uint64_t r39; + uint64_t r52; + uint64_t r63; + } test_vec[] = { + { UINT64_C(16047323941614028429), + UINT64_C( 5759110669390432380), + UINT64_C(10310007171098576525), + UINT64_C(10384376197257004685), + UINT64_C(10872883682835072653), + UINT64_C( 9782816859941668493), + UINT64_C( 6823951904759252621) }, + { UINT64_C( 6252747629803370157), + UINT64_C( 9603497943463504180), + UINT64_C(14938422555498157741), + UINT64_C(18304133725414782637), + UINT64_C(12327647779556456109), + UINT64_C(10612232069098010285), + UINT64_C( 6252747629803370157) }, + { UINT64_C( 9127216911202849255), + UINT64_C( 5473098671353457008), + UINT64_C(10036216613309387239), + UINT64_C(17994903780703801831), + UINT64_C( 6319878951979127271), + UINT64_C(10883620765877342695), + UINT64_C( 9127216911202849255) }, + { UINT64_C(17366484507887084631), + UINT64_C(13169929079093652159), + UINT64_C(11499672881740637271), + UINT64_C(16245006858353321047), + UINT64_C( 4284435786980336727), + UINT64_C(12390006919642686551), + UINT64_C(17366484507887084631) }, + { UINT64_C(11977100683987906951), + UINT64_C(15640438591829181941), + UINT64_C(13835352351822492039), + UINT64_C( 2410877258336039303), + UINT64_C(11890341519974986119), + UINT64_C( 6870018706549764487), + UINT64_C(11977100683987906951) }, + { UINT64_C(16622456104105027770), + UINT64_C(12751574988462635335), + UINT64_C(15437360142427416762), + UINT64_C(10423661486465624250), + UINT64_C( 656579698565064890), + UINT64_C(10700222594112825530), + UINT64_C(16622456104105027770) }, + { UINT64_C( 6364346099977397255), + UINT64_C( 5145566249440208637), + UINT64_C( 1668506987863712775), + UINT64_C(17818630034471756807), + UINT64_C( 1131387083589133319), + UINT64_C( 8057699559868703751), + UINT64_C(15587718136832173063) }, + { UINT64_C(14133761816967247706), + UINT64_C(14840253816082545319), + UINT64_C( 7315815602266109786), + UINT64_C(16136662355303987034), + UINT64_C( 2169982941962172250), + UINT64_C(12282782370117973850), + UINT64_C(14133761816967247706) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + uint64_t r13 = simde_vslid_n_u64(test_vec[i].a, test_vec[i].b, 13); + uint64_t r26 = simde_vslid_n_u64(test_vec[i].a, test_vec[i].b, 26); + uint64_t r39 = simde_vslid_n_u64(test_vec[i].a, test_vec[i].b, 39); + uint64_t r52 = simde_vslid_n_u64(test_vec[i].a, test_vec[i].b, 52); + uint64_t r63 = simde_vslid_n_u64(test_vec[i].a, test_vec[i].b, 63); + + simde_assert_equal_u64(r13, test_vec[i].r13); + simde_assert_equal_u64(r26, test_vec[i].r26); + simde_assert_equal_u64(r39, test_vec[i].r39); + simde_assert_equal_u64(r52, test_vec[i].r52); + simde_assert_equal_u64(r63, test_vec[i].r63); + } + return 0; +} + +SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vsli_n_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_s8) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_s16) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_s32) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_u8) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_u16) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vsliq_n_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vslid_n_s64) +SIMDE_TEST_FUNC_LIST_ENTRY(vslid_n_u64) +SIMDE_TEST_FUNC_LIST_END + +#include diff --git a/test/arm/neon/st1_lane.c b/test/arm/neon/st1_lane.c index 0aa36837f..711360aab 100644 --- a/test/arm/neon/st1_lane.c +++ b/test/arm/neon/st1_lane.c @@ -6,6 +6,50 @@ HEDLEY_DIAGNOSTIC_PUSH SIMDE_DIAGNOSTIC_DISABLE_UNREACHABLE_ +static int +test_simde_vst1_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + simde_float16 val[4]; + int lane; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( 85.175), + { SIMDE_FLOAT16_VALUE( 1.091), SIMDE_FLOAT16_VALUE( - 78.385), SIMDE_FLOAT16_VALUE( 23.855), SIMDE_FLOAT16_VALUE( 85.175) }, + INT8_C( 3) }, + { SIMDE_FLOAT16_VALUE( - 64.034), + { SIMDE_FLOAT16_VALUE( - 64.034), SIMDE_FLOAT16_VALUE( - 51.901), SIMDE_FLOAT16_VALUE( - 47.801), SIMDE_FLOAT16_VALUE( - 74.037) }, + INT8_C( 0) }, + { SIMDE_FLOAT16_VALUE( 99.952), + { SIMDE_FLOAT16_VALUE( 31.399), SIMDE_FLOAT16_VALUE( - 69.493), SIMDE_FLOAT16_VALUE( 39.299), SIMDE_FLOAT16_VALUE( 99.952) }, + INT8_C( 3) }, + { SIMDE_FLOAT16_VALUE( - 55.804), + { SIMDE_FLOAT16_VALUE( 31.333), SIMDE_FLOAT16_VALUE( - 55.804), SIMDE_FLOAT16_VALUE( - 16.597), SIMDE_FLOAT16_VALUE( - 83.723) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( - 89.293), + { SIMDE_FLOAT16_VALUE( - 29.785), SIMDE_FLOAT16_VALUE( - 89.293), SIMDE_FLOAT16_VALUE( - 65.019), SIMDE_FLOAT16_VALUE( - 74.124) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( 81.242), + { SIMDE_FLOAT16_VALUE( 26.817), SIMDE_FLOAT16_VALUE( - 9.372), SIMDE_FLOAT16_VALUE( 43.833), SIMDE_FLOAT16_VALUE( 81.242) }, + INT8_C( 3) }, + { SIMDE_FLOAT16_VALUE( - 59.960), + { SIMDE_FLOAT16_VALUE( 5.886), SIMDE_FLOAT16_VALUE( - 59.960), SIMDE_FLOAT16_VALUE( - 88.952), SIMDE_FLOAT16_VALUE( - 12.921) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( - 86.830), + { SIMDE_FLOAT16_VALUE( - 90.057), SIMDE_FLOAT16_VALUE( 75.838), SIMDE_FLOAT16_VALUE( - 86.830), SIMDE_FLOAT16_VALUE( 59.157) }, + INT8_C( 2) }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t val = simde_vld1_f16(test_vec[i].val); + simde_float16 a; + SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst1_lane_f16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); + + simde_assert_equal_f16(a, test_vec[i].a, 1); + } + + return 0; +} + static int test_simde_vst1_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -578,6 +622,58 @@ test_simde_vst1_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst1q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16 a; + simde_float16 val[8]; + int lane; + } test_vec[] = { + { SIMDE_FLOAT16_VALUE( - 64.026), + { SIMDE_FLOAT16_VALUE( 77.636), SIMDE_FLOAT16_VALUE( - 64.026), SIMDE_FLOAT16_VALUE( - 11.443), SIMDE_FLOAT16_VALUE( - 39.884), + SIMDE_FLOAT16_VALUE( - 87.713), SIMDE_FLOAT16_VALUE( - 22.005), SIMDE_FLOAT16_VALUE( 71.493), SIMDE_FLOAT16_VALUE( 94.947) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( 63.436), + { SIMDE_FLOAT16_VALUE( 63.436), SIMDE_FLOAT16_VALUE( - 60.792), SIMDE_FLOAT16_VALUE( 3.439), SIMDE_FLOAT16_VALUE( 69.299), + SIMDE_FLOAT16_VALUE( 60.921), SIMDE_FLOAT16_VALUE( - 17.081), SIMDE_FLOAT16_VALUE( 12.053), SIMDE_FLOAT16_VALUE( - 66.784) }, + INT8_C( 0) }, + { SIMDE_FLOAT16_VALUE( 32.004), + { SIMDE_FLOAT16_VALUE( 49.966), SIMDE_FLOAT16_VALUE( 90.559), SIMDE_FLOAT16_VALUE( - 6.561), SIMDE_FLOAT16_VALUE( - 4.223), + SIMDE_FLOAT16_VALUE( 32.004), SIMDE_FLOAT16_VALUE( 31.066), SIMDE_FLOAT16_VALUE( 60.932), SIMDE_FLOAT16_VALUE( - 85.011) }, + INT8_C( 4) }, + { SIMDE_FLOAT16_VALUE( - 56.576), + { SIMDE_FLOAT16_VALUE( 18.785), SIMDE_FLOAT16_VALUE( - 14.403), SIMDE_FLOAT16_VALUE( 43.277), SIMDE_FLOAT16_VALUE( 82.202), + SIMDE_FLOAT16_VALUE( 72.540), SIMDE_FLOAT16_VALUE( 14.761), SIMDE_FLOAT16_VALUE( - 56.576), SIMDE_FLOAT16_VALUE( - 71.628) }, + INT8_C( 6) }, + { SIMDE_FLOAT16_VALUE( - 76.518), + { SIMDE_FLOAT16_VALUE( 96.881), SIMDE_FLOAT16_VALUE( - 76.518), SIMDE_FLOAT16_VALUE( 80.572), SIMDE_FLOAT16_VALUE( 23.751), + SIMDE_FLOAT16_VALUE( - 89.697), SIMDE_FLOAT16_VALUE( 42.687), SIMDE_FLOAT16_VALUE( - 55.036), SIMDE_FLOAT16_VALUE( 46.427) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( - 91.933), + { SIMDE_FLOAT16_VALUE( - 5.377), SIMDE_FLOAT16_VALUE( 33.082), SIMDE_FLOAT16_VALUE( 34.890), SIMDE_FLOAT16_VALUE( - 91.933), + SIMDE_FLOAT16_VALUE( 93.006), SIMDE_FLOAT16_VALUE( - 95.231), SIMDE_FLOAT16_VALUE( 89.548), SIMDE_FLOAT16_VALUE( 25.629) }, + INT8_C( 3) }, + { SIMDE_FLOAT16_VALUE( - 36.496), + { SIMDE_FLOAT16_VALUE( - 23.493), SIMDE_FLOAT16_VALUE( - 36.496), SIMDE_FLOAT16_VALUE( 87.220), SIMDE_FLOAT16_VALUE( 52.231), + SIMDE_FLOAT16_VALUE( 16.211), SIMDE_FLOAT16_VALUE( 42.745), SIMDE_FLOAT16_VALUE( - 69.936), SIMDE_FLOAT16_VALUE( - 27.078) }, + INT8_C( 1) }, + { SIMDE_FLOAT16_VALUE( - 88.096), + { SIMDE_FLOAT16_VALUE( - 89.050), SIMDE_FLOAT16_VALUE( - 75.299), SIMDE_FLOAT16_VALUE( - 44.416), SIMDE_FLOAT16_VALUE( - 88.096), + SIMDE_FLOAT16_VALUE( - 91.574), SIMDE_FLOAT16_VALUE( - 27.581), SIMDE_FLOAT16_VALUE( - 28.933), SIMDE_FLOAT16_VALUE( - 97.993) }, + INT8_C( 3) }, + + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t val = simde_vld1q_f16(test_vec[i].val); + simde_float16 a; + SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst1q_lane_f16, HEDLEY_UNREACHABLE(), test_vec[i].lane, &a, val); + simde_assert_equal_f16(a, test_vec[i].a, 1); + } + + return 0; +} + static int test_simde_vst1q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1196,6 +1292,7 @@ test_simde_vst1q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { HEDLEY_DIAGNOSTIC_POP SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_s8) @@ -1207,6 +1304,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_lane_s8) diff --git a/test/arm/neon/st1_x2.c b/test/arm/neon/st1_x2.c index 327ab6614..05ee2daca 100644 --- a/test/arm/neon/st1_x2.c +++ b/test/arm/neon/st1_x2.c @@ -1,11 +1,68 @@ - -#include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1_x2 +#include "test/test.h" #include "test-neon.h" #include "../../../simde/arm/neon/st1_x2.h" #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[2][4]; + simde_float16 r[8]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( - 49.565), SIMDE_FLOAT16_VALUE( - 3.779), SIMDE_FLOAT16_VALUE( - 4.526), SIMDE_FLOAT16_VALUE( 54.137) }, + { SIMDE_FLOAT16_VALUE( 93.243), SIMDE_FLOAT16_VALUE( 77.887), SIMDE_FLOAT16_VALUE( 67.064), SIMDE_FLOAT16_VALUE( - 88.528) } }, + { SIMDE_FLOAT16_VALUE( - 49.565), SIMDE_FLOAT16_VALUE( - 3.779), SIMDE_FLOAT16_VALUE( - 4.526), SIMDE_FLOAT16_VALUE( 54.137), SIMDE_FLOAT16_VALUE( 93.243), SIMDE_FLOAT16_VALUE( 77.887), SIMDE_FLOAT16_VALUE( 67.064), SIMDE_FLOAT16_VALUE( - 88.528) } }, + { { { SIMDE_FLOAT16_VALUE( - 20.795), SIMDE_FLOAT16_VALUE( 75.008), SIMDE_FLOAT16_VALUE( - 10.786), SIMDE_FLOAT16_VALUE( 61.695) }, + { SIMDE_FLOAT16_VALUE( - 13.619), SIMDE_FLOAT16_VALUE( 27.128), SIMDE_FLOAT16_VALUE( - 26.152), SIMDE_FLOAT16_VALUE( 86.900) } }, + { SIMDE_FLOAT16_VALUE( - 20.795), SIMDE_FLOAT16_VALUE( 75.008), SIMDE_FLOAT16_VALUE( - 10.786), SIMDE_FLOAT16_VALUE( 61.695), SIMDE_FLOAT16_VALUE( - 13.619), SIMDE_FLOAT16_VALUE( 27.128), SIMDE_FLOAT16_VALUE( - 26.152), SIMDE_FLOAT16_VALUE( 86.900) } }, + { { { SIMDE_FLOAT16_VALUE( 2.070), SIMDE_FLOAT16_VALUE( 76.065), SIMDE_FLOAT16_VALUE( - 55.688), SIMDE_FLOAT16_VALUE( 21.829) }, + { SIMDE_FLOAT16_VALUE( - 92.596), SIMDE_FLOAT16_VALUE( 28.101), SIMDE_FLOAT16_VALUE( - 53.959), SIMDE_FLOAT16_VALUE( - 93.775) } }, + { SIMDE_FLOAT16_VALUE( 2.070), SIMDE_FLOAT16_VALUE( 76.065), SIMDE_FLOAT16_VALUE( - 55.688), SIMDE_FLOAT16_VALUE( 21.829), SIMDE_FLOAT16_VALUE( - 92.596), SIMDE_FLOAT16_VALUE( 28.101), SIMDE_FLOAT16_VALUE( - 53.959), SIMDE_FLOAT16_VALUE( - 93.775) } }, + { { { SIMDE_FLOAT16_VALUE( 35.951), SIMDE_FLOAT16_VALUE( 62.015), SIMDE_FLOAT16_VALUE( 90.532), SIMDE_FLOAT16_VALUE( 31.925) }, + { SIMDE_FLOAT16_VALUE( - 46.722), SIMDE_FLOAT16_VALUE( - 97.917), SIMDE_FLOAT16_VALUE( 38.448), SIMDE_FLOAT16_VALUE( - 24.373) } }, + { SIMDE_FLOAT16_VALUE( 35.951), SIMDE_FLOAT16_VALUE( 62.015), SIMDE_FLOAT16_VALUE( 90.532), SIMDE_FLOAT16_VALUE( 31.925), SIMDE_FLOAT16_VALUE( - 46.722), SIMDE_FLOAT16_VALUE( - 97.917), SIMDE_FLOAT16_VALUE( 38.448), SIMDE_FLOAT16_VALUE( - 24.373) } }, + { { { SIMDE_FLOAT16_VALUE( - 51.687), SIMDE_FLOAT16_VALUE( 48.322), SIMDE_FLOAT16_VALUE( 69.086), SIMDE_FLOAT16_VALUE( - 54.351) }, + { SIMDE_FLOAT16_VALUE( - 64.082), SIMDE_FLOAT16_VALUE( - 98.147), SIMDE_FLOAT16_VALUE( - 46.114), SIMDE_FLOAT16_VALUE( - 18.794) } }, + { SIMDE_FLOAT16_VALUE( - 51.687), SIMDE_FLOAT16_VALUE( 48.322), SIMDE_FLOAT16_VALUE( 69.086), SIMDE_FLOAT16_VALUE( - 54.351), SIMDE_FLOAT16_VALUE( - 64.082), SIMDE_FLOAT16_VALUE( - 98.147), SIMDE_FLOAT16_VALUE( - 46.114), SIMDE_FLOAT16_VALUE( - 18.794) } }, + { { { SIMDE_FLOAT16_VALUE( 68.290), SIMDE_FLOAT16_VALUE( 85.741), SIMDE_FLOAT16_VALUE( - 41.817), SIMDE_FLOAT16_VALUE( - 53.220) }, + { SIMDE_FLOAT16_VALUE( - 75.047), SIMDE_FLOAT16_VALUE( 76.317), SIMDE_FLOAT16_VALUE( - 59.770), SIMDE_FLOAT16_VALUE( 34.185) } }, + { SIMDE_FLOAT16_VALUE( 68.290), SIMDE_FLOAT16_VALUE( 85.741), SIMDE_FLOAT16_VALUE( - 41.817), SIMDE_FLOAT16_VALUE( - 53.220), SIMDE_FLOAT16_VALUE( - 75.047), SIMDE_FLOAT16_VALUE( 76.317), SIMDE_FLOAT16_VALUE( - 59.770), SIMDE_FLOAT16_VALUE( 34.185) } }, + { { { SIMDE_FLOAT16_VALUE( - 15.653), SIMDE_FLOAT16_VALUE( - 73.326), SIMDE_FLOAT16_VALUE( - 11.883), SIMDE_FLOAT16_VALUE( - 37.580) }, + { SIMDE_FLOAT16_VALUE( - 25.858), SIMDE_FLOAT16_VALUE( - 61.674), SIMDE_FLOAT16_VALUE( 40.814), SIMDE_FLOAT16_VALUE( - 12.611) } }, + { SIMDE_FLOAT16_VALUE( - 15.653), SIMDE_FLOAT16_VALUE( - 73.326), SIMDE_FLOAT16_VALUE( - 11.883), SIMDE_FLOAT16_VALUE( - 37.580), SIMDE_FLOAT16_VALUE( - 25.858), SIMDE_FLOAT16_VALUE( - 61.674), SIMDE_FLOAT16_VALUE( 40.814), SIMDE_FLOAT16_VALUE( - 12.611) } }, + { { { SIMDE_FLOAT16_VALUE( - 22.246), SIMDE_FLOAT16_VALUE( - 57.414), SIMDE_FLOAT16_VALUE( - 85.821), SIMDE_FLOAT16_VALUE( 94.244) }, + { SIMDE_FLOAT16_VALUE( - 24.985), SIMDE_FLOAT16_VALUE( 26.320), SIMDE_FLOAT16_VALUE( 45.671), SIMDE_FLOAT16_VALUE( 30.999) } }, + { SIMDE_FLOAT16_VALUE( - 22.246), SIMDE_FLOAT16_VALUE( - 57.414), SIMDE_FLOAT16_VALUE( - 85.821), SIMDE_FLOAT16_VALUE( 94.244), SIMDE_FLOAT16_VALUE( - 24.985), SIMDE_FLOAT16_VALUE( 26.320), SIMDE_FLOAT16_VALUE( 45.671), SIMDE_FLOAT16_VALUE( 30.999) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x2_t val = {{ + simde_vld1_f16(test_vec[i].val[0]), + simde_vld1_f16(test_vec[i].val[1]), + }}; + simde_float16 r_[8]; + simde_vst1_f16_x2(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x2_t val[2] = simde_test_arm_neon_random_f16x2(-100.0f, 100.0f); + simde_float16x8_t r = simde_vst1_f16_x2(val[2]); + + simde_test_arm_neon_write_f16x2(2, val[2], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1_f32_x2 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -726,6 +783,7 @@ test_simde_vst1_u64_x2 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f16_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f32_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f64_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_s8_x2) diff --git a/test/arm/neon/st1_x3.c b/test/arm/neon/st1_x3.c index 8165ae5bc..0f5441643 100644 --- a/test/arm/neon/st1_x3.c +++ b/test/arm/neon/st1_x3.c @@ -1,11 +1,93 @@ - -#include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1_x3 +#include "test/test.h" #include "test-neon.h" #include "../../../simde/arm/neon/st1_x3.h" #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[3][4]; + simde_float16 r[12]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( 29.597), SIMDE_FLOAT16_VALUE( - 97.375), SIMDE_FLOAT16_VALUE( - 37.290), SIMDE_FLOAT16_VALUE( 83.451) }, + { SIMDE_FLOAT16_VALUE( 58.071), SIMDE_FLOAT16_VALUE( 9.615), SIMDE_FLOAT16_VALUE( - 92.703), SIMDE_FLOAT16_VALUE( - 13.746) }, + { SIMDE_FLOAT16_VALUE( 82.251), SIMDE_FLOAT16_VALUE( 31.216), SIMDE_FLOAT16_VALUE( 5.814), SIMDE_FLOAT16_VALUE( 14.340) } }, + { SIMDE_FLOAT16_VALUE( 29.597), SIMDE_FLOAT16_VALUE( - 97.375), SIMDE_FLOAT16_VALUE( - 37.290), SIMDE_FLOAT16_VALUE( 83.451), + SIMDE_FLOAT16_VALUE( 58.071), SIMDE_FLOAT16_VALUE( 9.615), SIMDE_FLOAT16_VALUE( - 92.703), SIMDE_FLOAT16_VALUE( - 13.746), + SIMDE_FLOAT16_VALUE( 82.251), SIMDE_FLOAT16_VALUE( 31.216), SIMDE_FLOAT16_VALUE( 5.814), SIMDE_FLOAT16_VALUE( 14.340) } }, + { { { SIMDE_FLOAT16_VALUE( - 61.888), SIMDE_FLOAT16_VALUE( 47.875), SIMDE_FLOAT16_VALUE( - 89.222), SIMDE_FLOAT16_VALUE( 78.391) }, + { SIMDE_FLOAT16_VALUE( 7.891), SIMDE_FLOAT16_VALUE( - 86.814), SIMDE_FLOAT16_VALUE( 7.232), SIMDE_FLOAT16_VALUE( 87.625) }, + { SIMDE_FLOAT16_VALUE( - 59.898), SIMDE_FLOAT16_VALUE( 99.002), SIMDE_FLOAT16_VALUE( 38.867), SIMDE_FLOAT16_VALUE( - 11.673) } }, + { SIMDE_FLOAT16_VALUE( - 61.888), SIMDE_FLOAT16_VALUE( 47.875), SIMDE_FLOAT16_VALUE( - 89.222), SIMDE_FLOAT16_VALUE( 78.391), + SIMDE_FLOAT16_VALUE( 7.891), SIMDE_FLOAT16_VALUE( - 86.814), SIMDE_FLOAT16_VALUE( 7.232), SIMDE_FLOAT16_VALUE( 87.625), + SIMDE_FLOAT16_VALUE( - 59.898), SIMDE_FLOAT16_VALUE( 99.002), SIMDE_FLOAT16_VALUE( 38.867), SIMDE_FLOAT16_VALUE( - 11.673) } }, + { { { SIMDE_FLOAT16_VALUE( - 30.571), SIMDE_FLOAT16_VALUE( - 70.230), SIMDE_FLOAT16_VALUE( 40.374), SIMDE_FLOAT16_VALUE( - 20.532) }, + { SIMDE_FLOAT16_VALUE( - 53.791), SIMDE_FLOAT16_VALUE( - 27.304), SIMDE_FLOAT16_VALUE( - 92.472), SIMDE_FLOAT16_VALUE( 61.997) }, + { SIMDE_FLOAT16_VALUE( - 84.356), SIMDE_FLOAT16_VALUE( 57.986), SIMDE_FLOAT16_VALUE( - 40.919), SIMDE_FLOAT16_VALUE( 59.208) } }, + { SIMDE_FLOAT16_VALUE( - 30.571), SIMDE_FLOAT16_VALUE( - 70.230), SIMDE_FLOAT16_VALUE( 40.374), SIMDE_FLOAT16_VALUE( - 20.532), + SIMDE_FLOAT16_VALUE( - 53.791), SIMDE_FLOAT16_VALUE( - 27.304), SIMDE_FLOAT16_VALUE( - 92.472), SIMDE_FLOAT16_VALUE( 61.997), + SIMDE_FLOAT16_VALUE( - 84.356), SIMDE_FLOAT16_VALUE( 57.986), SIMDE_FLOAT16_VALUE( - 40.919), SIMDE_FLOAT16_VALUE( 59.208) } }, + { { { SIMDE_FLOAT16_VALUE( 44.416), SIMDE_FLOAT16_VALUE( - 23.810), SIMDE_FLOAT16_VALUE( - 15.844), SIMDE_FLOAT16_VALUE( - 72.400) }, + { SIMDE_FLOAT16_VALUE( 39.332), SIMDE_FLOAT16_VALUE( 94.084), SIMDE_FLOAT16_VALUE( 69.027), SIMDE_FLOAT16_VALUE( 27.132) }, + { SIMDE_FLOAT16_VALUE( - 53.948), SIMDE_FLOAT16_VALUE( - 6.196), SIMDE_FLOAT16_VALUE( 93.667), SIMDE_FLOAT16_VALUE( - 14.595) } }, + { SIMDE_FLOAT16_VALUE( 44.416), SIMDE_FLOAT16_VALUE( - 23.810), SIMDE_FLOAT16_VALUE( - 15.844), SIMDE_FLOAT16_VALUE( - 72.400), + SIMDE_FLOAT16_VALUE( 39.332), SIMDE_FLOAT16_VALUE( 94.084), SIMDE_FLOAT16_VALUE( 69.027), SIMDE_FLOAT16_VALUE( 27.132), + SIMDE_FLOAT16_VALUE( - 53.948), SIMDE_FLOAT16_VALUE( - 6.196), SIMDE_FLOAT16_VALUE( 93.667), SIMDE_FLOAT16_VALUE( - 14.595) } }, + { { { SIMDE_FLOAT16_VALUE( - 82.639), SIMDE_FLOAT16_VALUE( - 31.640), SIMDE_FLOAT16_VALUE( 38.749), SIMDE_FLOAT16_VALUE( - 86.594) }, + { SIMDE_FLOAT16_VALUE( 49.953), SIMDE_FLOAT16_VALUE( 35.426), SIMDE_FLOAT16_VALUE( 78.108), SIMDE_FLOAT16_VALUE( 30.009) }, + { SIMDE_FLOAT16_VALUE( 15.917), SIMDE_FLOAT16_VALUE( - 35.664), SIMDE_FLOAT16_VALUE( - 94.160), SIMDE_FLOAT16_VALUE( - 24.487) } }, + { SIMDE_FLOAT16_VALUE( - 82.639), SIMDE_FLOAT16_VALUE( - 31.640), SIMDE_FLOAT16_VALUE( 38.749), SIMDE_FLOAT16_VALUE( - 86.594), + SIMDE_FLOAT16_VALUE( 49.953), SIMDE_FLOAT16_VALUE( 35.426), SIMDE_FLOAT16_VALUE( 78.108), SIMDE_FLOAT16_VALUE( 30.009), + SIMDE_FLOAT16_VALUE( 15.917), SIMDE_FLOAT16_VALUE( - 35.664), SIMDE_FLOAT16_VALUE( - 94.160), SIMDE_FLOAT16_VALUE( - 24.487) } }, + { { { SIMDE_FLOAT16_VALUE( - 53.177), SIMDE_FLOAT16_VALUE( 32.005), SIMDE_FLOAT16_VALUE( 86.184), SIMDE_FLOAT16_VALUE( 76.077) }, + { SIMDE_FLOAT16_VALUE( 6.572), SIMDE_FLOAT16_VALUE( - 52.085), SIMDE_FLOAT16_VALUE( 48.290), SIMDE_FLOAT16_VALUE( 75.600) }, + { SIMDE_FLOAT16_VALUE( 81.474), SIMDE_FLOAT16_VALUE( - 76.981), SIMDE_FLOAT16_VALUE( - 76.144), SIMDE_FLOAT16_VALUE( 91.779) } }, + { SIMDE_FLOAT16_VALUE( - 53.177), SIMDE_FLOAT16_VALUE( 32.005), SIMDE_FLOAT16_VALUE( 86.184), SIMDE_FLOAT16_VALUE( 76.077), + SIMDE_FLOAT16_VALUE( 6.572), SIMDE_FLOAT16_VALUE( - 52.085), SIMDE_FLOAT16_VALUE( 48.290), SIMDE_FLOAT16_VALUE( 75.600), + SIMDE_FLOAT16_VALUE( 81.474), SIMDE_FLOAT16_VALUE( - 76.981), SIMDE_FLOAT16_VALUE( - 76.144), SIMDE_FLOAT16_VALUE( 91.779) } }, + { { { SIMDE_FLOAT16_VALUE( 52.159), SIMDE_FLOAT16_VALUE( 24.957), SIMDE_FLOAT16_VALUE( 64.016), SIMDE_FLOAT16_VALUE( 36.756) }, + { SIMDE_FLOAT16_VALUE( 65.380), SIMDE_FLOAT16_VALUE( - 22.729), SIMDE_FLOAT16_VALUE( 0.877), SIMDE_FLOAT16_VALUE( 71.132) }, + { SIMDE_FLOAT16_VALUE( 84.184), SIMDE_FLOAT16_VALUE( 1.688), SIMDE_FLOAT16_VALUE( 13.012), SIMDE_FLOAT16_VALUE( - 96.029) } }, + { SIMDE_FLOAT16_VALUE( 52.159), SIMDE_FLOAT16_VALUE( 24.957), SIMDE_FLOAT16_VALUE( 64.016), SIMDE_FLOAT16_VALUE( 36.756), + SIMDE_FLOAT16_VALUE( 65.380), SIMDE_FLOAT16_VALUE( - 22.729), SIMDE_FLOAT16_VALUE( 0.877), SIMDE_FLOAT16_VALUE( 71.132), + SIMDE_FLOAT16_VALUE( 84.184), SIMDE_FLOAT16_VALUE( 1.688), SIMDE_FLOAT16_VALUE( 13.012), SIMDE_FLOAT16_VALUE( - 96.029) } }, + { { { SIMDE_FLOAT16_VALUE( - 42.841), SIMDE_FLOAT16_VALUE( - 57.189), SIMDE_FLOAT16_VALUE( 82.362), SIMDE_FLOAT16_VALUE( - 76.770) }, + { SIMDE_FLOAT16_VALUE( 44.180), SIMDE_FLOAT16_VALUE( 81.552), SIMDE_FLOAT16_VALUE( 36.064), SIMDE_FLOAT16_VALUE( 37.831) }, + { SIMDE_FLOAT16_VALUE( - 86.197), SIMDE_FLOAT16_VALUE( 7.055), SIMDE_FLOAT16_VALUE( - 65.114), SIMDE_FLOAT16_VALUE( - 27.135) } }, + { SIMDE_FLOAT16_VALUE( - 42.841), SIMDE_FLOAT16_VALUE( - 57.189), SIMDE_FLOAT16_VALUE( 82.362), SIMDE_FLOAT16_VALUE( - 76.770), + SIMDE_FLOAT16_VALUE( 44.180), SIMDE_FLOAT16_VALUE( 81.552), SIMDE_FLOAT16_VALUE( 36.064), SIMDE_FLOAT16_VALUE( 37.831), + SIMDE_FLOAT16_VALUE( - 86.197), SIMDE_FLOAT16_VALUE( 7.055), SIMDE_FLOAT16_VALUE( - 65.114), SIMDE_FLOAT16_VALUE( - 27.135) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x3_t val = {{ + simde_vld1_f16(test_vec[i].val[0]), + simde_vld1_f16(test_vec[i].val[1]), + simde_vld1_f16(test_vec[i].val[2]), + }}; + simde_float16 r_[12]; + simde_vst1_f16_x3(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x3_t val[3] = simde_test_arm_neon_random_f16x3(-100.0f, 100.0f); + simde_float16x12_t r = simde_vst1_f16_x3(val[3]); + + simde_test_arm_neon_write_f16x3(2, val[3], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x12(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1_f32_x3 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -866,6 +948,7 @@ test_simde_vst1_u64_x3 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f16_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f32_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f64_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_s8_x3) diff --git a/test/arm/neon/st1_x4.c b/test/arm/neon/st1_x4.c index 4acefb6e2..03c438a4d 100644 --- a/test/arm/neon/st1_x4.c +++ b/test/arm/neon/st1_x4.c @@ -1,11 +1,110 @@ - -#include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1_x4 +#include "test/test.h" #include "test-neon.h" #include "../../../simde/arm/neon/st1_x4.h" #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[4][4]; + simde_float16 r[16]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( 82.328), SIMDE_FLOAT16_VALUE( 70.920), SIMDE_FLOAT16_VALUE( - 2.164), SIMDE_FLOAT16_VALUE( 20.248) }, + { SIMDE_FLOAT16_VALUE( 17.827), SIMDE_FLOAT16_VALUE( - 14.465), SIMDE_FLOAT16_VALUE( - 88.806), SIMDE_FLOAT16_VALUE( 79.077) }, + { SIMDE_FLOAT16_VALUE( 1.568), SIMDE_FLOAT16_VALUE( - 20.954), SIMDE_FLOAT16_VALUE( - 34.479), SIMDE_FLOAT16_VALUE( - 40.339) }, + { SIMDE_FLOAT16_VALUE( 35.388), SIMDE_FLOAT16_VALUE( 7.745), SIMDE_FLOAT16_VALUE( 48.873), SIMDE_FLOAT16_VALUE( - 89.095) } }, + { SIMDE_FLOAT16_VALUE( 82.328), SIMDE_FLOAT16_VALUE( 70.920), SIMDE_FLOAT16_VALUE( - 2.164), SIMDE_FLOAT16_VALUE( 20.248), + SIMDE_FLOAT16_VALUE( 17.827), SIMDE_FLOAT16_VALUE( - 14.465), SIMDE_FLOAT16_VALUE( - 88.806), SIMDE_FLOAT16_VALUE( 79.077), + SIMDE_FLOAT16_VALUE( 1.568), SIMDE_FLOAT16_VALUE( - 20.954), SIMDE_FLOAT16_VALUE( - 34.479), SIMDE_FLOAT16_VALUE( - 40.339), + SIMDE_FLOAT16_VALUE( 35.388), SIMDE_FLOAT16_VALUE( 7.745), SIMDE_FLOAT16_VALUE( 48.873), SIMDE_FLOAT16_VALUE( - 89.095) } }, + { { { SIMDE_FLOAT16_VALUE( - 85.226), SIMDE_FLOAT16_VALUE( 68.050), SIMDE_FLOAT16_VALUE( - 79.246), SIMDE_FLOAT16_VALUE( - 6.999) }, + { SIMDE_FLOAT16_VALUE( - 57.697), SIMDE_FLOAT16_VALUE( - 37.357), SIMDE_FLOAT16_VALUE( - 77.997), SIMDE_FLOAT16_VALUE( - 49.541) }, + { SIMDE_FLOAT16_VALUE( - 97.845), SIMDE_FLOAT16_VALUE( - 58.594), SIMDE_FLOAT16_VALUE( 49.242), SIMDE_FLOAT16_VALUE( 38.316) }, + { SIMDE_FLOAT16_VALUE( - 4.874), SIMDE_FLOAT16_VALUE( 31.136), SIMDE_FLOAT16_VALUE( 51.676), SIMDE_FLOAT16_VALUE( - 89.351) } }, + { SIMDE_FLOAT16_VALUE( - 85.226), SIMDE_FLOAT16_VALUE( 68.050), SIMDE_FLOAT16_VALUE( - 79.246), SIMDE_FLOAT16_VALUE( - 6.999), + SIMDE_FLOAT16_VALUE( - 57.697), SIMDE_FLOAT16_VALUE( - 37.357), SIMDE_FLOAT16_VALUE( - 77.997), SIMDE_FLOAT16_VALUE( - 49.541), + SIMDE_FLOAT16_VALUE( - 97.845), SIMDE_FLOAT16_VALUE( - 58.594), SIMDE_FLOAT16_VALUE( 49.242), SIMDE_FLOAT16_VALUE( 38.316), + SIMDE_FLOAT16_VALUE( - 4.874), SIMDE_FLOAT16_VALUE( 31.136), SIMDE_FLOAT16_VALUE( 51.676), SIMDE_FLOAT16_VALUE( - 89.351) } }, + { { { SIMDE_FLOAT16_VALUE( - 30.265), SIMDE_FLOAT16_VALUE( - 14.175), SIMDE_FLOAT16_VALUE( 36.709), SIMDE_FLOAT16_VALUE( 19.621) }, + { SIMDE_FLOAT16_VALUE( 43.887), SIMDE_FLOAT16_VALUE( - 72.313), SIMDE_FLOAT16_VALUE( 75.465), SIMDE_FLOAT16_VALUE( - 60.500) }, + { SIMDE_FLOAT16_VALUE( - 4.007), SIMDE_FLOAT16_VALUE( 97.670), SIMDE_FLOAT16_VALUE( - 48.236), SIMDE_FLOAT16_VALUE( 41.558) }, + { SIMDE_FLOAT16_VALUE( - 87.828), SIMDE_FLOAT16_VALUE( 55.865), SIMDE_FLOAT16_VALUE( 40.049), SIMDE_FLOAT16_VALUE( - 99.926) } }, + { SIMDE_FLOAT16_VALUE( - 30.265), SIMDE_FLOAT16_VALUE( - 14.175), SIMDE_FLOAT16_VALUE( 36.709), SIMDE_FLOAT16_VALUE( 19.621), + SIMDE_FLOAT16_VALUE( 43.887), SIMDE_FLOAT16_VALUE( - 72.313), SIMDE_FLOAT16_VALUE( 75.465), SIMDE_FLOAT16_VALUE( - 60.500), + SIMDE_FLOAT16_VALUE( - 4.007), SIMDE_FLOAT16_VALUE( 97.670), SIMDE_FLOAT16_VALUE( - 48.236), SIMDE_FLOAT16_VALUE( 41.558), + SIMDE_FLOAT16_VALUE( - 87.828), SIMDE_FLOAT16_VALUE( 55.865), SIMDE_FLOAT16_VALUE( 40.049), SIMDE_FLOAT16_VALUE( - 99.926) } }, + { { { SIMDE_FLOAT16_VALUE( - 44.272), SIMDE_FLOAT16_VALUE( 10.085), SIMDE_FLOAT16_VALUE( - 12.450), SIMDE_FLOAT16_VALUE( 58.234) }, + { SIMDE_FLOAT16_VALUE( - 92.323), SIMDE_FLOAT16_VALUE( - 72.811), SIMDE_FLOAT16_VALUE( - 75.942), SIMDE_FLOAT16_VALUE( - 33.709) }, + { SIMDE_FLOAT16_VALUE( - 54.556), SIMDE_FLOAT16_VALUE( - 96.713), SIMDE_FLOAT16_VALUE( - 44.482), SIMDE_FLOAT16_VALUE( 86.965) }, + { SIMDE_FLOAT16_VALUE( 91.324), SIMDE_FLOAT16_VALUE( - 40.202), SIMDE_FLOAT16_VALUE( - 14.814), SIMDE_FLOAT16_VALUE( 88.549) } }, + { SIMDE_FLOAT16_VALUE( - 44.272), SIMDE_FLOAT16_VALUE( 10.085), SIMDE_FLOAT16_VALUE( - 12.450), SIMDE_FLOAT16_VALUE( 58.234), + SIMDE_FLOAT16_VALUE( - 92.323), SIMDE_FLOAT16_VALUE( - 72.811), SIMDE_FLOAT16_VALUE( - 75.942), SIMDE_FLOAT16_VALUE( - 33.709), + SIMDE_FLOAT16_VALUE( - 54.556), SIMDE_FLOAT16_VALUE( - 96.713), SIMDE_FLOAT16_VALUE( - 44.482), SIMDE_FLOAT16_VALUE( 86.965), + SIMDE_FLOAT16_VALUE( 91.324), SIMDE_FLOAT16_VALUE( - 40.202), SIMDE_FLOAT16_VALUE( - 14.814), SIMDE_FLOAT16_VALUE( 88.549) } }, + { { { SIMDE_FLOAT16_VALUE( 2.562), SIMDE_FLOAT16_VALUE( - 38.287), SIMDE_FLOAT16_VALUE( - 35.816), SIMDE_FLOAT16_VALUE( 15.466) }, + { SIMDE_FLOAT16_VALUE( 18.376), SIMDE_FLOAT16_VALUE( - 90.001), SIMDE_FLOAT16_VALUE( 64.324), SIMDE_FLOAT16_VALUE( - 33.166) }, + { SIMDE_FLOAT16_VALUE( - 69.347), SIMDE_FLOAT16_VALUE( 26.212), SIMDE_FLOAT16_VALUE( - 90.466), SIMDE_FLOAT16_VALUE( - 27.484) }, + { SIMDE_FLOAT16_VALUE( - 55.925), SIMDE_FLOAT16_VALUE( - 34.864), SIMDE_FLOAT16_VALUE( - 97.656), SIMDE_FLOAT16_VALUE( - 7.985) } }, + { SIMDE_FLOAT16_VALUE( 2.562), SIMDE_FLOAT16_VALUE( - 38.287), SIMDE_FLOAT16_VALUE( - 35.816), SIMDE_FLOAT16_VALUE( 15.466), + SIMDE_FLOAT16_VALUE( 18.376), SIMDE_FLOAT16_VALUE( - 90.001), SIMDE_FLOAT16_VALUE( 64.324), SIMDE_FLOAT16_VALUE( - 33.166), + SIMDE_FLOAT16_VALUE( - 69.347), SIMDE_FLOAT16_VALUE( 26.212), SIMDE_FLOAT16_VALUE( - 90.466), SIMDE_FLOAT16_VALUE( - 27.484), + SIMDE_FLOAT16_VALUE( - 55.925), SIMDE_FLOAT16_VALUE( - 34.864), SIMDE_FLOAT16_VALUE( - 97.656), SIMDE_FLOAT16_VALUE( - 7.985) } }, + { { { SIMDE_FLOAT16_VALUE( 52.345), SIMDE_FLOAT16_VALUE( - 67.483), SIMDE_FLOAT16_VALUE( 35.701), SIMDE_FLOAT16_VALUE( 91.753) }, + { SIMDE_FLOAT16_VALUE( 19.358), SIMDE_FLOAT16_VALUE( - 88.690), SIMDE_FLOAT16_VALUE( 47.635), SIMDE_FLOAT16_VALUE( 8.845) }, + { SIMDE_FLOAT16_VALUE( - 20.974), SIMDE_FLOAT16_VALUE( 4.303), SIMDE_FLOAT16_VALUE( - 69.975), SIMDE_FLOAT16_VALUE( - 29.454) }, + { SIMDE_FLOAT16_VALUE( 18.539), SIMDE_FLOAT16_VALUE( 11.611), SIMDE_FLOAT16_VALUE( - 30.865), SIMDE_FLOAT16_VALUE( 45.777) } }, + { SIMDE_FLOAT16_VALUE( 52.345), SIMDE_FLOAT16_VALUE( - 67.483), SIMDE_FLOAT16_VALUE( 35.701), SIMDE_FLOAT16_VALUE( 91.753), + SIMDE_FLOAT16_VALUE( 19.358), SIMDE_FLOAT16_VALUE( - 88.690), SIMDE_FLOAT16_VALUE( 47.635), SIMDE_FLOAT16_VALUE( 8.845), + SIMDE_FLOAT16_VALUE( - 20.974), SIMDE_FLOAT16_VALUE( 4.303), SIMDE_FLOAT16_VALUE( - 69.975), SIMDE_FLOAT16_VALUE( - 29.454), + SIMDE_FLOAT16_VALUE( 18.539), SIMDE_FLOAT16_VALUE( 11.611), SIMDE_FLOAT16_VALUE( - 30.865), SIMDE_FLOAT16_VALUE( 45.777) } }, + { { { SIMDE_FLOAT16_VALUE( - 6.544), SIMDE_FLOAT16_VALUE( 72.545), SIMDE_FLOAT16_VALUE( - 38.100), SIMDE_FLOAT16_VALUE( 66.147) }, + { SIMDE_FLOAT16_VALUE( - 39.990), SIMDE_FLOAT16_VALUE( 59.395), SIMDE_FLOAT16_VALUE( 58.133), SIMDE_FLOAT16_VALUE( 62.227) }, + { SIMDE_FLOAT16_VALUE( 17.801), SIMDE_FLOAT16_VALUE( 50.002), SIMDE_FLOAT16_VALUE( 92.224), SIMDE_FLOAT16_VALUE( - 44.924) }, + { SIMDE_FLOAT16_VALUE( 11.046), SIMDE_FLOAT16_VALUE( 54.927), SIMDE_FLOAT16_VALUE( - 65.414), SIMDE_FLOAT16_VALUE( - 73.599) } }, + { SIMDE_FLOAT16_VALUE( - 6.544), SIMDE_FLOAT16_VALUE( 72.545), SIMDE_FLOAT16_VALUE( - 38.100), SIMDE_FLOAT16_VALUE( 66.147), + SIMDE_FLOAT16_VALUE( - 39.990), SIMDE_FLOAT16_VALUE( 59.395), SIMDE_FLOAT16_VALUE( 58.133), SIMDE_FLOAT16_VALUE( 62.227), + SIMDE_FLOAT16_VALUE( 17.801), SIMDE_FLOAT16_VALUE( 50.002), SIMDE_FLOAT16_VALUE( 92.224), SIMDE_FLOAT16_VALUE( - 44.924), + SIMDE_FLOAT16_VALUE( 11.046), SIMDE_FLOAT16_VALUE( 54.927), SIMDE_FLOAT16_VALUE( - 65.414), SIMDE_FLOAT16_VALUE( - 73.599) } }, + { { { SIMDE_FLOAT16_VALUE( - 96.039), SIMDE_FLOAT16_VALUE( 20.111), SIMDE_FLOAT16_VALUE( - 49.906), SIMDE_FLOAT16_VALUE( - 73.928) }, + { SIMDE_FLOAT16_VALUE( 70.164), SIMDE_FLOAT16_VALUE( 52.349), SIMDE_FLOAT16_VALUE( 46.465), SIMDE_FLOAT16_VALUE( 59.387) }, + { SIMDE_FLOAT16_VALUE( - 74.522), SIMDE_FLOAT16_VALUE( - 64.090), SIMDE_FLOAT16_VALUE( - 64.099), SIMDE_FLOAT16_VALUE( 19.591) }, + { SIMDE_FLOAT16_VALUE( 39.551), SIMDE_FLOAT16_VALUE( - 75.560), SIMDE_FLOAT16_VALUE( - 12.722), SIMDE_FLOAT16_VALUE( - 45.531) } }, + { SIMDE_FLOAT16_VALUE( - 96.039), SIMDE_FLOAT16_VALUE( 20.111), SIMDE_FLOAT16_VALUE( - 49.906), SIMDE_FLOAT16_VALUE( - 73.928), + SIMDE_FLOAT16_VALUE( 70.164), SIMDE_FLOAT16_VALUE( 52.349), SIMDE_FLOAT16_VALUE( 46.465), SIMDE_FLOAT16_VALUE( 59.387), + SIMDE_FLOAT16_VALUE( - 74.522), SIMDE_FLOAT16_VALUE( - 64.090), SIMDE_FLOAT16_VALUE( - 64.099), SIMDE_FLOAT16_VALUE( 19.591), + SIMDE_FLOAT16_VALUE( 39.551), SIMDE_FLOAT16_VALUE( - 75.560), SIMDE_FLOAT16_VALUE( - 12.722), SIMDE_FLOAT16_VALUE( - 45.531) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x4_t val = {{ + simde_vld1_f16(test_vec[i].val[0]), + simde_vld1_f16(test_vec[i].val[1]), + simde_vld1_f16(test_vec[i].val[2]), + simde_vld1_f16(test_vec[i].val[3]), + }}; + simde_float16 r_[16]; + simde_vst1_f16_x4(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t val[4] = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x16_t r = simde_vst1_f16_x4(val[4]); + + simde_test_arm_neon_write_f16x4(2, val[4], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1_f32_x4 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -982,6 +1081,7 @@ test_simde_vst1_u64_x4 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f16_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f32_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_f64_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1_s8_x4) diff --git a/test/arm/neon/st1q_x2.c b/test/arm/neon/st1q_x2.c index d6228ece9..0b0694cca 100644 --- a/test/arm/neon/st1q_x2.c +++ b/test/arm/neon/st1q_x2.c @@ -1,4 +1,3 @@ - #include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1q_x2 #include "test-neon.h" @@ -6,6 +5,104 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1q_f16_x2 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[2][8]; + simde_float16 r[16]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( - 20.956), SIMDE_FLOAT16_VALUE( - 58.964), SIMDE_FLOAT16_VALUE( - 74.395), SIMDE_FLOAT16_VALUE( - 58.069), + SIMDE_FLOAT16_VALUE( - 73.918), SIMDE_FLOAT16_VALUE( - 88.889), SIMDE_FLOAT16_VALUE( - 89.387), SIMDE_FLOAT16_VALUE( 26.229) }, + { SIMDE_FLOAT16_VALUE( 67.698), SIMDE_FLOAT16_VALUE( 18.105), SIMDE_FLOAT16_VALUE( 25.021), SIMDE_FLOAT16_VALUE( 34.669), + SIMDE_FLOAT16_VALUE( - 49.404), SIMDE_FLOAT16_VALUE( - 56.913), SIMDE_FLOAT16_VALUE( 84.184), SIMDE_FLOAT16_VALUE( 59.094) } }, + { SIMDE_FLOAT16_VALUE( - 20.956), SIMDE_FLOAT16_VALUE( - 58.964), SIMDE_FLOAT16_VALUE( - 74.395), SIMDE_FLOAT16_VALUE( - 58.069), + SIMDE_FLOAT16_VALUE( - 73.918), SIMDE_FLOAT16_VALUE( - 88.889), SIMDE_FLOAT16_VALUE( - 89.387), SIMDE_FLOAT16_VALUE( 26.229), + SIMDE_FLOAT16_VALUE( 67.698), SIMDE_FLOAT16_VALUE( 18.105), SIMDE_FLOAT16_VALUE( 25.021), SIMDE_FLOAT16_VALUE( 34.669), + SIMDE_FLOAT16_VALUE( - 49.404), SIMDE_FLOAT16_VALUE( - 56.913), SIMDE_FLOAT16_VALUE( 84.184), SIMDE_FLOAT16_VALUE( 59.094) } }, + { { { SIMDE_FLOAT16_VALUE( 2.598), SIMDE_FLOAT16_VALUE( - 70.000), SIMDE_FLOAT16_VALUE( 41.157), SIMDE_FLOAT16_VALUE( - 83.059), + SIMDE_FLOAT16_VALUE( - 90.002), SIMDE_FLOAT16_VALUE( 21.783), SIMDE_FLOAT16_VALUE( 70.489), SIMDE_FLOAT16_VALUE( 17.433) }, + { SIMDE_FLOAT16_VALUE( - 86.074), SIMDE_FLOAT16_VALUE( 41.189), SIMDE_FLOAT16_VALUE( 63.234), SIMDE_FLOAT16_VALUE( 12.204), + SIMDE_FLOAT16_VALUE( 7.910), SIMDE_FLOAT16_VALUE( - 15.107), SIMDE_FLOAT16_VALUE( - 16.136), SIMDE_FLOAT16_VALUE( 13.310) } }, + { SIMDE_FLOAT16_VALUE( 2.598), SIMDE_FLOAT16_VALUE( - 70.000), SIMDE_FLOAT16_VALUE( 41.157), SIMDE_FLOAT16_VALUE( - 83.059), + SIMDE_FLOAT16_VALUE( - 90.002), SIMDE_FLOAT16_VALUE( 21.783), SIMDE_FLOAT16_VALUE( 70.489), SIMDE_FLOAT16_VALUE( 17.433), + SIMDE_FLOAT16_VALUE( - 86.074), SIMDE_FLOAT16_VALUE( 41.189), SIMDE_FLOAT16_VALUE( 63.234), SIMDE_FLOAT16_VALUE( 12.204), + SIMDE_FLOAT16_VALUE( 7.910), SIMDE_FLOAT16_VALUE( - 15.107), SIMDE_FLOAT16_VALUE( - 16.136), SIMDE_FLOAT16_VALUE( 13.310) } }, + { { { SIMDE_FLOAT16_VALUE( - 58.406), SIMDE_FLOAT16_VALUE( - 64.859), SIMDE_FLOAT16_VALUE( - 31.395), SIMDE_FLOAT16_VALUE( 12.432), + SIMDE_FLOAT16_VALUE( - 14.707), SIMDE_FLOAT16_VALUE( 84.786), SIMDE_FLOAT16_VALUE( - 39.961), SIMDE_FLOAT16_VALUE( 98.967) }, + { SIMDE_FLOAT16_VALUE( 28.677), SIMDE_FLOAT16_VALUE( 63.490), SIMDE_FLOAT16_VALUE( 74.828), SIMDE_FLOAT16_VALUE( 21.706), + SIMDE_FLOAT16_VALUE( 22.304), SIMDE_FLOAT16_VALUE( 71.547), SIMDE_FLOAT16_VALUE( 80.978), SIMDE_FLOAT16_VALUE( 16.394) } }, + { SIMDE_FLOAT16_VALUE( - 58.406), SIMDE_FLOAT16_VALUE( - 64.859), SIMDE_FLOAT16_VALUE( - 31.395), SIMDE_FLOAT16_VALUE( 12.432), + SIMDE_FLOAT16_VALUE( - 14.707), SIMDE_FLOAT16_VALUE( 84.786), SIMDE_FLOAT16_VALUE( - 39.961), SIMDE_FLOAT16_VALUE( 98.967), + SIMDE_FLOAT16_VALUE( 28.677), SIMDE_FLOAT16_VALUE( 63.490), SIMDE_FLOAT16_VALUE( 74.828), SIMDE_FLOAT16_VALUE( 21.706), + SIMDE_FLOAT16_VALUE( 22.304), SIMDE_FLOAT16_VALUE( 71.547), SIMDE_FLOAT16_VALUE( 80.978), SIMDE_FLOAT16_VALUE( 16.394) } }, + { { { SIMDE_FLOAT16_VALUE( 97.804), SIMDE_FLOAT16_VALUE( 73.823), SIMDE_FLOAT16_VALUE( - 8.059), SIMDE_FLOAT16_VALUE( 93.096), + SIMDE_FLOAT16_VALUE( 90.504), SIMDE_FLOAT16_VALUE( - 2.398), SIMDE_FLOAT16_VALUE( 30.759), SIMDE_FLOAT16_VALUE( - 94.855) }, + { SIMDE_FLOAT16_VALUE( - 5.801), SIMDE_FLOAT16_VALUE( - 68.562), SIMDE_FLOAT16_VALUE( - 54.678), SIMDE_FLOAT16_VALUE( 22.048), + SIMDE_FLOAT16_VALUE( - 71.055), SIMDE_FLOAT16_VALUE( - 30.977), SIMDE_FLOAT16_VALUE( 18.382), SIMDE_FLOAT16_VALUE( 11.761) } }, + { SIMDE_FLOAT16_VALUE( 97.804), SIMDE_FLOAT16_VALUE( 73.823), SIMDE_FLOAT16_VALUE( - 8.059), SIMDE_FLOAT16_VALUE( 93.096), + SIMDE_FLOAT16_VALUE( 90.504), SIMDE_FLOAT16_VALUE( - 2.398), SIMDE_FLOAT16_VALUE( 30.759), SIMDE_FLOAT16_VALUE( - 94.855), + SIMDE_FLOAT16_VALUE( - 5.801), SIMDE_FLOAT16_VALUE( - 68.562), SIMDE_FLOAT16_VALUE( - 54.678), SIMDE_FLOAT16_VALUE( 22.048), + SIMDE_FLOAT16_VALUE( - 71.055), SIMDE_FLOAT16_VALUE( - 30.977), SIMDE_FLOAT16_VALUE( 18.382), SIMDE_FLOAT16_VALUE( 11.761) } }, + { { { SIMDE_FLOAT16_VALUE( 55.035), SIMDE_FLOAT16_VALUE( 94.504), SIMDE_FLOAT16_VALUE( 21.867), SIMDE_FLOAT16_VALUE( 60.556), + SIMDE_FLOAT16_VALUE( - 21.374), SIMDE_FLOAT16_VALUE( 82.466), SIMDE_FLOAT16_VALUE( 84.824), SIMDE_FLOAT16_VALUE( 31.885) }, + { SIMDE_FLOAT16_VALUE( 17.599), SIMDE_FLOAT16_VALUE( - 18.858), SIMDE_FLOAT16_VALUE( - 97.376), SIMDE_FLOAT16_VALUE( - 85.414), + SIMDE_FLOAT16_VALUE( - 37.173), SIMDE_FLOAT16_VALUE( 36.083), SIMDE_FLOAT16_VALUE( 51.414), SIMDE_FLOAT16_VALUE( 42.445) } }, + { SIMDE_FLOAT16_VALUE( 55.035), SIMDE_FLOAT16_VALUE( 94.504), SIMDE_FLOAT16_VALUE( 21.867), SIMDE_FLOAT16_VALUE( 60.556), + SIMDE_FLOAT16_VALUE( - 21.374), SIMDE_FLOAT16_VALUE( 82.466), SIMDE_FLOAT16_VALUE( 84.824), SIMDE_FLOAT16_VALUE( 31.885), + SIMDE_FLOAT16_VALUE( 17.599), SIMDE_FLOAT16_VALUE( - 18.858), SIMDE_FLOAT16_VALUE( - 97.376), SIMDE_FLOAT16_VALUE( - 85.414), + SIMDE_FLOAT16_VALUE( - 37.173), SIMDE_FLOAT16_VALUE( 36.083), SIMDE_FLOAT16_VALUE( 51.414), SIMDE_FLOAT16_VALUE( 42.445) } }, + { { { SIMDE_FLOAT16_VALUE( - 58.353), SIMDE_FLOAT16_VALUE( 94.351), SIMDE_FLOAT16_VALUE( - 72.718), SIMDE_FLOAT16_VALUE( 44.738), + SIMDE_FLOAT16_VALUE( 88.331), SIMDE_FLOAT16_VALUE( 93.544), SIMDE_FLOAT16_VALUE( 79.940), SIMDE_FLOAT16_VALUE( - 29.844) }, + { SIMDE_FLOAT16_VALUE( - 64.679), SIMDE_FLOAT16_VALUE( - 31.234), SIMDE_FLOAT16_VALUE( 25.402), SIMDE_FLOAT16_VALUE( 61.429), + SIMDE_FLOAT16_VALUE( - 80.532), SIMDE_FLOAT16_VALUE( 19.659), SIMDE_FLOAT16_VALUE( - 67.325), SIMDE_FLOAT16_VALUE( 54.634) } }, + { SIMDE_FLOAT16_VALUE( - 58.353), SIMDE_FLOAT16_VALUE( 94.351), SIMDE_FLOAT16_VALUE( - 72.718), SIMDE_FLOAT16_VALUE( 44.738), + SIMDE_FLOAT16_VALUE( 88.331), SIMDE_FLOAT16_VALUE( 93.544), SIMDE_FLOAT16_VALUE( 79.940), SIMDE_FLOAT16_VALUE( - 29.844), + SIMDE_FLOAT16_VALUE( - 64.679), SIMDE_FLOAT16_VALUE( - 31.234), SIMDE_FLOAT16_VALUE( 25.402), SIMDE_FLOAT16_VALUE( 61.429), + SIMDE_FLOAT16_VALUE( - 80.532), SIMDE_FLOAT16_VALUE( 19.659), SIMDE_FLOAT16_VALUE( - 67.325), SIMDE_FLOAT16_VALUE( 54.634) } }, + { { { SIMDE_FLOAT16_VALUE( - 99.773), SIMDE_FLOAT16_VALUE( 27.004), SIMDE_FLOAT16_VALUE( 53.708), SIMDE_FLOAT16_VALUE( 12.664), + SIMDE_FLOAT16_VALUE( 66.275), SIMDE_FLOAT16_VALUE( - 0.836), SIMDE_FLOAT16_VALUE( - 22.661), SIMDE_FLOAT16_VALUE( 72.602) }, + { SIMDE_FLOAT16_VALUE( 25.260), SIMDE_FLOAT16_VALUE( - 83.782), SIMDE_FLOAT16_VALUE( - 61.087), SIMDE_FLOAT16_VALUE( - 38.948), + SIMDE_FLOAT16_VALUE( - 78.380), SIMDE_FLOAT16_VALUE( 6.817), SIMDE_FLOAT16_VALUE( - 82.373), SIMDE_FLOAT16_VALUE( - 60.011) } }, + { SIMDE_FLOAT16_VALUE( - 99.773), SIMDE_FLOAT16_VALUE( 27.004), SIMDE_FLOAT16_VALUE( 53.708), SIMDE_FLOAT16_VALUE( 12.664), + SIMDE_FLOAT16_VALUE( 66.275), SIMDE_FLOAT16_VALUE( - 0.836), SIMDE_FLOAT16_VALUE( - 22.661), SIMDE_FLOAT16_VALUE( 72.602), + SIMDE_FLOAT16_VALUE( 25.260), SIMDE_FLOAT16_VALUE( - 83.782), SIMDE_FLOAT16_VALUE( - 61.087), SIMDE_FLOAT16_VALUE( - 38.948), + SIMDE_FLOAT16_VALUE( - 78.380), SIMDE_FLOAT16_VALUE( 6.817), SIMDE_FLOAT16_VALUE( - 82.373), SIMDE_FLOAT16_VALUE( - 60.011) } }, + { { { SIMDE_FLOAT16_VALUE( - 53.476), SIMDE_FLOAT16_VALUE( 8.724), SIMDE_FLOAT16_VALUE( - 88.674), SIMDE_FLOAT16_VALUE( - 48.309), + SIMDE_FLOAT16_VALUE( - 3.562), SIMDE_FLOAT16_VALUE( 14.309), SIMDE_FLOAT16_VALUE( - 99.801), SIMDE_FLOAT16_VALUE( - 11.703) }, + { SIMDE_FLOAT16_VALUE( - 62.913), SIMDE_FLOAT16_VALUE( - 43.056), SIMDE_FLOAT16_VALUE( - 65.766), SIMDE_FLOAT16_VALUE( 92.099), + SIMDE_FLOAT16_VALUE( 73.066), SIMDE_FLOAT16_VALUE( 29.509), SIMDE_FLOAT16_VALUE( - 77.995), SIMDE_FLOAT16_VALUE( - 81.816) } }, + { SIMDE_FLOAT16_VALUE( - 53.476), SIMDE_FLOAT16_VALUE( 8.724), SIMDE_FLOAT16_VALUE( - 88.674), SIMDE_FLOAT16_VALUE( - 48.309), + SIMDE_FLOAT16_VALUE( - 3.562), SIMDE_FLOAT16_VALUE( 14.309), SIMDE_FLOAT16_VALUE( - 99.801), SIMDE_FLOAT16_VALUE( - 11.703), + SIMDE_FLOAT16_VALUE( - 62.913), SIMDE_FLOAT16_VALUE( - 43.056), SIMDE_FLOAT16_VALUE( - 65.766), SIMDE_FLOAT16_VALUE( 92.099), + SIMDE_FLOAT16_VALUE( 73.066), SIMDE_FLOAT16_VALUE( 29.509), SIMDE_FLOAT16_VALUE( - 77.995), SIMDE_FLOAT16_VALUE( - 81.816) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x2_t val = {{ + simde_vld1q_f16(test_vec[i].val[0]), + simde_vld1q_f16(test_vec[i].val[1]), + }}; + simde_float16 r_[16]; + simde_vst1q_f16_x2(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x2_t val[2] = simde_test_arm_neon_random_f16x2(-100.0f, 100.0f); + simde_float16x16_t r = simde_vst1q_f16_x2(val[2]); + + simde_test_arm_neon_write_f16x2(2, val[2], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1q_f32_x2 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -814,6 +911,7 @@ test_simde_vst1q_u64_x2 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f16_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f32_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f64_x2) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_s8_x2) diff --git a/test/arm/neon/st1q_x3.c b/test/arm/neon/st1q_x3.c index 81f7e1968..1c04f327e 100644 --- a/test/arm/neon/st1q_x3.c +++ b/test/arm/neon/st1q_x3.c @@ -1,11 +1,141 @@ - -#include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1q_x3 +#include "test/test.h" #include "test-neon.h" #include "../../../simde/arm/neon/st1q_x3.h" #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1q_f16_x3 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[3][8]; + simde_float16 r[24]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( - 78.006), SIMDE_FLOAT16_VALUE( - 86.214), SIMDE_FLOAT16_VALUE( 79.768), SIMDE_FLOAT16_VALUE( - 58.640), + SIMDE_FLOAT16_VALUE( 95.290), SIMDE_FLOAT16_VALUE( - 58.694), SIMDE_FLOAT16_VALUE( - 66.494), SIMDE_FLOAT16_VALUE( 14.402) }, + { SIMDE_FLOAT16_VALUE( - 73.147), SIMDE_FLOAT16_VALUE( 86.011), SIMDE_FLOAT16_VALUE( - 57.821), SIMDE_FLOAT16_VALUE( 39.575), + SIMDE_FLOAT16_VALUE( - 71.723), SIMDE_FLOAT16_VALUE( - 56.950), SIMDE_FLOAT16_VALUE( - 64.715), SIMDE_FLOAT16_VALUE( 57.900) }, + { SIMDE_FLOAT16_VALUE( 61.685), SIMDE_FLOAT16_VALUE( - 81.983), SIMDE_FLOAT16_VALUE( 15.196), SIMDE_FLOAT16_VALUE( - 22.643), + SIMDE_FLOAT16_VALUE( 73.963), SIMDE_FLOAT16_VALUE( 69.945), SIMDE_FLOAT16_VALUE( - 18.252), SIMDE_FLOAT16_VALUE( 6.208) } }, + { SIMDE_FLOAT16_VALUE( - 78.006), SIMDE_FLOAT16_VALUE( - 86.214), SIMDE_FLOAT16_VALUE( 79.768), SIMDE_FLOAT16_VALUE( - 58.640), + SIMDE_FLOAT16_VALUE( 95.290), SIMDE_FLOAT16_VALUE( - 58.694), SIMDE_FLOAT16_VALUE( - 66.494), SIMDE_FLOAT16_VALUE( 14.402), + SIMDE_FLOAT16_VALUE( - 73.147), SIMDE_FLOAT16_VALUE( 86.011), SIMDE_FLOAT16_VALUE( - 57.821), SIMDE_FLOAT16_VALUE( 39.575), + SIMDE_FLOAT16_VALUE( - 71.723), SIMDE_FLOAT16_VALUE( - 56.950), SIMDE_FLOAT16_VALUE( - 64.715), SIMDE_FLOAT16_VALUE( 57.900), + SIMDE_FLOAT16_VALUE( 61.685), SIMDE_FLOAT16_VALUE( - 81.983), SIMDE_FLOAT16_VALUE( 15.196), SIMDE_FLOAT16_VALUE( - 22.643), + SIMDE_FLOAT16_VALUE( 73.963), SIMDE_FLOAT16_VALUE( 69.945), SIMDE_FLOAT16_VALUE( - 18.252), SIMDE_FLOAT16_VALUE( 6.208) } }, + { { { SIMDE_FLOAT16_VALUE( 75.124), SIMDE_FLOAT16_VALUE( 63.765), SIMDE_FLOAT16_VALUE( 93.445), SIMDE_FLOAT16_VALUE( - 2.808), + SIMDE_FLOAT16_VALUE( 21.150), SIMDE_FLOAT16_VALUE( - 27.804), SIMDE_FLOAT16_VALUE( 96.600), SIMDE_FLOAT16_VALUE( 49.484) }, + { SIMDE_FLOAT16_VALUE( - 21.286), SIMDE_FLOAT16_VALUE( 6.385), SIMDE_FLOAT16_VALUE( 58.966), SIMDE_FLOAT16_VALUE( - 25.964), + SIMDE_FLOAT16_VALUE( 81.303), SIMDE_FLOAT16_VALUE( 55.133), SIMDE_FLOAT16_VALUE( 59.277), SIMDE_FLOAT16_VALUE( 90.767) }, + { SIMDE_FLOAT16_VALUE( - 9.851), SIMDE_FLOAT16_VALUE( - 51.010), SIMDE_FLOAT16_VALUE( - 57.457), SIMDE_FLOAT16_VALUE( 76.293), + SIMDE_FLOAT16_VALUE( - 36.636), SIMDE_FLOAT16_VALUE( 71.898), SIMDE_FLOAT16_VALUE( 98.888), SIMDE_FLOAT16_VALUE( - 38.156) } }, + { SIMDE_FLOAT16_VALUE( 75.124), SIMDE_FLOAT16_VALUE( 63.765), SIMDE_FLOAT16_VALUE( 93.445), SIMDE_FLOAT16_VALUE( - 2.808), + SIMDE_FLOAT16_VALUE( 21.150), SIMDE_FLOAT16_VALUE( - 27.804), SIMDE_FLOAT16_VALUE( 96.600), SIMDE_FLOAT16_VALUE( 49.484), + SIMDE_FLOAT16_VALUE( - 21.286), SIMDE_FLOAT16_VALUE( 6.385), SIMDE_FLOAT16_VALUE( 58.966), SIMDE_FLOAT16_VALUE( - 25.964), + SIMDE_FLOAT16_VALUE( 81.303), SIMDE_FLOAT16_VALUE( 55.133), SIMDE_FLOAT16_VALUE( 59.277), SIMDE_FLOAT16_VALUE( 90.767), + SIMDE_FLOAT16_VALUE( - 9.851), SIMDE_FLOAT16_VALUE( - 51.010), SIMDE_FLOAT16_VALUE( - 57.457), SIMDE_FLOAT16_VALUE( 76.293), + SIMDE_FLOAT16_VALUE( - 36.636), SIMDE_FLOAT16_VALUE( 71.898), SIMDE_FLOAT16_VALUE( 98.888), SIMDE_FLOAT16_VALUE( - 38.156) } }, + { { { SIMDE_FLOAT16_VALUE( 26.535), SIMDE_FLOAT16_VALUE( - 91.027), SIMDE_FLOAT16_VALUE( - 79.934), SIMDE_FLOAT16_VALUE( - 46.283), + SIMDE_FLOAT16_VALUE( - 50.743), SIMDE_FLOAT16_VALUE( - 31.430), SIMDE_FLOAT16_VALUE( - 40.474), SIMDE_FLOAT16_VALUE( 52.962) }, + { SIMDE_FLOAT16_VALUE( 54.007), SIMDE_FLOAT16_VALUE( - 51.316), SIMDE_FLOAT16_VALUE( - 70.698), SIMDE_FLOAT16_VALUE( 59.510), + SIMDE_FLOAT16_VALUE( 60.522), SIMDE_FLOAT16_VALUE( - 83.540), SIMDE_FLOAT16_VALUE( 72.171), SIMDE_FLOAT16_VALUE( - 94.419) }, + { SIMDE_FLOAT16_VALUE( - 73.925), SIMDE_FLOAT16_VALUE( - 73.200), SIMDE_FLOAT16_VALUE( - 12.444), SIMDE_FLOAT16_VALUE( 72.493), + SIMDE_FLOAT16_VALUE( 27.042), SIMDE_FLOAT16_VALUE( 81.525), SIMDE_FLOAT16_VALUE( - 92.143), SIMDE_FLOAT16_VALUE( - 50.921) } }, + { SIMDE_FLOAT16_VALUE( 26.535), SIMDE_FLOAT16_VALUE( - 91.027), SIMDE_FLOAT16_VALUE( - 79.934), SIMDE_FLOAT16_VALUE( - 46.283), + SIMDE_FLOAT16_VALUE( - 50.743), SIMDE_FLOAT16_VALUE( - 31.430), SIMDE_FLOAT16_VALUE( - 40.474), SIMDE_FLOAT16_VALUE( 52.962), + SIMDE_FLOAT16_VALUE( 54.007), SIMDE_FLOAT16_VALUE( - 51.316), SIMDE_FLOAT16_VALUE( - 70.698), SIMDE_FLOAT16_VALUE( 59.510), + SIMDE_FLOAT16_VALUE( 60.522), SIMDE_FLOAT16_VALUE( - 83.540), SIMDE_FLOAT16_VALUE( 72.171), SIMDE_FLOAT16_VALUE( - 94.419), + SIMDE_FLOAT16_VALUE( - 73.925), SIMDE_FLOAT16_VALUE( - 73.200), SIMDE_FLOAT16_VALUE( - 12.444), SIMDE_FLOAT16_VALUE( 72.493), + SIMDE_FLOAT16_VALUE( 27.042), SIMDE_FLOAT16_VALUE( 81.525), SIMDE_FLOAT16_VALUE( - 92.143), SIMDE_FLOAT16_VALUE( - 50.921) } }, + { { { SIMDE_FLOAT16_VALUE( - 45.682), SIMDE_FLOAT16_VALUE( 52.798), SIMDE_FLOAT16_VALUE( - 80.990), SIMDE_FLOAT16_VALUE( - 61.788), + SIMDE_FLOAT16_VALUE( 54.221), SIMDE_FLOAT16_VALUE( 99.819), SIMDE_FLOAT16_VALUE( - 53.166), SIMDE_FLOAT16_VALUE( 62.212) }, + { SIMDE_FLOAT16_VALUE( - 26.140), SIMDE_FLOAT16_VALUE( - 44.258), SIMDE_FLOAT16_VALUE( - 63.797), SIMDE_FLOAT16_VALUE( 21.345), + SIMDE_FLOAT16_VALUE( 43.284), SIMDE_FLOAT16_VALUE( 49.659), SIMDE_FLOAT16_VALUE( 83.024), SIMDE_FLOAT16_VALUE( - 93.541) }, + { SIMDE_FLOAT16_VALUE( - 41.983), SIMDE_FLOAT16_VALUE( 72.431), SIMDE_FLOAT16_VALUE( - 72.624), SIMDE_FLOAT16_VALUE( - 68.362), + SIMDE_FLOAT16_VALUE( - 3.439), SIMDE_FLOAT16_VALUE( 57.763), SIMDE_FLOAT16_VALUE( 53.732), SIMDE_FLOAT16_VALUE( 23.477) } }, + { SIMDE_FLOAT16_VALUE( - 45.682), SIMDE_FLOAT16_VALUE( 52.798), SIMDE_FLOAT16_VALUE( - 80.990), SIMDE_FLOAT16_VALUE( - 61.788), + SIMDE_FLOAT16_VALUE( 54.221), SIMDE_FLOAT16_VALUE( 99.819), SIMDE_FLOAT16_VALUE( - 53.166), SIMDE_FLOAT16_VALUE( 62.212), + SIMDE_FLOAT16_VALUE( - 26.140), SIMDE_FLOAT16_VALUE( - 44.258), SIMDE_FLOAT16_VALUE( - 63.797), SIMDE_FLOAT16_VALUE( 21.345), + SIMDE_FLOAT16_VALUE( 43.284), SIMDE_FLOAT16_VALUE( 49.659), SIMDE_FLOAT16_VALUE( 83.024), SIMDE_FLOAT16_VALUE( - 93.541), + SIMDE_FLOAT16_VALUE( - 41.983), SIMDE_FLOAT16_VALUE( 72.431), SIMDE_FLOAT16_VALUE( - 72.624), SIMDE_FLOAT16_VALUE( - 68.362), + SIMDE_FLOAT16_VALUE( - 3.439), SIMDE_FLOAT16_VALUE( 57.763), SIMDE_FLOAT16_VALUE( 53.732), SIMDE_FLOAT16_VALUE( 23.477) } }, + { { { SIMDE_FLOAT16_VALUE( - 82.965), SIMDE_FLOAT16_VALUE( - 70.111), SIMDE_FLOAT16_VALUE( - 31.957), SIMDE_FLOAT16_VALUE( 61.018), + SIMDE_FLOAT16_VALUE( - 27.870), SIMDE_FLOAT16_VALUE( 71.621), SIMDE_FLOAT16_VALUE( - 77.867), SIMDE_FLOAT16_VALUE( - 30.557) }, + { SIMDE_FLOAT16_VALUE( 58.953), SIMDE_FLOAT16_VALUE( - 20.614), SIMDE_FLOAT16_VALUE( - 60.571), SIMDE_FLOAT16_VALUE( 63.967), + SIMDE_FLOAT16_VALUE( - 12.960), SIMDE_FLOAT16_VALUE( - 61.378), SIMDE_FLOAT16_VALUE( - 20.681), SIMDE_FLOAT16_VALUE( 73.740) }, + { SIMDE_FLOAT16_VALUE( 77.996), SIMDE_FLOAT16_VALUE( 4.822), SIMDE_FLOAT16_VALUE( - 39.992), SIMDE_FLOAT16_VALUE( 50.289), + SIMDE_FLOAT16_VALUE( - 37.043), SIMDE_FLOAT16_VALUE( - 89.635), SIMDE_FLOAT16_VALUE( 33.091), SIMDE_FLOAT16_VALUE( - 39.668) } }, + { SIMDE_FLOAT16_VALUE( - 82.965), SIMDE_FLOAT16_VALUE( - 70.111), SIMDE_FLOAT16_VALUE( - 31.957), SIMDE_FLOAT16_VALUE( 61.018), + SIMDE_FLOAT16_VALUE( - 27.870), SIMDE_FLOAT16_VALUE( 71.621), SIMDE_FLOAT16_VALUE( - 77.867), SIMDE_FLOAT16_VALUE( - 30.557), + SIMDE_FLOAT16_VALUE( 58.953), SIMDE_FLOAT16_VALUE( - 20.614), SIMDE_FLOAT16_VALUE( - 60.571), SIMDE_FLOAT16_VALUE( 63.967), + SIMDE_FLOAT16_VALUE( - 12.960), SIMDE_FLOAT16_VALUE( - 61.378), SIMDE_FLOAT16_VALUE( - 20.681), SIMDE_FLOAT16_VALUE( 73.740), + SIMDE_FLOAT16_VALUE( 77.996), SIMDE_FLOAT16_VALUE( 4.822), SIMDE_FLOAT16_VALUE( - 39.992), SIMDE_FLOAT16_VALUE( 50.289), + SIMDE_FLOAT16_VALUE( - 37.043), SIMDE_FLOAT16_VALUE( - 89.635), SIMDE_FLOAT16_VALUE( 33.091), SIMDE_FLOAT16_VALUE( - 39.668) } }, + { { { SIMDE_FLOAT16_VALUE( 3.917), SIMDE_FLOAT16_VALUE( 67.525), SIMDE_FLOAT16_VALUE( - 40.144), SIMDE_FLOAT16_VALUE( 98.412), + SIMDE_FLOAT16_VALUE( - 5.628), SIMDE_FLOAT16_VALUE( - 91.834), SIMDE_FLOAT16_VALUE( 66.132), SIMDE_FLOAT16_VALUE( - 44.698) }, + { SIMDE_FLOAT16_VALUE( 3.461), SIMDE_FLOAT16_VALUE( - 1.903), SIMDE_FLOAT16_VALUE( - 7.486), SIMDE_FLOAT16_VALUE( - 5.095), + SIMDE_FLOAT16_VALUE( - 4.045), SIMDE_FLOAT16_VALUE( 76.155), SIMDE_FLOAT16_VALUE( 45.573), SIMDE_FLOAT16_VALUE( - 17.845) }, + { SIMDE_FLOAT16_VALUE( - 69.007), SIMDE_FLOAT16_VALUE( - 91.001), SIMDE_FLOAT16_VALUE( 47.281), SIMDE_FLOAT16_VALUE( - 6.930), + SIMDE_FLOAT16_VALUE( - 31.826), SIMDE_FLOAT16_VALUE( - 9.605), SIMDE_FLOAT16_VALUE( - 8.507), SIMDE_FLOAT16_VALUE( - 69.550) } }, + { SIMDE_FLOAT16_VALUE( 3.917), SIMDE_FLOAT16_VALUE( 67.525), SIMDE_FLOAT16_VALUE( - 40.144), SIMDE_FLOAT16_VALUE( 98.412), + SIMDE_FLOAT16_VALUE( - 5.628), SIMDE_FLOAT16_VALUE( - 91.834), SIMDE_FLOAT16_VALUE( 66.132), SIMDE_FLOAT16_VALUE( - 44.698), + SIMDE_FLOAT16_VALUE( 3.461), SIMDE_FLOAT16_VALUE( - 1.903), SIMDE_FLOAT16_VALUE( - 7.486), SIMDE_FLOAT16_VALUE( - 5.095), + SIMDE_FLOAT16_VALUE( - 4.045), SIMDE_FLOAT16_VALUE( 76.155), SIMDE_FLOAT16_VALUE( 45.573), SIMDE_FLOAT16_VALUE( - 17.845), + SIMDE_FLOAT16_VALUE( - 69.007), SIMDE_FLOAT16_VALUE( - 91.001), SIMDE_FLOAT16_VALUE( 47.281), SIMDE_FLOAT16_VALUE( - 6.930), + SIMDE_FLOAT16_VALUE( - 31.826), SIMDE_FLOAT16_VALUE( - 9.605), SIMDE_FLOAT16_VALUE( - 8.507), SIMDE_FLOAT16_VALUE( - 69.550) } }, + { { { SIMDE_FLOAT16_VALUE( - 83.740), SIMDE_FLOAT16_VALUE( - 6.331), SIMDE_FLOAT16_VALUE( - 89.045), SIMDE_FLOAT16_VALUE( - 94.435), + SIMDE_FLOAT16_VALUE( - 65.111), SIMDE_FLOAT16_VALUE( - 70.456), SIMDE_FLOAT16_VALUE( 11.289), SIMDE_FLOAT16_VALUE( 7.177) }, + { SIMDE_FLOAT16_VALUE( - 90.849), SIMDE_FLOAT16_VALUE( - 95.953), SIMDE_FLOAT16_VALUE( - 42.247), SIMDE_FLOAT16_VALUE( 14.309), + SIMDE_FLOAT16_VALUE( - 95.825), SIMDE_FLOAT16_VALUE( - 22.270), SIMDE_FLOAT16_VALUE( 83.373), SIMDE_FLOAT16_VALUE( - 79.802) }, + { SIMDE_FLOAT16_VALUE( - 22.035), SIMDE_FLOAT16_VALUE( - 92.921), SIMDE_FLOAT16_VALUE( 43.906), SIMDE_FLOAT16_VALUE( 26.805), + SIMDE_FLOAT16_VALUE( 51.738), SIMDE_FLOAT16_VALUE( - 42.852), SIMDE_FLOAT16_VALUE( 5.828), SIMDE_FLOAT16_VALUE( 44.079) } }, + { SIMDE_FLOAT16_VALUE( - 83.740), SIMDE_FLOAT16_VALUE( - 6.331), SIMDE_FLOAT16_VALUE( - 89.045), SIMDE_FLOAT16_VALUE( - 94.435), + SIMDE_FLOAT16_VALUE( - 65.111), SIMDE_FLOAT16_VALUE( - 70.456), SIMDE_FLOAT16_VALUE( 11.289), SIMDE_FLOAT16_VALUE( 7.177), + SIMDE_FLOAT16_VALUE( - 90.849), SIMDE_FLOAT16_VALUE( - 95.953), SIMDE_FLOAT16_VALUE( - 42.247), SIMDE_FLOAT16_VALUE( 14.309), + SIMDE_FLOAT16_VALUE( - 95.825), SIMDE_FLOAT16_VALUE( - 22.270), SIMDE_FLOAT16_VALUE( 83.373), SIMDE_FLOAT16_VALUE( - 79.802), + SIMDE_FLOAT16_VALUE( - 22.035), SIMDE_FLOAT16_VALUE( - 92.921), SIMDE_FLOAT16_VALUE( 43.906), SIMDE_FLOAT16_VALUE( 26.805), + SIMDE_FLOAT16_VALUE( 51.738), SIMDE_FLOAT16_VALUE( - 42.852), SIMDE_FLOAT16_VALUE( 5.828), SIMDE_FLOAT16_VALUE( 44.079) } }, + { { { SIMDE_FLOAT16_VALUE( - 16.629), SIMDE_FLOAT16_VALUE( 51.044), SIMDE_FLOAT16_VALUE( 19.393), SIMDE_FLOAT16_VALUE( 55.131), + SIMDE_FLOAT16_VALUE( - 43.901), SIMDE_FLOAT16_VALUE( 70.204), SIMDE_FLOAT16_VALUE( 97.396), SIMDE_FLOAT16_VALUE( 81.774) }, + { SIMDE_FLOAT16_VALUE( - 65.652), SIMDE_FLOAT16_VALUE( 84.250), SIMDE_FLOAT16_VALUE( - 73.434), SIMDE_FLOAT16_VALUE( 83.066), + SIMDE_FLOAT16_VALUE( 98.140), SIMDE_FLOAT16_VALUE( 10.456), SIMDE_FLOAT16_VALUE( - 74.473), SIMDE_FLOAT16_VALUE( 36.574) }, + { SIMDE_FLOAT16_VALUE( - 30.153), SIMDE_FLOAT16_VALUE( 52.521), SIMDE_FLOAT16_VALUE( - 21.877), SIMDE_FLOAT16_VALUE( 99.308), + SIMDE_FLOAT16_VALUE( - 78.712), SIMDE_FLOAT16_VALUE( - 45.652), SIMDE_FLOAT16_VALUE( - 23.870), SIMDE_FLOAT16_VALUE( 33.651) } }, + { SIMDE_FLOAT16_VALUE( - 16.629), SIMDE_FLOAT16_VALUE( 51.044), SIMDE_FLOAT16_VALUE( 19.393), SIMDE_FLOAT16_VALUE( 55.131), + SIMDE_FLOAT16_VALUE( - 43.901), SIMDE_FLOAT16_VALUE( 70.204), SIMDE_FLOAT16_VALUE( 97.396), SIMDE_FLOAT16_VALUE( 81.774), + SIMDE_FLOAT16_VALUE( - 65.652), SIMDE_FLOAT16_VALUE( 84.250), SIMDE_FLOAT16_VALUE( - 73.434), SIMDE_FLOAT16_VALUE( 83.066), + SIMDE_FLOAT16_VALUE( 98.140), SIMDE_FLOAT16_VALUE( 10.456), SIMDE_FLOAT16_VALUE( - 74.473), SIMDE_FLOAT16_VALUE( 36.574), + SIMDE_FLOAT16_VALUE( - 30.153), SIMDE_FLOAT16_VALUE( 52.521), SIMDE_FLOAT16_VALUE( - 21.877), SIMDE_FLOAT16_VALUE( 99.308), + SIMDE_FLOAT16_VALUE( - 78.712), SIMDE_FLOAT16_VALUE( - 45.652), SIMDE_FLOAT16_VALUE( - 23.870), SIMDE_FLOAT16_VALUE( 33.651) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x3_t val = {{ + simde_vld1q_f16(test_vec[i].val[0]), + simde_vld1q_f16(test_vec[i].val[1]), + simde_vld1q_f16(test_vec[i].val[2]), + }}; + simde_float16 r_[24]; + simde_vst1q_f16_x3(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x3_t val[3] = simde_test_arm_neon_random_f16x3(-100.0f, 100.0f); + simde_float16x24_t r = simde_vst1q_f16_x3(val[3]); + + simde_test_arm_neon_write_f16x3(2, val[3], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x24(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1q_f32_x3 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1026,6 +1156,7 @@ test_simde_vst1q_u64_x3 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f16_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f32_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f64_x3) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_s8_x3) diff --git a/test/arm/neon/st1q_x4.c b/test/arm/neon/st1q_x4.c index c0e180c76..3cc890423 100644 --- a/test/arm/neon/st1q_x4.c +++ b/test/arm/neon/st1q_x4.c @@ -1,11 +1,174 @@ - -#include "test/test.h" #define SIMDE_TEST_ARM_NEON_INSN st1q_x4 +#include "test/test.h" #include "test-neon.h" #include "../../../simde/arm/neon/st1q_x4.h" #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst1q_f16_x4 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 val[4][8]; + simde_float16 r[32]; + } test_vec[] = { + { { { SIMDE_FLOAT16_VALUE( - 22.750), SIMDE_FLOAT16_VALUE( 61.096), SIMDE_FLOAT16_VALUE( 69.465), SIMDE_FLOAT16_VALUE( 61.036), + SIMDE_FLOAT16_VALUE( 97.235), SIMDE_FLOAT16_VALUE( 49.615), SIMDE_FLOAT16_VALUE( - 97.607), SIMDE_FLOAT16_VALUE( - 78.562) }, + { SIMDE_FLOAT16_VALUE( - 60.626), SIMDE_FLOAT16_VALUE( - 21.392), SIMDE_FLOAT16_VALUE( 32.623), SIMDE_FLOAT16_VALUE( - 22.547), + SIMDE_FLOAT16_VALUE( 99.897), SIMDE_FLOAT16_VALUE( 65.091), SIMDE_FLOAT16_VALUE( - 5.726), SIMDE_FLOAT16_VALUE( 95.620) }, + { SIMDE_FLOAT16_VALUE( - 23.061), SIMDE_FLOAT16_VALUE( - 37.099), SIMDE_FLOAT16_VALUE( - 36.446), SIMDE_FLOAT16_VALUE( - 62.836), + SIMDE_FLOAT16_VALUE( 11.655), SIMDE_FLOAT16_VALUE( - 63.088), SIMDE_FLOAT16_VALUE( 11.587), SIMDE_FLOAT16_VALUE( 29.929) }, + { SIMDE_FLOAT16_VALUE( - 71.852), SIMDE_FLOAT16_VALUE( 31.326), SIMDE_FLOAT16_VALUE( - 83.956), SIMDE_FLOAT16_VALUE( - 18.453), + SIMDE_FLOAT16_VALUE( 97.022), SIMDE_FLOAT16_VALUE( - 67.287), SIMDE_FLOAT16_VALUE( 14.241), SIMDE_FLOAT16_VALUE( 27.963) } }, + { SIMDE_FLOAT16_VALUE( - 22.750), SIMDE_FLOAT16_VALUE( 61.096), SIMDE_FLOAT16_VALUE( 69.465), SIMDE_FLOAT16_VALUE( 61.036), + SIMDE_FLOAT16_VALUE( 97.235), SIMDE_FLOAT16_VALUE( 49.615), SIMDE_FLOAT16_VALUE( - 97.607), SIMDE_FLOAT16_VALUE( - 78.562), + SIMDE_FLOAT16_VALUE( - 60.626), SIMDE_FLOAT16_VALUE( - 21.392), SIMDE_FLOAT16_VALUE( 32.623), SIMDE_FLOAT16_VALUE( - 22.547), + SIMDE_FLOAT16_VALUE( 99.897), SIMDE_FLOAT16_VALUE( 65.091), SIMDE_FLOAT16_VALUE( - 5.726), SIMDE_FLOAT16_VALUE( 95.620), + SIMDE_FLOAT16_VALUE( - 23.061), SIMDE_FLOAT16_VALUE( - 37.099), SIMDE_FLOAT16_VALUE( - 36.446), SIMDE_FLOAT16_VALUE( - 62.836), + SIMDE_FLOAT16_VALUE( 11.655), SIMDE_FLOAT16_VALUE( - 63.088), SIMDE_FLOAT16_VALUE( 11.587), SIMDE_FLOAT16_VALUE( 29.929), + SIMDE_FLOAT16_VALUE( - 71.852), SIMDE_FLOAT16_VALUE( 31.326), SIMDE_FLOAT16_VALUE( - 83.956), SIMDE_FLOAT16_VALUE( - 18.453), + SIMDE_FLOAT16_VALUE( 97.022), SIMDE_FLOAT16_VALUE( - 67.287), SIMDE_FLOAT16_VALUE( 14.241), SIMDE_FLOAT16_VALUE( 27.963) } }, + { { { SIMDE_FLOAT16_VALUE( 29.494), SIMDE_FLOAT16_VALUE( - 56.304), SIMDE_FLOAT16_VALUE( 67.254), SIMDE_FLOAT16_VALUE( - 54.309), + SIMDE_FLOAT16_VALUE( 34.087), SIMDE_FLOAT16_VALUE( 62.655), SIMDE_FLOAT16_VALUE( 4.600), SIMDE_FLOAT16_VALUE( 82.017) }, + { SIMDE_FLOAT16_VALUE( 78.796), SIMDE_FLOAT16_VALUE( 85.632), SIMDE_FLOAT16_VALUE( 35.246), SIMDE_FLOAT16_VALUE( - 37.467), + SIMDE_FLOAT16_VALUE( - 62.638), SIMDE_FLOAT16_VALUE( 42.445), SIMDE_FLOAT16_VALUE( 81.230), SIMDE_FLOAT16_VALUE( - 93.667) }, + { SIMDE_FLOAT16_VALUE( - 90.629), SIMDE_FLOAT16_VALUE( 12.454), SIMDE_FLOAT16_VALUE( - 62.277), SIMDE_FLOAT16_VALUE( 44.996), + SIMDE_FLOAT16_VALUE( - 70.130), SIMDE_FLOAT16_VALUE( 62.518), SIMDE_FLOAT16_VALUE( - 77.188), SIMDE_FLOAT16_VALUE( 84.320) }, + { SIMDE_FLOAT16_VALUE( - 71.327), SIMDE_FLOAT16_VALUE( 15.545), SIMDE_FLOAT16_VALUE( - 81.861), SIMDE_FLOAT16_VALUE( - 5.166), + SIMDE_FLOAT16_VALUE( 77.928), SIMDE_FLOAT16_VALUE( 24.877), SIMDE_FLOAT16_VALUE( 90.124), SIMDE_FLOAT16_VALUE( 77.249) } }, + { SIMDE_FLOAT16_VALUE( 29.494), SIMDE_FLOAT16_VALUE( - 56.304), SIMDE_FLOAT16_VALUE( 67.254), SIMDE_FLOAT16_VALUE( - 54.309), + SIMDE_FLOAT16_VALUE( 34.087), SIMDE_FLOAT16_VALUE( 62.655), SIMDE_FLOAT16_VALUE( 4.600), SIMDE_FLOAT16_VALUE( 82.017), + SIMDE_FLOAT16_VALUE( 78.796), SIMDE_FLOAT16_VALUE( 85.632), SIMDE_FLOAT16_VALUE( 35.246), SIMDE_FLOAT16_VALUE( - 37.467), + SIMDE_FLOAT16_VALUE( - 62.638), SIMDE_FLOAT16_VALUE( 42.445), SIMDE_FLOAT16_VALUE( 81.230), SIMDE_FLOAT16_VALUE( - 93.667), + SIMDE_FLOAT16_VALUE( - 90.629), SIMDE_FLOAT16_VALUE( 12.454), SIMDE_FLOAT16_VALUE( - 62.277), SIMDE_FLOAT16_VALUE( 44.996), + SIMDE_FLOAT16_VALUE( - 70.130), SIMDE_FLOAT16_VALUE( 62.518), SIMDE_FLOAT16_VALUE( - 77.188), SIMDE_FLOAT16_VALUE( 84.320), + SIMDE_FLOAT16_VALUE( - 71.327), SIMDE_FLOAT16_VALUE( 15.545), SIMDE_FLOAT16_VALUE( - 81.861), SIMDE_FLOAT16_VALUE( - 5.166), + SIMDE_FLOAT16_VALUE( 77.928), SIMDE_FLOAT16_VALUE( 24.877), SIMDE_FLOAT16_VALUE( 90.124), SIMDE_FLOAT16_VALUE( 77.249) } }, + { { { SIMDE_FLOAT16_VALUE( 68.706), SIMDE_FLOAT16_VALUE( 43.754), SIMDE_FLOAT16_VALUE( 33.900), SIMDE_FLOAT16_VALUE( - 54.873), + SIMDE_FLOAT16_VALUE( 32.555), SIMDE_FLOAT16_VALUE( - 92.535), SIMDE_FLOAT16_VALUE( 86.201), SIMDE_FLOAT16_VALUE( - 78.143) }, + { SIMDE_FLOAT16_VALUE( 89.252), SIMDE_FLOAT16_VALUE( 0.337), SIMDE_FLOAT16_VALUE( - 2.470), SIMDE_FLOAT16_VALUE( 87.685), + SIMDE_FLOAT16_VALUE( 1.848), SIMDE_FLOAT16_VALUE( - 82.342), SIMDE_FLOAT16_VALUE( 39.805), SIMDE_FLOAT16_VALUE( - 50.227) }, + { SIMDE_FLOAT16_VALUE( 68.212), SIMDE_FLOAT16_VALUE( - 71.435), SIMDE_FLOAT16_VALUE( - 92.714), SIMDE_FLOAT16_VALUE( - 1.382), + SIMDE_FLOAT16_VALUE( - 97.050), SIMDE_FLOAT16_VALUE( 5.653), SIMDE_FLOAT16_VALUE( 73.429), SIMDE_FLOAT16_VALUE( - 41.560) }, + { SIMDE_FLOAT16_VALUE( - 89.561), SIMDE_FLOAT16_VALUE( - 97.387), SIMDE_FLOAT16_VALUE( - 37.457), SIMDE_FLOAT16_VALUE( 97.624), + SIMDE_FLOAT16_VALUE( 54.484), SIMDE_FLOAT16_VALUE( - 60.017), SIMDE_FLOAT16_VALUE( 37.871), SIMDE_FLOAT16_VALUE( - 10.636) } }, + { SIMDE_FLOAT16_VALUE( 68.706), SIMDE_FLOAT16_VALUE( 43.754), SIMDE_FLOAT16_VALUE( 33.900), SIMDE_FLOAT16_VALUE( - 54.873), + SIMDE_FLOAT16_VALUE( 32.555), SIMDE_FLOAT16_VALUE( - 92.535), SIMDE_FLOAT16_VALUE( 86.201), SIMDE_FLOAT16_VALUE( - 78.143), + SIMDE_FLOAT16_VALUE( 89.252), SIMDE_FLOAT16_VALUE( 0.337), SIMDE_FLOAT16_VALUE( - 2.470), SIMDE_FLOAT16_VALUE( 87.685), + SIMDE_FLOAT16_VALUE( 1.848), SIMDE_FLOAT16_VALUE( - 82.342), SIMDE_FLOAT16_VALUE( 39.805), SIMDE_FLOAT16_VALUE( - 50.227), + SIMDE_FLOAT16_VALUE( 68.212), SIMDE_FLOAT16_VALUE( - 71.435), SIMDE_FLOAT16_VALUE( - 92.714), SIMDE_FLOAT16_VALUE( - 1.382), + SIMDE_FLOAT16_VALUE( - 97.050), SIMDE_FLOAT16_VALUE( 5.653), SIMDE_FLOAT16_VALUE( 73.429), SIMDE_FLOAT16_VALUE( - 41.560), + SIMDE_FLOAT16_VALUE( - 89.561), SIMDE_FLOAT16_VALUE( - 97.387), SIMDE_FLOAT16_VALUE( - 37.457), SIMDE_FLOAT16_VALUE( 97.624), + SIMDE_FLOAT16_VALUE( 54.484), SIMDE_FLOAT16_VALUE( - 60.017), SIMDE_FLOAT16_VALUE( 37.871), SIMDE_FLOAT16_VALUE( - 10.636) } }, + { { { SIMDE_FLOAT16_VALUE( - 16.854), SIMDE_FLOAT16_VALUE( - 55.538), SIMDE_FLOAT16_VALUE( 51.151), SIMDE_FLOAT16_VALUE( 14.679), + SIMDE_FLOAT16_VALUE( 66.667), SIMDE_FLOAT16_VALUE( - 91.282), SIMDE_FLOAT16_VALUE( - 61.208), SIMDE_FLOAT16_VALUE( - 94.220) }, + { SIMDE_FLOAT16_VALUE( 49.161), SIMDE_FLOAT16_VALUE( - 46.703), SIMDE_FLOAT16_VALUE( 2.994), SIMDE_FLOAT16_VALUE( 49.628), + SIMDE_FLOAT16_VALUE( - 12.308), SIMDE_FLOAT16_VALUE( - 25.366), SIMDE_FLOAT16_VALUE( 65.516), SIMDE_FLOAT16_VALUE( 69.138) }, + { SIMDE_FLOAT16_VALUE( 97.529), SIMDE_FLOAT16_VALUE( 3.577), SIMDE_FLOAT16_VALUE( - 60.264), SIMDE_FLOAT16_VALUE( 14.990), + SIMDE_FLOAT16_VALUE( - 75.640), SIMDE_FLOAT16_VALUE( 40.417), SIMDE_FLOAT16_VALUE( 85.723), SIMDE_FLOAT16_VALUE( - 53.491) }, + { SIMDE_FLOAT16_VALUE( 98.333), SIMDE_FLOAT16_VALUE( - 14.058), SIMDE_FLOAT16_VALUE( - 92.582), SIMDE_FLOAT16_VALUE( 76.277), + SIMDE_FLOAT16_VALUE( 28.806), SIMDE_FLOAT16_VALUE( 9.371), SIMDE_FLOAT16_VALUE( - 61.503), SIMDE_FLOAT16_VALUE( 26.682) } }, + { SIMDE_FLOAT16_VALUE( - 16.854), SIMDE_FLOAT16_VALUE( - 55.538), SIMDE_FLOAT16_VALUE( 51.151), SIMDE_FLOAT16_VALUE( 14.679), + SIMDE_FLOAT16_VALUE( 66.667), SIMDE_FLOAT16_VALUE( - 91.282), SIMDE_FLOAT16_VALUE( - 61.208), SIMDE_FLOAT16_VALUE( - 94.220), + SIMDE_FLOAT16_VALUE( 49.161), SIMDE_FLOAT16_VALUE( - 46.703), SIMDE_FLOAT16_VALUE( 2.994), SIMDE_FLOAT16_VALUE( 49.628), + SIMDE_FLOAT16_VALUE( - 12.308), SIMDE_FLOAT16_VALUE( - 25.366), SIMDE_FLOAT16_VALUE( 65.516), SIMDE_FLOAT16_VALUE( 69.138), + SIMDE_FLOAT16_VALUE( 97.529), SIMDE_FLOAT16_VALUE( 3.577), SIMDE_FLOAT16_VALUE( - 60.264), SIMDE_FLOAT16_VALUE( 14.990), + SIMDE_FLOAT16_VALUE( - 75.640), SIMDE_FLOAT16_VALUE( 40.417), SIMDE_FLOAT16_VALUE( 85.723), SIMDE_FLOAT16_VALUE( - 53.491), + SIMDE_FLOAT16_VALUE( 98.333), SIMDE_FLOAT16_VALUE( - 14.058), SIMDE_FLOAT16_VALUE( - 92.582), SIMDE_FLOAT16_VALUE( 76.277), + SIMDE_FLOAT16_VALUE( 28.806), SIMDE_FLOAT16_VALUE( 9.371), SIMDE_FLOAT16_VALUE( - 61.503), SIMDE_FLOAT16_VALUE( 26.682) } }, + { { { SIMDE_FLOAT16_VALUE( - 64.940), SIMDE_FLOAT16_VALUE( 10.855), SIMDE_FLOAT16_VALUE( 13.588), SIMDE_FLOAT16_VALUE( - 42.166), + SIMDE_FLOAT16_VALUE( 37.677), SIMDE_FLOAT16_VALUE( 54.544), SIMDE_FLOAT16_VALUE( - 41.338), SIMDE_FLOAT16_VALUE( 54.156) }, + { SIMDE_FLOAT16_VALUE( - 96.818), SIMDE_FLOAT16_VALUE( 38.055), SIMDE_FLOAT16_VALUE( - 67.285), SIMDE_FLOAT16_VALUE( 34.219), + SIMDE_FLOAT16_VALUE( 31.020), SIMDE_FLOAT16_VALUE( 7.521), SIMDE_FLOAT16_VALUE( - 27.190), SIMDE_FLOAT16_VALUE( - 93.042) }, + { SIMDE_FLOAT16_VALUE( - 35.381), SIMDE_FLOAT16_VALUE( 50.805), SIMDE_FLOAT16_VALUE( - 16.495), SIMDE_FLOAT16_VALUE( - 19.288), + SIMDE_FLOAT16_VALUE( 36.437), SIMDE_FLOAT16_VALUE( - 75.663), SIMDE_FLOAT16_VALUE( - 62.035), SIMDE_FLOAT16_VALUE( 94.778) }, + { SIMDE_FLOAT16_VALUE( - 77.789), SIMDE_FLOAT16_VALUE( 86.230), SIMDE_FLOAT16_VALUE( 21.458), SIMDE_FLOAT16_VALUE( - 76.976), + SIMDE_FLOAT16_VALUE( 59.077), SIMDE_FLOAT16_VALUE( - 13.310), SIMDE_FLOAT16_VALUE( - 38.537), SIMDE_FLOAT16_VALUE( 85.016) } }, + { SIMDE_FLOAT16_VALUE( - 64.940), SIMDE_FLOAT16_VALUE( 10.855), SIMDE_FLOAT16_VALUE( 13.588), SIMDE_FLOAT16_VALUE( - 42.166), + SIMDE_FLOAT16_VALUE( 37.677), SIMDE_FLOAT16_VALUE( 54.544), SIMDE_FLOAT16_VALUE( - 41.338), SIMDE_FLOAT16_VALUE( 54.156), + SIMDE_FLOAT16_VALUE( - 96.818), SIMDE_FLOAT16_VALUE( 38.055), SIMDE_FLOAT16_VALUE( - 67.285), SIMDE_FLOAT16_VALUE( 34.219), + SIMDE_FLOAT16_VALUE( 31.020), SIMDE_FLOAT16_VALUE( 7.521), SIMDE_FLOAT16_VALUE( - 27.190), SIMDE_FLOAT16_VALUE( - 93.042), + SIMDE_FLOAT16_VALUE( - 35.381), SIMDE_FLOAT16_VALUE( 50.805), SIMDE_FLOAT16_VALUE( - 16.495), SIMDE_FLOAT16_VALUE( - 19.288), + SIMDE_FLOAT16_VALUE( 36.437), SIMDE_FLOAT16_VALUE( - 75.663), SIMDE_FLOAT16_VALUE( - 62.035), SIMDE_FLOAT16_VALUE( 94.778), + SIMDE_FLOAT16_VALUE( - 77.789), SIMDE_FLOAT16_VALUE( 86.230), SIMDE_FLOAT16_VALUE( 21.458), SIMDE_FLOAT16_VALUE( - 76.976), + SIMDE_FLOAT16_VALUE( 59.077), SIMDE_FLOAT16_VALUE( - 13.310), SIMDE_FLOAT16_VALUE( - 38.537), SIMDE_FLOAT16_VALUE( 85.016) } }, + { { { SIMDE_FLOAT16_VALUE( - 16.344), SIMDE_FLOAT16_VALUE( 61.588), SIMDE_FLOAT16_VALUE( 49.037), SIMDE_FLOAT16_VALUE( 45.929), + SIMDE_FLOAT16_VALUE( 40.688), SIMDE_FLOAT16_VALUE( - 27.802), SIMDE_FLOAT16_VALUE( 27.455), SIMDE_FLOAT16_VALUE( 93.667) }, + { SIMDE_FLOAT16_VALUE( - 65.739), SIMDE_FLOAT16_VALUE( 98.667), SIMDE_FLOAT16_VALUE( - 77.640), SIMDE_FLOAT16_VALUE( 19.827), + SIMDE_FLOAT16_VALUE( 11.358), SIMDE_FLOAT16_VALUE( - 21.520), SIMDE_FLOAT16_VALUE( - 71.888), SIMDE_FLOAT16_VALUE( 29.715) }, + { SIMDE_FLOAT16_VALUE( 3.421), SIMDE_FLOAT16_VALUE( - 85.829), SIMDE_FLOAT16_VALUE( 42.144), SIMDE_FLOAT16_VALUE( 52.252), + SIMDE_FLOAT16_VALUE( - 34.443), SIMDE_FLOAT16_VALUE( - 81.291), SIMDE_FLOAT16_VALUE( - 35.565), SIMDE_FLOAT16_VALUE( - 95.472) }, + { SIMDE_FLOAT16_VALUE( - 51.527), SIMDE_FLOAT16_VALUE( - 14.435), SIMDE_FLOAT16_VALUE( 81.290), SIMDE_FLOAT16_VALUE( 98.495), + SIMDE_FLOAT16_VALUE( 41.063), SIMDE_FLOAT16_VALUE( - 8.915), SIMDE_FLOAT16_VALUE( - 77.131), SIMDE_FLOAT16_VALUE( - 97.806) } }, + { SIMDE_FLOAT16_VALUE( - 16.344), SIMDE_FLOAT16_VALUE( 61.588), SIMDE_FLOAT16_VALUE( 49.037), SIMDE_FLOAT16_VALUE( 45.929), + SIMDE_FLOAT16_VALUE( 40.688), SIMDE_FLOAT16_VALUE( - 27.802), SIMDE_FLOAT16_VALUE( 27.455), SIMDE_FLOAT16_VALUE( 93.667), + SIMDE_FLOAT16_VALUE( - 65.739), SIMDE_FLOAT16_VALUE( 98.667), SIMDE_FLOAT16_VALUE( - 77.640), SIMDE_FLOAT16_VALUE( 19.827), + SIMDE_FLOAT16_VALUE( 11.358), SIMDE_FLOAT16_VALUE( - 21.520), SIMDE_FLOAT16_VALUE( - 71.888), SIMDE_FLOAT16_VALUE( 29.715), + SIMDE_FLOAT16_VALUE( 3.421), SIMDE_FLOAT16_VALUE( - 85.829), SIMDE_FLOAT16_VALUE( 42.144), SIMDE_FLOAT16_VALUE( 52.252), + SIMDE_FLOAT16_VALUE( - 34.443), SIMDE_FLOAT16_VALUE( - 81.291), SIMDE_FLOAT16_VALUE( - 35.565), SIMDE_FLOAT16_VALUE( - 95.472), + SIMDE_FLOAT16_VALUE( - 51.527), SIMDE_FLOAT16_VALUE( - 14.435), SIMDE_FLOAT16_VALUE( 81.290), SIMDE_FLOAT16_VALUE( 98.495), + SIMDE_FLOAT16_VALUE( 41.063), SIMDE_FLOAT16_VALUE( - 8.915), SIMDE_FLOAT16_VALUE( - 77.131), SIMDE_FLOAT16_VALUE( - 97.806) } }, + { { { SIMDE_FLOAT16_VALUE( - 38.560), SIMDE_FLOAT16_VALUE( - 8.666), SIMDE_FLOAT16_VALUE( - 83.413), SIMDE_FLOAT16_VALUE( 36.428), + SIMDE_FLOAT16_VALUE( 95.115), SIMDE_FLOAT16_VALUE( - 54.473), SIMDE_FLOAT16_VALUE( - 54.310), SIMDE_FLOAT16_VALUE( 9.862) }, + { SIMDE_FLOAT16_VALUE( - 43.918), SIMDE_FLOAT16_VALUE( 65.519), SIMDE_FLOAT16_VALUE( 33.255), SIMDE_FLOAT16_VALUE( - 76.838), + SIMDE_FLOAT16_VALUE( 32.376), SIMDE_FLOAT16_VALUE( 88.754), SIMDE_FLOAT16_VALUE( - 99.571), SIMDE_FLOAT16_VALUE( 97.359) }, + { SIMDE_FLOAT16_VALUE( 0.759), SIMDE_FLOAT16_VALUE( 63.569), SIMDE_FLOAT16_VALUE( - 74.335), SIMDE_FLOAT16_VALUE( - 84.602), + SIMDE_FLOAT16_VALUE( 72.495), SIMDE_FLOAT16_VALUE( - 60.829), SIMDE_FLOAT16_VALUE( - 29.063), SIMDE_FLOAT16_VALUE( - 66.901) }, + { SIMDE_FLOAT16_VALUE( - 68.887), SIMDE_FLOAT16_VALUE( - 41.060), SIMDE_FLOAT16_VALUE( 36.083), SIMDE_FLOAT16_VALUE( - 79.810), + SIMDE_FLOAT16_VALUE( 51.686), SIMDE_FLOAT16_VALUE( - 67.010), SIMDE_FLOAT16_VALUE( 29.773), SIMDE_FLOAT16_VALUE( 23.022) } }, + { SIMDE_FLOAT16_VALUE( - 38.560), SIMDE_FLOAT16_VALUE( - 8.666), SIMDE_FLOAT16_VALUE( - 83.413), SIMDE_FLOAT16_VALUE( 36.428), + SIMDE_FLOAT16_VALUE( 95.115), SIMDE_FLOAT16_VALUE( - 54.473), SIMDE_FLOAT16_VALUE( - 54.310), SIMDE_FLOAT16_VALUE( 9.862), + SIMDE_FLOAT16_VALUE( - 43.918), SIMDE_FLOAT16_VALUE( 65.519), SIMDE_FLOAT16_VALUE( 33.255), SIMDE_FLOAT16_VALUE( - 76.838), + SIMDE_FLOAT16_VALUE( 32.376), SIMDE_FLOAT16_VALUE( 88.754), SIMDE_FLOAT16_VALUE( - 99.571), SIMDE_FLOAT16_VALUE( 97.359), + SIMDE_FLOAT16_VALUE( 0.759), SIMDE_FLOAT16_VALUE( 63.569), SIMDE_FLOAT16_VALUE( - 74.335), SIMDE_FLOAT16_VALUE( - 84.602), + SIMDE_FLOAT16_VALUE( 72.495), SIMDE_FLOAT16_VALUE( - 60.829), SIMDE_FLOAT16_VALUE( - 29.063), SIMDE_FLOAT16_VALUE( - 66.901), + SIMDE_FLOAT16_VALUE( - 68.887), SIMDE_FLOAT16_VALUE( - 41.060), SIMDE_FLOAT16_VALUE( 36.083), SIMDE_FLOAT16_VALUE( - 79.810), + SIMDE_FLOAT16_VALUE( 51.686), SIMDE_FLOAT16_VALUE( - 67.010), SIMDE_FLOAT16_VALUE( 29.773), SIMDE_FLOAT16_VALUE( 23.022) } }, + { { { SIMDE_FLOAT16_VALUE( - 99.229), SIMDE_FLOAT16_VALUE( 33.337), SIMDE_FLOAT16_VALUE( - 34.145), SIMDE_FLOAT16_VALUE( - 18.231), + SIMDE_FLOAT16_VALUE( - 6.066), SIMDE_FLOAT16_VALUE( 62.125), SIMDE_FLOAT16_VALUE( - 30.190), SIMDE_FLOAT16_VALUE( 93.279) }, + { SIMDE_FLOAT16_VALUE( - 4.053), SIMDE_FLOAT16_VALUE( - 16.454), SIMDE_FLOAT16_VALUE( 43.954), SIMDE_FLOAT16_VALUE( - 0.170), + SIMDE_FLOAT16_VALUE( 20.261), SIMDE_FLOAT16_VALUE( 4.405), SIMDE_FLOAT16_VALUE( 67.841), SIMDE_FLOAT16_VALUE( - 94.517) }, + { SIMDE_FLOAT16_VALUE( - 12.337), SIMDE_FLOAT16_VALUE( 41.336), SIMDE_FLOAT16_VALUE( - 44.534), SIMDE_FLOAT16_VALUE( - 99.256), + SIMDE_FLOAT16_VALUE( 33.547), SIMDE_FLOAT16_VALUE( - 71.183), SIMDE_FLOAT16_VALUE( - 76.108), SIMDE_FLOAT16_VALUE( - 9.130) }, + { SIMDE_FLOAT16_VALUE( 36.006), SIMDE_FLOAT16_VALUE( 42.039), SIMDE_FLOAT16_VALUE( 31.550), SIMDE_FLOAT16_VALUE( 33.704), + SIMDE_FLOAT16_VALUE( - 41.349), SIMDE_FLOAT16_VALUE( - 86.501), SIMDE_FLOAT16_VALUE( 55.249), SIMDE_FLOAT16_VALUE( 80.615) } }, + { SIMDE_FLOAT16_VALUE( - 99.229), SIMDE_FLOAT16_VALUE( 33.337), SIMDE_FLOAT16_VALUE( - 34.145), SIMDE_FLOAT16_VALUE( - 18.231), + SIMDE_FLOAT16_VALUE( - 6.066), SIMDE_FLOAT16_VALUE( 62.125), SIMDE_FLOAT16_VALUE( - 30.190), SIMDE_FLOAT16_VALUE( 93.279), + SIMDE_FLOAT16_VALUE( - 4.053), SIMDE_FLOAT16_VALUE( - 16.454), SIMDE_FLOAT16_VALUE( 43.954), SIMDE_FLOAT16_VALUE( - 0.170), + SIMDE_FLOAT16_VALUE( 20.261), SIMDE_FLOAT16_VALUE( 4.405), SIMDE_FLOAT16_VALUE( 67.841), SIMDE_FLOAT16_VALUE( - 94.517), + SIMDE_FLOAT16_VALUE( - 12.337), SIMDE_FLOAT16_VALUE( 41.336), SIMDE_FLOAT16_VALUE( - 44.534), SIMDE_FLOAT16_VALUE( - 99.256), + SIMDE_FLOAT16_VALUE( 33.547), SIMDE_FLOAT16_VALUE( - 71.183), SIMDE_FLOAT16_VALUE( - 76.108), SIMDE_FLOAT16_VALUE( - 9.130), + SIMDE_FLOAT16_VALUE( 36.006), SIMDE_FLOAT16_VALUE( 42.039), SIMDE_FLOAT16_VALUE( 31.550), SIMDE_FLOAT16_VALUE( 33.704), + SIMDE_FLOAT16_VALUE( - 41.349), SIMDE_FLOAT16_VALUE( - 86.501), SIMDE_FLOAT16_VALUE( 55.249), SIMDE_FLOAT16_VALUE( 80.615) } }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x4_t val = {{ + simde_vld1q_f16(test_vec[i].val[0]), + simde_vld1q_f16(test_vec[i].val[1]), + simde_vld1q_f16(test_vec[i].val[2]), + simde_vld1q_f16(test_vec[i].val[3]), + }}; + simde_float16 r_[32]; + simde_vst1q_f16_x4(r_, val); + simde_assert_equal_i(0, simde_memcmp(r_, test_vec[i].r, sizeof(test_vec[i].r))); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t val[4] = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x32_t r = simde_vst1q_f16_x4(val[4]); + + simde_test_arm_neon_write_f16x4(2, val[4], SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x32(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst1q_f32_x4 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1198,6 +1361,7 @@ test_simde_vst1q_u64_x4 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f16_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f32_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_f64_x4) SIMDE_TEST_FUNC_LIST_ENTRY(vst1q_s8_x4) diff --git a/test/arm/neon/st2_lane.c b/test/arm/neon/st2_lane.c index 4409d3abb..6be2d73a5 100644 --- a/test/arm/neon/st2_lane.c +++ b/test/arm/neon/st2_lane.c @@ -427,6 +427,60 @@ test_simde_vst2_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst2_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[2]; + simde_float16_t val[2][4]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 96.699), SIMDE_FLOAT16_VALUE( 96.081) }, + { { SIMDE_FLOAT16_VALUE( - 86.059), SIMDE_FLOAT16_VALUE( 10.093), SIMDE_FLOAT16_VALUE( - 96.699), SIMDE_FLOAT16_VALUE( 92.682) }, + { SIMDE_FLOAT16_VALUE( 94.074), SIMDE_FLOAT16_VALUE( - 25.535), SIMDE_FLOAT16_VALUE( 96.081), SIMDE_FLOAT16_VALUE( 79.156) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 24.214), SIMDE_FLOAT16_VALUE( 72.416) }, + { { SIMDE_FLOAT16_VALUE( 56.403), SIMDE_FLOAT16_VALUE( - 41.539), SIMDE_FLOAT16_VALUE( 24.214), SIMDE_FLOAT16_VALUE( 0.721) }, + { SIMDE_FLOAT16_VALUE( 11.102), SIMDE_FLOAT16_VALUE( 82.747), SIMDE_FLOAT16_VALUE( 72.416), SIMDE_FLOAT16_VALUE( 25.862) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( - 41.215), SIMDE_FLOAT16_VALUE( - 10.097) }, + { { SIMDE_FLOAT16_VALUE( 8.693), SIMDE_FLOAT16_VALUE( - 45.955), SIMDE_FLOAT16_VALUE( - 41.215), SIMDE_FLOAT16_VALUE( 47.533) }, + { SIMDE_FLOAT16_VALUE( 26.757), SIMDE_FLOAT16_VALUE( - 10.266), SIMDE_FLOAT16_VALUE( - 10.097), SIMDE_FLOAT16_VALUE( - 73.201) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( - 14.688), SIMDE_FLOAT16_VALUE( 26.052) }, + { { SIMDE_FLOAT16_VALUE( - 13.883), SIMDE_FLOAT16_VALUE( 25.378), SIMDE_FLOAT16_VALUE( 82.439), SIMDE_FLOAT16_VALUE( - 14.688) }, + { SIMDE_FLOAT16_VALUE( 75.364), SIMDE_FLOAT16_VALUE( - 98.234), SIMDE_FLOAT16_VALUE( 21.632), SIMDE_FLOAT16_VALUE( 26.052) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( - 56.396), SIMDE_FLOAT16_VALUE( - 13.528) }, + { { SIMDE_FLOAT16_VALUE( 53.363), SIMDE_FLOAT16_VALUE( 54.407), SIMDE_FLOAT16_VALUE( - 56.396), SIMDE_FLOAT16_VALUE( - 23.778) }, + { SIMDE_FLOAT16_VALUE( 46.186), SIMDE_FLOAT16_VALUE( - 25.159), SIMDE_FLOAT16_VALUE( - 13.528), SIMDE_FLOAT16_VALUE( - 47.334) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( - 68.946), SIMDE_FLOAT16_VALUE( - 82.937) }, + { { SIMDE_FLOAT16_VALUE( 30.751), SIMDE_FLOAT16_VALUE( 12.829), SIMDE_FLOAT16_VALUE( - 68.946), SIMDE_FLOAT16_VALUE( 10.480) }, + { SIMDE_FLOAT16_VALUE( 58.238), SIMDE_FLOAT16_VALUE( 3.858), SIMDE_FLOAT16_VALUE( - 82.937), SIMDE_FLOAT16_VALUE( - 2.434) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 4.407), SIMDE_FLOAT16_VALUE( - 66.972) }, + { { SIMDE_FLOAT16_VALUE( - 94.165), SIMDE_FLOAT16_VALUE( 4.407), SIMDE_FLOAT16_VALUE( - 78.247), SIMDE_FLOAT16_VALUE( 4.932) }, + { SIMDE_FLOAT16_VALUE( 57.296), SIMDE_FLOAT16_VALUE( - 66.972), SIMDE_FLOAT16_VALUE( - 76.884), SIMDE_FLOAT16_VALUE( - 1.070) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 49.253), SIMDE_FLOAT16_VALUE( - 68.363) }, + { { SIMDE_FLOAT16_VALUE( - 42.453), SIMDE_FLOAT16_VALUE( 76.770), SIMDE_FLOAT16_VALUE( - 49.253), SIMDE_FLOAT16_VALUE( 42.268) }, + { SIMDE_FLOAT16_VALUE( - 25.787), SIMDE_FLOAT16_VALUE( - 28.102), SIMDE_FLOAT16_VALUE( - 68.363), SIMDE_FLOAT16_VALUE( - 67.189) } }, + INT8_C( 2) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x2_t val = { + {simde_vld1_f16(test_vec[i].val[0]), simde_vld1_f16(test_vec[i].val[1])}}; + simde_float16_t a[2]; + SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst2_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + } + + return 0; +} + static int test_simde_vst2_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1067,6 +1121,76 @@ test_simde_vst2q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst2q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[2]; + simde_float16_t val[2][8]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 90.393), SIMDE_FLOAT16_VALUE( 63.104) }, + { { SIMDE_FLOAT16_VALUE( - 38.257), SIMDE_FLOAT16_VALUE( - 31.638), SIMDE_FLOAT16_VALUE( - 97.258), SIMDE_FLOAT16_VALUE( - 70.618), + SIMDE_FLOAT16_VALUE( - 11.335), SIMDE_FLOAT16_VALUE( - 8.280), SIMDE_FLOAT16_VALUE( 23.083), SIMDE_FLOAT16_VALUE( - 90.393) }, + { SIMDE_FLOAT16_VALUE( 74.360), SIMDE_FLOAT16_VALUE( 51.715), SIMDE_FLOAT16_VALUE( - 79.402), SIMDE_FLOAT16_VALUE( 71.628), + SIMDE_FLOAT16_VALUE( 50.051), SIMDE_FLOAT16_VALUE( - 51.089), SIMDE_FLOAT16_VALUE( - 33.431), SIMDE_FLOAT16_VALUE( 63.104) } }, + INT8_C( 7) }, + { { SIMDE_FLOAT16_VALUE( 64.040), SIMDE_FLOAT16_VALUE( - 67.280) }, + { { SIMDE_FLOAT16_VALUE( 41.175), SIMDE_FLOAT16_VALUE( 72.644), SIMDE_FLOAT16_VALUE( 64.040), SIMDE_FLOAT16_VALUE( - 54.404), + SIMDE_FLOAT16_VALUE( 97.772), SIMDE_FLOAT16_VALUE( 58.590), SIMDE_FLOAT16_VALUE( - 28.472), SIMDE_FLOAT16_VALUE( - 67.625) }, + { SIMDE_FLOAT16_VALUE( - 80.611), SIMDE_FLOAT16_VALUE( 99.705), SIMDE_FLOAT16_VALUE( - 67.280), SIMDE_FLOAT16_VALUE( 5.574), + SIMDE_FLOAT16_VALUE( - 20.029), SIMDE_FLOAT16_VALUE( - 35.226), SIMDE_FLOAT16_VALUE( - 1.819), SIMDE_FLOAT16_VALUE( - 62.990) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 23.024), SIMDE_FLOAT16_VALUE( - 77.316) }, + { { SIMDE_FLOAT16_VALUE( 94.862), SIMDE_FLOAT16_VALUE( - 53.113), SIMDE_FLOAT16_VALUE( 76.880), SIMDE_FLOAT16_VALUE( - 24.592), + SIMDE_FLOAT16_VALUE( 23.024), SIMDE_FLOAT16_VALUE( - 86.319), SIMDE_FLOAT16_VALUE( - 49.922), SIMDE_FLOAT16_VALUE( 74.371) }, + { SIMDE_FLOAT16_VALUE( - 35.182), SIMDE_FLOAT16_VALUE( 91.375), SIMDE_FLOAT16_VALUE( - 30.442), SIMDE_FLOAT16_VALUE( 62.763), + SIMDE_FLOAT16_VALUE( - 77.316), SIMDE_FLOAT16_VALUE( - 98.725), SIMDE_FLOAT16_VALUE( 77.730), SIMDE_FLOAT16_VALUE( - 6.752) } }, + INT8_C( 4) }, + { { SIMDE_FLOAT16_VALUE( - 89.098), SIMDE_FLOAT16_VALUE( - 47.386) }, + { { SIMDE_FLOAT16_VALUE( - 37.183), SIMDE_FLOAT16_VALUE( - 99.478), SIMDE_FLOAT16_VALUE( - 72.315), SIMDE_FLOAT16_VALUE( 60.865), + SIMDE_FLOAT16_VALUE( 90.711), SIMDE_FLOAT16_VALUE( 33.609), SIMDE_FLOAT16_VALUE( 22.229), SIMDE_FLOAT16_VALUE( - 89.098) }, + { SIMDE_FLOAT16_VALUE( 18.834), SIMDE_FLOAT16_VALUE( - 38.890), SIMDE_FLOAT16_VALUE( 1.151), SIMDE_FLOAT16_VALUE( - 50.932), + SIMDE_FLOAT16_VALUE( 16.869), SIMDE_FLOAT16_VALUE( 72.770), SIMDE_FLOAT16_VALUE( 43.215), SIMDE_FLOAT16_VALUE( - 47.386) } }, + INT8_C( 7) }, + { { SIMDE_FLOAT16_VALUE( - 74.604), SIMDE_FLOAT16_VALUE( 83.519) }, + { { SIMDE_FLOAT16_VALUE( - 87.245), SIMDE_FLOAT16_VALUE( - 66.241), SIMDE_FLOAT16_VALUE( - 49.725), SIMDE_FLOAT16_VALUE( - 74.604), + SIMDE_FLOAT16_VALUE( 52.094), SIMDE_FLOAT16_VALUE( - 95.107), SIMDE_FLOAT16_VALUE( 95.169), SIMDE_FLOAT16_VALUE( 80.442) }, + { SIMDE_FLOAT16_VALUE( 4.494), SIMDE_FLOAT16_VALUE( 93.210), SIMDE_FLOAT16_VALUE( 18.867), SIMDE_FLOAT16_VALUE( 83.519), + SIMDE_FLOAT16_VALUE( - 54.371), SIMDE_FLOAT16_VALUE( - 96.968), SIMDE_FLOAT16_VALUE( 31.350), SIMDE_FLOAT16_VALUE( 84.978) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( 11.757), SIMDE_FLOAT16_VALUE( - 92.199) }, + { { SIMDE_FLOAT16_VALUE( 11.757), SIMDE_FLOAT16_VALUE( 28.988), SIMDE_FLOAT16_VALUE( - 67.095), SIMDE_FLOAT16_VALUE( - 58.193), + SIMDE_FLOAT16_VALUE( 45.038), SIMDE_FLOAT16_VALUE( - 42.819), SIMDE_FLOAT16_VALUE( - 88.284), SIMDE_FLOAT16_VALUE( 53.437) }, + { SIMDE_FLOAT16_VALUE( - 92.199), SIMDE_FLOAT16_VALUE( 12.165), SIMDE_FLOAT16_VALUE( 3.097), SIMDE_FLOAT16_VALUE( 69.414), + SIMDE_FLOAT16_VALUE( - 22.656), SIMDE_FLOAT16_VALUE( 5.711), SIMDE_FLOAT16_VALUE( - 39.886), SIMDE_FLOAT16_VALUE( 37.784) } }, + INT8_C( 0) }, + { { SIMDE_FLOAT16_VALUE( 19.626), SIMDE_FLOAT16_VALUE( 24.723) }, + { { SIMDE_FLOAT16_VALUE( - 4.744), SIMDE_FLOAT16_VALUE( 15.391), SIMDE_FLOAT16_VALUE( 19.626), SIMDE_FLOAT16_VALUE( 64.213), + SIMDE_FLOAT16_VALUE( 92.541), SIMDE_FLOAT16_VALUE( 24.957), SIMDE_FLOAT16_VALUE( 42.707), SIMDE_FLOAT16_VALUE( - 42.789) }, + { SIMDE_FLOAT16_VALUE( - 35.790), SIMDE_FLOAT16_VALUE( 98.414), SIMDE_FLOAT16_VALUE( 24.723), SIMDE_FLOAT16_VALUE( - 2.908), + SIMDE_FLOAT16_VALUE( 8.083), SIMDE_FLOAT16_VALUE( - 46.904), SIMDE_FLOAT16_VALUE( - 37.831), SIMDE_FLOAT16_VALUE( - 59.362) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 64.222), SIMDE_FLOAT16_VALUE( - 33.836) }, + { { SIMDE_FLOAT16_VALUE( - 31.643), SIMDE_FLOAT16_VALUE( - 36.586), SIMDE_FLOAT16_VALUE( 64.222), SIMDE_FLOAT16_VALUE( - 73.531), + SIMDE_FLOAT16_VALUE( - 64.852), SIMDE_FLOAT16_VALUE( 22.974), SIMDE_FLOAT16_VALUE( - 27.821), SIMDE_FLOAT16_VALUE( - 62.560) }, + { SIMDE_FLOAT16_VALUE( - 78.721), SIMDE_FLOAT16_VALUE( 35.577), SIMDE_FLOAT16_VALUE( - 33.836), SIMDE_FLOAT16_VALUE( 54.532), + SIMDE_FLOAT16_VALUE( - 39.138), SIMDE_FLOAT16_VALUE( - 98.477), SIMDE_FLOAT16_VALUE( - 49.646), SIMDE_FLOAT16_VALUE( - 72.783) } }, + INT8_C( 2) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x2_t val = { + {simde_vld1q_f16(test_vec[i].val[0]), simde_vld1q_f16(test_vec[i].val[1])}}; + simde_float16_t a[2]; + SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst2q_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + } + + return 0; +} + static int test_simde_vst2q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1169,6 +1293,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst2_lane_f64) @@ -1180,6 +1305,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst2q_lane_f64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ diff --git a/test/arm/neon/st3.c b/test/arm/neon/st3.c index 616bf5ac5..e54bf69ec 100644 --- a/test/arm/neon/st3.c +++ b/test/arm/neon/st3.c @@ -17,6 +17,100 @@ */ #if !defined(SIMDE_BUG_INTEL_857088) + +static int +test_simde_vst3_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 r0[4]; + simde_float16 r1[4]; + simde_float16 r2[4]; + simde_float16 a[12]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 96.588), SIMDE_FLOAT16_VALUE( - 1.777), SIMDE_FLOAT16_VALUE( 46.463), SIMDE_FLOAT16_VALUE( 56.300) }, + { SIMDE_FLOAT16_VALUE( 56.067), SIMDE_FLOAT16_VALUE( 76.113), SIMDE_FLOAT16_VALUE( - 80.190), SIMDE_FLOAT16_VALUE( - 59.487) }, + { SIMDE_FLOAT16_VALUE( - 85.440), SIMDE_FLOAT16_VALUE( 11.955), SIMDE_FLOAT16_VALUE( 31.790), SIMDE_FLOAT16_VALUE( - 31.309) }, + { SIMDE_FLOAT16_VALUE( 96.588), SIMDE_FLOAT16_VALUE( 56.067), SIMDE_FLOAT16_VALUE( - 85.440), SIMDE_FLOAT16_VALUE( - 1.777), + SIMDE_FLOAT16_VALUE( 76.113), SIMDE_FLOAT16_VALUE( 11.955), SIMDE_FLOAT16_VALUE( 46.463), SIMDE_FLOAT16_VALUE( - 80.190), + SIMDE_FLOAT16_VALUE( 31.790), SIMDE_FLOAT16_VALUE( 56.300), SIMDE_FLOAT16_VALUE( - 59.487), SIMDE_FLOAT16_VALUE( - 31.309) } }, + { { SIMDE_FLOAT16_VALUE( - 63.610), SIMDE_FLOAT16_VALUE( 8.056), SIMDE_FLOAT16_VALUE( - 12.504), SIMDE_FLOAT16_VALUE( 76.738) }, + { SIMDE_FLOAT16_VALUE( - 48.058), SIMDE_FLOAT16_VALUE( - 5.714), SIMDE_FLOAT16_VALUE( - 3.043), SIMDE_FLOAT16_VALUE( 60.984) }, + { SIMDE_FLOAT16_VALUE( - 90.940), SIMDE_FLOAT16_VALUE( 77.403), SIMDE_FLOAT16_VALUE( - 57.046), SIMDE_FLOAT16_VALUE( 37.511) }, + { SIMDE_FLOAT16_VALUE( - 63.610), SIMDE_FLOAT16_VALUE( - 48.058), SIMDE_FLOAT16_VALUE( - 90.940), SIMDE_FLOAT16_VALUE( 8.056), + SIMDE_FLOAT16_VALUE( - 5.714), SIMDE_FLOAT16_VALUE( 77.403), SIMDE_FLOAT16_VALUE( - 12.504), SIMDE_FLOAT16_VALUE( - 3.043), + SIMDE_FLOAT16_VALUE( - 57.046), SIMDE_FLOAT16_VALUE( 76.738), SIMDE_FLOAT16_VALUE( 60.984), SIMDE_FLOAT16_VALUE( 37.511) } }, + { { SIMDE_FLOAT16_VALUE( 93.849), SIMDE_FLOAT16_VALUE( 43.255), SIMDE_FLOAT16_VALUE( 95.638), SIMDE_FLOAT16_VALUE( - 54.607) }, + { SIMDE_FLOAT16_VALUE( 99.776), SIMDE_FLOAT16_VALUE( 59.886), SIMDE_FLOAT16_VALUE( 53.777), SIMDE_FLOAT16_VALUE( - 64.378) }, + { SIMDE_FLOAT16_VALUE( - 44.354), SIMDE_FLOAT16_VALUE( 90.343), SIMDE_FLOAT16_VALUE( - 22.346), SIMDE_FLOAT16_VALUE( - 70.759) }, + { SIMDE_FLOAT16_VALUE( 93.849), SIMDE_FLOAT16_VALUE( 99.776), SIMDE_FLOAT16_VALUE( - 44.354), SIMDE_FLOAT16_VALUE( 43.255), + SIMDE_FLOAT16_VALUE( 59.886), SIMDE_FLOAT16_VALUE( 90.343), SIMDE_FLOAT16_VALUE( 95.638), SIMDE_FLOAT16_VALUE( 53.777), + SIMDE_FLOAT16_VALUE( - 22.346), SIMDE_FLOAT16_VALUE( - 54.607), SIMDE_FLOAT16_VALUE( - 64.378), SIMDE_FLOAT16_VALUE( - 70.759) } }, + { { SIMDE_FLOAT16_VALUE( 82.593), SIMDE_FLOAT16_VALUE( - 78.757), SIMDE_FLOAT16_VALUE( - 89.472), SIMDE_FLOAT16_VALUE( - 65.210) }, + { SIMDE_FLOAT16_VALUE( 71.127), SIMDE_FLOAT16_VALUE( 81.667), SIMDE_FLOAT16_VALUE( - 51.422), SIMDE_FLOAT16_VALUE( 71.658) }, + { SIMDE_FLOAT16_VALUE( - 37.830), SIMDE_FLOAT16_VALUE( - 63.633), SIMDE_FLOAT16_VALUE( 47.933), SIMDE_FLOAT16_VALUE( 6.764) }, + { SIMDE_FLOAT16_VALUE( 82.593), SIMDE_FLOAT16_VALUE( 71.127), SIMDE_FLOAT16_VALUE( - 37.830), SIMDE_FLOAT16_VALUE( - 78.757), + SIMDE_FLOAT16_VALUE( 81.667), SIMDE_FLOAT16_VALUE( - 63.633), SIMDE_FLOAT16_VALUE( - 89.472), SIMDE_FLOAT16_VALUE( - 51.422), + SIMDE_FLOAT16_VALUE( 47.933), SIMDE_FLOAT16_VALUE( - 65.210), SIMDE_FLOAT16_VALUE( 71.658), SIMDE_FLOAT16_VALUE( 6.764) } }, + { { SIMDE_FLOAT16_VALUE( - 18.391), SIMDE_FLOAT16_VALUE( 36.835), SIMDE_FLOAT16_VALUE( 22.395), SIMDE_FLOAT16_VALUE( 6.510) }, + { SIMDE_FLOAT16_VALUE( 44.811), SIMDE_FLOAT16_VALUE( - 12.385), SIMDE_FLOAT16_VALUE( - 7.166), SIMDE_FLOAT16_VALUE( - 93.173) }, + { SIMDE_FLOAT16_VALUE( 14.253), SIMDE_FLOAT16_VALUE( 90.608), SIMDE_FLOAT16_VALUE( - 83.326), SIMDE_FLOAT16_VALUE( - 35.862) }, + { SIMDE_FLOAT16_VALUE( - 18.391), SIMDE_FLOAT16_VALUE( 44.811), SIMDE_FLOAT16_VALUE( 14.253), SIMDE_FLOAT16_VALUE( 36.835), + SIMDE_FLOAT16_VALUE( - 12.385), SIMDE_FLOAT16_VALUE( 90.608), SIMDE_FLOAT16_VALUE( 22.395), SIMDE_FLOAT16_VALUE( - 7.166), + SIMDE_FLOAT16_VALUE( - 83.326), SIMDE_FLOAT16_VALUE( 6.510), SIMDE_FLOAT16_VALUE( - 93.173), SIMDE_FLOAT16_VALUE( - 35.862) } }, + { { SIMDE_FLOAT16_VALUE( 29.457), SIMDE_FLOAT16_VALUE( 71.643), SIMDE_FLOAT16_VALUE( - 73.821), SIMDE_FLOAT16_VALUE( - 57.149) }, + { SIMDE_FLOAT16_VALUE( 53.561), SIMDE_FLOAT16_VALUE( 52.606), SIMDE_FLOAT16_VALUE( - 93.208), SIMDE_FLOAT16_VALUE( 61.772) }, + { SIMDE_FLOAT16_VALUE( 29.595), SIMDE_FLOAT16_VALUE( - 0.911), SIMDE_FLOAT16_VALUE( - 25.184), SIMDE_FLOAT16_VALUE( - 63.522) }, + { SIMDE_FLOAT16_VALUE( 29.457), SIMDE_FLOAT16_VALUE( 53.561), SIMDE_FLOAT16_VALUE( 29.595), SIMDE_FLOAT16_VALUE( 71.643), + SIMDE_FLOAT16_VALUE( 52.606), SIMDE_FLOAT16_VALUE( - 0.911), SIMDE_FLOAT16_VALUE( - 73.821), SIMDE_FLOAT16_VALUE( - 93.208), + SIMDE_FLOAT16_VALUE( - 25.184), SIMDE_FLOAT16_VALUE( - 57.149), SIMDE_FLOAT16_VALUE( 61.772), SIMDE_FLOAT16_VALUE( - 63.522) } }, + { { SIMDE_FLOAT16_VALUE( - 25.263), SIMDE_FLOAT16_VALUE( - 21.565), SIMDE_FLOAT16_VALUE( 30.769), SIMDE_FLOAT16_VALUE( 63.051) }, + { SIMDE_FLOAT16_VALUE( 9.949), SIMDE_FLOAT16_VALUE( 3.818), SIMDE_FLOAT16_VALUE( 91.706), SIMDE_FLOAT16_VALUE( - 56.254) }, + { SIMDE_FLOAT16_VALUE( - 62.534), SIMDE_FLOAT16_VALUE( 15.090), SIMDE_FLOAT16_VALUE( 33.667), SIMDE_FLOAT16_VALUE( - 11.503) }, + { SIMDE_FLOAT16_VALUE( - 25.263), SIMDE_FLOAT16_VALUE( 9.949), SIMDE_FLOAT16_VALUE( - 62.534), SIMDE_FLOAT16_VALUE( - 21.565), + SIMDE_FLOAT16_VALUE( 3.818), SIMDE_FLOAT16_VALUE( 15.090), SIMDE_FLOAT16_VALUE( 30.769), SIMDE_FLOAT16_VALUE( 91.706), + SIMDE_FLOAT16_VALUE( 33.667), SIMDE_FLOAT16_VALUE( 63.051), SIMDE_FLOAT16_VALUE( - 56.254), SIMDE_FLOAT16_VALUE( - 11.503) } }, + { { SIMDE_FLOAT16_VALUE( 8.202), SIMDE_FLOAT16_VALUE( - 16.555), SIMDE_FLOAT16_VALUE( 92.084), SIMDE_FLOAT16_VALUE( 72.240) }, + { SIMDE_FLOAT16_VALUE( - 95.149), SIMDE_FLOAT16_VALUE( - 39.732), SIMDE_FLOAT16_VALUE( - 56.675), SIMDE_FLOAT16_VALUE( - 0.718) }, + { SIMDE_FLOAT16_VALUE( 73.858), SIMDE_FLOAT16_VALUE( - 78.753), SIMDE_FLOAT16_VALUE( - 19.882), SIMDE_FLOAT16_VALUE( 17.845) }, + { SIMDE_FLOAT16_VALUE( 8.202), SIMDE_FLOAT16_VALUE( - 95.149), SIMDE_FLOAT16_VALUE( 73.858), SIMDE_FLOAT16_VALUE( - 16.555), + SIMDE_FLOAT16_VALUE( - 39.732), SIMDE_FLOAT16_VALUE( - 78.753), SIMDE_FLOAT16_VALUE( 92.084), SIMDE_FLOAT16_VALUE( - 56.675), + SIMDE_FLOAT16_VALUE( - 19.882), SIMDE_FLOAT16_VALUE( 72.240), SIMDE_FLOAT16_VALUE( - 0.718), SIMDE_FLOAT16_VALUE( 17.845) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x3_t r_ = { { simde_vld1_f16(test_vec[i].r0), + simde_vld1_f16(test_vec[i].r1), + simde_vld1_f16(test_vec[i].r2), } }; + + simde_float16 a_[12]; + simde_vst3_f16(a_, r_); + simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); + + r_ = simde_vld3_f16(a_); + simde_test_arm_neon_assert_equal_f16x4(r_.val[0], simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r_.val[1], simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r_.val[2], simde_vld1_f16(test_vec[i].r2), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r0 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r1 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r2 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x12_t a = simde_vst3_f16(r0, r1, r2); + + simde_test_arm_neon_write_f16x4(2, r0, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x12(2, a, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst3_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1798,6 +1892,147 @@ test_simde_vst3_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst3q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 r0[8]; + simde_float16 r1[8]; + simde_float16 r2[8]; + simde_float16 a[24]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 58.181), SIMDE_FLOAT16_VALUE( - 74.070), SIMDE_FLOAT16_VALUE( 2.770), SIMDE_FLOAT16_VALUE( 89.824), + SIMDE_FLOAT16_VALUE( - 73.116), SIMDE_FLOAT16_VALUE( 77.533), SIMDE_FLOAT16_VALUE( - 71.575), SIMDE_FLOAT16_VALUE( 68.580) }, + { SIMDE_FLOAT16_VALUE( 27.343), SIMDE_FLOAT16_VALUE( 89.326), SIMDE_FLOAT16_VALUE( - 80.249), SIMDE_FLOAT16_VALUE( - 44.763), + SIMDE_FLOAT16_VALUE( 75.289), SIMDE_FLOAT16_VALUE( - 8.705), SIMDE_FLOAT16_VALUE( 79.289), SIMDE_FLOAT16_VALUE( - 12.997) }, + { SIMDE_FLOAT16_VALUE( 88.201), SIMDE_FLOAT16_VALUE( - 99.297), SIMDE_FLOAT16_VALUE( 94.888), SIMDE_FLOAT16_VALUE( - 94.830), + SIMDE_FLOAT16_VALUE( - 82.408), SIMDE_FLOAT16_VALUE( - 38.270), SIMDE_FLOAT16_VALUE( 33.263), SIMDE_FLOAT16_VALUE( 12.688) }, + { SIMDE_FLOAT16_VALUE( 58.181), SIMDE_FLOAT16_VALUE( 27.343), SIMDE_FLOAT16_VALUE( 88.201), SIMDE_FLOAT16_VALUE( - 74.070), + SIMDE_FLOAT16_VALUE( 89.326), SIMDE_FLOAT16_VALUE( - 99.297), SIMDE_FLOAT16_VALUE( 2.770), SIMDE_FLOAT16_VALUE( - 80.249), + SIMDE_FLOAT16_VALUE( 94.888), SIMDE_FLOAT16_VALUE( 89.824), SIMDE_FLOAT16_VALUE( - 44.763), SIMDE_FLOAT16_VALUE( - 94.830), + SIMDE_FLOAT16_VALUE( - 73.116), SIMDE_FLOAT16_VALUE( 75.289), SIMDE_FLOAT16_VALUE( - 82.408), SIMDE_FLOAT16_VALUE( 77.533), + SIMDE_FLOAT16_VALUE( - 8.705), SIMDE_FLOAT16_VALUE( - 38.270), SIMDE_FLOAT16_VALUE( - 71.575), SIMDE_FLOAT16_VALUE( 79.289), + SIMDE_FLOAT16_VALUE( 33.263), SIMDE_FLOAT16_VALUE( 68.580), SIMDE_FLOAT16_VALUE( - 12.997), SIMDE_FLOAT16_VALUE( 12.688) } }, + { { SIMDE_FLOAT16_VALUE( - 68.228), SIMDE_FLOAT16_VALUE( - 30.795), SIMDE_FLOAT16_VALUE( - 93.060), SIMDE_FLOAT16_VALUE( 88.656), + SIMDE_FLOAT16_VALUE( 71.174), SIMDE_FLOAT16_VALUE( - 83.014), SIMDE_FLOAT16_VALUE( 34.681), SIMDE_FLOAT16_VALUE( - 81.096) }, + { SIMDE_FLOAT16_VALUE( 80.371), SIMDE_FLOAT16_VALUE( 94.314), SIMDE_FLOAT16_VALUE( - 28.329), SIMDE_FLOAT16_VALUE( - 76.339), + SIMDE_FLOAT16_VALUE( 13.986), SIMDE_FLOAT16_VALUE( 15.222), SIMDE_FLOAT16_VALUE( 76.136), SIMDE_FLOAT16_VALUE( - 47.982) }, + { SIMDE_FLOAT16_VALUE( - 71.178), SIMDE_FLOAT16_VALUE( - 44.770), SIMDE_FLOAT16_VALUE( - 67.133), SIMDE_FLOAT16_VALUE( 84.421), + SIMDE_FLOAT16_VALUE( - 74.326), SIMDE_FLOAT16_VALUE( - 0.009), SIMDE_FLOAT16_VALUE( - 15.213), SIMDE_FLOAT16_VALUE( 78.007) }, + { SIMDE_FLOAT16_VALUE( - 68.228), SIMDE_FLOAT16_VALUE( 80.371), SIMDE_FLOAT16_VALUE( - 71.178), SIMDE_FLOAT16_VALUE( - 30.795), + SIMDE_FLOAT16_VALUE( 94.314), SIMDE_FLOAT16_VALUE( - 44.770), SIMDE_FLOAT16_VALUE( - 93.060), SIMDE_FLOAT16_VALUE( - 28.329), + SIMDE_FLOAT16_VALUE( - 67.133), SIMDE_FLOAT16_VALUE( 88.656), SIMDE_FLOAT16_VALUE( - 76.339), SIMDE_FLOAT16_VALUE( 84.421), + SIMDE_FLOAT16_VALUE( 71.174), SIMDE_FLOAT16_VALUE( 13.986), SIMDE_FLOAT16_VALUE( - 74.326), SIMDE_FLOAT16_VALUE( - 83.014), + SIMDE_FLOAT16_VALUE( 15.222), SIMDE_FLOAT16_VALUE( - 0.009), SIMDE_FLOAT16_VALUE( 34.681), SIMDE_FLOAT16_VALUE( 76.136), + SIMDE_FLOAT16_VALUE( - 15.213), SIMDE_FLOAT16_VALUE( - 81.096), SIMDE_FLOAT16_VALUE( - 47.982), SIMDE_FLOAT16_VALUE( 78.007) } }, + { { SIMDE_FLOAT16_VALUE( - 99.134), SIMDE_FLOAT16_VALUE( - 13.860), SIMDE_FLOAT16_VALUE( - 92.713), SIMDE_FLOAT16_VALUE( - 86.987), + SIMDE_FLOAT16_VALUE( - 46.071), SIMDE_FLOAT16_VALUE( - 46.428), SIMDE_FLOAT16_VALUE( - 55.358), SIMDE_FLOAT16_VALUE( 14.468) }, + { SIMDE_FLOAT16_VALUE( - 16.248), SIMDE_FLOAT16_VALUE( - 50.317), SIMDE_FLOAT16_VALUE( - 57.890), SIMDE_FLOAT16_VALUE( 13.081), + SIMDE_FLOAT16_VALUE( 58.363), SIMDE_FLOAT16_VALUE( 4.390), SIMDE_FLOAT16_VALUE( 3.684), SIMDE_FLOAT16_VALUE( - 18.959) }, + { SIMDE_FLOAT16_VALUE( 85.928), SIMDE_FLOAT16_VALUE( 31.159), SIMDE_FLOAT16_VALUE( 77.301), SIMDE_FLOAT16_VALUE( 10.011), + SIMDE_FLOAT16_VALUE( - 90.228), SIMDE_FLOAT16_VALUE( - 85.132), SIMDE_FLOAT16_VALUE( 25.086), SIMDE_FLOAT16_VALUE( - 86.977) }, + { SIMDE_FLOAT16_VALUE( - 99.134), SIMDE_FLOAT16_VALUE( - 16.248), SIMDE_FLOAT16_VALUE( 85.928), SIMDE_FLOAT16_VALUE( - 13.860), + SIMDE_FLOAT16_VALUE( - 50.317), SIMDE_FLOAT16_VALUE( 31.159), SIMDE_FLOAT16_VALUE( - 92.713), SIMDE_FLOAT16_VALUE( - 57.890), + SIMDE_FLOAT16_VALUE( 77.301), SIMDE_FLOAT16_VALUE( - 86.987), SIMDE_FLOAT16_VALUE( 13.081), SIMDE_FLOAT16_VALUE( 10.011), + SIMDE_FLOAT16_VALUE( - 46.071), SIMDE_FLOAT16_VALUE( 58.363), SIMDE_FLOAT16_VALUE( - 90.228), SIMDE_FLOAT16_VALUE( - 46.428), + SIMDE_FLOAT16_VALUE( 4.390), SIMDE_FLOAT16_VALUE( - 85.132), SIMDE_FLOAT16_VALUE( - 55.358), SIMDE_FLOAT16_VALUE( 3.684), + SIMDE_FLOAT16_VALUE( 25.086), SIMDE_FLOAT16_VALUE( 14.468), SIMDE_FLOAT16_VALUE( - 18.959), SIMDE_FLOAT16_VALUE( - 86.977) } }, + { { SIMDE_FLOAT16_VALUE( 30.458), SIMDE_FLOAT16_VALUE( - 38.116), SIMDE_FLOAT16_VALUE( - 23.410), SIMDE_FLOAT16_VALUE( - 76.968), + SIMDE_FLOAT16_VALUE( 94.449), SIMDE_FLOAT16_VALUE( - 25.593), SIMDE_FLOAT16_VALUE( - 22.524), SIMDE_FLOAT16_VALUE( 61.018) }, + { SIMDE_FLOAT16_VALUE( 52.150), SIMDE_FLOAT16_VALUE( 3.559), SIMDE_FLOAT16_VALUE( - 11.531), SIMDE_FLOAT16_VALUE( - 16.763), + SIMDE_FLOAT16_VALUE( 58.053), SIMDE_FLOAT16_VALUE( 59.411), SIMDE_FLOAT16_VALUE( - 87.074), SIMDE_FLOAT16_VALUE( 76.655) }, + { SIMDE_FLOAT16_VALUE( 31.620), SIMDE_FLOAT16_VALUE( - 8.034), SIMDE_FLOAT16_VALUE( 77.915), SIMDE_FLOAT16_VALUE( 90.009), + SIMDE_FLOAT16_VALUE( 40.231), SIMDE_FLOAT16_VALUE( - 28.817), SIMDE_FLOAT16_VALUE( - 12.330), SIMDE_FLOAT16_VALUE( - 39.982) }, + { SIMDE_FLOAT16_VALUE( 30.458), SIMDE_FLOAT16_VALUE( 52.150), SIMDE_FLOAT16_VALUE( 31.620), SIMDE_FLOAT16_VALUE( - 38.116), + SIMDE_FLOAT16_VALUE( 3.559), SIMDE_FLOAT16_VALUE( - 8.034), SIMDE_FLOAT16_VALUE( - 23.410), SIMDE_FLOAT16_VALUE( - 11.531), + SIMDE_FLOAT16_VALUE( 77.915), SIMDE_FLOAT16_VALUE( - 76.968), SIMDE_FLOAT16_VALUE( - 16.763), SIMDE_FLOAT16_VALUE( 90.009), + SIMDE_FLOAT16_VALUE( 94.449), SIMDE_FLOAT16_VALUE( 58.053), SIMDE_FLOAT16_VALUE( 40.231), SIMDE_FLOAT16_VALUE( - 25.593), + SIMDE_FLOAT16_VALUE( 59.411), SIMDE_FLOAT16_VALUE( - 28.817), SIMDE_FLOAT16_VALUE( - 22.524), SIMDE_FLOAT16_VALUE( - 87.074), + SIMDE_FLOAT16_VALUE( - 12.330), SIMDE_FLOAT16_VALUE( 61.018), SIMDE_FLOAT16_VALUE( 76.655), SIMDE_FLOAT16_VALUE( - 39.982) } }, + { { SIMDE_FLOAT16_VALUE( 3.549), SIMDE_FLOAT16_VALUE( - 62.385), SIMDE_FLOAT16_VALUE( - 33.808), SIMDE_FLOAT16_VALUE( 25.597), + SIMDE_FLOAT16_VALUE( - 26.574), SIMDE_FLOAT16_VALUE( 78.316), SIMDE_FLOAT16_VALUE( 6.102), SIMDE_FLOAT16_VALUE( 23.670) }, + { SIMDE_FLOAT16_VALUE( - 9.134), SIMDE_FLOAT16_VALUE( 38.254), SIMDE_FLOAT16_VALUE( 59.581), SIMDE_FLOAT16_VALUE( 25.105), + SIMDE_FLOAT16_VALUE( 24.218), SIMDE_FLOAT16_VALUE( - 76.984), SIMDE_FLOAT16_VALUE( - 55.415), SIMDE_FLOAT16_VALUE( 57.498) }, + { SIMDE_FLOAT16_VALUE( 75.669), SIMDE_FLOAT16_VALUE( 51.566), SIMDE_FLOAT16_VALUE( 76.383), SIMDE_FLOAT16_VALUE( - 88.090), + SIMDE_FLOAT16_VALUE( - 39.212), SIMDE_FLOAT16_VALUE( - 12.840), SIMDE_FLOAT16_VALUE( - 66.692), SIMDE_FLOAT16_VALUE( - 49.509) }, + { SIMDE_FLOAT16_VALUE( 3.549), SIMDE_FLOAT16_VALUE( - 9.134), SIMDE_FLOAT16_VALUE( 75.669), SIMDE_FLOAT16_VALUE( - 62.385), + SIMDE_FLOAT16_VALUE( 38.254), SIMDE_FLOAT16_VALUE( 51.566), SIMDE_FLOAT16_VALUE( - 33.808), SIMDE_FLOAT16_VALUE( 59.581), + SIMDE_FLOAT16_VALUE( 76.383), SIMDE_FLOAT16_VALUE( 25.597), SIMDE_FLOAT16_VALUE( 25.105), SIMDE_FLOAT16_VALUE( - 88.090), + SIMDE_FLOAT16_VALUE( - 26.574), SIMDE_FLOAT16_VALUE( 24.218), SIMDE_FLOAT16_VALUE( - 39.212), SIMDE_FLOAT16_VALUE( 78.316), + SIMDE_FLOAT16_VALUE( - 76.984), SIMDE_FLOAT16_VALUE( - 12.840), SIMDE_FLOAT16_VALUE( 6.102), SIMDE_FLOAT16_VALUE( - 55.415), + SIMDE_FLOAT16_VALUE( - 66.692), SIMDE_FLOAT16_VALUE( 23.670), SIMDE_FLOAT16_VALUE( 57.498), SIMDE_FLOAT16_VALUE( - 49.509) } }, + { { SIMDE_FLOAT16_VALUE( 77.925), SIMDE_FLOAT16_VALUE( 25.899), SIMDE_FLOAT16_VALUE( 36.333), SIMDE_FLOAT16_VALUE( 28.714), + SIMDE_FLOAT16_VALUE( - 36.012), SIMDE_FLOAT16_VALUE( - 29.170), SIMDE_FLOAT16_VALUE( - 11.929), SIMDE_FLOAT16_VALUE( 80.628) }, + { SIMDE_FLOAT16_VALUE( - 83.488), SIMDE_FLOAT16_VALUE( 9.307), SIMDE_FLOAT16_VALUE( 58.801), SIMDE_FLOAT16_VALUE( 94.273), + SIMDE_FLOAT16_VALUE( - 86.784), SIMDE_FLOAT16_VALUE( 18.524), SIMDE_FLOAT16_VALUE( 82.621), SIMDE_FLOAT16_VALUE( - 81.155) }, + { SIMDE_FLOAT16_VALUE( 4.254), SIMDE_FLOAT16_VALUE( 15.849), SIMDE_FLOAT16_VALUE( 96.241), SIMDE_FLOAT16_VALUE( - 5.090), + SIMDE_FLOAT16_VALUE( 47.936), SIMDE_FLOAT16_VALUE( - 88.844), SIMDE_FLOAT16_VALUE( 93.898), SIMDE_FLOAT16_VALUE( - 39.333) }, + { SIMDE_FLOAT16_VALUE( 77.925), SIMDE_FLOAT16_VALUE( - 83.488), SIMDE_FLOAT16_VALUE( 4.254), SIMDE_FLOAT16_VALUE( 25.899), + SIMDE_FLOAT16_VALUE( 9.307), SIMDE_FLOAT16_VALUE( 15.849), SIMDE_FLOAT16_VALUE( 36.333), SIMDE_FLOAT16_VALUE( 58.801), + SIMDE_FLOAT16_VALUE( 96.241), SIMDE_FLOAT16_VALUE( 28.714), SIMDE_FLOAT16_VALUE( 94.273), SIMDE_FLOAT16_VALUE( - 5.090), + SIMDE_FLOAT16_VALUE( - 36.012), SIMDE_FLOAT16_VALUE( - 86.784), SIMDE_FLOAT16_VALUE( 47.936), SIMDE_FLOAT16_VALUE( - 29.170), + SIMDE_FLOAT16_VALUE( 18.524), SIMDE_FLOAT16_VALUE( - 88.844), SIMDE_FLOAT16_VALUE( - 11.929), SIMDE_FLOAT16_VALUE( 82.621), + SIMDE_FLOAT16_VALUE( 93.898), SIMDE_FLOAT16_VALUE( 80.628), SIMDE_FLOAT16_VALUE( - 81.155), SIMDE_FLOAT16_VALUE( - 39.333) } }, + { { SIMDE_FLOAT16_VALUE( - 60.634), SIMDE_FLOAT16_VALUE( 75.923), SIMDE_FLOAT16_VALUE( - 88.231), SIMDE_FLOAT16_VALUE( - 87.014), + SIMDE_FLOAT16_VALUE( 31.833), SIMDE_FLOAT16_VALUE( - 64.054), SIMDE_FLOAT16_VALUE( 59.268), SIMDE_FLOAT16_VALUE( - 10.180) }, + { SIMDE_FLOAT16_VALUE( 89.023), SIMDE_FLOAT16_VALUE( - 72.712), SIMDE_FLOAT16_VALUE( 78.505), SIMDE_FLOAT16_VALUE( 11.950), + SIMDE_FLOAT16_VALUE( 37.332), SIMDE_FLOAT16_VALUE( - 36.100), SIMDE_FLOAT16_VALUE( - 81.490), SIMDE_FLOAT16_VALUE( 70.182) }, + { SIMDE_FLOAT16_VALUE( 83.870), SIMDE_FLOAT16_VALUE( - 68.922), SIMDE_FLOAT16_VALUE( - 62.692), SIMDE_FLOAT16_VALUE( - 32.504), + SIMDE_FLOAT16_VALUE( - 81.255), SIMDE_FLOAT16_VALUE( 91.033), SIMDE_FLOAT16_VALUE( - 71.163), SIMDE_FLOAT16_VALUE( - 22.896) }, + { SIMDE_FLOAT16_VALUE( - 60.634), SIMDE_FLOAT16_VALUE( 89.023), SIMDE_FLOAT16_VALUE( 83.870), SIMDE_FLOAT16_VALUE( 75.923), + SIMDE_FLOAT16_VALUE( - 72.712), SIMDE_FLOAT16_VALUE( - 68.922), SIMDE_FLOAT16_VALUE( - 88.231), SIMDE_FLOAT16_VALUE( 78.505), + SIMDE_FLOAT16_VALUE( - 62.692), SIMDE_FLOAT16_VALUE( - 87.014), SIMDE_FLOAT16_VALUE( 11.950), SIMDE_FLOAT16_VALUE( - 32.504), + SIMDE_FLOAT16_VALUE( 31.833), SIMDE_FLOAT16_VALUE( 37.332), SIMDE_FLOAT16_VALUE( - 81.255), SIMDE_FLOAT16_VALUE( - 64.054), + SIMDE_FLOAT16_VALUE( - 36.100), SIMDE_FLOAT16_VALUE( 91.033), SIMDE_FLOAT16_VALUE( 59.268), SIMDE_FLOAT16_VALUE( - 81.490), + SIMDE_FLOAT16_VALUE( - 71.163), SIMDE_FLOAT16_VALUE( - 10.180), SIMDE_FLOAT16_VALUE( 70.182), SIMDE_FLOAT16_VALUE( - 22.896) } }, + { { SIMDE_FLOAT16_VALUE( 30.680), SIMDE_FLOAT16_VALUE( 70.181), SIMDE_FLOAT16_VALUE( 61.927), SIMDE_FLOAT16_VALUE( 89.100), + SIMDE_FLOAT16_VALUE( - 41.719), SIMDE_FLOAT16_VALUE( 79.129), SIMDE_FLOAT16_VALUE( 27.325), SIMDE_FLOAT16_VALUE( - 19.893) }, + { SIMDE_FLOAT16_VALUE( - 95.074), SIMDE_FLOAT16_VALUE( 39.687), SIMDE_FLOAT16_VALUE( - 65.504), SIMDE_FLOAT16_VALUE( 87.309), + SIMDE_FLOAT16_VALUE( - 63.357), SIMDE_FLOAT16_VALUE( 37.794), SIMDE_FLOAT16_VALUE( 30.688), SIMDE_FLOAT16_VALUE( 3.602) }, + { SIMDE_FLOAT16_VALUE( - 90.304), SIMDE_FLOAT16_VALUE( - 57.865), SIMDE_FLOAT16_VALUE( 1.237), SIMDE_FLOAT16_VALUE( 95.036), + SIMDE_FLOAT16_VALUE( - 46.211), SIMDE_FLOAT16_VALUE( 17.337), SIMDE_FLOAT16_VALUE( 24.755), SIMDE_FLOAT16_VALUE( 20.183) }, + { SIMDE_FLOAT16_VALUE( 30.680), SIMDE_FLOAT16_VALUE( - 95.074), SIMDE_FLOAT16_VALUE( - 90.304), SIMDE_FLOAT16_VALUE( 70.181), + SIMDE_FLOAT16_VALUE( 39.687), SIMDE_FLOAT16_VALUE( - 57.865), SIMDE_FLOAT16_VALUE( 61.927), SIMDE_FLOAT16_VALUE( - 65.504), + SIMDE_FLOAT16_VALUE( 1.237), SIMDE_FLOAT16_VALUE( 89.100), SIMDE_FLOAT16_VALUE( 87.309), SIMDE_FLOAT16_VALUE( 95.036), + SIMDE_FLOAT16_VALUE( - 41.719), SIMDE_FLOAT16_VALUE( - 63.357), SIMDE_FLOAT16_VALUE( - 46.211), SIMDE_FLOAT16_VALUE( 79.129), + SIMDE_FLOAT16_VALUE( 37.794), SIMDE_FLOAT16_VALUE( 17.337), SIMDE_FLOAT16_VALUE( 27.325), SIMDE_FLOAT16_VALUE( 30.688), + SIMDE_FLOAT16_VALUE( 24.755), SIMDE_FLOAT16_VALUE( - 19.893), SIMDE_FLOAT16_VALUE( 3.602), SIMDE_FLOAT16_VALUE( 20.183) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x3_t r_ = { { simde_vld1q_f16(test_vec[i].r0), + simde_vld1q_f16(test_vec[i].r1), + simde_vld1q_f16(test_vec[i].r2), } }; + + simde_float16 a_[24]; + simde_vst3q_f16(a_, r_); + simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); + + r_ = simde_vld3q_f16(a_); + simde_test_arm_neon_assert_equal_f16x8(r_.val[0], simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r_.val[1], simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r_.val[2], simde_vld1q_f16(test_vec[i].r2), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r0 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r1 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r2 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x24_t a = simde_vst3q_f16(r0, r1, r2); + + simde_test_arm_neon_write_f16x8(2, r0, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x24(2, a, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst3q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -4427,6 +4662,7 @@ test_simde_vst3q_u64 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst3_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_s8) @@ -4438,6 +4674,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_s8) diff --git a/test/arm/neon/st3_lane.c b/test/arm/neon/st3_lane.c index 47efbbabc..d870e17a0 100644 --- a/test/arm/neon/st3_lane.c +++ b/test/arm/neon/st3_lane.c @@ -483,6 +483,71 @@ test_simde_vst3_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst3_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[3]; + simde_float16_t val[3][4]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 94.290), SIMDE_FLOAT16_VALUE( - 50.509), SIMDE_FLOAT16_VALUE( 23.704) }, + { { SIMDE_FLOAT16_VALUE( - 0.571), SIMDE_FLOAT16_VALUE( - 94.290), SIMDE_FLOAT16_VALUE( 93.093), SIMDE_FLOAT16_VALUE( 89.891) }, + { SIMDE_FLOAT16_VALUE( - 59.956), SIMDE_FLOAT16_VALUE( - 50.509), SIMDE_FLOAT16_VALUE( 35.194), SIMDE_FLOAT16_VALUE( 22.787) }, + { SIMDE_FLOAT16_VALUE( - 24.560), SIMDE_FLOAT16_VALUE( 23.704), SIMDE_FLOAT16_VALUE( - 11.110), SIMDE_FLOAT16_VALUE( - 19.691) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 6.814), SIMDE_FLOAT16_VALUE( - 31.127), SIMDE_FLOAT16_VALUE( - 34.824) }, + { { SIMDE_FLOAT16_VALUE( - 17.965), SIMDE_FLOAT16_VALUE( - 6.814), SIMDE_FLOAT16_VALUE( - 65.421), SIMDE_FLOAT16_VALUE( 26.500) }, + { SIMDE_FLOAT16_VALUE( 84.515), SIMDE_FLOAT16_VALUE( - 31.127), SIMDE_FLOAT16_VALUE( 85.025), SIMDE_FLOAT16_VALUE( - 51.272) }, + { SIMDE_FLOAT16_VALUE( - 38.529), SIMDE_FLOAT16_VALUE( - 34.824), SIMDE_FLOAT16_VALUE( - 81.647), SIMDE_FLOAT16_VALUE( 47.194) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 73.789), SIMDE_FLOAT16_VALUE( - 3.383), SIMDE_FLOAT16_VALUE( - 13.801) }, + { { SIMDE_FLOAT16_VALUE( - 94.005), SIMDE_FLOAT16_VALUE( - 35.738), SIMDE_FLOAT16_VALUE( - 73.789), SIMDE_FLOAT16_VALUE( - 45.230) }, + { SIMDE_FLOAT16_VALUE( 12.330), SIMDE_FLOAT16_VALUE( 74.051), SIMDE_FLOAT16_VALUE( - 3.383), SIMDE_FLOAT16_VALUE( - 10.054) }, + { SIMDE_FLOAT16_VALUE( - 64.841), SIMDE_FLOAT16_VALUE( 8.563), SIMDE_FLOAT16_VALUE( - 13.801), SIMDE_FLOAT16_VALUE( 10.504) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 0.435), SIMDE_FLOAT16_VALUE( 62.466), SIMDE_FLOAT16_VALUE( 94.721) }, + { { SIMDE_FLOAT16_VALUE( 0.435), SIMDE_FLOAT16_VALUE( - 13.638), SIMDE_FLOAT16_VALUE( 0.530), SIMDE_FLOAT16_VALUE( - 61.496) }, + { SIMDE_FLOAT16_VALUE( 62.466), SIMDE_FLOAT16_VALUE( 24.828), SIMDE_FLOAT16_VALUE( 9.968), SIMDE_FLOAT16_VALUE( - 50.721) }, + { SIMDE_FLOAT16_VALUE( 94.721), SIMDE_FLOAT16_VALUE( 44.136), SIMDE_FLOAT16_VALUE( 78.905), SIMDE_FLOAT16_VALUE( - 55.458) } }, + INT8_C( 0) }, + { { SIMDE_FLOAT16_VALUE( - 22.144), SIMDE_FLOAT16_VALUE( 90.977), SIMDE_FLOAT16_VALUE( - 5.890) }, + { { SIMDE_FLOAT16_VALUE( 26.172), SIMDE_FLOAT16_VALUE( - 89.305), SIMDE_FLOAT16_VALUE( - 59.818), SIMDE_FLOAT16_VALUE( - 22.144) }, + { SIMDE_FLOAT16_VALUE( - 44.719), SIMDE_FLOAT16_VALUE( - 60.282), SIMDE_FLOAT16_VALUE( 94.935), SIMDE_FLOAT16_VALUE( 90.977) }, + { SIMDE_FLOAT16_VALUE( 80.867), SIMDE_FLOAT16_VALUE( - 45.339), SIMDE_FLOAT16_VALUE( - 19.747), SIMDE_FLOAT16_VALUE( - 5.890) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( 23.224), SIMDE_FLOAT16_VALUE( 49.585), SIMDE_FLOAT16_VALUE( 78.018) }, + { { SIMDE_FLOAT16_VALUE( - 48.686), SIMDE_FLOAT16_VALUE( 23.224), SIMDE_FLOAT16_VALUE( 40.332), SIMDE_FLOAT16_VALUE( 74.959) }, + { SIMDE_FLOAT16_VALUE( - 65.641), SIMDE_FLOAT16_VALUE( 49.585), SIMDE_FLOAT16_VALUE( 6.886), SIMDE_FLOAT16_VALUE( 98.862) }, + { SIMDE_FLOAT16_VALUE( - 29.125), SIMDE_FLOAT16_VALUE( 78.018), SIMDE_FLOAT16_VALUE( - 4.487), SIMDE_FLOAT16_VALUE( 50.278) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 90.627), SIMDE_FLOAT16_VALUE( - 1.372), SIMDE_FLOAT16_VALUE( - 87.387) }, + { { SIMDE_FLOAT16_VALUE( - 58.435), SIMDE_FLOAT16_VALUE( 67.021), SIMDE_FLOAT16_VALUE( - 90.627), SIMDE_FLOAT16_VALUE( 69.960) }, + { SIMDE_FLOAT16_VALUE( 62.852), SIMDE_FLOAT16_VALUE( 29.780), SIMDE_FLOAT16_VALUE( - 1.372), SIMDE_FLOAT16_VALUE( 41.058) }, + { SIMDE_FLOAT16_VALUE( 43.350), SIMDE_FLOAT16_VALUE( - 69.931), SIMDE_FLOAT16_VALUE( - 87.387), SIMDE_FLOAT16_VALUE( 98.569) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 33.021), SIMDE_FLOAT16_VALUE( 22.025), SIMDE_FLOAT16_VALUE( 69.528) }, + { { SIMDE_FLOAT16_VALUE( 40.864), SIMDE_FLOAT16_VALUE( 81.943), SIMDE_FLOAT16_VALUE( - 40.609), SIMDE_FLOAT16_VALUE( 33.021) }, + { SIMDE_FLOAT16_VALUE( 93.909), SIMDE_FLOAT16_VALUE( 69.201), SIMDE_FLOAT16_VALUE( 43.541), SIMDE_FLOAT16_VALUE( 22.025) }, + { SIMDE_FLOAT16_VALUE( 45.748), SIMDE_FLOAT16_VALUE( - 89.541), SIMDE_FLOAT16_VALUE( 49.879), SIMDE_FLOAT16_VALUE( 69.528) } }, + INT8_C( 3) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x3_t val = {{simde_vld1_f16(test_vec[i].val[0]), + simde_vld1_f16(test_vec[i].val[1]), + simde_vld1_f16(test_vec[i].val[2])}}; + simde_float16_t a[3]; + SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst3_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + simde_assert_equal_f16(a[2], test_vec[i].a[2], 1); + } + + return 0; +} + static int test_simde_vst3_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1108,6 +1173,95 @@ test_simde_vst3q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst3q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[3]; + simde_float16_t val[3][8]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 56.833), SIMDE_FLOAT16_VALUE( 73.442), SIMDE_FLOAT16_VALUE( - 22.618) }, + { { SIMDE_FLOAT16_VALUE( 90.228), SIMDE_FLOAT16_VALUE( 1.415), SIMDE_FLOAT16_VALUE( - 54.694), SIMDE_FLOAT16_VALUE( 41.258), + SIMDE_FLOAT16_VALUE( 74.577), SIMDE_FLOAT16_VALUE( 13.483), SIMDE_FLOAT16_VALUE( 56.833), SIMDE_FLOAT16_VALUE( 68.200) }, + { SIMDE_FLOAT16_VALUE( - 65.650), SIMDE_FLOAT16_VALUE( - 68.388), SIMDE_FLOAT16_VALUE( - 13.278), SIMDE_FLOAT16_VALUE( - 65.890), + SIMDE_FLOAT16_VALUE( 16.926), SIMDE_FLOAT16_VALUE( - 26.315), SIMDE_FLOAT16_VALUE( 73.442), SIMDE_FLOAT16_VALUE( 26.641) }, + { SIMDE_FLOAT16_VALUE( - 42.649), SIMDE_FLOAT16_VALUE( 80.336), SIMDE_FLOAT16_VALUE( 27.756), SIMDE_FLOAT16_VALUE( 8.267), + SIMDE_FLOAT16_VALUE( 24.942), SIMDE_FLOAT16_VALUE( 96.066), SIMDE_FLOAT16_VALUE( - 22.618), SIMDE_FLOAT16_VALUE( - 58.346) } }, + INT8_C( 6) }, + { { SIMDE_FLOAT16_VALUE( - 74.185), SIMDE_FLOAT16_VALUE( - 78.069), SIMDE_FLOAT16_VALUE( - 86.364) }, + { { SIMDE_FLOAT16_VALUE( - 69.189), SIMDE_FLOAT16_VALUE( - 74.185), SIMDE_FLOAT16_VALUE( 5.043), SIMDE_FLOAT16_VALUE( - 18.088), + SIMDE_FLOAT16_VALUE( 14.723), SIMDE_FLOAT16_VALUE( 9.202), SIMDE_FLOAT16_VALUE( 78.389), SIMDE_FLOAT16_VALUE( - 87.305) }, + { SIMDE_FLOAT16_VALUE( - 12.507), SIMDE_FLOAT16_VALUE( - 78.069), SIMDE_FLOAT16_VALUE( - 96.953), SIMDE_FLOAT16_VALUE( - 96.401), + SIMDE_FLOAT16_VALUE( 42.165), SIMDE_FLOAT16_VALUE( 75.353), SIMDE_FLOAT16_VALUE( - 82.128), SIMDE_FLOAT16_VALUE( 17.448) }, + { SIMDE_FLOAT16_VALUE( 57.653), SIMDE_FLOAT16_VALUE( - 86.364), SIMDE_FLOAT16_VALUE( 30.786), SIMDE_FLOAT16_VALUE( - 50.101), + SIMDE_FLOAT16_VALUE( 60.669), SIMDE_FLOAT16_VALUE( - 26.173), SIMDE_FLOAT16_VALUE( 10.148), SIMDE_FLOAT16_VALUE( - 92.619) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 30.210), SIMDE_FLOAT16_VALUE( - 13.667), SIMDE_FLOAT16_VALUE( - 47.603) }, + { { SIMDE_FLOAT16_VALUE( - 96.908), SIMDE_FLOAT16_VALUE( - 14.691), SIMDE_FLOAT16_VALUE( 67.605), SIMDE_FLOAT16_VALUE( 52.785), + SIMDE_FLOAT16_VALUE( 10.770), SIMDE_FLOAT16_VALUE( 49.923), SIMDE_FLOAT16_VALUE( 79.174), SIMDE_FLOAT16_VALUE( - 30.210) }, + { SIMDE_FLOAT16_VALUE( - 86.230), SIMDE_FLOAT16_VALUE( - 40.180), SIMDE_FLOAT16_VALUE( 81.322), SIMDE_FLOAT16_VALUE( - 4.303), + SIMDE_FLOAT16_VALUE( - 25.810), SIMDE_FLOAT16_VALUE( 19.169), SIMDE_FLOAT16_VALUE( - 7.542), SIMDE_FLOAT16_VALUE( - 13.667) }, + { SIMDE_FLOAT16_VALUE( 48.690), SIMDE_FLOAT16_VALUE( - 84.400), SIMDE_FLOAT16_VALUE( - 42.388), SIMDE_FLOAT16_VALUE( - 71.989), + SIMDE_FLOAT16_VALUE( 81.668), SIMDE_FLOAT16_VALUE( 61.074), SIMDE_FLOAT16_VALUE( 68.365), SIMDE_FLOAT16_VALUE( - 47.603) } }, + INT8_C( 7) }, + { { SIMDE_FLOAT16_VALUE( - 74.022), SIMDE_FLOAT16_VALUE( 72.818), SIMDE_FLOAT16_VALUE( 75.403) }, + { { SIMDE_FLOAT16_VALUE( 65.684), SIMDE_FLOAT16_VALUE( 92.561), SIMDE_FLOAT16_VALUE( - 76.038), SIMDE_FLOAT16_VALUE( 75.659), + SIMDE_FLOAT16_VALUE( - 74.022), SIMDE_FLOAT16_VALUE( 74.037), SIMDE_FLOAT16_VALUE( - 67.037), SIMDE_FLOAT16_VALUE( 61.026) }, + { SIMDE_FLOAT16_VALUE( 43.862), SIMDE_FLOAT16_VALUE( - 42.260), SIMDE_FLOAT16_VALUE( - 8.530), SIMDE_FLOAT16_VALUE( 15.388), + SIMDE_FLOAT16_VALUE( 72.818), SIMDE_FLOAT16_VALUE( - 3.493), SIMDE_FLOAT16_VALUE( 29.692), SIMDE_FLOAT16_VALUE( 78.968) }, + { SIMDE_FLOAT16_VALUE( - 91.120), SIMDE_FLOAT16_VALUE( 22.041), SIMDE_FLOAT16_VALUE( - 61.013), SIMDE_FLOAT16_VALUE( - 71.187), + SIMDE_FLOAT16_VALUE( 75.403), SIMDE_FLOAT16_VALUE( 95.292), SIMDE_FLOAT16_VALUE( 21.707), SIMDE_FLOAT16_VALUE( - 88.034) } }, + INT8_C( 4) }, + { { SIMDE_FLOAT16_VALUE( 66.250), SIMDE_FLOAT16_VALUE( 86.996), SIMDE_FLOAT16_VALUE( - 40.809) }, + { { SIMDE_FLOAT16_VALUE( 56.098), SIMDE_FLOAT16_VALUE( - 69.469), SIMDE_FLOAT16_VALUE( - 38.141), SIMDE_FLOAT16_VALUE( 95.730), + SIMDE_FLOAT16_VALUE( 66.250), SIMDE_FLOAT16_VALUE( 88.246), SIMDE_FLOAT16_VALUE( - 73.861), SIMDE_FLOAT16_VALUE( 88.929) }, + { SIMDE_FLOAT16_VALUE( 91.342), SIMDE_FLOAT16_VALUE( 25.976), SIMDE_FLOAT16_VALUE( 27.770), SIMDE_FLOAT16_VALUE( 57.030), + SIMDE_FLOAT16_VALUE( 86.996), SIMDE_FLOAT16_VALUE( 24.429), SIMDE_FLOAT16_VALUE( 36.571), SIMDE_FLOAT16_VALUE( 25.024) }, + { SIMDE_FLOAT16_VALUE( - 51.937), SIMDE_FLOAT16_VALUE( 0.024), SIMDE_FLOAT16_VALUE( 26.121), SIMDE_FLOAT16_VALUE( 82.133), + SIMDE_FLOAT16_VALUE( - 40.809), SIMDE_FLOAT16_VALUE( - 89.179), SIMDE_FLOAT16_VALUE( - 45.771), SIMDE_FLOAT16_VALUE( - 76.817) } }, + INT8_C( 4) }, + { { SIMDE_FLOAT16_VALUE( 41.869), SIMDE_FLOAT16_VALUE( - 27.827), SIMDE_FLOAT16_VALUE( - 92.856) }, + { { SIMDE_FLOAT16_VALUE( - 35.175), SIMDE_FLOAT16_VALUE( 47.619), SIMDE_FLOAT16_VALUE( 41.869), SIMDE_FLOAT16_VALUE( 17.848), + SIMDE_FLOAT16_VALUE( - 56.188), SIMDE_FLOAT16_VALUE( - 50.409), SIMDE_FLOAT16_VALUE( 65.876), SIMDE_FLOAT16_VALUE( - 32.688) }, + { SIMDE_FLOAT16_VALUE( 3.311), SIMDE_FLOAT16_VALUE( - 49.604), SIMDE_FLOAT16_VALUE( - 27.827), SIMDE_FLOAT16_VALUE( - 2.760), + SIMDE_FLOAT16_VALUE( 20.012), SIMDE_FLOAT16_VALUE( 66.946), SIMDE_FLOAT16_VALUE( 31.788), SIMDE_FLOAT16_VALUE( - 52.736) }, + { SIMDE_FLOAT16_VALUE( - 93.576), SIMDE_FLOAT16_VALUE( 4.057), SIMDE_FLOAT16_VALUE( - 92.856), SIMDE_FLOAT16_VALUE( 68.167), + SIMDE_FLOAT16_VALUE( - 17.311), SIMDE_FLOAT16_VALUE( - 69.405), SIMDE_FLOAT16_VALUE( 82.245), SIMDE_FLOAT16_VALUE( - 7.897) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 84.635), SIMDE_FLOAT16_VALUE( - 34.646), SIMDE_FLOAT16_VALUE( - 57.874) }, + { { SIMDE_FLOAT16_VALUE( 81.549), SIMDE_FLOAT16_VALUE( - 2.028), SIMDE_FLOAT16_VALUE( 84.635), SIMDE_FLOAT16_VALUE( - 29.533), + SIMDE_FLOAT16_VALUE( - 38.208), SIMDE_FLOAT16_VALUE( 73.008), SIMDE_FLOAT16_VALUE( 35.951), SIMDE_FLOAT16_VALUE( - 30.779) }, + { SIMDE_FLOAT16_VALUE( 42.215), SIMDE_FLOAT16_VALUE( - 27.247), SIMDE_FLOAT16_VALUE( - 34.646), SIMDE_FLOAT16_VALUE( - 1.564), + SIMDE_FLOAT16_VALUE( 71.796), SIMDE_FLOAT16_VALUE( - 51.547), SIMDE_FLOAT16_VALUE( 32.523), SIMDE_FLOAT16_VALUE( - 43.421) }, + { SIMDE_FLOAT16_VALUE( 0.439), SIMDE_FLOAT16_VALUE( 13.866), SIMDE_FLOAT16_VALUE( - 57.874), SIMDE_FLOAT16_VALUE( - 87.206), + SIMDE_FLOAT16_VALUE( - 61.660), SIMDE_FLOAT16_VALUE( 80.530), SIMDE_FLOAT16_VALUE( - 56.810), SIMDE_FLOAT16_VALUE( - 20.126) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( - 78.288), SIMDE_FLOAT16_VALUE( 36.942), SIMDE_FLOAT16_VALUE( 21.455) }, + { { SIMDE_FLOAT16_VALUE( - 1.116), SIMDE_FLOAT16_VALUE( 88.525), SIMDE_FLOAT16_VALUE( - 54.747), SIMDE_FLOAT16_VALUE( 70.086), + SIMDE_FLOAT16_VALUE( - 0.981), SIMDE_FLOAT16_VALUE( - 78.288), SIMDE_FLOAT16_VALUE( - 64.328), SIMDE_FLOAT16_VALUE( 72.247) }, + { SIMDE_FLOAT16_VALUE( 50.505), SIMDE_FLOAT16_VALUE( 62.460), SIMDE_FLOAT16_VALUE( 55.021), SIMDE_FLOAT16_VALUE( 63.023), + SIMDE_FLOAT16_VALUE( 46.784), SIMDE_FLOAT16_VALUE( 36.942), SIMDE_FLOAT16_VALUE( - 96.105), SIMDE_FLOAT16_VALUE( - 30.818) }, + { SIMDE_FLOAT16_VALUE( - 28.408), SIMDE_FLOAT16_VALUE( - 39.921), SIMDE_FLOAT16_VALUE( 8.839), SIMDE_FLOAT16_VALUE( - 70.832), + SIMDE_FLOAT16_VALUE( 33.768), SIMDE_FLOAT16_VALUE( 21.455), SIMDE_FLOAT16_VALUE( - 89.062), SIMDE_FLOAT16_VALUE( 97.215) } }, + INT8_C( 5) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x3_t val = {{simde_vld1q_f16(test_vec[i].val[0]), + simde_vld1q_f16(test_vec[i].val[1]), + simde_vld1q_f16(test_vec[i].val[2])}}; + simde_float16_t a[3]; + SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst3q_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + simde_assert_equal_f16(a[2], test_vec[i].a[2], 1); + } + + return 0; +} + static int test_simde_vst3q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1221,6 +1375,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3_lane_f64) @@ -1232,6 +1387,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst3q_lane_f64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ diff --git a/test/arm/neon/st4.c b/test/arm/neon/st4.c index 54daa5288..d1f7af4db 100644 --- a/test/arm/neon/st4.c +++ b/test/arm/neon/st4.c @@ -18,6 +18,120 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vst4_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 r0[4]; + simde_float16 r1[4]; + simde_float16 r2[4]; + simde_float16 r3[4]; + simde_float16 a[16]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 47.024), SIMDE_FLOAT16_VALUE( 6.719), SIMDE_FLOAT16_VALUE( 41.219), SIMDE_FLOAT16_VALUE( 13.593) }, + { SIMDE_FLOAT16_VALUE( - 94.191), SIMDE_FLOAT16_VALUE( 54.699), SIMDE_FLOAT16_VALUE( 93.339), SIMDE_FLOAT16_VALUE( - 70.910) }, + { SIMDE_FLOAT16_VALUE( - 57.135), SIMDE_FLOAT16_VALUE( - 74.250), SIMDE_FLOAT16_VALUE( 44.190), SIMDE_FLOAT16_VALUE( - 50.049) }, + { SIMDE_FLOAT16_VALUE( 4.436), SIMDE_FLOAT16_VALUE( 57.399), SIMDE_FLOAT16_VALUE( 71.170), SIMDE_FLOAT16_VALUE( - 90.745) }, + { SIMDE_FLOAT16_VALUE( - 47.024), SIMDE_FLOAT16_VALUE( - 94.191), SIMDE_FLOAT16_VALUE( - 57.135), SIMDE_FLOAT16_VALUE( 4.436), + SIMDE_FLOAT16_VALUE( 6.719), SIMDE_FLOAT16_VALUE( 54.699), SIMDE_FLOAT16_VALUE( - 74.250), SIMDE_FLOAT16_VALUE( 57.399), + SIMDE_FLOAT16_VALUE( 41.219), SIMDE_FLOAT16_VALUE( 93.339), SIMDE_FLOAT16_VALUE( 44.190), SIMDE_FLOAT16_VALUE( 71.170), + SIMDE_FLOAT16_VALUE( 13.593), SIMDE_FLOAT16_VALUE( - 70.910), SIMDE_FLOAT16_VALUE( - 50.049), SIMDE_FLOAT16_VALUE( - 90.745) } }, + { { SIMDE_FLOAT16_VALUE( - 57.272), SIMDE_FLOAT16_VALUE( - 89.707), SIMDE_FLOAT16_VALUE( 23.428), SIMDE_FLOAT16_VALUE( - 9.662) }, + { SIMDE_FLOAT16_VALUE( - 66.994), SIMDE_FLOAT16_VALUE( - 28.587), SIMDE_FLOAT16_VALUE( - 24.075), SIMDE_FLOAT16_VALUE( 72.816) }, + { SIMDE_FLOAT16_VALUE( 87.704), SIMDE_FLOAT16_VALUE( 11.476), SIMDE_FLOAT16_VALUE( - 9.653), SIMDE_FLOAT16_VALUE( 93.611) }, + { SIMDE_FLOAT16_VALUE( 96.922), SIMDE_FLOAT16_VALUE( 35.294), SIMDE_FLOAT16_VALUE( 9.081), SIMDE_FLOAT16_VALUE( 56.455) }, + { SIMDE_FLOAT16_VALUE( - 57.272), SIMDE_FLOAT16_VALUE( - 66.994), SIMDE_FLOAT16_VALUE( 87.704), SIMDE_FLOAT16_VALUE( 96.922), + SIMDE_FLOAT16_VALUE( - 89.707), SIMDE_FLOAT16_VALUE( - 28.587), SIMDE_FLOAT16_VALUE( 11.476), SIMDE_FLOAT16_VALUE( 35.294), + SIMDE_FLOAT16_VALUE( 23.428), SIMDE_FLOAT16_VALUE( - 24.075), SIMDE_FLOAT16_VALUE( - 9.653), SIMDE_FLOAT16_VALUE( 9.081), + SIMDE_FLOAT16_VALUE( - 9.662), SIMDE_FLOAT16_VALUE( 72.816), SIMDE_FLOAT16_VALUE( 93.611), SIMDE_FLOAT16_VALUE( 56.455) } }, + { { SIMDE_FLOAT16_VALUE( - 93.105), SIMDE_FLOAT16_VALUE( - 3.401), SIMDE_FLOAT16_VALUE( 38.259), SIMDE_FLOAT16_VALUE( - 77.939) }, + { SIMDE_FLOAT16_VALUE( - 61.605), SIMDE_FLOAT16_VALUE( - 53.766), SIMDE_FLOAT16_VALUE( - 79.385), SIMDE_FLOAT16_VALUE( 51.701) }, + { SIMDE_FLOAT16_VALUE( - 13.690), SIMDE_FLOAT16_VALUE( 61.359), SIMDE_FLOAT16_VALUE( - 48.410), SIMDE_FLOAT16_VALUE( 83.577) }, + { SIMDE_FLOAT16_VALUE( - 31.493), SIMDE_FLOAT16_VALUE( 67.547), SIMDE_FLOAT16_VALUE( - 49.486), SIMDE_FLOAT16_VALUE( - 70.863) }, + { SIMDE_FLOAT16_VALUE( - 93.105), SIMDE_FLOAT16_VALUE( - 61.605), SIMDE_FLOAT16_VALUE( - 13.690), SIMDE_FLOAT16_VALUE( - 31.493), + SIMDE_FLOAT16_VALUE( - 3.401), SIMDE_FLOAT16_VALUE( - 53.766), SIMDE_FLOAT16_VALUE( 61.359), SIMDE_FLOAT16_VALUE( 67.547), + SIMDE_FLOAT16_VALUE( 38.259), SIMDE_FLOAT16_VALUE( - 79.385), SIMDE_FLOAT16_VALUE( - 48.410), SIMDE_FLOAT16_VALUE( - 49.486), + SIMDE_FLOAT16_VALUE( - 77.939), SIMDE_FLOAT16_VALUE( 51.701), SIMDE_FLOAT16_VALUE( 83.577), SIMDE_FLOAT16_VALUE( - 70.863) } }, + { { SIMDE_FLOAT16_VALUE( - 59.131), SIMDE_FLOAT16_VALUE( - 86.919), SIMDE_FLOAT16_VALUE( 79.472), SIMDE_FLOAT16_VALUE( - 8.816) }, + { SIMDE_FLOAT16_VALUE( 1.504), SIMDE_FLOAT16_VALUE( - 19.699), SIMDE_FLOAT16_VALUE( - 54.045), SIMDE_FLOAT16_VALUE( 80.818) }, + { SIMDE_FLOAT16_VALUE( 96.502), SIMDE_FLOAT16_VALUE( 74.467), SIMDE_FLOAT16_VALUE( - 62.320), SIMDE_FLOAT16_VALUE( - 13.596) }, + { SIMDE_FLOAT16_VALUE( 93.177), SIMDE_FLOAT16_VALUE( - 66.739), SIMDE_FLOAT16_VALUE( - 38.338), SIMDE_FLOAT16_VALUE( 99.656) }, + { SIMDE_FLOAT16_VALUE( - 59.131), SIMDE_FLOAT16_VALUE( 1.504), SIMDE_FLOAT16_VALUE( 96.502), SIMDE_FLOAT16_VALUE( 93.177), + SIMDE_FLOAT16_VALUE( - 86.919), SIMDE_FLOAT16_VALUE( - 19.699), SIMDE_FLOAT16_VALUE( 74.467), SIMDE_FLOAT16_VALUE( - 66.739), + SIMDE_FLOAT16_VALUE( 79.472), SIMDE_FLOAT16_VALUE( - 54.045), SIMDE_FLOAT16_VALUE( - 62.320), SIMDE_FLOAT16_VALUE( - 38.338), + SIMDE_FLOAT16_VALUE( - 8.816), SIMDE_FLOAT16_VALUE( 80.818), SIMDE_FLOAT16_VALUE( - 13.596), SIMDE_FLOAT16_VALUE( 99.656) } }, + { { SIMDE_FLOAT16_VALUE( - 37.162), SIMDE_FLOAT16_VALUE( 16.071), SIMDE_FLOAT16_VALUE( 53.822), SIMDE_FLOAT16_VALUE( 26.764) }, + { SIMDE_FLOAT16_VALUE( 12.408), SIMDE_FLOAT16_VALUE( 83.245), SIMDE_FLOAT16_VALUE( - 72.788), SIMDE_FLOAT16_VALUE( 33.763) }, + { SIMDE_FLOAT16_VALUE( 62.702), SIMDE_FLOAT16_VALUE( 47.064), SIMDE_FLOAT16_VALUE( - 22.343), SIMDE_FLOAT16_VALUE( - 73.798) }, + { SIMDE_FLOAT16_VALUE( 47.563), SIMDE_FLOAT16_VALUE( - 15.230), SIMDE_FLOAT16_VALUE( - 20.486), SIMDE_FLOAT16_VALUE( 9.512) }, + { SIMDE_FLOAT16_VALUE( - 37.162), SIMDE_FLOAT16_VALUE( 12.408), SIMDE_FLOAT16_VALUE( 62.702), SIMDE_FLOAT16_VALUE( 47.563), + SIMDE_FLOAT16_VALUE( 16.071), SIMDE_FLOAT16_VALUE( 83.245), SIMDE_FLOAT16_VALUE( 47.064), SIMDE_FLOAT16_VALUE( - 15.230), + SIMDE_FLOAT16_VALUE( 53.822), SIMDE_FLOAT16_VALUE( - 72.788), SIMDE_FLOAT16_VALUE( - 22.343), SIMDE_FLOAT16_VALUE( - 20.486), + SIMDE_FLOAT16_VALUE( 26.764), SIMDE_FLOAT16_VALUE( 33.763), SIMDE_FLOAT16_VALUE( - 73.798), SIMDE_FLOAT16_VALUE( 9.512) } }, + { { SIMDE_FLOAT16_VALUE( 81.267), SIMDE_FLOAT16_VALUE( - 14.419), SIMDE_FLOAT16_VALUE( - 86.910), SIMDE_FLOAT16_VALUE( 96.117) }, + { SIMDE_FLOAT16_VALUE( 63.922), SIMDE_FLOAT16_VALUE( - 13.784), SIMDE_FLOAT16_VALUE( 60.900), SIMDE_FLOAT16_VALUE( - 46.465) }, + { SIMDE_FLOAT16_VALUE( - 8.848), SIMDE_FLOAT16_VALUE( - 38.930), SIMDE_FLOAT16_VALUE( - 83.013), SIMDE_FLOAT16_VALUE( - 82.394) }, + { SIMDE_FLOAT16_VALUE( - 76.512), SIMDE_FLOAT16_VALUE( 98.430), SIMDE_FLOAT16_VALUE( - 36.632), SIMDE_FLOAT16_VALUE( 72.951) }, + { SIMDE_FLOAT16_VALUE( 81.267), SIMDE_FLOAT16_VALUE( 63.922), SIMDE_FLOAT16_VALUE( - 8.848), SIMDE_FLOAT16_VALUE( - 76.512), + SIMDE_FLOAT16_VALUE( - 14.419), SIMDE_FLOAT16_VALUE( - 13.784), SIMDE_FLOAT16_VALUE( - 38.930), SIMDE_FLOAT16_VALUE( 98.430), + SIMDE_FLOAT16_VALUE( - 86.910), SIMDE_FLOAT16_VALUE( 60.900), SIMDE_FLOAT16_VALUE( - 83.013), SIMDE_FLOAT16_VALUE( - 36.632), + SIMDE_FLOAT16_VALUE( 96.117), SIMDE_FLOAT16_VALUE( - 46.465), SIMDE_FLOAT16_VALUE( - 82.394), SIMDE_FLOAT16_VALUE( 72.951) } }, + { { SIMDE_FLOAT16_VALUE( - 59.414), SIMDE_FLOAT16_VALUE( - 0.156), SIMDE_FLOAT16_VALUE( 25.048), SIMDE_FLOAT16_VALUE( 72.088) }, + { SIMDE_FLOAT16_VALUE( - 6.793), SIMDE_FLOAT16_VALUE( 3.483), SIMDE_FLOAT16_VALUE( - 70.169), SIMDE_FLOAT16_VALUE( - 17.526) }, + { SIMDE_FLOAT16_VALUE( - 45.584), SIMDE_FLOAT16_VALUE( 48.819), SIMDE_FLOAT16_VALUE( 99.172), SIMDE_FLOAT16_VALUE( - 28.748) }, + { SIMDE_FLOAT16_VALUE( 87.589), SIMDE_FLOAT16_VALUE( 24.842), SIMDE_FLOAT16_VALUE( 68.343), SIMDE_FLOAT16_VALUE( 86.487) }, + { SIMDE_FLOAT16_VALUE( - 59.414), SIMDE_FLOAT16_VALUE( - 6.793), SIMDE_FLOAT16_VALUE( - 45.584), SIMDE_FLOAT16_VALUE( 87.589), + SIMDE_FLOAT16_VALUE( - 0.156), SIMDE_FLOAT16_VALUE( 3.483), SIMDE_FLOAT16_VALUE( 48.819), SIMDE_FLOAT16_VALUE( 24.842), + SIMDE_FLOAT16_VALUE( 25.048), SIMDE_FLOAT16_VALUE( - 70.169), SIMDE_FLOAT16_VALUE( 99.172), SIMDE_FLOAT16_VALUE( 68.343), + SIMDE_FLOAT16_VALUE( 72.088), SIMDE_FLOAT16_VALUE( - 17.526), SIMDE_FLOAT16_VALUE( - 28.748), SIMDE_FLOAT16_VALUE( 86.487) } }, + { { SIMDE_FLOAT16_VALUE( - 20.140), SIMDE_FLOAT16_VALUE( - 42.975), SIMDE_FLOAT16_VALUE( 95.423), SIMDE_FLOAT16_VALUE( 44.578) }, + { SIMDE_FLOAT16_VALUE( - 89.956), SIMDE_FLOAT16_VALUE( 30.266), SIMDE_FLOAT16_VALUE( - 96.275), SIMDE_FLOAT16_VALUE( - 86.081) }, + { SIMDE_FLOAT16_VALUE( - 51.282), SIMDE_FLOAT16_VALUE( 69.208), SIMDE_FLOAT16_VALUE( 62.760), SIMDE_FLOAT16_VALUE( 60.084) }, + { SIMDE_FLOAT16_VALUE( 50.762), SIMDE_FLOAT16_VALUE( 51.328), SIMDE_FLOAT16_VALUE( - 5.349), SIMDE_FLOAT16_VALUE( 8.335) }, + { SIMDE_FLOAT16_VALUE( - 20.140), SIMDE_FLOAT16_VALUE( - 89.956), SIMDE_FLOAT16_VALUE( - 51.282), SIMDE_FLOAT16_VALUE( 50.762), + SIMDE_FLOAT16_VALUE( - 42.975), SIMDE_FLOAT16_VALUE( 30.266), SIMDE_FLOAT16_VALUE( 69.208), SIMDE_FLOAT16_VALUE( 51.328), + SIMDE_FLOAT16_VALUE( 95.423), SIMDE_FLOAT16_VALUE( - 96.275), SIMDE_FLOAT16_VALUE( 62.760), SIMDE_FLOAT16_VALUE( - 5.349), + SIMDE_FLOAT16_VALUE( 44.578), SIMDE_FLOAT16_VALUE( - 86.081), SIMDE_FLOAT16_VALUE( 60.084), SIMDE_FLOAT16_VALUE( 8.335) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4x4_t r_ = { { simde_vld1_f16(test_vec[i].r0), + simde_vld1_f16(test_vec[i].r1), + simde_vld1_f16(test_vec[i].r2), + simde_vld1_f16(test_vec[i].r3), } }; + + simde_float16 a_[16]; + simde_vst4_f16(a_, r_); + simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); + + r_ = simde_vld4_f16(a_); + simde_test_arm_neon_assert_equal_f16x4(r_.val[0], simde_vld1_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x4(r_.val[1], simde_vld1_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x4(r_.val[2], simde_vld1_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x4(r_.val[3], simde_vld1_f16(test_vec[i].r3), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t r0 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r1 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r2 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r3 = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x16_t a = simde_vst4_f16(r0, r1, r2, r3); + + simde_test_arm_neon_write_f16x4(2, r0, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x16(2, a, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst4_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -2185,6 +2299,184 @@ test_simde_vst4_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst4q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 r0[8]; + simde_float16 r1[8]; + simde_float16 r2[8]; + simde_float16 r3[8]; + simde_float16 a[32]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 14.743), SIMDE_FLOAT16_VALUE( 96.869), SIMDE_FLOAT16_VALUE( 35.732), SIMDE_FLOAT16_VALUE( - 11.204), + SIMDE_FLOAT16_VALUE( - 36.507), SIMDE_FLOAT16_VALUE( - 87.762), SIMDE_FLOAT16_VALUE( - 30.140), SIMDE_FLOAT16_VALUE( 40.553) }, + { SIMDE_FLOAT16_VALUE( 90.636), SIMDE_FLOAT16_VALUE( - 74.102), SIMDE_FLOAT16_VALUE( - 11.935), SIMDE_FLOAT16_VALUE( 76.173), + SIMDE_FLOAT16_VALUE( - 60.016), SIMDE_FLOAT16_VALUE( - 12.468), SIMDE_FLOAT16_VALUE( - 42.341), SIMDE_FLOAT16_VALUE( - 48.188) }, + { SIMDE_FLOAT16_VALUE( - 65.064), SIMDE_FLOAT16_VALUE( - 77.067), SIMDE_FLOAT16_VALUE( - 3.798), SIMDE_FLOAT16_VALUE( 78.248), + SIMDE_FLOAT16_VALUE( - 75.705), SIMDE_FLOAT16_VALUE( - 81.300), SIMDE_FLOAT16_VALUE( 41.755), SIMDE_FLOAT16_VALUE( 0.822) }, + { SIMDE_FLOAT16_VALUE( 45.696), SIMDE_FLOAT16_VALUE( - 91.718), SIMDE_FLOAT16_VALUE( 81.433), SIMDE_FLOAT16_VALUE( 42.239), + SIMDE_FLOAT16_VALUE( 70.791), SIMDE_FLOAT16_VALUE( 21.361), SIMDE_FLOAT16_VALUE( 51.896), SIMDE_FLOAT16_VALUE( - 38.175) }, + { SIMDE_FLOAT16_VALUE( - 14.743), SIMDE_FLOAT16_VALUE( 90.636), SIMDE_FLOAT16_VALUE( - 65.064), SIMDE_FLOAT16_VALUE( 45.696), + SIMDE_FLOAT16_VALUE( 96.869), SIMDE_FLOAT16_VALUE( - 74.102), SIMDE_FLOAT16_VALUE( - 77.067), SIMDE_FLOAT16_VALUE( - 91.718), + SIMDE_FLOAT16_VALUE( 35.732), SIMDE_FLOAT16_VALUE( - 11.935), SIMDE_FLOAT16_VALUE( - 3.798), SIMDE_FLOAT16_VALUE( 81.433), + SIMDE_FLOAT16_VALUE( - 11.204), SIMDE_FLOAT16_VALUE( 76.173), SIMDE_FLOAT16_VALUE( 78.248), SIMDE_FLOAT16_VALUE( 42.239), + SIMDE_FLOAT16_VALUE( - 36.507), SIMDE_FLOAT16_VALUE( - 60.016), SIMDE_FLOAT16_VALUE( - 75.705), SIMDE_FLOAT16_VALUE( 70.791), + SIMDE_FLOAT16_VALUE( - 87.762), SIMDE_FLOAT16_VALUE( - 12.468), SIMDE_FLOAT16_VALUE( - 81.300), SIMDE_FLOAT16_VALUE( 21.361), + SIMDE_FLOAT16_VALUE( - 30.140), SIMDE_FLOAT16_VALUE( - 42.341), SIMDE_FLOAT16_VALUE( 41.755), SIMDE_FLOAT16_VALUE( 51.896), + SIMDE_FLOAT16_VALUE( 40.553), SIMDE_FLOAT16_VALUE( - 48.188), SIMDE_FLOAT16_VALUE( 0.822), SIMDE_FLOAT16_VALUE( - 38.175) } }, + { { SIMDE_FLOAT16_VALUE( 20.836), SIMDE_FLOAT16_VALUE( - 6.255), SIMDE_FLOAT16_VALUE( 79.168), SIMDE_FLOAT16_VALUE( 29.039), + SIMDE_FLOAT16_VALUE( 96.927), SIMDE_FLOAT16_VALUE( 41.696), SIMDE_FLOAT16_VALUE( - 42.924), SIMDE_FLOAT16_VALUE( - 7.066) }, + { SIMDE_FLOAT16_VALUE( 14.370), SIMDE_FLOAT16_VALUE( 7.192), SIMDE_FLOAT16_VALUE( - 88.540), SIMDE_FLOAT16_VALUE( 13.116), + SIMDE_FLOAT16_VALUE( 64.603), SIMDE_FLOAT16_VALUE( - 63.088), SIMDE_FLOAT16_VALUE( - 11.758), SIMDE_FLOAT16_VALUE( 14.850) }, + { SIMDE_FLOAT16_VALUE( 27.807), SIMDE_FLOAT16_VALUE( 11.530), SIMDE_FLOAT16_VALUE( - 32.331), SIMDE_FLOAT16_VALUE( - 44.759), + SIMDE_FLOAT16_VALUE( 24.258), SIMDE_FLOAT16_VALUE( - 86.786), SIMDE_FLOAT16_VALUE( - 94.826), SIMDE_FLOAT16_VALUE( 48.908) }, + { SIMDE_FLOAT16_VALUE( 20.925), SIMDE_FLOAT16_VALUE( 99.983), SIMDE_FLOAT16_VALUE( - 59.823), SIMDE_FLOAT16_VALUE( 71.958), + SIMDE_FLOAT16_VALUE( 63.444), SIMDE_FLOAT16_VALUE( 71.547), SIMDE_FLOAT16_VALUE( 68.404), SIMDE_FLOAT16_VALUE( 32.576) }, + { SIMDE_FLOAT16_VALUE( 20.836), SIMDE_FLOAT16_VALUE( 14.370), SIMDE_FLOAT16_VALUE( 27.807), SIMDE_FLOAT16_VALUE( 20.925), + SIMDE_FLOAT16_VALUE( - 6.255), SIMDE_FLOAT16_VALUE( 7.192), SIMDE_FLOAT16_VALUE( 11.530), SIMDE_FLOAT16_VALUE( 99.983), + SIMDE_FLOAT16_VALUE( 79.168), SIMDE_FLOAT16_VALUE( - 88.540), SIMDE_FLOAT16_VALUE( - 32.331), SIMDE_FLOAT16_VALUE( - 59.823), + SIMDE_FLOAT16_VALUE( 29.039), SIMDE_FLOAT16_VALUE( 13.116), SIMDE_FLOAT16_VALUE( - 44.759), SIMDE_FLOAT16_VALUE( 71.958), + SIMDE_FLOAT16_VALUE( 96.927), SIMDE_FLOAT16_VALUE( 64.603), SIMDE_FLOAT16_VALUE( 24.258), SIMDE_FLOAT16_VALUE( 63.444), + SIMDE_FLOAT16_VALUE( 41.696), SIMDE_FLOAT16_VALUE( - 63.088), SIMDE_FLOAT16_VALUE( - 86.786), SIMDE_FLOAT16_VALUE( 71.547), + SIMDE_FLOAT16_VALUE( - 42.924), SIMDE_FLOAT16_VALUE( - 11.758), SIMDE_FLOAT16_VALUE( - 94.826), SIMDE_FLOAT16_VALUE( 68.404), + SIMDE_FLOAT16_VALUE( - 7.066), SIMDE_FLOAT16_VALUE( 14.850), SIMDE_FLOAT16_VALUE( 48.908), SIMDE_FLOAT16_VALUE( 32.576) } }, + { { SIMDE_FLOAT16_VALUE( - 98.651), SIMDE_FLOAT16_VALUE( 8.370), SIMDE_FLOAT16_VALUE( 5.212), SIMDE_FLOAT16_VALUE( - 39.002), + SIMDE_FLOAT16_VALUE( - 50.855), SIMDE_FLOAT16_VALUE( 9.832), SIMDE_FLOAT16_VALUE( 64.681), SIMDE_FLOAT16_VALUE( 53.110) }, + { SIMDE_FLOAT16_VALUE( 56.471), SIMDE_FLOAT16_VALUE( 66.543), SIMDE_FLOAT16_VALUE( - 19.108), SIMDE_FLOAT16_VALUE( 57.884), + SIMDE_FLOAT16_VALUE( 8.069), SIMDE_FLOAT16_VALUE( - 92.092), SIMDE_FLOAT16_VALUE( 21.120), SIMDE_FLOAT16_VALUE( - 28.746) }, + { SIMDE_FLOAT16_VALUE( - 33.477), SIMDE_FLOAT16_VALUE( 94.776), SIMDE_FLOAT16_VALUE( 39.423), SIMDE_FLOAT16_VALUE( - 14.126), + SIMDE_FLOAT16_VALUE( - 86.375), SIMDE_FLOAT16_VALUE( - 35.066), SIMDE_FLOAT16_VALUE( 93.908), SIMDE_FLOAT16_VALUE( - 70.598) }, + { SIMDE_FLOAT16_VALUE( - 47.494), SIMDE_FLOAT16_VALUE( - 73.328), SIMDE_FLOAT16_VALUE( - 6.515), SIMDE_FLOAT16_VALUE( 64.596), + SIMDE_FLOAT16_VALUE( - 85.478), SIMDE_FLOAT16_VALUE( - 93.767), SIMDE_FLOAT16_VALUE( - 96.092), SIMDE_FLOAT16_VALUE( - 43.609) }, + { SIMDE_FLOAT16_VALUE( - 98.651), SIMDE_FLOAT16_VALUE( 56.471), SIMDE_FLOAT16_VALUE( - 33.477), SIMDE_FLOAT16_VALUE( - 47.494), + SIMDE_FLOAT16_VALUE( 8.370), SIMDE_FLOAT16_VALUE( 66.543), SIMDE_FLOAT16_VALUE( 94.776), SIMDE_FLOAT16_VALUE( - 73.328), + SIMDE_FLOAT16_VALUE( 5.212), SIMDE_FLOAT16_VALUE( - 19.108), SIMDE_FLOAT16_VALUE( 39.423), SIMDE_FLOAT16_VALUE( - 6.515), + SIMDE_FLOAT16_VALUE( - 39.002), SIMDE_FLOAT16_VALUE( 57.884), SIMDE_FLOAT16_VALUE( - 14.126), SIMDE_FLOAT16_VALUE( 64.596), + SIMDE_FLOAT16_VALUE( - 50.855), SIMDE_FLOAT16_VALUE( 8.069), SIMDE_FLOAT16_VALUE( - 86.375), SIMDE_FLOAT16_VALUE( - 85.478), + SIMDE_FLOAT16_VALUE( 9.832), SIMDE_FLOAT16_VALUE( - 92.092), SIMDE_FLOAT16_VALUE( - 35.066), SIMDE_FLOAT16_VALUE( - 93.767), + SIMDE_FLOAT16_VALUE( 64.681), SIMDE_FLOAT16_VALUE( 21.120), SIMDE_FLOAT16_VALUE( 93.908), SIMDE_FLOAT16_VALUE( - 96.092), + SIMDE_FLOAT16_VALUE( 53.110), SIMDE_FLOAT16_VALUE( - 28.746), SIMDE_FLOAT16_VALUE( - 70.598), SIMDE_FLOAT16_VALUE( - 43.609) } }, + { { SIMDE_FLOAT16_VALUE( 32.009), SIMDE_FLOAT16_VALUE( - 70.333), SIMDE_FLOAT16_VALUE( 5.538), SIMDE_FLOAT16_VALUE( 82.520), + SIMDE_FLOAT16_VALUE( - 40.966), SIMDE_FLOAT16_VALUE( - 11.607), SIMDE_FLOAT16_VALUE( - 54.581), SIMDE_FLOAT16_VALUE( 51.136) }, + { SIMDE_FLOAT16_VALUE( - 80.277), SIMDE_FLOAT16_VALUE( 70.486), SIMDE_FLOAT16_VALUE( - 49.720), SIMDE_FLOAT16_VALUE( 84.405), + SIMDE_FLOAT16_VALUE( - 38.234), SIMDE_FLOAT16_VALUE( 47.061), SIMDE_FLOAT16_VALUE( - 27.953), SIMDE_FLOAT16_VALUE( - 35.272) }, + { SIMDE_FLOAT16_VALUE( 37.433), SIMDE_FLOAT16_VALUE( 84.527), SIMDE_FLOAT16_VALUE( 41.358), SIMDE_FLOAT16_VALUE( - 69.610), + SIMDE_FLOAT16_VALUE( 51.678), SIMDE_FLOAT16_VALUE( - 94.404), SIMDE_FLOAT16_VALUE( - 16.678), SIMDE_FLOAT16_VALUE( 76.803) }, + { SIMDE_FLOAT16_VALUE( - 82.082), SIMDE_FLOAT16_VALUE( - 90.814), SIMDE_FLOAT16_VALUE( - 62.857), SIMDE_FLOAT16_VALUE( - 73.589), + SIMDE_FLOAT16_VALUE( 77.514), SIMDE_FLOAT16_VALUE( - 67.143), SIMDE_FLOAT16_VALUE( - 66.907), SIMDE_FLOAT16_VALUE( 66.753) }, + { SIMDE_FLOAT16_VALUE( 32.009), SIMDE_FLOAT16_VALUE( - 80.277), SIMDE_FLOAT16_VALUE( 37.433), SIMDE_FLOAT16_VALUE( - 82.082), + SIMDE_FLOAT16_VALUE( - 70.333), SIMDE_FLOAT16_VALUE( 70.486), SIMDE_FLOAT16_VALUE( 84.527), SIMDE_FLOAT16_VALUE( - 90.814), + SIMDE_FLOAT16_VALUE( 5.538), SIMDE_FLOAT16_VALUE( - 49.720), SIMDE_FLOAT16_VALUE( 41.358), SIMDE_FLOAT16_VALUE( - 62.857), + SIMDE_FLOAT16_VALUE( 82.520), SIMDE_FLOAT16_VALUE( 84.405), SIMDE_FLOAT16_VALUE( - 69.610), SIMDE_FLOAT16_VALUE( - 73.589), + SIMDE_FLOAT16_VALUE( - 40.966), SIMDE_FLOAT16_VALUE( - 38.234), SIMDE_FLOAT16_VALUE( 51.678), SIMDE_FLOAT16_VALUE( 77.514), + SIMDE_FLOAT16_VALUE( - 11.607), SIMDE_FLOAT16_VALUE( 47.061), SIMDE_FLOAT16_VALUE( - 94.404), SIMDE_FLOAT16_VALUE( - 67.143), + SIMDE_FLOAT16_VALUE( - 54.581), SIMDE_FLOAT16_VALUE( - 27.953), SIMDE_FLOAT16_VALUE( - 16.678), SIMDE_FLOAT16_VALUE( - 66.907), + SIMDE_FLOAT16_VALUE( 51.136), SIMDE_FLOAT16_VALUE( - 35.272), SIMDE_FLOAT16_VALUE( 76.803), SIMDE_FLOAT16_VALUE( 66.753) } }, + { { SIMDE_FLOAT16_VALUE( 13.527), SIMDE_FLOAT16_VALUE( 15.879), SIMDE_FLOAT16_VALUE( - 25.658), SIMDE_FLOAT16_VALUE( 64.222), + SIMDE_FLOAT16_VALUE( 72.758), SIMDE_FLOAT16_VALUE( 17.152), SIMDE_FLOAT16_VALUE( 61.595), SIMDE_FLOAT16_VALUE( - 25.744) }, + { SIMDE_FLOAT16_VALUE( 73.965), SIMDE_FLOAT16_VALUE( 66.305), SIMDE_FLOAT16_VALUE( - 82.679), SIMDE_FLOAT16_VALUE( - 48.114), + SIMDE_FLOAT16_VALUE( 35.804), SIMDE_FLOAT16_VALUE( 11.885), SIMDE_FLOAT16_VALUE( - 98.287), SIMDE_FLOAT16_VALUE( 61.933) }, + { SIMDE_FLOAT16_VALUE( - 79.049), SIMDE_FLOAT16_VALUE( 99.270), SIMDE_FLOAT16_VALUE( - 46.729), SIMDE_FLOAT16_VALUE( 30.358), + SIMDE_FLOAT16_VALUE( 46.057), SIMDE_FLOAT16_VALUE( - 87.368), SIMDE_FLOAT16_VALUE( 82.236), SIMDE_FLOAT16_VALUE( - 81.438) }, + { SIMDE_FLOAT16_VALUE( 31.502), SIMDE_FLOAT16_VALUE( 72.068), SIMDE_FLOAT16_VALUE( - 3.689), SIMDE_FLOAT16_VALUE( 42.230), + SIMDE_FLOAT16_VALUE( 9.827), SIMDE_FLOAT16_VALUE( 27.830), SIMDE_FLOAT16_VALUE( - 62.111), SIMDE_FLOAT16_VALUE( - 46.971) }, + { SIMDE_FLOAT16_VALUE( 13.527), SIMDE_FLOAT16_VALUE( 73.965), SIMDE_FLOAT16_VALUE( - 79.049), SIMDE_FLOAT16_VALUE( 31.502), + SIMDE_FLOAT16_VALUE( 15.879), SIMDE_FLOAT16_VALUE( 66.305), SIMDE_FLOAT16_VALUE( 99.270), SIMDE_FLOAT16_VALUE( 72.068), + SIMDE_FLOAT16_VALUE( - 25.658), SIMDE_FLOAT16_VALUE( - 82.679), SIMDE_FLOAT16_VALUE( - 46.729), SIMDE_FLOAT16_VALUE( - 3.689), + SIMDE_FLOAT16_VALUE( 64.222), SIMDE_FLOAT16_VALUE( - 48.114), SIMDE_FLOAT16_VALUE( 30.358), SIMDE_FLOAT16_VALUE( 42.230), + SIMDE_FLOAT16_VALUE( 72.758), SIMDE_FLOAT16_VALUE( 35.804), SIMDE_FLOAT16_VALUE( 46.057), SIMDE_FLOAT16_VALUE( 9.827), + SIMDE_FLOAT16_VALUE( 17.152), SIMDE_FLOAT16_VALUE( 11.885), SIMDE_FLOAT16_VALUE( - 87.368), SIMDE_FLOAT16_VALUE( 27.830), + SIMDE_FLOAT16_VALUE( 61.595), SIMDE_FLOAT16_VALUE( - 98.287), SIMDE_FLOAT16_VALUE( 82.236), SIMDE_FLOAT16_VALUE( - 62.111), + SIMDE_FLOAT16_VALUE( - 25.744), SIMDE_FLOAT16_VALUE( 61.933), SIMDE_FLOAT16_VALUE( - 81.438), SIMDE_FLOAT16_VALUE( - 46.971) } }, + { { SIMDE_FLOAT16_VALUE( - 98.937), SIMDE_FLOAT16_VALUE( - 75.185), SIMDE_FLOAT16_VALUE( 51.461), SIMDE_FLOAT16_VALUE( 34.040), + SIMDE_FLOAT16_VALUE( - 18.490), SIMDE_FLOAT16_VALUE( 2.435), SIMDE_FLOAT16_VALUE( - 31.936), SIMDE_FLOAT16_VALUE( - 44.440) }, + { SIMDE_FLOAT16_VALUE( 51.617), SIMDE_FLOAT16_VALUE( 92.309), SIMDE_FLOAT16_VALUE( - 32.479), SIMDE_FLOAT16_VALUE( 64.487), + SIMDE_FLOAT16_VALUE( 33.205), SIMDE_FLOAT16_VALUE( 26.250), SIMDE_FLOAT16_VALUE( - 8.956), SIMDE_FLOAT16_VALUE( 43.765) }, + { SIMDE_FLOAT16_VALUE( - 80.764), SIMDE_FLOAT16_VALUE( - 42.445), SIMDE_FLOAT16_VALUE( - 77.677), SIMDE_FLOAT16_VALUE( 54.417), + SIMDE_FLOAT16_VALUE( - 58.892), SIMDE_FLOAT16_VALUE( 67.912), SIMDE_FLOAT16_VALUE( - 19.769), SIMDE_FLOAT16_VALUE( - 94.018) }, + { SIMDE_FLOAT16_VALUE( 32.228), SIMDE_FLOAT16_VALUE( - 45.404), SIMDE_FLOAT16_VALUE( - 11.380), SIMDE_FLOAT16_VALUE( 85.317), + SIMDE_FLOAT16_VALUE( 32.872), SIMDE_FLOAT16_VALUE( - 10.946), SIMDE_FLOAT16_VALUE( 93.337), SIMDE_FLOAT16_VALUE( 94.934) }, + { SIMDE_FLOAT16_VALUE( - 98.937), SIMDE_FLOAT16_VALUE( 51.617), SIMDE_FLOAT16_VALUE( - 80.764), SIMDE_FLOAT16_VALUE( 32.228), + SIMDE_FLOAT16_VALUE( - 75.185), SIMDE_FLOAT16_VALUE( 92.309), SIMDE_FLOAT16_VALUE( - 42.445), SIMDE_FLOAT16_VALUE( - 45.404), + SIMDE_FLOAT16_VALUE( 51.461), SIMDE_FLOAT16_VALUE( - 32.479), SIMDE_FLOAT16_VALUE( - 77.677), SIMDE_FLOAT16_VALUE( - 11.380), + SIMDE_FLOAT16_VALUE( 34.040), SIMDE_FLOAT16_VALUE( 64.487), SIMDE_FLOAT16_VALUE( 54.417), SIMDE_FLOAT16_VALUE( 85.317), + SIMDE_FLOAT16_VALUE( - 18.490), SIMDE_FLOAT16_VALUE( 33.205), SIMDE_FLOAT16_VALUE( - 58.892), SIMDE_FLOAT16_VALUE( 32.872), + SIMDE_FLOAT16_VALUE( 2.435), SIMDE_FLOAT16_VALUE( 26.250), SIMDE_FLOAT16_VALUE( 67.912), SIMDE_FLOAT16_VALUE( - 10.946), + SIMDE_FLOAT16_VALUE( - 31.936), SIMDE_FLOAT16_VALUE( - 8.956), SIMDE_FLOAT16_VALUE( - 19.769), SIMDE_FLOAT16_VALUE( 93.337), + SIMDE_FLOAT16_VALUE( - 44.440), SIMDE_FLOAT16_VALUE( 43.765), SIMDE_FLOAT16_VALUE( - 94.018), SIMDE_FLOAT16_VALUE( 94.934) } }, + { { SIMDE_FLOAT16_VALUE( - 82.721), SIMDE_FLOAT16_VALUE( - 5.083), SIMDE_FLOAT16_VALUE( 28.116), SIMDE_FLOAT16_VALUE( - 2.078), + SIMDE_FLOAT16_VALUE( - 8.045), SIMDE_FLOAT16_VALUE( 98.088), SIMDE_FLOAT16_VALUE( 77.205), SIMDE_FLOAT16_VALUE( - 86.702) }, + { SIMDE_FLOAT16_VALUE( - 75.686), SIMDE_FLOAT16_VALUE( - 67.078), SIMDE_FLOAT16_VALUE( 38.647), SIMDE_FLOAT16_VALUE( - 7.111), + SIMDE_FLOAT16_VALUE( - 74.933), SIMDE_FLOAT16_VALUE( - 54.055), SIMDE_FLOAT16_VALUE( - 51.905), SIMDE_FLOAT16_VALUE( 53.734) }, + { SIMDE_FLOAT16_VALUE( - 39.474), SIMDE_FLOAT16_VALUE( 89.193), SIMDE_FLOAT16_VALUE( - 72.210), SIMDE_FLOAT16_VALUE( - 42.032), + SIMDE_FLOAT16_VALUE( 61.321), SIMDE_FLOAT16_VALUE( - 12.021), SIMDE_FLOAT16_VALUE( - 35.830), SIMDE_FLOAT16_VALUE( - 85.851) }, + { SIMDE_FLOAT16_VALUE( 64.970), SIMDE_FLOAT16_VALUE( 85.757), SIMDE_FLOAT16_VALUE( 28.273), SIMDE_FLOAT16_VALUE( 77.789), + SIMDE_FLOAT16_VALUE( 50.825), SIMDE_FLOAT16_VALUE( 95.876), SIMDE_FLOAT16_VALUE( 44.735), SIMDE_FLOAT16_VALUE( - 28.570) }, + { SIMDE_FLOAT16_VALUE( - 82.721), SIMDE_FLOAT16_VALUE( - 75.686), SIMDE_FLOAT16_VALUE( - 39.474), SIMDE_FLOAT16_VALUE( 64.970), + SIMDE_FLOAT16_VALUE( - 5.083), SIMDE_FLOAT16_VALUE( - 67.078), SIMDE_FLOAT16_VALUE( 89.193), SIMDE_FLOAT16_VALUE( 85.757), + SIMDE_FLOAT16_VALUE( 28.116), SIMDE_FLOAT16_VALUE( 38.647), SIMDE_FLOAT16_VALUE( - 72.210), SIMDE_FLOAT16_VALUE( 28.273), + SIMDE_FLOAT16_VALUE( - 2.078), SIMDE_FLOAT16_VALUE( - 7.111), SIMDE_FLOAT16_VALUE( - 42.032), SIMDE_FLOAT16_VALUE( 77.789), + SIMDE_FLOAT16_VALUE( - 8.045), SIMDE_FLOAT16_VALUE( - 74.933), SIMDE_FLOAT16_VALUE( 61.321), SIMDE_FLOAT16_VALUE( 50.825), + SIMDE_FLOAT16_VALUE( 98.088), SIMDE_FLOAT16_VALUE( - 54.055), SIMDE_FLOAT16_VALUE( - 12.021), SIMDE_FLOAT16_VALUE( 95.876), + SIMDE_FLOAT16_VALUE( 77.205), SIMDE_FLOAT16_VALUE( - 51.905), SIMDE_FLOAT16_VALUE( - 35.830), SIMDE_FLOAT16_VALUE( 44.735), + SIMDE_FLOAT16_VALUE( - 86.702), SIMDE_FLOAT16_VALUE( 53.734), SIMDE_FLOAT16_VALUE( - 85.851), SIMDE_FLOAT16_VALUE( - 28.570) } }, + { { SIMDE_FLOAT16_VALUE( 56.021), SIMDE_FLOAT16_VALUE( - 47.189), SIMDE_FLOAT16_VALUE( 34.633), SIMDE_FLOAT16_VALUE( - 48.482), + SIMDE_FLOAT16_VALUE( - 13.024), SIMDE_FLOAT16_VALUE( - 68.164), SIMDE_FLOAT16_VALUE( - 38.493), SIMDE_FLOAT16_VALUE( - 96.972) }, + { SIMDE_FLOAT16_VALUE( 14.297), SIMDE_FLOAT16_VALUE( 60.328), SIMDE_FLOAT16_VALUE( - 55.132), SIMDE_FLOAT16_VALUE( 61.549), + SIMDE_FLOAT16_VALUE( 25.753), SIMDE_FLOAT16_VALUE( 72.855), SIMDE_FLOAT16_VALUE( 28.394), SIMDE_FLOAT16_VALUE( 58.315) }, + { SIMDE_FLOAT16_VALUE( - 52.297), SIMDE_FLOAT16_VALUE( - 59.309), SIMDE_FLOAT16_VALUE( 39.064), SIMDE_FLOAT16_VALUE( 92.546), + SIMDE_FLOAT16_VALUE( 29.222), SIMDE_FLOAT16_VALUE( 18.830), SIMDE_FLOAT16_VALUE( 78.006), SIMDE_FLOAT16_VALUE( - 34.477) }, + { SIMDE_FLOAT16_VALUE( 48.515), SIMDE_FLOAT16_VALUE( 6.778), SIMDE_FLOAT16_VALUE( 83.115), SIMDE_FLOAT16_VALUE( - 15.075), + SIMDE_FLOAT16_VALUE( - 64.544), SIMDE_FLOAT16_VALUE( - 5.615), SIMDE_FLOAT16_VALUE( - 79.529), SIMDE_FLOAT16_VALUE( - 20.122) }, + { SIMDE_FLOAT16_VALUE( 56.021), SIMDE_FLOAT16_VALUE( 14.297), SIMDE_FLOAT16_VALUE( - 52.297), SIMDE_FLOAT16_VALUE( 48.515), + SIMDE_FLOAT16_VALUE( - 47.189), SIMDE_FLOAT16_VALUE( 60.328), SIMDE_FLOAT16_VALUE( - 59.309), SIMDE_FLOAT16_VALUE( 6.778), + SIMDE_FLOAT16_VALUE( 34.633), SIMDE_FLOAT16_VALUE( - 55.132), SIMDE_FLOAT16_VALUE( 39.064), SIMDE_FLOAT16_VALUE( 83.115), + SIMDE_FLOAT16_VALUE( - 48.482), SIMDE_FLOAT16_VALUE( 61.549), SIMDE_FLOAT16_VALUE( 92.546), SIMDE_FLOAT16_VALUE( - 15.075), + SIMDE_FLOAT16_VALUE( - 13.024), SIMDE_FLOAT16_VALUE( 25.753), SIMDE_FLOAT16_VALUE( 29.222), SIMDE_FLOAT16_VALUE( - 64.544), + SIMDE_FLOAT16_VALUE( - 68.164), SIMDE_FLOAT16_VALUE( 72.855), SIMDE_FLOAT16_VALUE( 18.830), SIMDE_FLOAT16_VALUE( - 5.615), + SIMDE_FLOAT16_VALUE( - 38.493), SIMDE_FLOAT16_VALUE( 28.394), SIMDE_FLOAT16_VALUE( 78.006), SIMDE_FLOAT16_VALUE( - 79.529), + SIMDE_FLOAT16_VALUE( - 96.972), SIMDE_FLOAT16_VALUE( 58.315), SIMDE_FLOAT16_VALUE( - 34.477), SIMDE_FLOAT16_VALUE( - 20.122) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8x4_t r_ = { { simde_vld1q_f16(test_vec[i].r0), + simde_vld1q_f16(test_vec[i].r1), + simde_vld1q_f16(test_vec[i].r2), + simde_vld1q_f16(test_vec[i].r3), } }; + + simde_float16 a_[32]; + simde_vst4q_f16(a_, r_); + simde_assert_equal_i(0, simde_memcmp(a_, test_vec[i].a, sizeof(test_vec[i].a))); + + r_ = simde_vld4q_f16(a_); + simde_test_arm_neon_assert_equal_f16x8(r_.val[0], simde_vld1q_f16(test_vec[i].r0), 1); + simde_test_arm_neon_assert_equal_f16x8(r_.val[1], simde_vld1q_f16(test_vec[i].r1), 1); + simde_test_arm_neon_assert_equal_f16x8(r_.val[2], simde_vld1q_f16(test_vec[i].r2), 1); + simde_test_arm_neon_assert_equal_f16x8(r_.val[3], simde_vld1q_f16(test_vec[i].r3), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t r0 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r1 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r2 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r3 = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x32_t a = simde_vst4q_f16(r0, r1, r2, r3); + + simde_test_arm_neon_write_f16x8(2, r0, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, r1, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r2, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r3, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x32(2, a, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vst4q_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -5480,6 +5772,7 @@ test_simde_vst4q_u64 (SIMDE_MUNIT_TEST_ARGS) { SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vst4_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_s8) @@ -5491,6 +5784,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_s8) diff --git a/test/arm/neon/st4_lane.c b/test/arm/neon/st4_lane.c index 223b5e19b..bc0cd96de 100644 --- a/test/arm/neon/st4_lane.c +++ b/test/arm/neon/st4_lane.c @@ -527,6 +527,81 @@ test_simde_vst4_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst4_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t val[4][4]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 60.083), SIMDE_FLOAT16_VALUE( 58.683), SIMDE_FLOAT16_VALUE( 26.432), SIMDE_FLOAT16_VALUE( 26.942) }, + { { SIMDE_FLOAT16_VALUE( 51.260), SIMDE_FLOAT16_VALUE( 43.575), SIMDE_FLOAT16_VALUE( 60.083), SIMDE_FLOAT16_VALUE( 6.426) }, + { SIMDE_FLOAT16_VALUE( - 54.660), SIMDE_FLOAT16_VALUE( - 98.247), SIMDE_FLOAT16_VALUE( 58.683), SIMDE_FLOAT16_VALUE( - 59.385) }, + { SIMDE_FLOAT16_VALUE( - 2.564), SIMDE_FLOAT16_VALUE( 8.979), SIMDE_FLOAT16_VALUE( 26.432), SIMDE_FLOAT16_VALUE( 65.683) }, + { SIMDE_FLOAT16_VALUE( - 27.627), SIMDE_FLOAT16_VALUE( - 39.374), SIMDE_FLOAT16_VALUE( 26.942), SIMDE_FLOAT16_VALUE( 46.466) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( - 37.067), SIMDE_FLOAT16_VALUE( 55.701), SIMDE_FLOAT16_VALUE( - 31.523), SIMDE_FLOAT16_VALUE( 65.176) }, + { { SIMDE_FLOAT16_VALUE( - 6.453), SIMDE_FLOAT16_VALUE( 99.709), SIMDE_FLOAT16_VALUE( - 82.488), SIMDE_FLOAT16_VALUE( - 37.067) }, + { SIMDE_FLOAT16_VALUE( 54.797), SIMDE_FLOAT16_VALUE( 11.286), SIMDE_FLOAT16_VALUE( - 73.371), SIMDE_FLOAT16_VALUE( 55.701) }, + { SIMDE_FLOAT16_VALUE( - 24.340), SIMDE_FLOAT16_VALUE( 63.362), SIMDE_FLOAT16_VALUE( 5.424), SIMDE_FLOAT16_VALUE( - 31.523) }, + { SIMDE_FLOAT16_VALUE( - 43.884), SIMDE_FLOAT16_VALUE( 18.129), SIMDE_FLOAT16_VALUE( 58.203), SIMDE_FLOAT16_VALUE( 65.176) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( - 28.720), SIMDE_FLOAT16_VALUE( - 49.573), SIMDE_FLOAT16_VALUE( 24.949), SIMDE_FLOAT16_VALUE( 49.280) }, + { { SIMDE_FLOAT16_VALUE( 61.070), SIMDE_FLOAT16_VALUE( - 5.625), SIMDE_FLOAT16_VALUE( - 72.174), SIMDE_FLOAT16_VALUE( - 28.720) }, + { SIMDE_FLOAT16_VALUE( 40.072), SIMDE_FLOAT16_VALUE( - 46.993), SIMDE_FLOAT16_VALUE( - 84.710), SIMDE_FLOAT16_VALUE( - 49.573) }, + { SIMDE_FLOAT16_VALUE( 77.467), SIMDE_FLOAT16_VALUE( - 72.848), SIMDE_FLOAT16_VALUE( - 83.441), SIMDE_FLOAT16_VALUE( 24.949) }, + { SIMDE_FLOAT16_VALUE( - 55.436), SIMDE_FLOAT16_VALUE( - 24.861), SIMDE_FLOAT16_VALUE( 3.198), SIMDE_FLOAT16_VALUE( 49.280) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( 51.625), SIMDE_FLOAT16_VALUE( 97.892), SIMDE_FLOAT16_VALUE( - 17.597), SIMDE_FLOAT16_VALUE( 5.577) }, + { { SIMDE_FLOAT16_VALUE( 33.135), SIMDE_FLOAT16_VALUE( 16.293), SIMDE_FLOAT16_VALUE( - 26.543), SIMDE_FLOAT16_VALUE( 51.625) }, + { SIMDE_FLOAT16_VALUE( 81.686), SIMDE_FLOAT16_VALUE( 51.696), SIMDE_FLOAT16_VALUE( - 3.389), SIMDE_FLOAT16_VALUE( 97.892) }, + { SIMDE_FLOAT16_VALUE( - 88.988), SIMDE_FLOAT16_VALUE( 33.960), SIMDE_FLOAT16_VALUE( - 25.360), SIMDE_FLOAT16_VALUE( - 17.597) }, + { SIMDE_FLOAT16_VALUE( - 9.130), SIMDE_FLOAT16_VALUE( - 32.157), SIMDE_FLOAT16_VALUE( 22.866), SIMDE_FLOAT16_VALUE( 5.577) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( - 15.048), SIMDE_FLOAT16_VALUE( 26.311), SIMDE_FLOAT16_VALUE( - 35.219), SIMDE_FLOAT16_VALUE( - 43.204) }, + { { SIMDE_FLOAT16_VALUE( - 15.048), SIMDE_FLOAT16_VALUE( 61.508), SIMDE_FLOAT16_VALUE( 97.472), SIMDE_FLOAT16_VALUE( 66.229) }, + { SIMDE_FLOAT16_VALUE( 26.311), SIMDE_FLOAT16_VALUE( - 60.377), SIMDE_FLOAT16_VALUE( 34.214), SIMDE_FLOAT16_VALUE( - 88.647) }, + { SIMDE_FLOAT16_VALUE( - 35.219), SIMDE_FLOAT16_VALUE( - 74.942), SIMDE_FLOAT16_VALUE( - 75.514), SIMDE_FLOAT16_VALUE( 85.620) }, + { SIMDE_FLOAT16_VALUE( - 43.204), SIMDE_FLOAT16_VALUE( - 55.337), SIMDE_FLOAT16_VALUE( 10.157), SIMDE_FLOAT16_VALUE( 62.595) } }, + INT8_C( 0) }, + { { SIMDE_FLOAT16_VALUE( 38.903), SIMDE_FLOAT16_VALUE( - 77.987), SIMDE_FLOAT16_VALUE( 20.926), SIMDE_FLOAT16_VALUE( 9.521) }, + { { SIMDE_FLOAT16_VALUE( 7.294), SIMDE_FLOAT16_VALUE( 38.903), SIMDE_FLOAT16_VALUE( 66.733), SIMDE_FLOAT16_VALUE( 44.944) }, + { SIMDE_FLOAT16_VALUE( - 96.975), SIMDE_FLOAT16_VALUE( - 77.987), SIMDE_FLOAT16_VALUE( - 39.390), SIMDE_FLOAT16_VALUE( - 30.978) }, + { SIMDE_FLOAT16_VALUE( 28.002), SIMDE_FLOAT16_VALUE( 20.926), SIMDE_FLOAT16_VALUE( - 14.789), SIMDE_FLOAT16_VALUE( 24.786) }, + { SIMDE_FLOAT16_VALUE( - 99.879), SIMDE_FLOAT16_VALUE( 9.521), SIMDE_FLOAT16_VALUE( - 88.263), SIMDE_FLOAT16_VALUE( 1.612) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( - 7.449), SIMDE_FLOAT16_VALUE( 16.069), SIMDE_FLOAT16_VALUE( 25.206), SIMDE_FLOAT16_VALUE( 48.665) }, + { { SIMDE_FLOAT16_VALUE( - 94.903), SIMDE_FLOAT16_VALUE( - 7.449), SIMDE_FLOAT16_VALUE( 42.096), SIMDE_FLOAT16_VALUE( - 68.648) }, + { SIMDE_FLOAT16_VALUE( - 6.593), SIMDE_FLOAT16_VALUE( 16.069), SIMDE_FLOAT16_VALUE( 0.945), SIMDE_FLOAT16_VALUE( - 9.970) }, + { SIMDE_FLOAT16_VALUE( - 77.854), SIMDE_FLOAT16_VALUE( 25.206), SIMDE_FLOAT16_VALUE( - 53.484), SIMDE_FLOAT16_VALUE( 32.713) }, + { SIMDE_FLOAT16_VALUE( - 73.085), SIMDE_FLOAT16_VALUE( 48.665), SIMDE_FLOAT16_VALUE( - 24.966), SIMDE_FLOAT16_VALUE( 80.918) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( 69.111), SIMDE_FLOAT16_VALUE( - 96.976), SIMDE_FLOAT16_VALUE( - 44.445), SIMDE_FLOAT16_VALUE( - 63.048) }, + { { SIMDE_FLOAT16_VALUE( 13.879), SIMDE_FLOAT16_VALUE( - 95.193), SIMDE_FLOAT16_VALUE( 98.700), SIMDE_FLOAT16_VALUE( 69.111) }, + { SIMDE_FLOAT16_VALUE( - 87.658), SIMDE_FLOAT16_VALUE( - 62.304), SIMDE_FLOAT16_VALUE( 59.211), SIMDE_FLOAT16_VALUE( - 96.976) }, + { SIMDE_FLOAT16_VALUE( 27.387), SIMDE_FLOAT16_VALUE( 47.095), SIMDE_FLOAT16_VALUE( - 84.285), SIMDE_FLOAT16_VALUE( - 44.445) }, + { SIMDE_FLOAT16_VALUE( - 79.622), SIMDE_FLOAT16_VALUE( - 44.543), SIMDE_FLOAT16_VALUE( 19.639), SIMDE_FLOAT16_VALUE( - 63.048) } }, + INT8_C( 3) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x4x4_t val = {{simde_vld1_f16(test_vec[i].val[0]), + simde_vld1_f16(test_vec[i].val[1]), + simde_vld1_f16(test_vec[i].val[2]), + simde_vld1_f16(test_vec[i].val[3])}}; + simde_float16_t a[4]; + SIMDE_CONSTIFY_4_NO_RESULT_(simde_vst4_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + simde_assert_equal_f16(a[2], test_vec[i].a[2], 1); + simde_assert_equal_f16(a[3], test_vec[i].a[3], 1); + } + + return 0; +} + static int test_simde_vst4_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1216,6 +1291,113 @@ test_simde_vst4q_lane_u64 (SIMDE_MUNIT_TEST_ARGS) { #endif } +static int +test_simde_vst4q_lane_f16 (SIMDE_MUNIT_TEST_ARGS) { + struct { + simde_float16_t a[4]; + simde_float16_t val[4][8]; + int lane; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 98.770), SIMDE_FLOAT16_VALUE( 14.613), SIMDE_FLOAT16_VALUE( - 84.301), SIMDE_FLOAT16_VALUE( 5.731) }, + { { SIMDE_FLOAT16_VALUE( - 98.770), SIMDE_FLOAT16_VALUE( 72.336), SIMDE_FLOAT16_VALUE( 54.135), SIMDE_FLOAT16_VALUE( 2.463), + SIMDE_FLOAT16_VALUE( 22.308), SIMDE_FLOAT16_VALUE( 96.431), SIMDE_FLOAT16_VALUE( - 70.869), SIMDE_FLOAT16_VALUE( - 60.824) }, + { SIMDE_FLOAT16_VALUE( 14.613), SIMDE_FLOAT16_VALUE( - 44.363), SIMDE_FLOAT16_VALUE( 22.697), SIMDE_FLOAT16_VALUE( - 35.818), + SIMDE_FLOAT16_VALUE( 3.415), SIMDE_FLOAT16_VALUE( - 49.499), SIMDE_FLOAT16_VALUE( 88.012), SIMDE_FLOAT16_VALUE( - 62.649) }, + { SIMDE_FLOAT16_VALUE( - 84.301), SIMDE_FLOAT16_VALUE( 83.781), SIMDE_FLOAT16_VALUE( 77.145), SIMDE_FLOAT16_VALUE( 90.152), + SIMDE_FLOAT16_VALUE( - 39.448), SIMDE_FLOAT16_VALUE( - 61.272), SIMDE_FLOAT16_VALUE( - 36.164), SIMDE_FLOAT16_VALUE( - 80.754) }, + { SIMDE_FLOAT16_VALUE( 5.731), SIMDE_FLOAT16_VALUE( 35.555), SIMDE_FLOAT16_VALUE( 50.103), SIMDE_FLOAT16_VALUE( 5.404), + SIMDE_FLOAT16_VALUE( - 91.869), SIMDE_FLOAT16_VALUE( - 74.373), SIMDE_FLOAT16_VALUE( 44.386), SIMDE_FLOAT16_VALUE( 93.547) } }, + INT8_C( 0) }, + { { SIMDE_FLOAT16_VALUE( 33.229), SIMDE_FLOAT16_VALUE( 61.127), SIMDE_FLOAT16_VALUE( 84.941), SIMDE_FLOAT16_VALUE( - 60.142) }, + { { SIMDE_FLOAT16_VALUE( 19.711), SIMDE_FLOAT16_VALUE( 18.155), SIMDE_FLOAT16_VALUE( 57.075), SIMDE_FLOAT16_VALUE( 33.229), + SIMDE_FLOAT16_VALUE( 76.025), SIMDE_FLOAT16_VALUE( 51.787), SIMDE_FLOAT16_VALUE( - 94.944), SIMDE_FLOAT16_VALUE( - 15.758) }, + { SIMDE_FLOAT16_VALUE( - 66.362), SIMDE_FLOAT16_VALUE( - 34.575), SIMDE_FLOAT16_VALUE( - 83.725), SIMDE_FLOAT16_VALUE( 61.127), + SIMDE_FLOAT16_VALUE( - 69.232), SIMDE_FLOAT16_VALUE( - 49.809), SIMDE_FLOAT16_VALUE( - 88.043), SIMDE_FLOAT16_VALUE( - 6.273) }, + { SIMDE_FLOAT16_VALUE( 99.864), SIMDE_FLOAT16_VALUE( 93.575), SIMDE_FLOAT16_VALUE( - 44.208), SIMDE_FLOAT16_VALUE( 84.941), + SIMDE_FLOAT16_VALUE( 88.583), SIMDE_FLOAT16_VALUE( - 27.809), SIMDE_FLOAT16_VALUE( 82.999), SIMDE_FLOAT16_VALUE( 43.553) }, + { SIMDE_FLOAT16_VALUE( - 9.077), SIMDE_FLOAT16_VALUE( - 27.498), SIMDE_FLOAT16_VALUE( - 91.044), SIMDE_FLOAT16_VALUE( - 60.142), + SIMDE_FLOAT16_VALUE( - 8.960), SIMDE_FLOAT16_VALUE( - 42.727), SIMDE_FLOAT16_VALUE( 83.794), SIMDE_FLOAT16_VALUE( - 14.626) } }, + INT8_C( 3) }, + { { SIMDE_FLOAT16_VALUE( - 52.785), SIMDE_FLOAT16_VALUE( 50.278), SIMDE_FLOAT16_VALUE( 80.135), SIMDE_FLOAT16_VALUE( 70.735) }, + { { SIMDE_FLOAT16_VALUE( - 94.777), SIMDE_FLOAT16_VALUE( - 52.785), SIMDE_FLOAT16_VALUE( 1.541), SIMDE_FLOAT16_VALUE( 6.163), + SIMDE_FLOAT16_VALUE( 42.017), SIMDE_FLOAT16_VALUE( 1.317), SIMDE_FLOAT16_VALUE( 16.236), SIMDE_FLOAT16_VALUE( - 96.325) }, + { SIMDE_FLOAT16_VALUE( - 54.833), SIMDE_FLOAT16_VALUE( 50.278), SIMDE_FLOAT16_VALUE( 36.420), SIMDE_FLOAT16_VALUE( 7.185), + SIMDE_FLOAT16_VALUE( - 47.910), SIMDE_FLOAT16_VALUE( - 92.387), SIMDE_FLOAT16_VALUE( - 70.291), SIMDE_FLOAT16_VALUE( - 87.456) }, + { SIMDE_FLOAT16_VALUE( 98.589), SIMDE_FLOAT16_VALUE( 80.135), SIMDE_FLOAT16_VALUE( 90.248), SIMDE_FLOAT16_VALUE( - 57.291), + SIMDE_FLOAT16_VALUE( 78.782), SIMDE_FLOAT16_VALUE( - 18.645), SIMDE_FLOAT16_VALUE( 44.035), SIMDE_FLOAT16_VALUE( - 69.060) }, + { SIMDE_FLOAT16_VALUE( - 4.907), SIMDE_FLOAT16_VALUE( 70.735), SIMDE_FLOAT16_VALUE( 60.495), SIMDE_FLOAT16_VALUE( 46.837), + SIMDE_FLOAT16_VALUE( 48.727), SIMDE_FLOAT16_VALUE( 11.956), SIMDE_FLOAT16_VALUE( 63.488), SIMDE_FLOAT16_VALUE( 54.092) } }, + INT8_C( 1) }, + { { SIMDE_FLOAT16_VALUE( 44.476), SIMDE_FLOAT16_VALUE( 37.690), SIMDE_FLOAT16_VALUE( - 71.366), SIMDE_FLOAT16_VALUE( 65.597) }, + { { SIMDE_FLOAT16_VALUE( 7.966), SIMDE_FLOAT16_VALUE( - 89.954), SIMDE_FLOAT16_VALUE( 44.476), SIMDE_FLOAT16_VALUE( 93.979), + SIMDE_FLOAT16_VALUE( - 92.909), SIMDE_FLOAT16_VALUE( - 70.967), SIMDE_FLOAT16_VALUE( - 95.756), SIMDE_FLOAT16_VALUE( 4.963) }, + { SIMDE_FLOAT16_VALUE( - 29.745), SIMDE_FLOAT16_VALUE( - 67.014), SIMDE_FLOAT16_VALUE( 37.690), SIMDE_FLOAT16_VALUE( - 76.656), + SIMDE_FLOAT16_VALUE( - 51.753), SIMDE_FLOAT16_VALUE( 7.929), SIMDE_FLOAT16_VALUE( - 70.420), SIMDE_FLOAT16_VALUE( 43.684) }, + { SIMDE_FLOAT16_VALUE( 44.722), SIMDE_FLOAT16_VALUE( 15.111), SIMDE_FLOAT16_VALUE( - 71.366), SIMDE_FLOAT16_VALUE( - 97.766), + SIMDE_FLOAT16_VALUE( 89.998), SIMDE_FLOAT16_VALUE( 21.407), SIMDE_FLOAT16_VALUE( - 19.366), SIMDE_FLOAT16_VALUE( - 60.287) }, + { SIMDE_FLOAT16_VALUE( - 34.807), SIMDE_FLOAT16_VALUE( - 87.033), SIMDE_FLOAT16_VALUE( 65.597), SIMDE_FLOAT16_VALUE( - 27.301), + SIMDE_FLOAT16_VALUE( - 70.444), SIMDE_FLOAT16_VALUE( 60.262), SIMDE_FLOAT16_VALUE( - 19.955), SIMDE_FLOAT16_VALUE( - 71.415) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 22.061), SIMDE_FLOAT16_VALUE( - 54.172), SIMDE_FLOAT16_VALUE( - 54.779), SIMDE_FLOAT16_VALUE( - 43.246) }, + { { SIMDE_FLOAT16_VALUE( 43.494), SIMDE_FLOAT16_VALUE( 62.681), SIMDE_FLOAT16_VALUE( 38.498), SIMDE_FLOAT16_VALUE( - 23.142), + SIMDE_FLOAT16_VALUE( 57.776), SIMDE_FLOAT16_VALUE( - 16.032), SIMDE_FLOAT16_VALUE( 22.061), SIMDE_FLOAT16_VALUE( - 47.440) }, + { SIMDE_FLOAT16_VALUE( 8.881), SIMDE_FLOAT16_VALUE( - 4.877), SIMDE_FLOAT16_VALUE( 93.213), SIMDE_FLOAT16_VALUE( - 68.090), + SIMDE_FLOAT16_VALUE( - 32.443), SIMDE_FLOAT16_VALUE( - 46.325), SIMDE_FLOAT16_VALUE( - 54.172), SIMDE_FLOAT16_VALUE( - 13.517) }, + { SIMDE_FLOAT16_VALUE( 77.177), SIMDE_FLOAT16_VALUE( - 77.041), SIMDE_FLOAT16_VALUE( 32.358), SIMDE_FLOAT16_VALUE( - 42.327), + SIMDE_FLOAT16_VALUE( 95.672), SIMDE_FLOAT16_VALUE( 94.045), SIMDE_FLOAT16_VALUE( - 54.779), SIMDE_FLOAT16_VALUE( - 2.682) }, + { SIMDE_FLOAT16_VALUE( - 84.388), SIMDE_FLOAT16_VALUE( - 25.734), SIMDE_FLOAT16_VALUE( 86.870), SIMDE_FLOAT16_VALUE( 59.016), + SIMDE_FLOAT16_VALUE( 59.342), SIMDE_FLOAT16_VALUE( 28.672), SIMDE_FLOAT16_VALUE( - 43.246), SIMDE_FLOAT16_VALUE( 97.343) } }, + INT8_C( 6) }, + { { SIMDE_FLOAT16_VALUE( - 20.805), SIMDE_FLOAT16_VALUE( 71.371), SIMDE_FLOAT16_VALUE( - 89.862), SIMDE_FLOAT16_VALUE( 25.312) }, + { { SIMDE_FLOAT16_VALUE( - 71.695), SIMDE_FLOAT16_VALUE( - 40.345), SIMDE_FLOAT16_VALUE( - 20.805), SIMDE_FLOAT16_VALUE( 38.667), + SIMDE_FLOAT16_VALUE( 20.408), SIMDE_FLOAT16_VALUE( - 86.972), SIMDE_FLOAT16_VALUE( - 56.312), SIMDE_FLOAT16_VALUE( 76.247) }, + { SIMDE_FLOAT16_VALUE( 39.998), SIMDE_FLOAT16_VALUE( - 79.719), SIMDE_FLOAT16_VALUE( 71.371), SIMDE_FLOAT16_VALUE( 7.846), + SIMDE_FLOAT16_VALUE( - 94.570), SIMDE_FLOAT16_VALUE( - 53.312), SIMDE_FLOAT16_VALUE( - 72.990), SIMDE_FLOAT16_VALUE( 58.388) }, + { SIMDE_FLOAT16_VALUE( 8.013), SIMDE_FLOAT16_VALUE( - 9.782), SIMDE_FLOAT16_VALUE( - 89.862), SIMDE_FLOAT16_VALUE( - 67.543), + SIMDE_FLOAT16_VALUE( 13.264), SIMDE_FLOAT16_VALUE( 29.069), SIMDE_FLOAT16_VALUE( - 49.153), SIMDE_FLOAT16_VALUE( 26.139) }, + { SIMDE_FLOAT16_VALUE( - 18.425), SIMDE_FLOAT16_VALUE( - 61.420), SIMDE_FLOAT16_VALUE( 25.312), SIMDE_FLOAT16_VALUE( - 25.551), + SIMDE_FLOAT16_VALUE( 6.080), SIMDE_FLOAT16_VALUE( 38.418), SIMDE_FLOAT16_VALUE( 13.340), SIMDE_FLOAT16_VALUE( - 56.124) } }, + INT8_C( 2) }, + { { SIMDE_FLOAT16_VALUE( 78.600), SIMDE_FLOAT16_VALUE( 61.792), SIMDE_FLOAT16_VALUE( - 25.973), SIMDE_FLOAT16_VALUE( 76.425) }, + { { SIMDE_FLOAT16_VALUE( 18.958), SIMDE_FLOAT16_VALUE( 64.729), SIMDE_FLOAT16_VALUE( 1.914), SIMDE_FLOAT16_VALUE( 85.223), + SIMDE_FLOAT16_VALUE( 96.281), SIMDE_FLOAT16_VALUE( 78.600), SIMDE_FLOAT16_VALUE( 71.180), SIMDE_FLOAT16_VALUE( 43.076) }, + { SIMDE_FLOAT16_VALUE( - 82.493), SIMDE_FLOAT16_VALUE( - 47.753), SIMDE_FLOAT16_VALUE( - 9.341), SIMDE_FLOAT16_VALUE( 31.656), + SIMDE_FLOAT16_VALUE( - 38.949), SIMDE_FLOAT16_VALUE( 61.792), SIMDE_FLOAT16_VALUE( 84.671), SIMDE_FLOAT16_VALUE( 92.457) }, + { SIMDE_FLOAT16_VALUE( - 78.210), SIMDE_FLOAT16_VALUE( 24.820), SIMDE_FLOAT16_VALUE( 65.528), SIMDE_FLOAT16_VALUE( 46.437), + SIMDE_FLOAT16_VALUE( 13.524), SIMDE_FLOAT16_VALUE( - 25.973), SIMDE_FLOAT16_VALUE( - 0.656), SIMDE_FLOAT16_VALUE( - 10.110) }, + { SIMDE_FLOAT16_VALUE( 50.870), SIMDE_FLOAT16_VALUE( - 85.824), SIMDE_FLOAT16_VALUE( - 65.392), SIMDE_FLOAT16_VALUE( 38.653), + SIMDE_FLOAT16_VALUE( 91.978), SIMDE_FLOAT16_VALUE( 76.425), SIMDE_FLOAT16_VALUE( 16.142), SIMDE_FLOAT16_VALUE( 29.744) } }, + INT8_C( 5) }, + { { SIMDE_FLOAT16_VALUE( - 42.908), SIMDE_FLOAT16_VALUE( 47.841), SIMDE_FLOAT16_VALUE( 45.710), SIMDE_FLOAT16_VALUE( - 13.391) }, + { { SIMDE_FLOAT16_VALUE( - 33.961), SIMDE_FLOAT16_VALUE( 18.906), SIMDE_FLOAT16_VALUE( - 56.106), SIMDE_FLOAT16_VALUE( 57.120), + SIMDE_FLOAT16_VALUE( - 69.166), SIMDE_FLOAT16_VALUE( 2.556), SIMDE_FLOAT16_VALUE( - 71.425), SIMDE_FLOAT16_VALUE( - 42.908) }, + { SIMDE_FLOAT16_VALUE( 4.074), SIMDE_FLOAT16_VALUE( 74.119), SIMDE_FLOAT16_VALUE( 78.984), SIMDE_FLOAT16_VALUE( - 24.515), + SIMDE_FLOAT16_VALUE( - 21.240), SIMDE_FLOAT16_VALUE( - 59.624), SIMDE_FLOAT16_VALUE( 91.931), SIMDE_FLOAT16_VALUE( 47.841) }, + { SIMDE_FLOAT16_VALUE( 74.533), SIMDE_FLOAT16_VALUE( - 22.329), SIMDE_FLOAT16_VALUE( - 3.682), SIMDE_FLOAT16_VALUE( 71.406), + SIMDE_FLOAT16_VALUE( 65.214), SIMDE_FLOAT16_VALUE( 18.547), SIMDE_FLOAT16_VALUE( - 64.480), SIMDE_FLOAT16_VALUE( 45.710) }, + { SIMDE_FLOAT16_VALUE( 99.987), SIMDE_FLOAT16_VALUE( 73.572), SIMDE_FLOAT16_VALUE( 43.785), SIMDE_FLOAT16_VALUE( 25.305), + SIMDE_FLOAT16_VALUE( - 43.854), SIMDE_FLOAT16_VALUE( 87.175), SIMDE_FLOAT16_VALUE( 6.221), SIMDE_FLOAT16_VALUE( - 13.391) } }, + INT8_C( 7) }, + }; + + for (size_t i = 0; i < (sizeof(test_vec) / sizeof(test_vec[0])); i++) { + simde_float16x8x4_t val = {{simde_vld1q_f16(test_vec[i].val[0]), + simde_vld1q_f16(test_vec[i].val[1]), + simde_vld1q_f16(test_vec[i].val[2]), + simde_vld1q_f16(test_vec[i].val[3])}}; + simde_float16_t a[4]; + SIMDE_CONSTIFY_8_NO_RESULT_(simde_vst4q_lane_f16, HEDLEY_UNREACHABLE(), + test_vec[i].lane, a, val); + + simde_assert_equal_f16(a[0], test_vec[i].a[0], 1); + simde_assert_equal_f16(a[1], test_vec[i].a[1], 1); + simde_assert_equal_f16(a[2], test_vec[i].a[2], 1); + simde_assert_equal_f16(a[3], test_vec[i].a[3], 1); + } + + return 0; +} + static int test_simde_vst4q_lane_f32 (SIMDE_MUNIT_TEST_ARGS) { #if 1 @@ -1336,6 +1518,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4_lane_f64) @@ -1347,6 +1530,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_u32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_u64) +SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vst4q_lane_f64) #endif /* !defined(SIMDE_BUG_INTEL_857088) */ diff --git a/test/arm/neon/trn.c b/test/arm/neon/trn.c index ccc336101..0461016fe 100644 --- a/test/arm/neon/trn.c +++ b/test/arm/neon/trn.c @@ -5,8 +5,77 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vtrn_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 66.320), SIMDE_FLOAT16_VALUE( 36.495), SIMDE_FLOAT16_VALUE( - 89.790), SIMDE_FLOAT16_VALUE( 71.726) }, + { SIMDE_FLOAT16_VALUE( 5.106), SIMDE_FLOAT16_VALUE( 19.453), SIMDE_FLOAT16_VALUE( 93.337), SIMDE_FLOAT16_VALUE( 76.081) }, + { { SIMDE_FLOAT16_VALUE( 66.320), SIMDE_FLOAT16_VALUE( 5.106), SIMDE_FLOAT16_VALUE( - 89.790), SIMDE_FLOAT16_VALUE( 93.337) }, + { SIMDE_FLOAT16_VALUE( 36.495), SIMDE_FLOAT16_VALUE( 19.453), SIMDE_FLOAT16_VALUE( 71.726), SIMDE_FLOAT16_VALUE( 76.081) } } }, + { { SIMDE_FLOAT16_VALUE( - 70.663), SIMDE_FLOAT16_VALUE( - 22.827), SIMDE_FLOAT16_VALUE( - 46.632), SIMDE_FLOAT16_VALUE( 86.654) }, + { SIMDE_FLOAT16_VALUE( 82.387), SIMDE_FLOAT16_VALUE( 11.566), SIMDE_FLOAT16_VALUE( - 69.654), SIMDE_FLOAT16_VALUE( 8.208) }, + { { SIMDE_FLOAT16_VALUE( - 70.663), SIMDE_FLOAT16_VALUE( 82.387), SIMDE_FLOAT16_VALUE( - 46.632), SIMDE_FLOAT16_VALUE( - 69.654) }, + { SIMDE_FLOAT16_VALUE( - 22.827), SIMDE_FLOAT16_VALUE( 11.566), SIMDE_FLOAT16_VALUE( 86.654), SIMDE_FLOAT16_VALUE( 8.208) } } }, + { { SIMDE_FLOAT16_VALUE( - 96.352), SIMDE_FLOAT16_VALUE( 29.146), SIMDE_FLOAT16_VALUE( 85.005), SIMDE_FLOAT16_VALUE( - 71.890) }, + { SIMDE_FLOAT16_VALUE( - 52.922), SIMDE_FLOAT16_VALUE( 46.133), SIMDE_FLOAT16_VALUE( - 81.316), SIMDE_FLOAT16_VALUE( - 14.760) }, + { { SIMDE_FLOAT16_VALUE( - 96.352), SIMDE_FLOAT16_VALUE( - 52.922), SIMDE_FLOAT16_VALUE( 85.005), SIMDE_FLOAT16_VALUE( - 81.316) }, + { SIMDE_FLOAT16_VALUE( 29.146), SIMDE_FLOAT16_VALUE( 46.133), SIMDE_FLOAT16_VALUE( - 71.890), SIMDE_FLOAT16_VALUE( - 14.760) } } }, + { { SIMDE_FLOAT16_VALUE( 43.402), SIMDE_FLOAT16_VALUE( 54.648), SIMDE_FLOAT16_VALUE( 97.496), SIMDE_FLOAT16_VALUE( 14.606) }, + { SIMDE_FLOAT16_VALUE( 22.113), SIMDE_FLOAT16_VALUE( - 11.759), SIMDE_FLOAT16_VALUE( 36.678), SIMDE_FLOAT16_VALUE( - 73.712) }, + { { SIMDE_FLOAT16_VALUE( 43.402), SIMDE_FLOAT16_VALUE( 22.113), SIMDE_FLOAT16_VALUE( 97.496), SIMDE_FLOAT16_VALUE( 36.678) }, + { SIMDE_FLOAT16_VALUE( 54.648), SIMDE_FLOAT16_VALUE( - 11.759), SIMDE_FLOAT16_VALUE( 14.606), SIMDE_FLOAT16_VALUE( - 73.712) } } }, + { { SIMDE_FLOAT16_VALUE( - 68.260), SIMDE_FLOAT16_VALUE( 59.137), SIMDE_FLOAT16_VALUE( 29.196), SIMDE_FLOAT16_VALUE( - 69.831) }, + { SIMDE_FLOAT16_VALUE( 54.264), SIMDE_FLOAT16_VALUE( 8.480), SIMDE_FLOAT16_VALUE( - 72.393), SIMDE_FLOAT16_VALUE( 63.870) }, + { { SIMDE_FLOAT16_VALUE( - 68.260), SIMDE_FLOAT16_VALUE( 54.264), SIMDE_FLOAT16_VALUE( 29.196), SIMDE_FLOAT16_VALUE( - 72.393) }, + { SIMDE_FLOAT16_VALUE( 59.137), SIMDE_FLOAT16_VALUE( 8.480), SIMDE_FLOAT16_VALUE( - 69.831), SIMDE_FLOAT16_VALUE( 63.870) } } }, + { { SIMDE_FLOAT16_VALUE( 59.923), SIMDE_FLOAT16_VALUE( 35.065), SIMDE_FLOAT16_VALUE( 6.417), SIMDE_FLOAT16_VALUE( - 85.371) }, + { SIMDE_FLOAT16_VALUE( 19.002), SIMDE_FLOAT16_VALUE( 90.454), SIMDE_FLOAT16_VALUE( 57.432), SIMDE_FLOAT16_VALUE( 80.462) }, + { { SIMDE_FLOAT16_VALUE( 59.923), SIMDE_FLOAT16_VALUE( 19.002), SIMDE_FLOAT16_VALUE( 6.417), SIMDE_FLOAT16_VALUE( 57.432) }, + { SIMDE_FLOAT16_VALUE( 35.065), SIMDE_FLOAT16_VALUE( 90.454), SIMDE_FLOAT16_VALUE( - 85.371), SIMDE_FLOAT16_VALUE( 80.462) } } }, + { { SIMDE_FLOAT16_VALUE( - 33.733), SIMDE_FLOAT16_VALUE( - 87.485), SIMDE_FLOAT16_VALUE( - 77.296), SIMDE_FLOAT16_VALUE( - 17.408) }, + { SIMDE_FLOAT16_VALUE( 97.524), SIMDE_FLOAT16_VALUE( 69.470), SIMDE_FLOAT16_VALUE( 73.726), SIMDE_FLOAT16_VALUE( 84.744) }, + { { SIMDE_FLOAT16_VALUE( - 33.733), SIMDE_FLOAT16_VALUE( 97.524), SIMDE_FLOAT16_VALUE( - 77.296), SIMDE_FLOAT16_VALUE( 73.726) }, + { SIMDE_FLOAT16_VALUE( - 87.485), SIMDE_FLOAT16_VALUE( 69.470), SIMDE_FLOAT16_VALUE( - 17.408), SIMDE_FLOAT16_VALUE( 84.744) } } }, + { { SIMDE_FLOAT16_VALUE( - 55.591), SIMDE_FLOAT16_VALUE( 61.251), SIMDE_FLOAT16_VALUE( - 65.213), SIMDE_FLOAT16_VALUE( - 77.400) }, + { SIMDE_FLOAT16_VALUE( 25.667), SIMDE_FLOAT16_VALUE( 90.005), SIMDE_FLOAT16_VALUE( - 46.475), SIMDE_FLOAT16_VALUE( 28.899) }, + { { SIMDE_FLOAT16_VALUE( - 55.591), SIMDE_FLOAT16_VALUE( 25.667), SIMDE_FLOAT16_VALUE( - 65.213), SIMDE_FLOAT16_VALUE( - 46.475) }, + { SIMDE_FLOAT16_VALUE( 61.251), SIMDE_FLOAT16_VALUE( 90.005), SIMDE_FLOAT16_VALUE( - 77.400), SIMDE_FLOAT16_VALUE( 28.899) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4x2_t r = simde_vtrn_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], simde_vld1_f16(test_vec[i].r[0]), 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], simde_vld1_f16(test_vec[i].r[1]), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x2_t r[2] = simde_vtrn_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vtrn_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -81,10 +150,25 @@ test_simde_vtrn_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r[2] = simde_vtrn_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -159,10 +243,25 @@ test_simde_vtrn_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x2_t r[2] = simde_vtrn_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -237,10 +336,25 @@ test_simde_vtrn_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x2_t r[2] = simde_vtrn_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -315,10 +429,25 @@ test_simde_vtrn_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r[2] = simde_vtrn_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -393,10 +522,25 @@ test_simde_vtrn_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x2_t r[2] = simde_vtrn_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -470,10 +614,25 @@ test_simde_vtrn_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x2_t r[2] = simde_vtrn_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -547,10 +706,124 @@ test_simde_vtrn_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r[2] = simde_vtrn_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vtrnq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[2][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 72.402), SIMDE_FLOAT16_VALUE( - 27.275), SIMDE_FLOAT16_VALUE( - 87.963), SIMDE_FLOAT16_VALUE( - 3.927), + SIMDE_FLOAT16_VALUE( 27.751), SIMDE_FLOAT16_VALUE( - 91.569), SIMDE_FLOAT16_VALUE( - 72.494), SIMDE_FLOAT16_VALUE( - 63.937) }, + { SIMDE_FLOAT16_VALUE( 31.848), SIMDE_FLOAT16_VALUE( 16.416), SIMDE_FLOAT16_VALUE( - 23.281), SIMDE_FLOAT16_VALUE( 29.168), + SIMDE_FLOAT16_VALUE( - 37.304), SIMDE_FLOAT16_VALUE( - 59.759), SIMDE_FLOAT16_VALUE( 51.039), SIMDE_FLOAT16_VALUE( 87.698) }, + { { SIMDE_FLOAT16_VALUE( 72.402), SIMDE_FLOAT16_VALUE( 31.848), SIMDE_FLOAT16_VALUE( - 87.963), SIMDE_FLOAT16_VALUE( - 23.281), + SIMDE_FLOAT16_VALUE( 27.751), SIMDE_FLOAT16_VALUE( - 37.304), SIMDE_FLOAT16_VALUE( - 72.494), SIMDE_FLOAT16_VALUE( 51.039) }, + { SIMDE_FLOAT16_VALUE( - 27.275), SIMDE_FLOAT16_VALUE( 16.416), SIMDE_FLOAT16_VALUE( - 3.927), SIMDE_FLOAT16_VALUE( 29.168), + SIMDE_FLOAT16_VALUE( - 91.569), SIMDE_FLOAT16_VALUE( - 59.759), SIMDE_FLOAT16_VALUE( - 63.937), SIMDE_FLOAT16_VALUE( 87.698) } } }, + { { SIMDE_FLOAT16_VALUE( 88.336), SIMDE_FLOAT16_VALUE( - 46.110), SIMDE_FLOAT16_VALUE( 53.634), SIMDE_FLOAT16_VALUE( 1.307), + SIMDE_FLOAT16_VALUE( 41.335), SIMDE_FLOAT16_VALUE( - 93.616), SIMDE_FLOAT16_VALUE( 39.662), SIMDE_FLOAT16_VALUE( 95.339) }, + { SIMDE_FLOAT16_VALUE( 66.944), SIMDE_FLOAT16_VALUE( 3.046), SIMDE_FLOAT16_VALUE( 64.921), SIMDE_FLOAT16_VALUE( 14.665), + SIMDE_FLOAT16_VALUE( 44.219), SIMDE_FLOAT16_VALUE( - 22.965), SIMDE_FLOAT16_VALUE( - 95.371), SIMDE_FLOAT16_VALUE( 40.338) }, + { { SIMDE_FLOAT16_VALUE( 88.336), SIMDE_FLOAT16_VALUE( 66.944), SIMDE_FLOAT16_VALUE( 53.634), SIMDE_FLOAT16_VALUE( 64.921), + SIMDE_FLOAT16_VALUE( 41.335), SIMDE_FLOAT16_VALUE( 44.219), SIMDE_FLOAT16_VALUE( 39.662), SIMDE_FLOAT16_VALUE( - 95.371) }, + { SIMDE_FLOAT16_VALUE( - 46.110), SIMDE_FLOAT16_VALUE( 3.046), SIMDE_FLOAT16_VALUE( 1.307), SIMDE_FLOAT16_VALUE( 14.665), + SIMDE_FLOAT16_VALUE( - 93.616), SIMDE_FLOAT16_VALUE( - 22.965), SIMDE_FLOAT16_VALUE( 95.339), SIMDE_FLOAT16_VALUE( 40.338) } } }, + { { SIMDE_FLOAT16_VALUE( - 73.328), SIMDE_FLOAT16_VALUE( 78.977), SIMDE_FLOAT16_VALUE( 37.493), SIMDE_FLOAT16_VALUE( - 93.150), + SIMDE_FLOAT16_VALUE( 57.450), SIMDE_FLOAT16_VALUE( 94.268), SIMDE_FLOAT16_VALUE( - 95.908), SIMDE_FLOAT16_VALUE( 22.514) }, + { SIMDE_FLOAT16_VALUE( - 79.810), SIMDE_FLOAT16_VALUE( - 64.617), SIMDE_FLOAT16_VALUE( 26.363), SIMDE_FLOAT16_VALUE( - 66.259), + SIMDE_FLOAT16_VALUE( - 22.866), SIMDE_FLOAT16_VALUE( - 77.509), SIMDE_FLOAT16_VALUE( - 94.614), SIMDE_FLOAT16_VALUE( 39.092) }, + { { SIMDE_FLOAT16_VALUE( - 73.328), SIMDE_FLOAT16_VALUE( - 79.810), SIMDE_FLOAT16_VALUE( 37.493), SIMDE_FLOAT16_VALUE( 26.363), + SIMDE_FLOAT16_VALUE( 57.450), SIMDE_FLOAT16_VALUE( - 22.866), SIMDE_FLOAT16_VALUE( - 95.908), SIMDE_FLOAT16_VALUE( - 94.614) }, + { SIMDE_FLOAT16_VALUE( 78.977), SIMDE_FLOAT16_VALUE( - 64.617), SIMDE_FLOAT16_VALUE( - 93.150), SIMDE_FLOAT16_VALUE( - 66.259), + SIMDE_FLOAT16_VALUE( 94.268), SIMDE_FLOAT16_VALUE( - 77.509), SIMDE_FLOAT16_VALUE( 22.514), SIMDE_FLOAT16_VALUE( 39.092) } } }, + { { SIMDE_FLOAT16_VALUE( - 16.026), SIMDE_FLOAT16_VALUE( - 71.194), SIMDE_FLOAT16_VALUE( - 85.000), SIMDE_FLOAT16_VALUE( 93.544), + SIMDE_FLOAT16_VALUE( 88.015), SIMDE_FLOAT16_VALUE( - 93.133), SIMDE_FLOAT16_VALUE( - 50.972), SIMDE_FLOAT16_VALUE( - 13.782) }, + { SIMDE_FLOAT16_VALUE( 53.753), SIMDE_FLOAT16_VALUE( - 85.486), SIMDE_FLOAT16_VALUE( - 7.617), SIMDE_FLOAT16_VALUE( - 78.630), + SIMDE_FLOAT16_VALUE( 90.258), SIMDE_FLOAT16_VALUE( - 49.347), SIMDE_FLOAT16_VALUE( - 53.331), SIMDE_FLOAT16_VALUE( - 75.926) }, + { { SIMDE_FLOAT16_VALUE( - 16.026), SIMDE_FLOAT16_VALUE( 53.753), SIMDE_FLOAT16_VALUE( - 85.000), SIMDE_FLOAT16_VALUE( - 7.617), + SIMDE_FLOAT16_VALUE( 88.015), SIMDE_FLOAT16_VALUE( 90.258), SIMDE_FLOAT16_VALUE( - 50.972), SIMDE_FLOAT16_VALUE( - 53.331) }, + { SIMDE_FLOAT16_VALUE( - 71.194), SIMDE_FLOAT16_VALUE( - 85.486), SIMDE_FLOAT16_VALUE( 93.544), SIMDE_FLOAT16_VALUE( - 78.630), + SIMDE_FLOAT16_VALUE( - 93.133), SIMDE_FLOAT16_VALUE( - 49.347), SIMDE_FLOAT16_VALUE( - 13.782), SIMDE_FLOAT16_VALUE( - 75.926) } } }, + { { SIMDE_FLOAT16_VALUE( - 35.438), SIMDE_FLOAT16_VALUE( 55.067), SIMDE_FLOAT16_VALUE( 44.514), SIMDE_FLOAT16_VALUE( 13.056), + SIMDE_FLOAT16_VALUE( - 7.241), SIMDE_FLOAT16_VALUE( 30.924), SIMDE_FLOAT16_VALUE( 96.856), SIMDE_FLOAT16_VALUE( 46.739) }, + { SIMDE_FLOAT16_VALUE( 2.315), SIMDE_FLOAT16_VALUE( - 96.213), SIMDE_FLOAT16_VALUE( 97.921), SIMDE_FLOAT16_VALUE( - 14.954), + SIMDE_FLOAT16_VALUE( - 37.120), SIMDE_FLOAT16_VALUE( - 36.198), SIMDE_FLOAT16_VALUE( - 5.285), SIMDE_FLOAT16_VALUE( - 47.281) }, + { { SIMDE_FLOAT16_VALUE( - 35.438), SIMDE_FLOAT16_VALUE( 2.315), SIMDE_FLOAT16_VALUE( 44.514), SIMDE_FLOAT16_VALUE( 97.921), + SIMDE_FLOAT16_VALUE( - 7.241), SIMDE_FLOAT16_VALUE( - 37.120), SIMDE_FLOAT16_VALUE( 96.856), SIMDE_FLOAT16_VALUE( - 5.285) }, + { SIMDE_FLOAT16_VALUE( 55.067), SIMDE_FLOAT16_VALUE( - 96.213), SIMDE_FLOAT16_VALUE( 13.056), SIMDE_FLOAT16_VALUE( - 14.954), + SIMDE_FLOAT16_VALUE( 30.924), SIMDE_FLOAT16_VALUE( - 36.198), SIMDE_FLOAT16_VALUE( 46.739), SIMDE_FLOAT16_VALUE( - 47.281) } } }, + { { SIMDE_FLOAT16_VALUE( - 31.935), SIMDE_FLOAT16_VALUE( - 35.016), SIMDE_FLOAT16_VALUE( 62.766), SIMDE_FLOAT16_VALUE( - 76.317), + SIMDE_FLOAT16_VALUE( - 36.406), SIMDE_FLOAT16_VALUE( 52.789), SIMDE_FLOAT16_VALUE( 25.725), SIMDE_FLOAT16_VALUE( - 45.278) }, + { SIMDE_FLOAT16_VALUE( - 31.370), SIMDE_FLOAT16_VALUE( - 54.045), SIMDE_FLOAT16_VALUE( 10.498), SIMDE_FLOAT16_VALUE( - 13.009), + SIMDE_FLOAT16_VALUE( - 6.750), SIMDE_FLOAT16_VALUE( 32.058), SIMDE_FLOAT16_VALUE( 40.207), SIMDE_FLOAT16_VALUE( 43.579) }, + { { SIMDE_FLOAT16_VALUE( - 31.935), SIMDE_FLOAT16_VALUE( - 31.370), SIMDE_FLOAT16_VALUE( 62.766), SIMDE_FLOAT16_VALUE( 10.498), + SIMDE_FLOAT16_VALUE( - 36.406), SIMDE_FLOAT16_VALUE( - 6.750), SIMDE_FLOAT16_VALUE( 25.725), SIMDE_FLOAT16_VALUE( 40.207) }, + { SIMDE_FLOAT16_VALUE( - 35.016), SIMDE_FLOAT16_VALUE( - 54.045), SIMDE_FLOAT16_VALUE( - 76.317), SIMDE_FLOAT16_VALUE( - 13.009), + SIMDE_FLOAT16_VALUE( 52.789), SIMDE_FLOAT16_VALUE( 32.058), SIMDE_FLOAT16_VALUE( - 45.278), SIMDE_FLOAT16_VALUE( 43.579) } } }, + { { SIMDE_FLOAT16_VALUE( - 40.893), SIMDE_FLOAT16_VALUE( - 77.395), SIMDE_FLOAT16_VALUE( 91.385), SIMDE_FLOAT16_VALUE( - 33.021), + SIMDE_FLOAT16_VALUE( 87.997), SIMDE_FLOAT16_VALUE( 48.892), SIMDE_FLOAT16_VALUE( - 22.163), SIMDE_FLOAT16_VALUE( - 82.676) }, + { SIMDE_FLOAT16_VALUE( 34.999), SIMDE_FLOAT16_VALUE( 34.155), SIMDE_FLOAT16_VALUE( - 52.404), SIMDE_FLOAT16_VALUE( - 53.410), + SIMDE_FLOAT16_VALUE( - 96.720), SIMDE_FLOAT16_VALUE( - 99.174), SIMDE_FLOAT16_VALUE( 31.360), SIMDE_FLOAT16_VALUE( 73.126) }, + { { SIMDE_FLOAT16_VALUE( - 40.893), SIMDE_FLOAT16_VALUE( 34.999), SIMDE_FLOAT16_VALUE( 91.385), SIMDE_FLOAT16_VALUE( - 52.404), + SIMDE_FLOAT16_VALUE( 87.997), SIMDE_FLOAT16_VALUE( - 96.720), SIMDE_FLOAT16_VALUE( - 22.163), SIMDE_FLOAT16_VALUE( 31.360) }, + { SIMDE_FLOAT16_VALUE( - 77.395), SIMDE_FLOAT16_VALUE( 34.155), SIMDE_FLOAT16_VALUE( - 33.021), SIMDE_FLOAT16_VALUE( - 53.410), + SIMDE_FLOAT16_VALUE( 48.892), SIMDE_FLOAT16_VALUE( - 99.174), SIMDE_FLOAT16_VALUE( - 82.676), SIMDE_FLOAT16_VALUE( 73.126) } } }, + { { SIMDE_FLOAT16_VALUE( - 85.597), SIMDE_FLOAT16_VALUE( 2.289), SIMDE_FLOAT16_VALUE( 38.799), SIMDE_FLOAT16_VALUE( 7.382), + SIMDE_FLOAT16_VALUE( - 8.076), SIMDE_FLOAT16_VALUE( - 66.132), SIMDE_FLOAT16_VALUE( 4.632), SIMDE_FLOAT16_VALUE( 35.004) }, + { SIMDE_FLOAT16_VALUE( 64.622), SIMDE_FLOAT16_VALUE( 7.608), SIMDE_FLOAT16_VALUE( 81.664), SIMDE_FLOAT16_VALUE( - 22.375), + SIMDE_FLOAT16_VALUE( - 85.186), SIMDE_FLOAT16_VALUE( - 85.063), SIMDE_FLOAT16_VALUE( 48.648), SIMDE_FLOAT16_VALUE( 0.615) }, + { { SIMDE_FLOAT16_VALUE( - 85.597), SIMDE_FLOAT16_VALUE( 64.622), SIMDE_FLOAT16_VALUE( 38.799), SIMDE_FLOAT16_VALUE( 81.664), + SIMDE_FLOAT16_VALUE( - 8.076), SIMDE_FLOAT16_VALUE( - 85.186), SIMDE_FLOAT16_VALUE( 4.632), SIMDE_FLOAT16_VALUE( 48.648) }, + { SIMDE_FLOAT16_VALUE( 2.289), SIMDE_FLOAT16_VALUE( 7.608), SIMDE_FLOAT16_VALUE( 7.382), SIMDE_FLOAT16_VALUE( - 22.375), + SIMDE_FLOAT16_VALUE( - 66.132), SIMDE_FLOAT16_VALUE( - 85.063), SIMDE_FLOAT16_VALUE( 35.004), SIMDE_FLOAT16_VALUE( 0.615) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8x2_t r = simde_vtrnq_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r.val[0], simde_vld1q_f16(test_vec[i].r[0]), 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], simde_vld1q_f16(test_vec[i].r[1]), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x2_t r[2] = simde_vtrnq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -624,10 +897,25 @@ test_simde_vtrnq_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r[2] = simde_vtrnq_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -734,10 +1022,25 @@ test_simde_vtrnq_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x2_t r[2] = simde_vtrnq_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -812,10 +1115,25 @@ test_simde_vtrnq_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x2_t r[2] = simde_vtrnq_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -889,10 +1207,25 @@ test_simde_vtrnq_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x2_t r[2] = simde_vtrnq_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -999,10 +1332,25 @@ test_simde_vtrnq_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x2_t r[2] = simde_vtrnq_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -1076,10 +1424,25 @@ test_simde_vtrnq_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x2_t r[2] = simde_vtrnq_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrnq_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -1154,12 +1517,27 @@ test_simde_vtrnq_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x2_t r[2] = simde_vtrnq_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_s16) @@ -1168,6 +1546,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrnq_s16) diff --git a/test/arm/neon/trn1.c b/test/arm/neon/trn1.c index fcc91644e..32733b0d7 100644 --- a/test/arm/neon/trn1.c +++ b/test/arm/neon/trn1.c @@ -3,8 +3,68 @@ #include #include +static int +test_simde_vtrn1_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 11.105), SIMDE_FLOAT16_VALUE( - 63.267), SIMDE_FLOAT16_VALUE( - 15.443), SIMDE_FLOAT16_VALUE( - 77.497) }, + { SIMDE_FLOAT16_VALUE( 99.692), SIMDE_FLOAT16_VALUE( - 68.474), SIMDE_FLOAT16_VALUE( - 55.887), SIMDE_FLOAT16_VALUE( - 21.106) }, + { SIMDE_FLOAT16_VALUE( 11.105), SIMDE_FLOAT16_VALUE( 99.692), SIMDE_FLOAT16_VALUE( - 15.443), SIMDE_FLOAT16_VALUE( - 55.887) } }, + { { SIMDE_FLOAT16_VALUE( - 52.862), SIMDE_FLOAT16_VALUE( - 88.095), SIMDE_FLOAT16_VALUE( 55.948), SIMDE_FLOAT16_VALUE( - 84.989) }, + { SIMDE_FLOAT16_VALUE( - 27.479), SIMDE_FLOAT16_VALUE( - 14.155), SIMDE_FLOAT16_VALUE( 73.644), SIMDE_FLOAT16_VALUE( - 71.624) }, + { SIMDE_FLOAT16_VALUE( - 52.862), SIMDE_FLOAT16_VALUE( - 27.479), SIMDE_FLOAT16_VALUE( 55.948), SIMDE_FLOAT16_VALUE( 73.644) } }, + { { SIMDE_FLOAT16_VALUE( 57.467), SIMDE_FLOAT16_VALUE( - 90.025), SIMDE_FLOAT16_VALUE( 11.725), SIMDE_FLOAT16_VALUE( 82.468) }, + { SIMDE_FLOAT16_VALUE( 85.913), SIMDE_FLOAT16_VALUE( - 65.687), SIMDE_FLOAT16_VALUE( - 18.534), SIMDE_FLOAT16_VALUE( - 19.093) }, + { SIMDE_FLOAT16_VALUE( 57.467), SIMDE_FLOAT16_VALUE( 85.913), SIMDE_FLOAT16_VALUE( 11.725), SIMDE_FLOAT16_VALUE( - 18.534) } }, + { { SIMDE_FLOAT16_VALUE( - 27.762), SIMDE_FLOAT16_VALUE( 58.179), SIMDE_FLOAT16_VALUE( - 53.292), SIMDE_FLOAT16_VALUE( - 33.595) }, + { SIMDE_FLOAT16_VALUE( - 49.071), SIMDE_FLOAT16_VALUE( - 66.212), SIMDE_FLOAT16_VALUE( - 65.087), SIMDE_FLOAT16_VALUE( 86.615) }, + { SIMDE_FLOAT16_VALUE( - 27.762), SIMDE_FLOAT16_VALUE( - 49.071), SIMDE_FLOAT16_VALUE( - 53.292), SIMDE_FLOAT16_VALUE( - 65.087) } }, + { { SIMDE_FLOAT16_VALUE( 38.277), SIMDE_FLOAT16_VALUE( - 95.558), SIMDE_FLOAT16_VALUE( 26.475), SIMDE_FLOAT16_VALUE( - 96.782) }, + { SIMDE_FLOAT16_VALUE( 68.464), SIMDE_FLOAT16_VALUE( - 65.232), SIMDE_FLOAT16_VALUE( 26.296), SIMDE_FLOAT16_VALUE( 11.376) }, + { SIMDE_FLOAT16_VALUE( 38.277), SIMDE_FLOAT16_VALUE( 68.464), SIMDE_FLOAT16_VALUE( 26.475), SIMDE_FLOAT16_VALUE( 26.296) } }, + { { SIMDE_FLOAT16_VALUE( - 48.285), SIMDE_FLOAT16_VALUE( 24.604), SIMDE_FLOAT16_VALUE( 69.617), SIMDE_FLOAT16_VALUE( 66.002) }, + { SIMDE_FLOAT16_VALUE( 0.920), SIMDE_FLOAT16_VALUE( - 90.358), SIMDE_FLOAT16_VALUE( 2.468), SIMDE_FLOAT16_VALUE( - 37.903) }, + { SIMDE_FLOAT16_VALUE( - 48.285), SIMDE_FLOAT16_VALUE( 0.920), SIMDE_FLOAT16_VALUE( 69.617), SIMDE_FLOAT16_VALUE( 2.468) } }, + { { SIMDE_FLOAT16_VALUE( - 46.735), SIMDE_FLOAT16_VALUE( - 76.326), SIMDE_FLOAT16_VALUE( - 33.544), SIMDE_FLOAT16_VALUE( 0.423) }, + { SIMDE_FLOAT16_VALUE( - 37.682), SIMDE_FLOAT16_VALUE( - 51.421), SIMDE_FLOAT16_VALUE( 98.911), SIMDE_FLOAT16_VALUE( 97.679) }, + { SIMDE_FLOAT16_VALUE( - 46.735), SIMDE_FLOAT16_VALUE( - 37.682), SIMDE_FLOAT16_VALUE( - 33.544), SIMDE_FLOAT16_VALUE( 98.911) } }, + { { SIMDE_FLOAT16_VALUE( - 98.472), SIMDE_FLOAT16_VALUE( - 2.386), SIMDE_FLOAT16_VALUE( - 66.666), SIMDE_FLOAT16_VALUE( 27.183) }, + { SIMDE_FLOAT16_VALUE( 23.202), SIMDE_FLOAT16_VALUE( 1.710), SIMDE_FLOAT16_VALUE( 58.950), SIMDE_FLOAT16_VALUE( - 44.528) }, + { SIMDE_FLOAT16_VALUE( - 98.472), SIMDE_FLOAT16_VALUE( 23.202), SIMDE_FLOAT16_VALUE( - 66.666), SIMDE_FLOAT16_VALUE( 58.950) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r = simde_vtrn1_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vtrn1_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vtrn1_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -45,10 +105,25 @@ test_simde_vtrn1_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vtrn1_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -89,10 +164,25 @@ test_simde_vtrn1_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vtrn1_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -133,10 +223,25 @@ test_simde_vtrn1_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vtrn1_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -177,10 +282,25 @@ test_simde_vtrn1_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vtrn1_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -221,10 +341,25 @@ test_simde_vtrn1_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t r = simde_vtrn1_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -264,10 +399,25 @@ test_simde_vtrn1_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t r = simde_vtrn1_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -307,10 +457,107 @@ test_simde_vtrn1_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r = simde_vtrn1_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vtrn1q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 50.625), SIMDE_FLOAT16_VALUE( 30.747), SIMDE_FLOAT16_VALUE( 18.487), SIMDE_FLOAT16_VALUE( 57.491), + SIMDE_FLOAT16_VALUE( - 21.129), SIMDE_FLOAT16_VALUE( 15.440), SIMDE_FLOAT16_VALUE( - 52.593), SIMDE_FLOAT16_VALUE( - 4.727) }, + { SIMDE_FLOAT16_VALUE( 44.277), SIMDE_FLOAT16_VALUE( - 25.789), SIMDE_FLOAT16_VALUE( - 25.136), SIMDE_FLOAT16_VALUE( 33.647), + SIMDE_FLOAT16_VALUE( - 95.841), SIMDE_FLOAT16_VALUE( 52.244), SIMDE_FLOAT16_VALUE( - 21.627), SIMDE_FLOAT16_VALUE( 66.122) }, + { SIMDE_FLOAT16_VALUE( - 50.625), SIMDE_FLOAT16_VALUE( 44.277), SIMDE_FLOAT16_VALUE( 18.487), SIMDE_FLOAT16_VALUE( - 25.136), + SIMDE_FLOAT16_VALUE( - 21.129), SIMDE_FLOAT16_VALUE( - 95.841), SIMDE_FLOAT16_VALUE( - 52.593), SIMDE_FLOAT16_VALUE( - 21.627) } }, + { { SIMDE_FLOAT16_VALUE( - 6.913), SIMDE_FLOAT16_VALUE( - 94.272), SIMDE_FLOAT16_VALUE( 11.297), SIMDE_FLOAT16_VALUE( 3.058), + SIMDE_FLOAT16_VALUE( - 5.001), SIMDE_FLOAT16_VALUE( - 7.243), SIMDE_FLOAT16_VALUE( 48.542), SIMDE_FLOAT16_VALUE( - 21.366) }, + { SIMDE_FLOAT16_VALUE( 18.542), SIMDE_FLOAT16_VALUE( 92.836), SIMDE_FLOAT16_VALUE( - 53.782), SIMDE_FLOAT16_VALUE( - 54.711), + SIMDE_FLOAT16_VALUE( 46.595), SIMDE_FLOAT16_VALUE( 50.270), SIMDE_FLOAT16_VALUE( 64.443), SIMDE_FLOAT16_VALUE( - 32.873) }, + { SIMDE_FLOAT16_VALUE( - 6.913), SIMDE_FLOAT16_VALUE( 18.542), SIMDE_FLOAT16_VALUE( 11.297), SIMDE_FLOAT16_VALUE( - 53.782), + SIMDE_FLOAT16_VALUE( - 5.001), SIMDE_FLOAT16_VALUE( 46.595), SIMDE_FLOAT16_VALUE( 48.542), SIMDE_FLOAT16_VALUE( 64.443) } }, + { { SIMDE_FLOAT16_VALUE( 92.715), SIMDE_FLOAT16_VALUE( 49.477), SIMDE_FLOAT16_VALUE( 85.205), SIMDE_FLOAT16_VALUE( 90.565), + SIMDE_FLOAT16_VALUE( - 47.228), SIMDE_FLOAT16_VALUE( - 61.293), SIMDE_FLOAT16_VALUE( 80.419), SIMDE_FLOAT16_VALUE( 32.831) }, + { SIMDE_FLOAT16_VALUE( - 59.790), SIMDE_FLOAT16_VALUE( 90.991), SIMDE_FLOAT16_VALUE( 9.151), SIMDE_FLOAT16_VALUE( 81.903), + SIMDE_FLOAT16_VALUE( 36.535), SIMDE_FLOAT16_VALUE( - 5.406), SIMDE_FLOAT16_VALUE( - 73.592), SIMDE_FLOAT16_VALUE( 52.041) }, + { SIMDE_FLOAT16_VALUE( 92.715), SIMDE_FLOAT16_VALUE( - 59.790), SIMDE_FLOAT16_VALUE( 85.205), SIMDE_FLOAT16_VALUE( 9.151), + SIMDE_FLOAT16_VALUE( - 47.228), SIMDE_FLOAT16_VALUE( 36.535), SIMDE_FLOAT16_VALUE( 80.419), SIMDE_FLOAT16_VALUE( - 73.592) } }, + { { SIMDE_FLOAT16_VALUE( - 5.048), SIMDE_FLOAT16_VALUE( - 15.367), SIMDE_FLOAT16_VALUE( - 85.673), SIMDE_FLOAT16_VALUE( 94.930), + SIMDE_FLOAT16_VALUE( 82.348), SIMDE_FLOAT16_VALUE( - 3.141), SIMDE_FLOAT16_VALUE( 83.294), SIMDE_FLOAT16_VALUE( 25.936) }, + { SIMDE_FLOAT16_VALUE( 96.385), SIMDE_FLOAT16_VALUE( - 79.579), SIMDE_FLOAT16_VALUE( - 4.256), SIMDE_FLOAT16_VALUE( - 34.022), + SIMDE_FLOAT16_VALUE( 82.922), SIMDE_FLOAT16_VALUE( - 1.174), SIMDE_FLOAT16_VALUE( 40.213), SIMDE_FLOAT16_VALUE( 55.100) }, + { SIMDE_FLOAT16_VALUE( - 5.048), SIMDE_FLOAT16_VALUE( 96.385), SIMDE_FLOAT16_VALUE( - 85.673), SIMDE_FLOAT16_VALUE( - 4.256), + SIMDE_FLOAT16_VALUE( 82.348), SIMDE_FLOAT16_VALUE( 82.922), SIMDE_FLOAT16_VALUE( 83.294), SIMDE_FLOAT16_VALUE( 40.213) } }, + { { SIMDE_FLOAT16_VALUE( 19.062), SIMDE_FLOAT16_VALUE( 43.816), SIMDE_FLOAT16_VALUE( - 30.166), SIMDE_FLOAT16_VALUE( - 39.978), + SIMDE_FLOAT16_VALUE( - 39.433), SIMDE_FLOAT16_VALUE( 11.691), SIMDE_FLOAT16_VALUE( 7.837), SIMDE_FLOAT16_VALUE( 17.290) }, + { SIMDE_FLOAT16_VALUE( 8.433), SIMDE_FLOAT16_VALUE( 16.426), SIMDE_FLOAT16_VALUE( - 43.558), SIMDE_FLOAT16_VALUE( 96.327), + SIMDE_FLOAT16_VALUE( - 25.953), SIMDE_FLOAT16_VALUE( - 22.456), SIMDE_FLOAT16_VALUE( - 81.824), SIMDE_FLOAT16_VALUE( 87.942) }, + { SIMDE_FLOAT16_VALUE( 19.062), SIMDE_FLOAT16_VALUE( 8.433), SIMDE_FLOAT16_VALUE( - 30.166), SIMDE_FLOAT16_VALUE( - 43.558), + SIMDE_FLOAT16_VALUE( - 39.433), SIMDE_FLOAT16_VALUE( - 25.953), SIMDE_FLOAT16_VALUE( 7.837), SIMDE_FLOAT16_VALUE( - 81.824) } }, + { { SIMDE_FLOAT16_VALUE( - 11.648), SIMDE_FLOAT16_VALUE( - 29.171), SIMDE_FLOAT16_VALUE( 80.053), SIMDE_FLOAT16_VALUE( - 68.578), + SIMDE_FLOAT16_VALUE( - 48.654), SIMDE_FLOAT16_VALUE( - 99.755), SIMDE_FLOAT16_VALUE( - 90.352), SIMDE_FLOAT16_VALUE( 89.383) }, + { SIMDE_FLOAT16_VALUE( - 33.866), SIMDE_FLOAT16_VALUE( - 47.474), SIMDE_FLOAT16_VALUE( - 66.626), SIMDE_FLOAT16_VALUE( - 97.760), + SIMDE_FLOAT16_VALUE( - 84.997), SIMDE_FLOAT16_VALUE( 67.434), SIMDE_FLOAT16_VALUE( - 97.087), SIMDE_FLOAT16_VALUE( - 32.962) }, + { SIMDE_FLOAT16_VALUE( - 11.648), SIMDE_FLOAT16_VALUE( - 33.866), SIMDE_FLOAT16_VALUE( 80.053), SIMDE_FLOAT16_VALUE( - 66.626), + SIMDE_FLOAT16_VALUE( - 48.654), SIMDE_FLOAT16_VALUE( - 84.997), SIMDE_FLOAT16_VALUE( - 90.352), SIMDE_FLOAT16_VALUE( - 97.087) } }, + { { SIMDE_FLOAT16_VALUE( 99.618), SIMDE_FLOAT16_VALUE( - 77.016), SIMDE_FLOAT16_VALUE( - 80.554), SIMDE_FLOAT16_VALUE( - 81.723), + SIMDE_FLOAT16_VALUE( 10.169), SIMDE_FLOAT16_VALUE( 23.695), SIMDE_FLOAT16_VALUE( - 19.190), SIMDE_FLOAT16_VALUE( 95.941) }, + { SIMDE_FLOAT16_VALUE( - 51.304), SIMDE_FLOAT16_VALUE( - 96.429), SIMDE_FLOAT16_VALUE( 13.144), SIMDE_FLOAT16_VALUE( - 7.245), + SIMDE_FLOAT16_VALUE( 29.744), SIMDE_FLOAT16_VALUE( 29.133), SIMDE_FLOAT16_VALUE( 74.282), SIMDE_FLOAT16_VALUE( - 74.127) }, + { SIMDE_FLOAT16_VALUE( 99.618), SIMDE_FLOAT16_VALUE( - 51.304), SIMDE_FLOAT16_VALUE( - 80.554), SIMDE_FLOAT16_VALUE( 13.144), + SIMDE_FLOAT16_VALUE( 10.169), SIMDE_FLOAT16_VALUE( 29.744), SIMDE_FLOAT16_VALUE( - 19.190), SIMDE_FLOAT16_VALUE( 74.282) } }, + { { SIMDE_FLOAT16_VALUE( 83.161), SIMDE_FLOAT16_VALUE( - 1.215), SIMDE_FLOAT16_VALUE( 78.312), SIMDE_FLOAT16_VALUE( - 84.448), + SIMDE_FLOAT16_VALUE( 81.889), SIMDE_FLOAT16_VALUE( 3.516), SIMDE_FLOAT16_VALUE( 29.221), SIMDE_FLOAT16_VALUE( - 43.978) }, + { SIMDE_FLOAT16_VALUE( 5.808), SIMDE_FLOAT16_VALUE( - 80.158), SIMDE_FLOAT16_VALUE( - 16.019), SIMDE_FLOAT16_VALUE( 94.441), + SIMDE_FLOAT16_VALUE( 76.241), SIMDE_FLOAT16_VALUE( 17.818), SIMDE_FLOAT16_VALUE( - 76.931), SIMDE_FLOAT16_VALUE( 91.708) }, + { SIMDE_FLOAT16_VALUE( 83.161), SIMDE_FLOAT16_VALUE( 5.808), SIMDE_FLOAT16_VALUE( 78.312), SIMDE_FLOAT16_VALUE( - 16.019), + SIMDE_FLOAT16_VALUE( 81.889), SIMDE_FLOAT16_VALUE( 76.241), SIMDE_FLOAT16_VALUE( 29.221), SIMDE_FLOAT16_VALUE( - 76.931) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vtrn1q_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vtrn1q_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -350,10 +597,25 @@ test_simde_vtrn1q_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vtrn1q_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float64 a[2]; simde_float64 b[2]; @@ -394,10 +656,25 @@ test_simde_vtrn1q_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r = simde_vtrn1q_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -462,10 +739,25 @@ test_simde_vtrn1q_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vtrn1q_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -506,10 +798,25 @@ test_simde_vtrn1q_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vtrn1q_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -549,10 +856,25 @@ test_simde_vtrn1q_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vtrn1q_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[2]; int64_t b[2]; @@ -592,10 +914,25 @@ test_simde_vtrn1q_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vtrn1q_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -660,10 +997,25 @@ test_simde_vtrn1q_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t r = simde_vtrn1q_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -703,10 +1055,25 @@ test_simde_vtrn1q_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t r = simde_vtrn1q_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -747,10 +1114,25 @@ test_simde_vtrn1q_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t r = simde_vtrn1q_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn1q_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[2]; uint64_t b[2]; @@ -790,9 +1172,24 @@ test_simde_vtrn1q_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t r = simde_vtrn1q_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_s16) @@ -801,6 +1198,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn1q_s8) diff --git a/test/arm/neon/trn2.c b/test/arm/neon/trn2.c index 72b95c8ed..8e186c7ee 100644 --- a/test/arm/neon/trn2.c +++ b/test/arm/neon/trn2.c @@ -3,8 +3,68 @@ #include #include +static int +test_simde_vtrn2_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 r[4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 56.300), SIMDE_FLOAT16_VALUE( - 9.279), SIMDE_FLOAT16_VALUE( - 19.078), SIMDE_FLOAT16_VALUE( 91.073) }, + { SIMDE_FLOAT16_VALUE( 85.295), SIMDE_FLOAT16_VALUE( 3.112), SIMDE_FLOAT16_VALUE( - 91.891), SIMDE_FLOAT16_VALUE( 4.800) }, + { SIMDE_FLOAT16_VALUE( - 9.279), SIMDE_FLOAT16_VALUE( 3.112), SIMDE_FLOAT16_VALUE( 91.073), SIMDE_FLOAT16_VALUE( 4.800) } }, + { { SIMDE_FLOAT16_VALUE( 74.709), SIMDE_FLOAT16_VALUE( - 26.125), SIMDE_FLOAT16_VALUE( - 27.765), SIMDE_FLOAT16_VALUE( - 99.458) }, + { SIMDE_FLOAT16_VALUE( 89.694), SIMDE_FLOAT16_VALUE( - 71.014), SIMDE_FLOAT16_VALUE( 48.493), SIMDE_FLOAT16_VALUE( 30.421) }, + { SIMDE_FLOAT16_VALUE( - 26.125), SIMDE_FLOAT16_VALUE( - 71.014), SIMDE_FLOAT16_VALUE( - 99.458), SIMDE_FLOAT16_VALUE( 30.421) } }, + { { SIMDE_FLOAT16_VALUE( 73.102), SIMDE_FLOAT16_VALUE( 41.016), SIMDE_FLOAT16_VALUE( - 35.965), SIMDE_FLOAT16_VALUE( - 89.249) }, + { SIMDE_FLOAT16_VALUE( 85.101), SIMDE_FLOAT16_VALUE( 59.074), SIMDE_FLOAT16_VALUE( 55.450), SIMDE_FLOAT16_VALUE( - 85.858) }, + { SIMDE_FLOAT16_VALUE( 41.016), SIMDE_FLOAT16_VALUE( 59.074), SIMDE_FLOAT16_VALUE( - 89.249), SIMDE_FLOAT16_VALUE( - 85.858) } }, + { { SIMDE_FLOAT16_VALUE( 74.180), SIMDE_FLOAT16_VALUE( 76.843), SIMDE_FLOAT16_VALUE( - 60.751), SIMDE_FLOAT16_VALUE( - 41.114) }, + { SIMDE_FLOAT16_VALUE( 10.403), SIMDE_FLOAT16_VALUE( 54.517), SIMDE_FLOAT16_VALUE( - 73.384), SIMDE_FLOAT16_VALUE( 65.706) }, + { SIMDE_FLOAT16_VALUE( 76.843), SIMDE_FLOAT16_VALUE( 54.517), SIMDE_FLOAT16_VALUE( - 41.114), SIMDE_FLOAT16_VALUE( 65.706) } }, + { { SIMDE_FLOAT16_VALUE( - 73.108), SIMDE_FLOAT16_VALUE( - 71.731), SIMDE_FLOAT16_VALUE( - 42.349), SIMDE_FLOAT16_VALUE( - 33.103) }, + { SIMDE_FLOAT16_VALUE( - 37.091), SIMDE_FLOAT16_VALUE( - 4.379), SIMDE_FLOAT16_VALUE( - 56.871), SIMDE_FLOAT16_VALUE( - 36.204) }, + { SIMDE_FLOAT16_VALUE( - 71.731), SIMDE_FLOAT16_VALUE( - 4.379), SIMDE_FLOAT16_VALUE( - 33.103), SIMDE_FLOAT16_VALUE( - 36.204) } }, + { { SIMDE_FLOAT16_VALUE( - 12.826), SIMDE_FLOAT16_VALUE( 44.335), SIMDE_FLOAT16_VALUE( - 3.623), SIMDE_FLOAT16_VALUE( - 89.419) }, + { SIMDE_FLOAT16_VALUE( - 91.221), SIMDE_FLOAT16_VALUE( - 5.349), SIMDE_FLOAT16_VALUE( - 8.450), SIMDE_FLOAT16_VALUE( 34.071) }, + { SIMDE_FLOAT16_VALUE( 44.335), SIMDE_FLOAT16_VALUE( - 5.349), SIMDE_FLOAT16_VALUE( - 89.419), SIMDE_FLOAT16_VALUE( 34.071) } }, + { { SIMDE_FLOAT16_VALUE( 14.291), SIMDE_FLOAT16_VALUE( - 9.864), SIMDE_FLOAT16_VALUE( 72.841), SIMDE_FLOAT16_VALUE( - 90.631) }, + { SIMDE_FLOAT16_VALUE( 40.737), SIMDE_FLOAT16_VALUE( - 40.555), SIMDE_FLOAT16_VALUE( - 53.928), SIMDE_FLOAT16_VALUE( 45.466) }, + { SIMDE_FLOAT16_VALUE( - 9.864), SIMDE_FLOAT16_VALUE( - 40.555), SIMDE_FLOAT16_VALUE( - 90.631), SIMDE_FLOAT16_VALUE( 45.466) } }, + { { SIMDE_FLOAT16_VALUE( 77.115), SIMDE_FLOAT16_VALUE( 37.620), SIMDE_FLOAT16_VALUE( 28.892), SIMDE_FLOAT16_VALUE( - 64.989) }, + { SIMDE_FLOAT16_VALUE( - 18.627), SIMDE_FLOAT16_VALUE( - 85.234), SIMDE_FLOAT16_VALUE( 78.130), SIMDE_FLOAT16_VALUE( - 13.349) }, + { SIMDE_FLOAT16_VALUE( 37.620), SIMDE_FLOAT16_VALUE( - 85.234), SIMDE_FLOAT16_VALUE( - 64.989), SIMDE_FLOAT16_VALUE( - 13.349) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4_t r = simde_vtrn2_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r, simde_vld1_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vtrn2_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vtrn2_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -45,10 +105,25 @@ test_simde_vtrn2_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vtrn2_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -89,10 +164,25 @@ test_simde_vtrn2_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vtrn2_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -133,10 +223,25 @@ test_simde_vtrn2_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vtrn2_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -177,10 +282,25 @@ test_simde_vtrn2_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vtrn2_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -221,10 +341,25 @@ test_simde_vtrn2_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t r = simde_vtrn2_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -264,10 +399,25 @@ test_simde_vtrn2_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t r = simde_vtrn2_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -307,10 +457,107 @@ test_simde_vtrn2_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r = simde_vtrn2_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vtrn2q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 72.335), SIMDE_FLOAT16_VALUE( 7.554), SIMDE_FLOAT16_VALUE( - 33.008), SIMDE_FLOAT16_VALUE( 90.304), + SIMDE_FLOAT16_VALUE( - 99.993), SIMDE_FLOAT16_VALUE( - 50.693), SIMDE_FLOAT16_VALUE( - 79.762), SIMDE_FLOAT16_VALUE( 7.872) }, + { SIMDE_FLOAT16_VALUE( - 8.557), SIMDE_FLOAT16_VALUE( 96.325), SIMDE_FLOAT16_VALUE( - 89.713), SIMDE_FLOAT16_VALUE( 36.819), + SIMDE_FLOAT16_VALUE( 5.553), SIMDE_FLOAT16_VALUE( 14.489), SIMDE_FLOAT16_VALUE( 5.427), SIMDE_FLOAT16_VALUE( 32.305) }, + { SIMDE_FLOAT16_VALUE( 7.554), SIMDE_FLOAT16_VALUE( 96.325), SIMDE_FLOAT16_VALUE( 90.304), SIMDE_FLOAT16_VALUE( 36.819), + SIMDE_FLOAT16_VALUE( - 50.693), SIMDE_FLOAT16_VALUE( 14.489), SIMDE_FLOAT16_VALUE( 7.872), SIMDE_FLOAT16_VALUE( 32.305) } }, + { { SIMDE_FLOAT16_VALUE( 74.913), SIMDE_FLOAT16_VALUE( - 11.970), SIMDE_FLOAT16_VALUE( - 42.824), SIMDE_FLOAT16_VALUE( - 82.513), + SIMDE_FLOAT16_VALUE( - 55.926), SIMDE_FLOAT16_VALUE( 86.502), SIMDE_FLOAT16_VALUE( - 85.855), SIMDE_FLOAT16_VALUE( 25.208) }, + { SIMDE_FLOAT16_VALUE( 6.209), SIMDE_FLOAT16_VALUE( 90.752), SIMDE_FLOAT16_VALUE( - 82.451), SIMDE_FLOAT16_VALUE( - 91.435), + SIMDE_FLOAT16_VALUE( 66.777), SIMDE_FLOAT16_VALUE( - 74.104), SIMDE_FLOAT16_VALUE( - 66.222), SIMDE_FLOAT16_VALUE( - 49.415) }, + { SIMDE_FLOAT16_VALUE( - 11.970), SIMDE_FLOAT16_VALUE( 90.752), SIMDE_FLOAT16_VALUE( - 82.513), SIMDE_FLOAT16_VALUE( - 91.435), + SIMDE_FLOAT16_VALUE( 86.502), SIMDE_FLOAT16_VALUE( - 74.104), SIMDE_FLOAT16_VALUE( 25.208), SIMDE_FLOAT16_VALUE( - 49.415) } }, + { { SIMDE_FLOAT16_VALUE( - 63.196), SIMDE_FLOAT16_VALUE( - 52.466), SIMDE_FLOAT16_VALUE( - 73.706), SIMDE_FLOAT16_VALUE( 40.555), + SIMDE_FLOAT16_VALUE( - 76.544), SIMDE_FLOAT16_VALUE( 87.237), SIMDE_FLOAT16_VALUE( 84.511), SIMDE_FLOAT16_VALUE( - 75.225) }, + { SIMDE_FLOAT16_VALUE( 44.539), SIMDE_FLOAT16_VALUE( 21.319), SIMDE_FLOAT16_VALUE( 66.231), SIMDE_FLOAT16_VALUE( - 19.440), + SIMDE_FLOAT16_VALUE( 72.319), SIMDE_FLOAT16_VALUE( - 77.793), SIMDE_FLOAT16_VALUE( 71.130), SIMDE_FLOAT16_VALUE( 32.971) }, + { SIMDE_FLOAT16_VALUE( - 52.466), SIMDE_FLOAT16_VALUE( 21.319), SIMDE_FLOAT16_VALUE( 40.555), SIMDE_FLOAT16_VALUE( - 19.440), + SIMDE_FLOAT16_VALUE( 87.237), SIMDE_FLOAT16_VALUE( - 77.793), SIMDE_FLOAT16_VALUE( - 75.225), SIMDE_FLOAT16_VALUE( 32.971) } }, + { { SIMDE_FLOAT16_VALUE( - 44.126), SIMDE_FLOAT16_VALUE( 81.561), SIMDE_FLOAT16_VALUE( - 38.052), SIMDE_FLOAT16_VALUE( 19.498), + SIMDE_FLOAT16_VALUE( - 93.987), SIMDE_FLOAT16_VALUE( 83.750), SIMDE_FLOAT16_VALUE( - 87.122), SIMDE_FLOAT16_VALUE( 72.370) }, + { SIMDE_FLOAT16_VALUE( 57.508), SIMDE_FLOAT16_VALUE( - 39.509), SIMDE_FLOAT16_VALUE( - 62.778), SIMDE_FLOAT16_VALUE( 95.740), + SIMDE_FLOAT16_VALUE( - 53.685), SIMDE_FLOAT16_VALUE( 86.996), SIMDE_FLOAT16_VALUE( - 59.984), SIMDE_FLOAT16_VALUE( 75.447) }, + { SIMDE_FLOAT16_VALUE( 81.561), SIMDE_FLOAT16_VALUE( - 39.509), SIMDE_FLOAT16_VALUE( 19.498), SIMDE_FLOAT16_VALUE( 95.740), + SIMDE_FLOAT16_VALUE( 83.750), SIMDE_FLOAT16_VALUE( 86.996), SIMDE_FLOAT16_VALUE( 72.370), SIMDE_FLOAT16_VALUE( 75.447) } }, + { { SIMDE_FLOAT16_VALUE( 84.124), SIMDE_FLOAT16_VALUE( 57.505), SIMDE_FLOAT16_VALUE( - 95.462), SIMDE_FLOAT16_VALUE( 81.748), + SIMDE_FLOAT16_VALUE( 24.643), SIMDE_FLOAT16_VALUE( 70.285), SIMDE_FLOAT16_VALUE( - 69.576), SIMDE_FLOAT16_VALUE( 73.340) }, + { SIMDE_FLOAT16_VALUE( 17.355), SIMDE_FLOAT16_VALUE( 5.498), SIMDE_FLOAT16_VALUE( 29.349), SIMDE_FLOAT16_VALUE( 93.614), + SIMDE_FLOAT16_VALUE( 78.556), SIMDE_FLOAT16_VALUE( - 70.390), SIMDE_FLOAT16_VALUE( 53.898), SIMDE_FLOAT16_VALUE( 55.877) }, + { SIMDE_FLOAT16_VALUE( 57.505), SIMDE_FLOAT16_VALUE( 5.498), SIMDE_FLOAT16_VALUE( 81.748), SIMDE_FLOAT16_VALUE( 93.614), + SIMDE_FLOAT16_VALUE( 70.285), SIMDE_FLOAT16_VALUE( - 70.390), SIMDE_FLOAT16_VALUE( 73.340), SIMDE_FLOAT16_VALUE( 55.877) } }, + { { SIMDE_FLOAT16_VALUE( - 9.431), SIMDE_FLOAT16_VALUE( - 67.084), SIMDE_FLOAT16_VALUE( 3.039), SIMDE_FLOAT16_VALUE( - 23.339), + SIMDE_FLOAT16_VALUE( - 70.932), SIMDE_FLOAT16_VALUE( - 71.857), SIMDE_FLOAT16_VALUE( 23.002), SIMDE_FLOAT16_VALUE( - 46.362) }, + { SIMDE_FLOAT16_VALUE( 43.482), SIMDE_FLOAT16_VALUE( 21.428), SIMDE_FLOAT16_VALUE( - 1.230), SIMDE_FLOAT16_VALUE( - 40.972), + SIMDE_FLOAT16_VALUE( - 47.974), SIMDE_FLOAT16_VALUE( 73.628), SIMDE_FLOAT16_VALUE( - 29.720), SIMDE_FLOAT16_VALUE( - 90.383) }, + { SIMDE_FLOAT16_VALUE( - 67.084), SIMDE_FLOAT16_VALUE( 21.428), SIMDE_FLOAT16_VALUE( - 23.339), SIMDE_FLOAT16_VALUE( - 40.972), + SIMDE_FLOAT16_VALUE( - 71.857), SIMDE_FLOAT16_VALUE( 73.628), SIMDE_FLOAT16_VALUE( - 46.362), SIMDE_FLOAT16_VALUE( - 90.383) } }, + { { SIMDE_FLOAT16_VALUE( 95.642), SIMDE_FLOAT16_VALUE( - 87.854), SIMDE_FLOAT16_VALUE( - 44.833), SIMDE_FLOAT16_VALUE( 50.993), + SIMDE_FLOAT16_VALUE( - 1.937), SIMDE_FLOAT16_VALUE( 79.506), SIMDE_FLOAT16_VALUE( 71.543), SIMDE_FLOAT16_VALUE( 92.534) }, + { SIMDE_FLOAT16_VALUE( - 89.081), SIMDE_FLOAT16_VALUE( - 58.662), SIMDE_FLOAT16_VALUE( - 70.366), SIMDE_FLOAT16_VALUE( - 82.181), + SIMDE_FLOAT16_VALUE( - 14.473), SIMDE_FLOAT16_VALUE( 16.003), SIMDE_FLOAT16_VALUE( - 24.386), SIMDE_FLOAT16_VALUE( 17.897) }, + { SIMDE_FLOAT16_VALUE( - 87.854), SIMDE_FLOAT16_VALUE( - 58.662), SIMDE_FLOAT16_VALUE( 50.993), SIMDE_FLOAT16_VALUE( - 82.181), + SIMDE_FLOAT16_VALUE( 79.506), SIMDE_FLOAT16_VALUE( 16.003), SIMDE_FLOAT16_VALUE( 92.534), SIMDE_FLOAT16_VALUE( 17.897) } }, + { { SIMDE_FLOAT16_VALUE( 7.217), SIMDE_FLOAT16_VALUE( - 3.575), SIMDE_FLOAT16_VALUE( - 54.338), SIMDE_FLOAT16_VALUE( - 88.934), + SIMDE_FLOAT16_VALUE( - 45.428), SIMDE_FLOAT16_VALUE( - 96.685), SIMDE_FLOAT16_VALUE( - 19.259), SIMDE_FLOAT16_VALUE( - 78.009) }, + { SIMDE_FLOAT16_VALUE( 59.084), SIMDE_FLOAT16_VALUE( - 63.373), SIMDE_FLOAT16_VALUE( - 74.830), SIMDE_FLOAT16_VALUE( 65.237), + SIMDE_FLOAT16_VALUE( 6.826), SIMDE_FLOAT16_VALUE( 80.287), SIMDE_FLOAT16_VALUE( - 55.550), SIMDE_FLOAT16_VALUE( - 99.593) }, + { SIMDE_FLOAT16_VALUE( - 3.575), SIMDE_FLOAT16_VALUE( - 63.373), SIMDE_FLOAT16_VALUE( - 88.934), SIMDE_FLOAT16_VALUE( 65.237), + SIMDE_FLOAT16_VALUE( - 96.685), SIMDE_FLOAT16_VALUE( 80.287), SIMDE_FLOAT16_VALUE( - 78.009), SIMDE_FLOAT16_VALUE( - 99.593) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vtrn2q_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vtrn2q_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -350,10 +597,25 @@ test_simde_vtrn2q_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vtrn2q_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float64 a[2]; simde_float64 b[2]; @@ -394,10 +656,25 @@ test_simde_vtrn2q_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r = simde_vtrn2q_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -462,10 +739,25 @@ test_simde_vtrn2q_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vtrn2q_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -506,10 +798,25 @@ test_simde_vtrn2q_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vtrn2q_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -549,10 +856,25 @@ test_simde_vtrn2q_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vtrn2q_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[2]; int64_t b[2]; @@ -592,10 +914,25 @@ test_simde_vtrn2q_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vtrn2q_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -660,10 +997,25 @@ test_simde_vtrn2q_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t r = simde_vtrn2q_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -703,10 +1055,25 @@ test_simde_vtrn2q_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t r = simde_vtrn2q_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -747,10 +1114,25 @@ test_simde_vtrn2q_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t r = simde_vtrn2q_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vtrn2q_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[2]; uint64_t b[2]; @@ -790,9 +1172,24 @@ test_simde_vtrn2q_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t r = simde_vtrn2q_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN +SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_s16) @@ -801,6 +1198,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vtrn2q_s8) diff --git a/test/arm/neon/uzp.c b/test/arm/neon/uzp.c index e87e1534a..6a2fd6e28 100644 --- a/test/arm/neon/uzp.c +++ b/test/arm/neon/uzp.c @@ -5,8 +5,77 @@ #if !defined(SIMDE_BUG_INTEL_857088) +static int +test_simde_vuzp_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[4]; + simde_float16 b[4]; + simde_float16 r[2][4]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 59.585), SIMDE_FLOAT16_VALUE( - 87.027), SIMDE_FLOAT16_VALUE( - 74.361), SIMDE_FLOAT16_VALUE( - 30.649) }, + { SIMDE_FLOAT16_VALUE( 93.073), SIMDE_FLOAT16_VALUE( - 31.922), SIMDE_FLOAT16_VALUE( 17.320), SIMDE_FLOAT16_VALUE( - 55.446) }, + { { SIMDE_FLOAT16_VALUE( 59.585), SIMDE_FLOAT16_VALUE( - 74.361), SIMDE_FLOAT16_VALUE( 93.073), SIMDE_FLOAT16_VALUE( 17.320) }, + { SIMDE_FLOAT16_VALUE( - 87.027), SIMDE_FLOAT16_VALUE( - 30.649), SIMDE_FLOAT16_VALUE( - 31.922), SIMDE_FLOAT16_VALUE( - 55.446) } } }, + { { SIMDE_FLOAT16_VALUE( 9.958), SIMDE_FLOAT16_VALUE( 85.170), SIMDE_FLOAT16_VALUE( - 72.074), SIMDE_FLOAT16_VALUE( - 27.806) }, + { SIMDE_FLOAT16_VALUE( 78.386), SIMDE_FLOAT16_VALUE( 6.905), SIMDE_FLOAT16_VALUE( 69.743), SIMDE_FLOAT16_VALUE( 45.300) }, + { { SIMDE_FLOAT16_VALUE( 9.958), SIMDE_FLOAT16_VALUE( - 72.074), SIMDE_FLOAT16_VALUE( 78.386), SIMDE_FLOAT16_VALUE( 69.743) }, + { SIMDE_FLOAT16_VALUE( 85.170), SIMDE_FLOAT16_VALUE( - 27.806), SIMDE_FLOAT16_VALUE( 6.905), SIMDE_FLOAT16_VALUE( 45.300) } } }, + { { SIMDE_FLOAT16_VALUE( - 69.822), SIMDE_FLOAT16_VALUE( - 45.174), SIMDE_FLOAT16_VALUE( - 30.230), SIMDE_FLOAT16_VALUE( - 86.717) }, + { SIMDE_FLOAT16_VALUE( 40.263), SIMDE_FLOAT16_VALUE( 89.268), SIMDE_FLOAT16_VALUE( 94.905), SIMDE_FLOAT16_VALUE( 59.037) }, + { { SIMDE_FLOAT16_VALUE( - 69.822), SIMDE_FLOAT16_VALUE( - 30.230), SIMDE_FLOAT16_VALUE( 40.263), SIMDE_FLOAT16_VALUE( 94.905) }, + { SIMDE_FLOAT16_VALUE( - 45.174), SIMDE_FLOAT16_VALUE( - 86.717), SIMDE_FLOAT16_VALUE( 89.268), SIMDE_FLOAT16_VALUE( 59.037) } } }, + { { SIMDE_FLOAT16_VALUE( 0.579), SIMDE_FLOAT16_VALUE( 53.168), SIMDE_FLOAT16_VALUE( - 49.281), SIMDE_FLOAT16_VALUE( - 2.295) }, + { SIMDE_FLOAT16_VALUE( 64.385), SIMDE_FLOAT16_VALUE( 51.685), SIMDE_FLOAT16_VALUE( 44.562), SIMDE_FLOAT16_VALUE( 6.481) }, + { { SIMDE_FLOAT16_VALUE( 0.579), SIMDE_FLOAT16_VALUE( - 49.281), SIMDE_FLOAT16_VALUE( 64.385), SIMDE_FLOAT16_VALUE( 44.562) }, + { SIMDE_FLOAT16_VALUE( 53.168), SIMDE_FLOAT16_VALUE( - 2.295), SIMDE_FLOAT16_VALUE( 51.685), SIMDE_FLOAT16_VALUE( 6.481) } } }, + { { SIMDE_FLOAT16_VALUE( - 86.361), SIMDE_FLOAT16_VALUE( - 69.966), SIMDE_FLOAT16_VALUE( 26.111), SIMDE_FLOAT16_VALUE( 5.103) }, + { SIMDE_FLOAT16_VALUE( - 54.134), SIMDE_FLOAT16_VALUE( - 87.125), SIMDE_FLOAT16_VALUE( 29.118), SIMDE_FLOAT16_VALUE( - 4.129) }, + { { SIMDE_FLOAT16_VALUE( - 86.361), SIMDE_FLOAT16_VALUE( 26.111), SIMDE_FLOAT16_VALUE( - 54.134), SIMDE_FLOAT16_VALUE( 29.118) }, + { SIMDE_FLOAT16_VALUE( - 69.966), SIMDE_FLOAT16_VALUE( 5.103), SIMDE_FLOAT16_VALUE( - 87.125), SIMDE_FLOAT16_VALUE( - 4.129) } } }, + { { SIMDE_FLOAT16_VALUE( 88.320), SIMDE_FLOAT16_VALUE( 36.543), SIMDE_FLOAT16_VALUE( 16.274), SIMDE_FLOAT16_VALUE( 89.586) }, + { SIMDE_FLOAT16_VALUE( - 43.919), SIMDE_FLOAT16_VALUE( - 42.540), SIMDE_FLOAT16_VALUE( 43.983), SIMDE_FLOAT16_VALUE( 48.582) }, + { { SIMDE_FLOAT16_VALUE( 88.320), SIMDE_FLOAT16_VALUE( 16.274), SIMDE_FLOAT16_VALUE( - 43.919), SIMDE_FLOAT16_VALUE( 43.983) }, + { SIMDE_FLOAT16_VALUE( 36.543), SIMDE_FLOAT16_VALUE( 89.586), SIMDE_FLOAT16_VALUE( - 42.540), SIMDE_FLOAT16_VALUE( 48.582) } } }, + { { SIMDE_FLOAT16_VALUE( 41.012), SIMDE_FLOAT16_VALUE( 97.838), SIMDE_FLOAT16_VALUE( 17.977), SIMDE_FLOAT16_VALUE( 3.700) }, + { SIMDE_FLOAT16_VALUE( - 3.857), SIMDE_FLOAT16_VALUE( - 64.852), SIMDE_FLOAT16_VALUE( - 33.562), SIMDE_FLOAT16_VALUE( 17.944) }, + { { SIMDE_FLOAT16_VALUE( 41.012), SIMDE_FLOAT16_VALUE( 17.977), SIMDE_FLOAT16_VALUE( - 3.857), SIMDE_FLOAT16_VALUE( - 33.562) }, + { SIMDE_FLOAT16_VALUE( 97.838), SIMDE_FLOAT16_VALUE( 3.700), SIMDE_FLOAT16_VALUE( - 64.852), SIMDE_FLOAT16_VALUE( 17.944) } } }, + { { SIMDE_FLOAT16_VALUE( - 74.409), SIMDE_FLOAT16_VALUE( - 5.545), SIMDE_FLOAT16_VALUE( - 17.427), SIMDE_FLOAT16_VALUE( - 74.159) }, + { SIMDE_FLOAT16_VALUE( - 93.810), SIMDE_FLOAT16_VALUE( 50.112), SIMDE_FLOAT16_VALUE( 65.086), SIMDE_FLOAT16_VALUE( 63.782) }, + { { SIMDE_FLOAT16_VALUE( - 74.409), SIMDE_FLOAT16_VALUE( - 17.427), SIMDE_FLOAT16_VALUE( - 93.810), SIMDE_FLOAT16_VALUE( 65.086) }, + { SIMDE_FLOAT16_VALUE( - 5.545), SIMDE_FLOAT16_VALUE( - 74.159), SIMDE_FLOAT16_VALUE( 50.112), SIMDE_FLOAT16_VALUE( 63.782) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x4_t a = simde_vld1_f16(test_vec[i].a); + simde_float16x4_t b = simde_vld1_f16(test_vec[i].b); + simde_float16x4x2_t r = simde_vuzp_f16(a, b); + + simde_test_arm_neon_assert_equal_f16x4(r.val[0], simde_vld1_f16(test_vec[i].r[0]), 1); + simde_test_arm_neon_assert_equal_f16x4(r.val[1], simde_vld1_f16(test_vec[i].r[1]), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x2_t r[2] = simde_vuzp_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + static int test_simde_vuzp_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -81,10 +150,25 @@ test_simde_vuzp_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r[2] = simde_vuzp_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -159,10 +243,25 @@ test_simde_vuzp_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x2_t r[2] = simde_vuzp_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -237,10 +336,25 @@ test_simde_vuzp_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x2_t r[2] = simde_vuzp_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -315,10 +429,25 @@ test_simde_vuzp_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r[2] = simde_vuzp_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -393,10 +522,25 @@ test_simde_vuzp_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x2_t r[2] = simde_vuzp_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -470,10 +614,25 @@ test_simde_vuzp_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x2_t r[2] = simde_vuzp_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -547,10 +706,124 @@ test_simde_vuzp_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r[2] = simde_vuzp_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vuzpq_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[2][8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 73.721), SIMDE_FLOAT16_VALUE( 46.339), SIMDE_FLOAT16_VALUE( - 10.427), SIMDE_FLOAT16_VALUE( - 38.234), + SIMDE_FLOAT16_VALUE( - 26.097), SIMDE_FLOAT16_VALUE( 16.887), SIMDE_FLOAT16_VALUE( - 35.759), SIMDE_FLOAT16_VALUE( 44.147) }, + { SIMDE_FLOAT16_VALUE( - 60.955), SIMDE_FLOAT16_VALUE( - 22.797), SIMDE_FLOAT16_VALUE( 18.481), SIMDE_FLOAT16_VALUE( 74.071), + SIMDE_FLOAT16_VALUE( - 22.559), SIMDE_FLOAT16_VALUE( - 85.762), SIMDE_FLOAT16_VALUE( 19.915), SIMDE_FLOAT16_VALUE( 70.645) }, + { { SIMDE_FLOAT16_VALUE( 73.721), SIMDE_FLOAT16_VALUE( - 10.427), SIMDE_FLOAT16_VALUE( - 26.097), SIMDE_FLOAT16_VALUE( - 35.759), + SIMDE_FLOAT16_VALUE( - 60.955), SIMDE_FLOAT16_VALUE( 18.481), SIMDE_FLOAT16_VALUE( - 22.559), SIMDE_FLOAT16_VALUE( 19.915) }, + { SIMDE_FLOAT16_VALUE( 46.339), SIMDE_FLOAT16_VALUE( - 38.234), SIMDE_FLOAT16_VALUE( 16.887), SIMDE_FLOAT16_VALUE( 44.147), + SIMDE_FLOAT16_VALUE( - 22.797), SIMDE_FLOAT16_VALUE( 74.071), SIMDE_FLOAT16_VALUE( - 85.762), SIMDE_FLOAT16_VALUE( 70.645) } } }, + { { SIMDE_FLOAT16_VALUE( - 40.265), SIMDE_FLOAT16_VALUE( - 2.182), SIMDE_FLOAT16_VALUE( 72.917), SIMDE_FLOAT16_VALUE( 13.517), + SIMDE_FLOAT16_VALUE( - 43.058), SIMDE_FLOAT16_VALUE( - 92.395), SIMDE_FLOAT16_VALUE( 74.671), SIMDE_FLOAT16_VALUE( 43.330) }, + { SIMDE_FLOAT16_VALUE( - 30.362), SIMDE_FLOAT16_VALUE( 8.299), SIMDE_FLOAT16_VALUE( - 59.100), SIMDE_FLOAT16_VALUE( 42.001), + SIMDE_FLOAT16_VALUE( 3.619), SIMDE_FLOAT16_VALUE( - 9.776), SIMDE_FLOAT16_VALUE( 59.224), SIMDE_FLOAT16_VALUE( - 70.831) }, + { { SIMDE_FLOAT16_VALUE( - 40.265), SIMDE_FLOAT16_VALUE( 72.917), SIMDE_FLOAT16_VALUE( - 43.058), SIMDE_FLOAT16_VALUE( 74.671), + SIMDE_FLOAT16_VALUE( - 30.362), SIMDE_FLOAT16_VALUE( - 59.100), SIMDE_FLOAT16_VALUE( 3.619), SIMDE_FLOAT16_VALUE( 59.224) }, + { SIMDE_FLOAT16_VALUE( - 2.182), SIMDE_FLOAT16_VALUE( 13.517), SIMDE_FLOAT16_VALUE( - 92.395), SIMDE_FLOAT16_VALUE( 43.330), + SIMDE_FLOAT16_VALUE( 8.299), SIMDE_FLOAT16_VALUE( 42.001), SIMDE_FLOAT16_VALUE( - 9.776), SIMDE_FLOAT16_VALUE( - 70.831) } } }, + { { SIMDE_FLOAT16_VALUE( 5.621), SIMDE_FLOAT16_VALUE( - 81.247), SIMDE_FLOAT16_VALUE( 88.883), SIMDE_FLOAT16_VALUE( - 67.993), + SIMDE_FLOAT16_VALUE( - 42.899), SIMDE_FLOAT16_VALUE( 93.583), SIMDE_FLOAT16_VALUE( - 78.748), SIMDE_FLOAT16_VALUE( - 91.309) }, + { SIMDE_FLOAT16_VALUE( 81.834), SIMDE_FLOAT16_VALUE( - 9.827), SIMDE_FLOAT16_VALUE( 45.970), SIMDE_FLOAT16_VALUE( - 17.700), + SIMDE_FLOAT16_VALUE( 29.375), SIMDE_FLOAT16_VALUE( 29.732), SIMDE_FLOAT16_VALUE( 6.694), SIMDE_FLOAT16_VALUE( - 39.767) }, + { { SIMDE_FLOAT16_VALUE( 5.621), SIMDE_FLOAT16_VALUE( 88.883), SIMDE_FLOAT16_VALUE( - 42.899), SIMDE_FLOAT16_VALUE( - 78.748), + SIMDE_FLOAT16_VALUE( 81.834), SIMDE_FLOAT16_VALUE( 45.970), SIMDE_FLOAT16_VALUE( 29.375), SIMDE_FLOAT16_VALUE( 6.694) }, + { SIMDE_FLOAT16_VALUE( - 81.247), SIMDE_FLOAT16_VALUE( - 67.993), SIMDE_FLOAT16_VALUE( 93.583), SIMDE_FLOAT16_VALUE( - 91.309), + SIMDE_FLOAT16_VALUE( - 9.827), SIMDE_FLOAT16_VALUE( - 17.700), SIMDE_FLOAT16_VALUE( 29.732), SIMDE_FLOAT16_VALUE( - 39.767) } } }, + { { SIMDE_FLOAT16_VALUE( - 46.220), SIMDE_FLOAT16_VALUE( - 82.527), SIMDE_FLOAT16_VALUE( 16.982), SIMDE_FLOAT16_VALUE( - 71.688), + SIMDE_FLOAT16_VALUE( 87.960), SIMDE_FLOAT16_VALUE( 65.171), SIMDE_FLOAT16_VALUE( - 7.922), SIMDE_FLOAT16_VALUE( 7.006) }, + { SIMDE_FLOAT16_VALUE( - 65.689), SIMDE_FLOAT16_VALUE( 3.075), SIMDE_FLOAT16_VALUE( - 37.945), SIMDE_FLOAT16_VALUE( 93.491), + SIMDE_FLOAT16_VALUE( - 96.072), SIMDE_FLOAT16_VALUE( - 91.531), SIMDE_FLOAT16_VALUE( - 19.597), SIMDE_FLOAT16_VALUE( - 43.425) }, + { { SIMDE_FLOAT16_VALUE( - 46.220), SIMDE_FLOAT16_VALUE( 16.982), SIMDE_FLOAT16_VALUE( 87.960), SIMDE_FLOAT16_VALUE( - 7.922), + SIMDE_FLOAT16_VALUE( - 65.689), SIMDE_FLOAT16_VALUE( - 37.945), SIMDE_FLOAT16_VALUE( - 96.072), SIMDE_FLOAT16_VALUE( - 19.597) }, + { SIMDE_FLOAT16_VALUE( - 82.527), SIMDE_FLOAT16_VALUE( - 71.688), SIMDE_FLOAT16_VALUE( 65.171), SIMDE_FLOAT16_VALUE( 7.006), + SIMDE_FLOAT16_VALUE( 3.075), SIMDE_FLOAT16_VALUE( 93.491), SIMDE_FLOAT16_VALUE( - 91.531), SIMDE_FLOAT16_VALUE( - 43.425) } } }, + { { SIMDE_FLOAT16_VALUE( 2.917), SIMDE_FLOAT16_VALUE( 56.631), SIMDE_FLOAT16_VALUE( - 8.652), SIMDE_FLOAT16_VALUE( - 52.253), + SIMDE_FLOAT16_VALUE( - 58.261), SIMDE_FLOAT16_VALUE( - 54.516), SIMDE_FLOAT16_VALUE( - 72.534), SIMDE_FLOAT16_VALUE( - 2.400) }, + { SIMDE_FLOAT16_VALUE( 16.476), SIMDE_FLOAT16_VALUE( - 79.379), SIMDE_FLOAT16_VALUE( - 98.048), SIMDE_FLOAT16_VALUE( 82.106), + SIMDE_FLOAT16_VALUE( 39.266), SIMDE_FLOAT16_VALUE( - 94.860), SIMDE_FLOAT16_VALUE( 35.909), SIMDE_FLOAT16_VALUE( - 75.766) }, + { { SIMDE_FLOAT16_VALUE( 2.917), SIMDE_FLOAT16_VALUE( - 8.652), SIMDE_FLOAT16_VALUE( - 58.261), SIMDE_FLOAT16_VALUE( - 72.534), + SIMDE_FLOAT16_VALUE( 16.476), SIMDE_FLOAT16_VALUE( - 98.048), SIMDE_FLOAT16_VALUE( 39.266), SIMDE_FLOAT16_VALUE( 35.909) }, + { SIMDE_FLOAT16_VALUE( 56.631), SIMDE_FLOAT16_VALUE( - 52.253), SIMDE_FLOAT16_VALUE( - 54.516), SIMDE_FLOAT16_VALUE( - 2.400), + SIMDE_FLOAT16_VALUE( - 79.379), SIMDE_FLOAT16_VALUE( 82.106), SIMDE_FLOAT16_VALUE( - 94.860), SIMDE_FLOAT16_VALUE( - 75.766) } } }, + { { SIMDE_FLOAT16_VALUE( 44.120), SIMDE_FLOAT16_VALUE( 3.642), SIMDE_FLOAT16_VALUE( - 74.784), SIMDE_FLOAT16_VALUE( 65.438), + SIMDE_FLOAT16_VALUE( 4.092), SIMDE_FLOAT16_VALUE( 98.436), SIMDE_FLOAT16_VALUE( 58.246), SIMDE_FLOAT16_VALUE( - 60.461) }, + { SIMDE_FLOAT16_VALUE( 76.015), SIMDE_FLOAT16_VALUE( - 62.906), SIMDE_FLOAT16_VALUE( - 27.211), SIMDE_FLOAT16_VALUE( - 53.115), + SIMDE_FLOAT16_VALUE( - 19.255), SIMDE_FLOAT16_VALUE( 64.714), SIMDE_FLOAT16_VALUE( - 24.123), SIMDE_FLOAT16_VALUE( - 77.006) }, + { { SIMDE_FLOAT16_VALUE( 44.120), SIMDE_FLOAT16_VALUE( - 74.784), SIMDE_FLOAT16_VALUE( 4.092), SIMDE_FLOAT16_VALUE( 58.246), + SIMDE_FLOAT16_VALUE( 76.015), SIMDE_FLOAT16_VALUE( - 27.211), SIMDE_FLOAT16_VALUE( - 19.255), SIMDE_FLOAT16_VALUE( - 24.123) }, + { SIMDE_FLOAT16_VALUE( 3.642), SIMDE_FLOAT16_VALUE( 65.438), SIMDE_FLOAT16_VALUE( 98.436), SIMDE_FLOAT16_VALUE( - 60.461), + SIMDE_FLOAT16_VALUE( - 62.906), SIMDE_FLOAT16_VALUE( - 53.115), SIMDE_FLOAT16_VALUE( 64.714), SIMDE_FLOAT16_VALUE( - 77.006) } } }, + { { SIMDE_FLOAT16_VALUE( 28.681), SIMDE_FLOAT16_VALUE( 95.430), SIMDE_FLOAT16_VALUE( - 1.556), SIMDE_FLOAT16_VALUE( 56.474), + SIMDE_FLOAT16_VALUE( - 8.834), SIMDE_FLOAT16_VALUE( 57.675), SIMDE_FLOAT16_VALUE( - 74.749), SIMDE_FLOAT16_VALUE( 76.983) }, + { SIMDE_FLOAT16_VALUE( - 99.195), SIMDE_FLOAT16_VALUE( - 28.023), SIMDE_FLOAT16_VALUE( 87.270), SIMDE_FLOAT16_VALUE( - 1.591), + SIMDE_FLOAT16_VALUE( 11.636), SIMDE_FLOAT16_VALUE( - 6.493), SIMDE_FLOAT16_VALUE( - 32.712), SIMDE_FLOAT16_VALUE( - 72.389) }, + { { SIMDE_FLOAT16_VALUE( 28.681), SIMDE_FLOAT16_VALUE( - 1.556), SIMDE_FLOAT16_VALUE( - 8.834), SIMDE_FLOAT16_VALUE( - 74.749), + SIMDE_FLOAT16_VALUE( - 99.195), SIMDE_FLOAT16_VALUE( 87.270), SIMDE_FLOAT16_VALUE( 11.636), SIMDE_FLOAT16_VALUE( - 32.712) }, + { SIMDE_FLOAT16_VALUE( 95.430), SIMDE_FLOAT16_VALUE( 56.474), SIMDE_FLOAT16_VALUE( 57.675), SIMDE_FLOAT16_VALUE( 76.983), + SIMDE_FLOAT16_VALUE( - 28.023), SIMDE_FLOAT16_VALUE( - 1.591), SIMDE_FLOAT16_VALUE( - 6.493), SIMDE_FLOAT16_VALUE( - 72.389) } } }, + { { SIMDE_FLOAT16_VALUE( 77.839), SIMDE_FLOAT16_VALUE( - 60.894), SIMDE_FLOAT16_VALUE( - 29.168), SIMDE_FLOAT16_VALUE( - 10.467), + SIMDE_FLOAT16_VALUE( - 99.954), SIMDE_FLOAT16_VALUE( 6.428), SIMDE_FLOAT16_VALUE( - 12.713), SIMDE_FLOAT16_VALUE( 92.257) }, + { SIMDE_FLOAT16_VALUE( 5.688), SIMDE_FLOAT16_VALUE( 43.890), SIMDE_FLOAT16_VALUE( 48.122), SIMDE_FLOAT16_VALUE( - 2.823), + SIMDE_FLOAT16_VALUE( 65.174), SIMDE_FLOAT16_VALUE( 27.456), SIMDE_FLOAT16_VALUE( 71.730), SIMDE_FLOAT16_VALUE( 27.816) }, + { { SIMDE_FLOAT16_VALUE( 77.839), SIMDE_FLOAT16_VALUE( - 29.168), SIMDE_FLOAT16_VALUE( - 99.954), SIMDE_FLOAT16_VALUE( - 12.713), + SIMDE_FLOAT16_VALUE( 5.688), SIMDE_FLOAT16_VALUE( 48.122), SIMDE_FLOAT16_VALUE( 65.174), SIMDE_FLOAT16_VALUE( 71.730) }, + { SIMDE_FLOAT16_VALUE( - 60.894), SIMDE_FLOAT16_VALUE( - 10.467), SIMDE_FLOAT16_VALUE( 6.428), SIMDE_FLOAT16_VALUE( 92.257), + SIMDE_FLOAT16_VALUE( 43.890), SIMDE_FLOAT16_VALUE( - 2.823), SIMDE_FLOAT16_VALUE( 27.456), SIMDE_FLOAT16_VALUE( 27.816) } } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8x2_t r = simde_vuzpq_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r.val[0], simde_vld1q_f16(test_vec[i].r[0]), 1); + simde_test_arm_neon_assert_equal_f16x8(r.val[1], simde_vld1q_f16(test_vec[i].r[1]), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x2_t r[2] = simde_vuzpq_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -624,10 +897,25 @@ test_simde_vuzpq_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x2_t r[2] = simde_vuzpq_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -734,10 +1022,25 @@ test_simde_vuzpq_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x2_t r[2] = simde_vuzpq_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -812,10 +1115,25 @@ test_simde_vuzpq_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x2_t r[2] = simde_vuzpq_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -889,10 +1207,25 @@ test_simde_vuzpq_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x2_t r[2] = simde_vuzpq_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -999,10 +1332,25 @@ test_simde_vuzpq_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x2_t r[2] = simde_vuzpq_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -1076,10 +1424,25 @@ test_simde_vuzpq_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x2_t r[2] = simde_vuzpq_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzpq_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -1154,12 +1517,27 @@ test_simde_vuzpq_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x2_t r[2] = simde_vuzpq_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r[2], SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } #endif /* !defined(SIMDE_BUG_INTEL_857088) */ SIMDE_TEST_FUNC_LIST_BEGIN #if !defined(SIMDE_BUG_INTEL_857088) +SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_s16) @@ -1168,6 +1546,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_s8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzpq_s16) diff --git a/test/arm/neon/uzp1.c b/test/arm/neon/uzp1.c index 644402287..fc61c0622 100644 --- a/test/arm/neon/uzp1.c +++ b/test/arm/neon/uzp1.c @@ -5,6 +5,7 @@ static int test_simde_vuzp1_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[4]; simde_float16 b[4]; @@ -47,10 +48,25 @@ test_simde_vuzp1_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vuzp1_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -93,10 +109,25 @@ test_simde_vuzp1_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vuzp1_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -137,10 +168,25 @@ test_simde_vuzp1_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vuzp1_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -181,10 +227,25 @@ test_simde_vuzp1_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vuzp1_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -225,10 +286,25 @@ test_simde_vuzp1_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vuzp1_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -269,10 +345,25 @@ test_simde_vuzp1_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t r = simde_vuzp1_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -312,10 +403,25 @@ test_simde_vuzp1_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t r = simde_vuzp1_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -355,10 +461,107 @@ test_simde_vuzp1_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r = simde_vuzp1_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vuzp1q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( - 97.687), SIMDE_FLOAT16_VALUE( - 79.004), SIMDE_FLOAT16_VALUE( - 89.890), SIMDE_FLOAT16_VALUE( 40.738), + SIMDE_FLOAT16_VALUE( - 65.793), SIMDE_FLOAT16_VALUE( - 23.168), SIMDE_FLOAT16_VALUE( - 5.072), SIMDE_FLOAT16_VALUE( 43.484) }, + { SIMDE_FLOAT16_VALUE( - 84.405), SIMDE_FLOAT16_VALUE( 65.922), SIMDE_FLOAT16_VALUE( - 58.324), SIMDE_FLOAT16_VALUE( - 27.090), + SIMDE_FLOAT16_VALUE( 15.688), SIMDE_FLOAT16_VALUE( - 30.522), SIMDE_FLOAT16_VALUE( 24.014), SIMDE_FLOAT16_VALUE( - 74.725) }, + { SIMDE_FLOAT16_VALUE( - 97.687), SIMDE_FLOAT16_VALUE( - 89.890), SIMDE_FLOAT16_VALUE( - 65.793), SIMDE_FLOAT16_VALUE( - 5.072), + SIMDE_FLOAT16_VALUE( - 84.405), SIMDE_FLOAT16_VALUE( - 58.324), SIMDE_FLOAT16_VALUE( 15.688), SIMDE_FLOAT16_VALUE( 24.014) } }, + { { SIMDE_FLOAT16_VALUE( - 75.826), SIMDE_FLOAT16_VALUE( - 94.734), SIMDE_FLOAT16_VALUE( - 19.540), SIMDE_FLOAT16_VALUE( 46.550), + SIMDE_FLOAT16_VALUE( 82.958), SIMDE_FLOAT16_VALUE( 77.773), SIMDE_FLOAT16_VALUE( 19.467), SIMDE_FLOAT16_VALUE( 62.504) }, + { SIMDE_FLOAT16_VALUE( 18.734), SIMDE_FLOAT16_VALUE( 47.806), SIMDE_FLOAT16_VALUE( 3.703), SIMDE_FLOAT16_VALUE( - 50.129), + SIMDE_FLOAT16_VALUE( - 8.807), SIMDE_FLOAT16_VALUE( - 57.665), SIMDE_FLOAT16_VALUE( 19.634), SIMDE_FLOAT16_VALUE( - 49.723) }, + { SIMDE_FLOAT16_VALUE( - 75.826), SIMDE_FLOAT16_VALUE( - 19.540), SIMDE_FLOAT16_VALUE( 82.958), SIMDE_FLOAT16_VALUE( 19.467), + SIMDE_FLOAT16_VALUE( 18.734), SIMDE_FLOAT16_VALUE( 3.703), SIMDE_FLOAT16_VALUE( - 8.807), SIMDE_FLOAT16_VALUE( 19.634) } }, + { { SIMDE_FLOAT16_VALUE( - 22.928), SIMDE_FLOAT16_VALUE( - 24.929), SIMDE_FLOAT16_VALUE( - 92.576), SIMDE_FLOAT16_VALUE( 66.926), + SIMDE_FLOAT16_VALUE( - 61.278), SIMDE_FLOAT16_VALUE( - 16.213), SIMDE_FLOAT16_VALUE( 46.549), SIMDE_FLOAT16_VALUE( 41.416) }, + { SIMDE_FLOAT16_VALUE( 2.906), SIMDE_FLOAT16_VALUE( 40.984), SIMDE_FLOAT16_VALUE( 33.772), SIMDE_FLOAT16_VALUE( 94.502), + SIMDE_FLOAT16_VALUE( 29.055), SIMDE_FLOAT16_VALUE( 48.251), SIMDE_FLOAT16_VALUE( - 53.444), SIMDE_FLOAT16_VALUE( - 10.038) }, + { SIMDE_FLOAT16_VALUE( - 22.928), SIMDE_FLOAT16_VALUE( - 92.576), SIMDE_FLOAT16_VALUE( - 61.278), SIMDE_FLOAT16_VALUE( 46.549), + SIMDE_FLOAT16_VALUE( 2.906), SIMDE_FLOAT16_VALUE( 33.772), SIMDE_FLOAT16_VALUE( 29.055), SIMDE_FLOAT16_VALUE( - 53.444) } }, + { { SIMDE_FLOAT16_VALUE( - 43.188), SIMDE_FLOAT16_VALUE( - 66.409), SIMDE_FLOAT16_VALUE( 35.235), SIMDE_FLOAT16_VALUE( - 72.208), + SIMDE_FLOAT16_VALUE( - 97.543), SIMDE_FLOAT16_VALUE( 90.132), SIMDE_FLOAT16_VALUE( - 47.254), SIMDE_FLOAT16_VALUE( 89.145) }, + { SIMDE_FLOAT16_VALUE( - 92.430), SIMDE_FLOAT16_VALUE( - 24.436), SIMDE_FLOAT16_VALUE( - 87.694), SIMDE_FLOAT16_VALUE( 77.935), + SIMDE_FLOAT16_VALUE( - 17.203), SIMDE_FLOAT16_VALUE( - 52.392), SIMDE_FLOAT16_VALUE( 38.656), SIMDE_FLOAT16_VALUE( 26.074) }, + { SIMDE_FLOAT16_VALUE( - 43.188), SIMDE_FLOAT16_VALUE( 35.235), SIMDE_FLOAT16_VALUE( - 97.543), SIMDE_FLOAT16_VALUE( - 47.254), + SIMDE_FLOAT16_VALUE( - 92.430), SIMDE_FLOAT16_VALUE( - 87.694), SIMDE_FLOAT16_VALUE( - 17.203), SIMDE_FLOAT16_VALUE( 38.656) } }, + { { SIMDE_FLOAT16_VALUE( - 65.868), SIMDE_FLOAT16_VALUE( 10.356), SIMDE_FLOAT16_VALUE( - 59.186), SIMDE_FLOAT16_VALUE( 59.722), + SIMDE_FLOAT16_VALUE( - 89.649), SIMDE_FLOAT16_VALUE( - 81.545), SIMDE_FLOAT16_VALUE( 7.064), SIMDE_FLOAT16_VALUE( 29.273) }, + { SIMDE_FLOAT16_VALUE( 99.268), SIMDE_FLOAT16_VALUE( 59.929), SIMDE_FLOAT16_VALUE( 54.246), SIMDE_FLOAT16_VALUE( - 9.983), + SIMDE_FLOAT16_VALUE( 23.574), SIMDE_FLOAT16_VALUE( - 94.576), SIMDE_FLOAT16_VALUE( - 16.893), SIMDE_FLOAT16_VALUE( 24.448) }, + { SIMDE_FLOAT16_VALUE( - 65.868), SIMDE_FLOAT16_VALUE( - 59.186), SIMDE_FLOAT16_VALUE( - 89.649), SIMDE_FLOAT16_VALUE( 7.064), + SIMDE_FLOAT16_VALUE( 99.268), SIMDE_FLOAT16_VALUE( 54.246), SIMDE_FLOAT16_VALUE( 23.574), SIMDE_FLOAT16_VALUE( - 16.893) } }, + { { SIMDE_FLOAT16_VALUE( - 24.032), SIMDE_FLOAT16_VALUE( - 24.501), SIMDE_FLOAT16_VALUE( - 16.286), SIMDE_FLOAT16_VALUE( - 43.379), + SIMDE_FLOAT16_VALUE( - 89.719), SIMDE_FLOAT16_VALUE( - 44.948), SIMDE_FLOAT16_VALUE( - 26.410), SIMDE_FLOAT16_VALUE( 47.907) }, + { SIMDE_FLOAT16_VALUE( 80.290), SIMDE_FLOAT16_VALUE( - 61.861), SIMDE_FLOAT16_VALUE( 99.823), SIMDE_FLOAT16_VALUE( - 33.955), + SIMDE_FLOAT16_VALUE( - 96.942), SIMDE_FLOAT16_VALUE( - 21.953), SIMDE_FLOAT16_VALUE( - 83.239), SIMDE_FLOAT16_VALUE( - 80.026) }, + { SIMDE_FLOAT16_VALUE( - 24.032), SIMDE_FLOAT16_VALUE( - 16.286), SIMDE_FLOAT16_VALUE( - 89.719), SIMDE_FLOAT16_VALUE( - 26.410), + SIMDE_FLOAT16_VALUE( 80.290), SIMDE_FLOAT16_VALUE( 99.823), SIMDE_FLOAT16_VALUE( - 96.942), SIMDE_FLOAT16_VALUE( - 83.239) } }, + { { SIMDE_FLOAT16_VALUE( - 55.230), SIMDE_FLOAT16_VALUE( - 23.393), SIMDE_FLOAT16_VALUE( 12.967), SIMDE_FLOAT16_VALUE( - 8.488), + SIMDE_FLOAT16_VALUE( 32.610), SIMDE_FLOAT16_VALUE( - 20.642), SIMDE_FLOAT16_VALUE( - 49.605), SIMDE_FLOAT16_VALUE( 85.964) }, + { SIMDE_FLOAT16_VALUE( 86.315), SIMDE_FLOAT16_VALUE( 91.729), SIMDE_FLOAT16_VALUE( - 6.684), SIMDE_FLOAT16_VALUE( 31.160), + SIMDE_FLOAT16_VALUE( - 10.924), SIMDE_FLOAT16_VALUE( - 90.352), SIMDE_FLOAT16_VALUE( 33.044), SIMDE_FLOAT16_VALUE( - 55.391) }, + { SIMDE_FLOAT16_VALUE( - 55.230), SIMDE_FLOAT16_VALUE( 12.967), SIMDE_FLOAT16_VALUE( 32.610), SIMDE_FLOAT16_VALUE( - 49.605), + SIMDE_FLOAT16_VALUE( 86.315), SIMDE_FLOAT16_VALUE( - 6.684), SIMDE_FLOAT16_VALUE( - 10.924), SIMDE_FLOAT16_VALUE( 33.044) } }, + { { SIMDE_FLOAT16_VALUE( 63.752), SIMDE_FLOAT16_VALUE( - 40.489), SIMDE_FLOAT16_VALUE( - 58.676), SIMDE_FLOAT16_VALUE( - 83.592), + SIMDE_FLOAT16_VALUE( 74.115), SIMDE_FLOAT16_VALUE( - 76.605), SIMDE_FLOAT16_VALUE( 25.040), SIMDE_FLOAT16_VALUE( 69.197) }, + { SIMDE_FLOAT16_VALUE( 16.183), SIMDE_FLOAT16_VALUE( 14.003), SIMDE_FLOAT16_VALUE( - 41.902), SIMDE_FLOAT16_VALUE( - 14.309), + SIMDE_FLOAT16_VALUE( - 63.990), SIMDE_FLOAT16_VALUE( - 92.928), SIMDE_FLOAT16_VALUE( - 54.163), SIMDE_FLOAT16_VALUE( 94.671) }, + { SIMDE_FLOAT16_VALUE( 63.752), SIMDE_FLOAT16_VALUE( - 58.676), SIMDE_FLOAT16_VALUE( 74.115), SIMDE_FLOAT16_VALUE( 25.040), + SIMDE_FLOAT16_VALUE( 16.183), SIMDE_FLOAT16_VALUE( - 41.902), SIMDE_FLOAT16_VALUE( - 63.990), SIMDE_FLOAT16_VALUE( - 54.163) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vuzp1q_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vuzp1q_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -398,10 +601,25 @@ test_simde_vuzp1q_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vuzp1q_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float64 a[2]; simde_float64 b[2]; @@ -442,10 +660,25 @@ test_simde_vuzp1q_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r = simde_vuzp1q_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -510,10 +743,25 @@ test_simde_vuzp1q_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vuzp1q_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -554,10 +802,25 @@ test_simde_vuzp1q_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vuzp1q_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -597,10 +860,25 @@ test_simde_vuzp1q_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vuzp1q_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[2]; int64_t b[2]; @@ -640,10 +918,25 @@ test_simde_vuzp1q_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vuzp1q_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -708,10 +1001,25 @@ test_simde_vuzp1q_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t r = simde_vuzp1q_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -751,10 +1059,25 @@ test_simde_vuzp1q_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t r = simde_vuzp1q_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -795,10 +1118,25 @@ test_simde_vuzp1q_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t r = simde_vuzp1q_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp1q_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[2]; uint64_t b[2]; @@ -838,6 +1176,20 @@ test_simde_vuzp1q_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t r = simde_vuzp1q_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN @@ -850,6 +1202,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp1q_s8) diff --git a/test/arm/neon/uzp2.c b/test/arm/neon/uzp2.c index 00a6ff873..c2c604b7f 100644 --- a/test/arm/neon/uzp2.c +++ b/test/arm/neon/uzp2.c @@ -5,6 +5,7 @@ static int test_simde_vuzp2_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float16 a[4]; simde_float16 b[4]; @@ -35,10 +36,25 @@ test_simde_vuzp2_f16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x4_t a = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t b = simde_test_arm_neon_random_f16x4(-100.0f, 100.0f); + simde_float16x4_t r = simde_vuzp2_f16(a, b); + + simde_test_arm_neon_write_f16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[2]; simde_float32 b[2]; @@ -81,10 +97,25 @@ test_simde_vuzp2_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x2_t a = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t b = simde_test_arm_neon_random_f32x2(-1000.0f, 1000.0f); + simde_float32x2_t r = simde_vuzp2_f32(a, b); + + simde_test_arm_neon_write_f32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[8]; int8_t b[8]; @@ -125,10 +156,25 @@ test_simde_vuzp2_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x8_t a = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t b = simde_test_arm_neon_random_i8x8(); + simde_int8x8_t r = simde_vuzp2_s8(a, b); + + simde_test_arm_neon_write_i8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[4]; int16_t b[4]; @@ -169,10 +215,25 @@ test_simde_vuzp2_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x4_t a = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t b = simde_test_arm_neon_random_i16x4(); + simde_int16x4_t r = simde_vuzp2_s16(a, b); + + simde_test_arm_neon_write_i16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[2]; int32_t b[2]; @@ -213,10 +274,25 @@ test_simde_vuzp2_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x2_t a = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t b = simde_test_arm_neon_random_i32x2(); + simde_int32x2_t r = simde_vuzp2_s32(a, b); + + simde_test_arm_neon_write_i32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[8]; uint8_t b[8]; @@ -257,10 +333,25 @@ test_simde_vuzp2_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x8_t a = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t b = simde_test_arm_neon_random_u8x8(); + simde_uint8x8_t r = simde_vuzp2_u8(a, b); + + simde_test_arm_neon_write_u8x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[4]; uint16_t b[4]; @@ -300,10 +391,25 @@ test_simde_vuzp2_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x4_t a = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t b = simde_test_arm_neon_random_u16x4(); + simde_uint16x4_t r = simde_vuzp2_u16(a, b); + + simde_test_arm_neon_write_u16x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[2]; uint32_t b[2]; @@ -343,10 +449,107 @@ test_simde_vuzp2_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x2_t a = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t b = simde_test_arm_neon_random_u32x2(); + simde_uint32x2_t r = simde_vuzp2_u32(a, b); + + simde_test_arm_neon_write_u32x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif +} + +static int +test_simde_vuzp2q_f16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 + struct { + simde_float16 a[8]; + simde_float16 b[8]; + simde_float16 r[8]; + } test_vec[] = { + { { SIMDE_FLOAT16_VALUE( 90.452), SIMDE_FLOAT16_VALUE( 17.701), SIMDE_FLOAT16_VALUE( - 49.009), SIMDE_FLOAT16_VALUE( - 88.786), + SIMDE_FLOAT16_VALUE( - 29.009), SIMDE_FLOAT16_VALUE( - 37.203), SIMDE_FLOAT16_VALUE( - 58.225), SIMDE_FLOAT16_VALUE( 98.368) }, + { SIMDE_FLOAT16_VALUE( 19.440), SIMDE_FLOAT16_VALUE( - 1.659), SIMDE_FLOAT16_VALUE( 42.542), SIMDE_FLOAT16_VALUE( - 99.688), + SIMDE_FLOAT16_VALUE( - 88.443), SIMDE_FLOAT16_VALUE( 34.706), SIMDE_FLOAT16_VALUE( - 85.533), SIMDE_FLOAT16_VALUE( - 56.781) }, + { SIMDE_FLOAT16_VALUE( 17.701), SIMDE_FLOAT16_VALUE( - 88.786), SIMDE_FLOAT16_VALUE( - 37.203), SIMDE_FLOAT16_VALUE( 98.368), + SIMDE_FLOAT16_VALUE( - 1.659), SIMDE_FLOAT16_VALUE( - 99.688), SIMDE_FLOAT16_VALUE( 34.706), SIMDE_FLOAT16_VALUE( - 56.781) } }, + { { SIMDE_FLOAT16_VALUE( 33.998), SIMDE_FLOAT16_VALUE( 9.488), SIMDE_FLOAT16_VALUE( - 57.721), SIMDE_FLOAT16_VALUE( - 25.485), + SIMDE_FLOAT16_VALUE( - 94.073), SIMDE_FLOAT16_VALUE( - 82.899), SIMDE_FLOAT16_VALUE( 44.365), SIMDE_FLOAT16_VALUE( - 99.238) }, + { SIMDE_FLOAT16_VALUE( 9.551), SIMDE_FLOAT16_VALUE( 89.969), SIMDE_FLOAT16_VALUE( - 86.075), SIMDE_FLOAT16_VALUE( - 77.835), + SIMDE_FLOAT16_VALUE( 0.704), SIMDE_FLOAT16_VALUE( - 29.406), SIMDE_FLOAT16_VALUE( 20.191), SIMDE_FLOAT16_VALUE( - 38.311) }, + { SIMDE_FLOAT16_VALUE( 9.488), SIMDE_FLOAT16_VALUE( - 25.485), SIMDE_FLOAT16_VALUE( - 82.899), SIMDE_FLOAT16_VALUE( - 99.238), + SIMDE_FLOAT16_VALUE( 89.969), SIMDE_FLOAT16_VALUE( - 77.835), SIMDE_FLOAT16_VALUE( - 29.406), SIMDE_FLOAT16_VALUE( - 38.311) } }, + { { SIMDE_FLOAT16_VALUE( 77.669), SIMDE_FLOAT16_VALUE( 71.372), SIMDE_FLOAT16_VALUE( 54.402), SIMDE_FLOAT16_VALUE( 77.171), + SIMDE_FLOAT16_VALUE( 37.815), SIMDE_FLOAT16_VALUE( 41.042), SIMDE_FLOAT16_VALUE( - 13.386), SIMDE_FLOAT16_VALUE( 20.748) }, + { SIMDE_FLOAT16_VALUE( - 98.103), SIMDE_FLOAT16_VALUE( - 63.759), SIMDE_FLOAT16_VALUE( - 16.397), SIMDE_FLOAT16_VALUE( - 83.256), + SIMDE_FLOAT16_VALUE( - 23.909), SIMDE_FLOAT16_VALUE( - 69.358), SIMDE_FLOAT16_VALUE( 53.308), SIMDE_FLOAT16_VALUE( 92.091) }, + { SIMDE_FLOAT16_VALUE( 71.372), SIMDE_FLOAT16_VALUE( 77.171), SIMDE_FLOAT16_VALUE( 41.042), SIMDE_FLOAT16_VALUE( 20.748), + SIMDE_FLOAT16_VALUE( - 63.759), SIMDE_FLOAT16_VALUE( - 83.256), SIMDE_FLOAT16_VALUE( - 69.358), SIMDE_FLOAT16_VALUE( 92.091) } }, + { { SIMDE_FLOAT16_VALUE( 0.764), SIMDE_FLOAT16_VALUE( - 93.449), SIMDE_FLOAT16_VALUE( 78.083), SIMDE_FLOAT16_VALUE( 10.810), + SIMDE_FLOAT16_VALUE( 94.958), SIMDE_FLOAT16_VALUE( 38.118), SIMDE_FLOAT16_VALUE( 48.405), SIMDE_FLOAT16_VALUE( 98.273) }, + { SIMDE_FLOAT16_VALUE( - 11.121), SIMDE_FLOAT16_VALUE( - 18.789), SIMDE_FLOAT16_VALUE( - 60.496), SIMDE_FLOAT16_VALUE( - 49.152), + SIMDE_FLOAT16_VALUE( - 38.385), SIMDE_FLOAT16_VALUE( - 40.755), SIMDE_FLOAT16_VALUE( 93.481), SIMDE_FLOAT16_VALUE( 93.409) }, + { SIMDE_FLOAT16_VALUE( - 93.449), SIMDE_FLOAT16_VALUE( 10.810), SIMDE_FLOAT16_VALUE( 38.118), SIMDE_FLOAT16_VALUE( 98.273), + SIMDE_FLOAT16_VALUE( - 18.789), SIMDE_FLOAT16_VALUE( - 49.152), SIMDE_FLOAT16_VALUE( - 40.755), SIMDE_FLOAT16_VALUE( 93.409) } }, + { { SIMDE_FLOAT16_VALUE( 60.980), SIMDE_FLOAT16_VALUE( 44.087), SIMDE_FLOAT16_VALUE( 4.420), SIMDE_FLOAT16_VALUE( - 86.379), + SIMDE_FLOAT16_VALUE( - 21.956), SIMDE_FLOAT16_VALUE( 66.211), SIMDE_FLOAT16_VALUE( 40.919), SIMDE_FLOAT16_VALUE( 44.341) }, + { SIMDE_FLOAT16_VALUE( - 6.654), SIMDE_FLOAT16_VALUE( 74.703), SIMDE_FLOAT16_VALUE( 90.296), SIMDE_FLOAT16_VALUE( - 32.224), + SIMDE_FLOAT16_VALUE( - 62.184), SIMDE_FLOAT16_VALUE( 60.075), SIMDE_FLOAT16_VALUE( 54.181), SIMDE_FLOAT16_VALUE( - 32.279) }, + { SIMDE_FLOAT16_VALUE( 44.087), SIMDE_FLOAT16_VALUE( - 86.379), SIMDE_FLOAT16_VALUE( 66.211), SIMDE_FLOAT16_VALUE( 44.341), + SIMDE_FLOAT16_VALUE( 74.703), SIMDE_FLOAT16_VALUE( - 32.224), SIMDE_FLOAT16_VALUE( 60.075), SIMDE_FLOAT16_VALUE( - 32.279) } }, + { { SIMDE_FLOAT16_VALUE( - 18.086), SIMDE_FLOAT16_VALUE( - 93.397), SIMDE_FLOAT16_VALUE( 80.704), SIMDE_FLOAT16_VALUE( - 25.237), + SIMDE_FLOAT16_VALUE( 67.082), SIMDE_FLOAT16_VALUE( 45.500), SIMDE_FLOAT16_VALUE( - 42.821), SIMDE_FLOAT16_VALUE( 30.047) }, + { SIMDE_FLOAT16_VALUE( 33.473), SIMDE_FLOAT16_VALUE( - 16.060), SIMDE_FLOAT16_VALUE( 82.753), SIMDE_FLOAT16_VALUE( - 97.939), + SIMDE_FLOAT16_VALUE( 55.855), SIMDE_FLOAT16_VALUE( 32.741), SIMDE_FLOAT16_VALUE( 82.947), SIMDE_FLOAT16_VALUE( - 16.162) }, + { SIMDE_FLOAT16_VALUE( - 93.397), SIMDE_FLOAT16_VALUE( - 25.237), SIMDE_FLOAT16_VALUE( 45.500), SIMDE_FLOAT16_VALUE( 30.047), + SIMDE_FLOAT16_VALUE( - 16.060), SIMDE_FLOAT16_VALUE( - 97.939), SIMDE_FLOAT16_VALUE( 32.741), SIMDE_FLOAT16_VALUE( - 16.162) } }, + { { SIMDE_FLOAT16_VALUE( 39.481), SIMDE_FLOAT16_VALUE( - 43.751), SIMDE_FLOAT16_VALUE( 46.517), SIMDE_FLOAT16_VALUE( 73.973), + SIMDE_FLOAT16_VALUE( - 63.824), SIMDE_FLOAT16_VALUE( - 81.545), SIMDE_FLOAT16_VALUE( 80.086), SIMDE_FLOAT16_VALUE( 5.275) }, + { SIMDE_FLOAT16_VALUE( 47.213), SIMDE_FLOAT16_VALUE( 95.067), SIMDE_FLOAT16_VALUE( - 53.211), SIMDE_FLOAT16_VALUE( 47.692), + SIMDE_FLOAT16_VALUE( 71.401), SIMDE_FLOAT16_VALUE( - 54.682), SIMDE_FLOAT16_VALUE( 74.308), SIMDE_FLOAT16_VALUE( - 33.162) }, + { SIMDE_FLOAT16_VALUE( - 43.751), SIMDE_FLOAT16_VALUE( 73.973), SIMDE_FLOAT16_VALUE( - 81.545), SIMDE_FLOAT16_VALUE( 5.275), + SIMDE_FLOAT16_VALUE( 95.067), SIMDE_FLOAT16_VALUE( 47.692), SIMDE_FLOAT16_VALUE( - 54.682), SIMDE_FLOAT16_VALUE( - 33.162) } }, + { { SIMDE_FLOAT16_VALUE( 93.150), SIMDE_FLOAT16_VALUE( - 92.626), SIMDE_FLOAT16_VALUE( 71.362), SIMDE_FLOAT16_VALUE( - 72.353), + SIMDE_FLOAT16_VALUE( 24.600), SIMDE_FLOAT16_VALUE( - 79.615), SIMDE_FLOAT16_VALUE( - 49.427), SIMDE_FLOAT16_VALUE( - 48.559) }, + { SIMDE_FLOAT16_VALUE( - 35.290), SIMDE_FLOAT16_VALUE( - 24.706), SIMDE_FLOAT16_VALUE( 98.487), SIMDE_FLOAT16_VALUE( 58.278), + SIMDE_FLOAT16_VALUE( - 53.949), SIMDE_FLOAT16_VALUE( - 40.152), SIMDE_FLOAT16_VALUE( 29.818), SIMDE_FLOAT16_VALUE( 53.465) }, + { SIMDE_FLOAT16_VALUE( - 92.626), SIMDE_FLOAT16_VALUE( - 72.353), SIMDE_FLOAT16_VALUE( - 79.615), SIMDE_FLOAT16_VALUE( - 48.559), + SIMDE_FLOAT16_VALUE( - 24.706), SIMDE_FLOAT16_VALUE( 58.278), SIMDE_FLOAT16_VALUE( - 40.152), SIMDE_FLOAT16_VALUE( 53.465) } }, + }; + + for (size_t i = 0 ; i < (sizeof(test_vec) / sizeof(test_vec[0])) ; i++) { + simde_float16x8_t a = simde_vld1q_f16(test_vec[i].a); + simde_float16x8_t b = simde_vld1q_f16(test_vec[i].b); + simde_float16x8_t r = simde_vuzp2q_f16(a, b); + simde_test_arm_neon_assert_equal_f16x8(r, simde_vld1q_f16(test_vec[i].r), 1); + } + + return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float16x8_t a = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t b = simde_test_arm_neon_random_f16x8(-100.0f, 100.0f); + simde_float16x8_t r = simde_vuzp2q_f16(a, b); + + simde_test_arm_neon_write_f16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_f32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float32 a[4]; simde_float32 b[4]; @@ -386,10 +589,25 @@ test_simde_vuzp2q_f32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float32x4_t a = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t b = simde_test_arm_neon_random_f32x4(-1000.0f, 1000.0f); + simde_float32x4_t r = simde_vuzp2q_f32(a, b); + + simde_test_arm_neon_write_f32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_f64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { simde_float64 a[2]; simde_float64 b[2]; @@ -430,10 +648,25 @@ test_simde_vuzp2q_f64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_float64x2_t a = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t b = simde_test_arm_neon_random_f64x2(SIMDE_FLOAT64_C(-1000.0), SIMDE_FLOAT64_C(1000.0)); + simde_float64x2_t r = simde_vuzp2q_f64(a, b); + + simde_test_arm_neon_write_f64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_f64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_f64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_s8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int8_t a[16]; int8_t b[16]; @@ -498,10 +731,25 @@ test_simde_vuzp2q_s8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int8x16_t a = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t b = simde_test_arm_neon_random_i8x16(); + simde_int8x16_t r = simde_vuzp2q_s8(a, b); + + simde_test_arm_neon_write_i8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_s16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int16_t a[8]; int16_t b[8]; @@ -542,10 +790,25 @@ test_simde_vuzp2q_s16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int16x8_t a = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t b = simde_test_arm_neon_random_i16x8(); + simde_int16x8_t r = simde_vuzp2q_s16(a, b); + + simde_test_arm_neon_write_i16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_s32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int32_t a[4]; int32_t b[4]; @@ -585,10 +848,25 @@ test_simde_vuzp2q_s32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int32x4_t a = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t b = simde_test_arm_neon_random_i32x4(); + simde_int32x4_t r = simde_vuzp2q_s32(a, b); + + simde_test_arm_neon_write_i32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_s64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { int64_t a[2]; int64_t b[2]; @@ -628,10 +906,25 @@ test_simde_vuzp2q_s64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_int64x2_t a = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t b = simde_test_arm_neon_random_i64x2(); + simde_int64x2_t r = simde_vuzp2q_s64(a, b); + + simde_test_arm_neon_write_i64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_i64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_i64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_u8 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint8_t a[16]; uint8_t b[16]; @@ -696,10 +989,25 @@ test_simde_vuzp2q_u8 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint8x16_t a = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t b = simde_test_arm_neon_random_u8x16(); + simde_uint8x16_t r = simde_vuzp2q_u8(a, b); + + simde_test_arm_neon_write_u8x16(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u8x16(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u8x16(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_u16 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint16_t a[8]; uint16_t b[8]; @@ -739,10 +1047,25 @@ test_simde_vuzp2q_u16 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint16x8_t a = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t b = simde_test_arm_neon_random_u16x8(); + simde_uint16x8_t r = simde_vuzp2q_u16(a, b); + + simde_test_arm_neon_write_u16x8(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u16x8(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u16x8(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_u32 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint32_t a[4]; uint32_t b[4]; @@ -783,10 +1106,25 @@ test_simde_vuzp2q_u32 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint32x4_t a = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t b = simde_test_arm_neon_random_u32x4(); + simde_uint32x4_t r = simde_vuzp2q_u32(a, b); + + simde_test_arm_neon_write_u32x4(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u32x4(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u32x4(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } static int test_simde_vuzp2q_u64 (SIMDE_MUNIT_TEST_ARGS) { +#if 1 struct { uint64_t a[2]; uint64_t b[2]; @@ -826,6 +1164,20 @@ test_simde_vuzp2q_u64 (SIMDE_MUNIT_TEST_ARGS) { } return 0; + +#else + fputc('\n', stdout); + for (int i = 0 ; i < 8 ; i++) { + simde_uint64x2_t a = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t b = simde_test_arm_neon_random_u64x2(); + simde_uint64x2_t r = simde_vuzp2q_u64(a, b); + + simde_test_arm_neon_write_u64x2(2, a, SIMDE_TEST_VEC_POS_FIRST); + simde_test_arm_neon_write_u64x2(2, b, SIMDE_TEST_VEC_POS_MIDDLE); + simde_test_arm_neon_write_u64x2(2, r, SIMDE_TEST_VEC_POS_LAST); + } + return 1; +#endif } SIMDE_TEST_FUNC_LIST_BEGIN @@ -838,6 +1190,7 @@ SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u8) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2_u32) +SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_f16) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_f32) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_f64) SIMDE_TEST_FUNC_LIST_ENTRY(vuzp2q_s8)